kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 05db6b973f7e1398455d363ebd3ba815c3f82d53
parent 6fcad2bdde71cb0814a384cf7a415bf6328688eb
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu,  7 May 2026 11:15:29 -0700

make compiler-rt templates re-includable; add build.sh

Each impl/.inc and the fp_lib.h header is parameterized so multiple
inclusions in one TU (different precisions, src/dst pairs, or callsites)
emit uniquely-suffixed static helpers. Bare-name aliases let existing
per-op .c files compile unchanged. fp_lib_undef.h clears the aliases
between sections of a future consolidated TU.

build.sh exhaustively enumerates the (target, mode) tuples cfree
documents and builds libcfree_rt-<variant>.a with clang for each
(13 variants).

Diffstat:
Alib/build.sh | 225+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/fp/divdf3.c | 3+++
Mlib/fp/divsf3.c | 4++++
Mlib/fp/extendsfdf2.c | 3+++
Mlib/fp/fixdfdi.c | 9+++++++--
Mlib/fp/fixdfsi.c | 9+++++++--
Mlib/fp/fixsfdi.c | 9+++++++--
Mlib/fp/fixsfsi.c | 9+++++++--
Mlib/fp/fixunsdfdi.c | 6+++++-
Mlib/fp/fixunsdfsi.c | 6+++++-
Mlib/fp/fixunssfdi.c | 6+++++-
Mlib/fp/fixunssfsi.c | 6+++++-
Mlib/fp/floatdidf.c | 3+++
Mlib/fp/floatdisf.c | 3+++
Mlib/fp/floatundidf.c | 3+++
Mlib/fp/floatundisf.c | 3+++
Mlib/fp/truncdfsf2.c | 3+++
Mlib/fp_tf/divtf3.c | 3+++
Mlib/fp_tf/extenddftf2.c | 3+++
Mlib/fp_tf/extendsftf2.c | 3+++
Mlib/fp_tf/fixtfdi.c | 9+++++++--
Mlib/fp_tf/fixtfsi.c | 9+++++++--
Mlib/fp_tf/fixtfti.c | 9+++++++--
Mlib/fp_tf/fixunstfdi.c | 6+++++-
Mlib/fp_tf/fixunstfsi.c | 6+++++-
Mlib/fp_tf/fixunstfti.c | 6+++++-
Mlib/fp_tf/floattitf.c | 3+++
Mlib/fp_tf/floatuntitf.c | 3+++
Mlib/fp_tf/trunctfdf2.c | 3+++
Mlib/fp_tf/trunctfsf2.c | 3+++
Mlib/fp_ti/fixdfti.c | 9+++++++--
Mlib/fp_ti/fixsfti.c | 9+++++++--
Mlib/fp_ti/fixunsdfti.c | 6+++++-
Mlib/fp_ti/fixunssfti.c | 6+++++-
Mlib/fp_ti/floattidf.c | 3+++
Mlib/fp_ti/floattisf.c | 3+++
Mlib/fp_ti/floatuntidf.c | 3+++
Mlib/fp_ti/floatuntisf.c | 3+++
Mlib/impl/fp_add_impl.inc | 18++++++++++++++++++
Mlib/impl/fp_compare_impl.inc | 43++++++++++++++++++++++++++++++++++++-------
Mlib/impl/fp_div_impl.inc | 22++++++++++++++++++++++
Mlib/impl/fp_extend.h | 141++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mlib/impl/fp_extend_impl.inc | 18++++++++++++++++++
Mlib/impl/fp_fixint_impl.inc | 20++++++++++++++++++++
Mlib/impl/fp_fixuint_impl.inc | 16++++++++++++++++
Mlib/impl/fp_mul_impl.inc | 18++++++++++++++++++
Mlib/impl/fp_trunc.h | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mlib/impl/fp_trunc_impl.inc | 18++++++++++++++++++
Mlib/impl/int_div_impl.inc | 54+++++++++++++++++++++++++++++++++++++++++++++++-------
Mlib/impl/int_to_fp.h | 118++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mlib/impl/int_to_fp_impl.inc | 39+++++++++++++++++++++++++++++++++++++++
Mlib/include/common/fp_lib.h | 155+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Alib/include/common/fp_lib_undef.h | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/int/divdi3.c | 5++++-
Mlib/int/moddi3.c | 5++++-
Mlib/int/udivdi3.c | 11++++++-----
Mlib/int/umoddi3.c | 11++++++-----
Mlib/int64/divti3.c | 5++++-
Mlib/int64/modti3.c | 5++++-
59 files changed, 1205 insertions(+), 138 deletions(-)

diff --git a/lib/build.sh b/lib/build.sh @@ -0,0 +1,225 @@ +#!/usr/bin/env bash +# +# build.sh — exhaustive build of libcfree_rt.a for every (target, mode) tuple +# cfree supports. Compiles every required source file with clang for each +# variant; on success, archives the objects with llvm-ar (if present) into +# $OUT/libcfree_rt-<variant>.a. +# +# Reports OK / FAIL per variant; exits non-zero if any fail. +# Override defaults via env: CC=clang OUT=/tmp/cfree_rt_build AR=llvm-ar. +# +# Run from anywhere — the script self-locates and cd's to lib/ first. + +set -uo pipefail + +cd "$(dirname "$0")" || exit 1 + +CC=${CC:-clang} +AR=${AR:-llvm-ar} +OUT=${OUT:-/tmp/cfree_rt_build} + +# Freestanding, warnings on, common headers reachable. -fno-builtin keeps the +# compiler from rewriting our memcpy/memset/etc. into recursive calls. +COMMON_FLAGS=(-c -ffreestanding -fno-builtin -Wall -Wextra + -Iinclude/common -Iimpl) + +mkdir -p "$OUT" +OK=0 +FAIL=0 +FAILED_VARIANTS=() + +#------------------------------------------------------------------------------- +# build_variant <name> "<extra cc flags>" <source files...> +#------------------------------------------------------------------------------- +build_variant() { + local name=$1 + local flags_str=$2 + shift 2 + local sources=("$@") + # shellcheck disable=SC2206 + local flags=( $flags_str ) + + local out="$OUT/$name" + mkdir -p "$out" + local log="$out/build.log" + : >"$log" + + local objs=() + local nfail=0 + for src in "${sources[@]}"; do + local obj="$out/$(echo "$src" | tr / _).o" + if "$CC" "${COMMON_FLAGS[@]}" "${flags[@]}" -o "$obj" "$src" >>"$log" 2>&1; then + objs+=("$obj") + else + nfail=$((nfail+1)) + echo "FAIL: $src" >>"$log" + fi + done + + local total=${#sources[@]} + if [ $nfail -ne 0 ]; then + printf " %-32s FAIL (%d/%d sources failed; see %s)\n" \ + "$name" "$nfail" "$total" "$log" + FAIL=$((FAIL+1)) + FAILED_VARIANTS+=("$name") + return + fi + + # Archive (best-effort; not fatal if AR is missing). + if command -v "$AR" >/dev/null 2>&1; then + local archive="$OUT/libcfree_rt-$name.a" + if "$AR" rcs "$archive" "${objs[@]}" 2>>"$log"; then + printf " %-32s OK (%d objs → %s)\n" \ + "$name" "$total" "$(basename "$archive")" + else + printf " %-32s OK (%d objs; %s failed to archive)\n" \ + "$name" "$total" "$AR" + fi + else + printf " %-32s OK (%d objs; %s not found, no archive)\n" \ + "$name" "$total" "$AR" + fi + OK=$((OK+1)) +} + +#------------------------------------------------------------------------------- +# Source sets +#------------------------------------------------------------------------------- + +INT_C=( int/*.c ) +INT32_C=( int32/*.c ) +INT64_C=( int64/*.c ) +FP_C=( fp/*.c ) +FP_TF_C=( fp_tf/*.c ) +FP_TI_C=( fp_ti/*.c ) +MEM_C=( mem/mem.c ) +ATOMIC_C=( atomic/atomic_freestanding.c ) + +# ARM AEABI: 6 base files have a *_thumb1.S companion (idivmod, uidivmod, +# memcpy, memmove, memset, memcmp). The Thumb2 build uses base files only; +# the Thumb1 build uses *_thumb1.S in place of those base files, plus the +# ISA-agnostic ones (ldivmod, uldivmod, dcmp, fcmp, drsub, frsub). +ARM_AEABI_THUMB2=( arm/aeabi_*.S arm/aeabi_*.c ) +# Filter out *_thumb1.S from the Thumb2 list. +_t2=() +for f in "${ARM_AEABI_THUMB2[@]}"; do + case "$f" in *_thumb1.S) ;; *) _t2+=("$f") ;; esac +done +ARM_AEABI_THUMB2=("${_t2[@]}") + +# Build the Thumb1 list: every *_thumb1.S, plus any base file whose stem has +# no *_thumb1.S partner. +_thumb1_stems=() +for f in arm/aeabi_*_thumb1.S; do + _thumb1_stems+=( "$(basename "$f" _thumb1.S)" ) +done +ARM_AEABI_THUMB1=( arm/aeabi_*_thumb1.S ) +for f in "${ARM_AEABI_THUMB2[@]}"; do + stem=$(basename "$f" .S); stem=$(basename "$stem" .c) + has_t1=0 + for s in "${_thumb1_stems[@]}"; do + [ "$stem" = "$s" ] && { has_t1=1; break; } + done + [ $has_t1 -eq 0 ] && ARM_AEABI_THUMB1+=("$f") +done + +RV32_SR=( riscv/save_rv32.S riscv/restore_rv32.S ) +RV64_SR=( riscv/save_rv64.S riscv/restore_rv64.S ) + +#------------------------------------------------------------------------------- +# Variants — each combination of (data model, target, mode) cfree supports. +#------------------------------------------------------------------------------- +echo "Building libcfree_rt for every supported (target, mode) tuple" +echo " CC=$CC, AR=$AR, OUT=$OUT" +echo + +# ---- LP64 little-endian ------------------------------------------------------ + +# x86_64 Linux / Darwin / RV64 / aarch64 base: int + int64 + fp + atomic + mem. +# binary64 long double (no fp_tf). +LP64_BASE=( "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}" ) + +build_variant x86_64-linux \ + "--target=x86_64-linux-gnu -Iinclude/lp64_le -DHAS_INT128=1" \ + "${LP64_BASE[@]}" + +build_variant x86_64-apple-darwin \ + "--target=x86_64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1" \ + "${LP64_BASE[@]}" + +# aarch64-linux: long double is binary128 by default (no flag to change), +# so the build pulls in fp_tf + fp_ti and pre-includes tf_supplement.h. +build_variant aarch64-linux \ + "--target=aarch64-linux-gnu \ + -Iinclude/lp64_le_ldbl128 -Iinclude/lp64_le -DHAS_INT128=1 \ + -include include/lp64_le_ldbl128/tf_supplement.h" \ + "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${FP_TF_C[@]}" "${FP_TI_C[@]}" \ + "${MEM_C[@]}" "${ATOMIC_C[@]}" + +# aarch64-apple-darwin: long double is binary64 (no fp_tf needed). +build_variant aarch64-apple-darwin \ + "--target=aarch64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1" \ + "${LP64_BASE[@]}" + +# RISC-V 64 (soft-float) — with and without -msave-restore. +build_variant riscv64-elf \ + "--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd \ + -Iinclude/lp64_le -DHAS_INT128=1" \ + "${LP64_BASE[@]}" + +build_variant riscv64-elf-save-restore \ + "--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd -msave-restore \ + -Iinclude/lp64_le -DHAS_INT128=1" \ + "${LP64_BASE[@]}" "${RV64_SR[@]}" + +# ---- LLP64 little-endian (Win64) -------------------------------------------- + +build_variant x86_64-pc-windows \ + "--target=x86_64-pc-windows-msvc -Iinclude/llp64_le -DHAS_INT128=1" \ + "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}" + +# ---- ILP32 little-endian ----------------------------------------------------- + +ILP32_BASE=( "${INT_C[@]}" "${INT32_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}" ) + +build_variant i386-linux \ + "--target=i386-linux-gnu -Iinclude/ilp32_le -DHAS_INT128=0" \ + "${ILP32_BASE[@]}" + +build_variant wasm32 \ + "--target=wasm32-unknown-unknown -Iinclude/ilp32_le -DHAS_INT128=0" \ + "${ILP32_BASE[@]}" + +build_variant riscv32-elf \ + "--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd \ + -Iinclude/ilp32_le -DHAS_INT128=0" \ + "${ILP32_BASE[@]}" + +build_variant riscv32-elf-save-restore \ + "--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd -msave-restore \ + -Iinclude/ilp32_le -DHAS_INT128=0" \ + "${ILP32_BASE[@]}" "${RV32_SR[@]}" + +# ARM32 ARMv7+/Thumb2 (AEABI). The .S files use the AEABI base PCS regardless +# of FPU presence, so one variant covers soft- and hard-float targets. +build_variant arm-eabi-thumb2 \ + "--target=arm-none-eabi -march=armv7-a -mthumb -mfloat-abi=soft \ + -Iinclude/ilp32_le -DHAS_INT128=0" \ + "${ILP32_BASE[@]}" "${ARM_AEABI_THUMB2[@]}" + +# ARM32 ARMv6-M / Cortex-M0/M0+/M1 (Thumb1). +build_variant arm-eabi-thumb1 \ + "--target=arm-none-eabi -march=armv6-m -mthumb -mfloat-abi=soft \ + -Iinclude/ilp32_le -DHAS_INT128=0" \ + "${ILP32_BASE[@]}" "${ARM_AEABI_THUMB1[@]}" + +#------------------------------------------------------------------------------- +echo +echo "Summary: $OK ok, $FAIL failed" +if [ $FAIL -ne 0 ]; then + echo "Failed variants:" + for v in "${FAILED_VARIANTS[@]}"; do + echo " - $v ($OUT/$v/build.log)" + done + exit 1 +fi diff --git a/lib/fp/divdf3.c b/lib/fp/divdf3.c @@ -20,3 +20,6 @@ COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#undef NUMBER_OF_HALF_ITERATIONS +#undef NUMBER_OF_FULL_ITERATIONS diff --git a/lib/fp/divsf3.c b/lib/fp/divsf3.c @@ -21,3 +21,7 @@ COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#undef NUMBER_OF_HALF_ITERATIONS +#undef NUMBER_OF_FULL_ITERATIONS +#undef USE_NATIVE_FULL_ITERATIONS diff --git a/lib/fp/extendsfdf2.c b/lib/fp/extendsfdf2.c @@ -12,3 +12,6 @@ COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); } + +#undef SRC_SINGLE +#undef DST_DOUBLE diff --git a/lib/fp/fixdfdi.c b/lib/fp/fixdfdi.c @@ -13,9 +13,14 @@ // flags to set, and we don't want to code-gen to an unknown soft-float // implementation. -typedef di_int fixint_t; -typedef du_int fixuint_t; +#define fixint_t di_int +#define fixuint_t du_int +#define FP_FIX_SUFFIX fixdfdi #include "fp_fixint_impl.inc" COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/fixdfsi.c b/lib/fp/fixdfsi.c @@ -8,9 +8,14 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -typedef si_int fixint_t; -typedef su_int fixuint_t; +#define fixint_t si_int +#define fixuint_t su_int +#define FP_FIX_SUFFIX fixdfsi #include "fp_fixint_impl.inc" COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/fixsfdi.c b/lib/fp/fixsfdi.c @@ -13,9 +13,14 @@ // flags to set, and we don't want to code-gen to an unknown soft-float // implementation. -typedef di_int fixint_t; -typedef du_int fixuint_t; +#define fixint_t di_int +#define fixuint_t du_int +#define FP_FIX_SUFFIX fixsfdi #include "fp_fixint_impl.inc" COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/fixsfsi.c b/lib/fp/fixsfsi.c @@ -8,9 +8,14 @@ #define SINGLE_PRECISION #include "fp_lib.h" -typedef si_int fixint_t; -typedef su_int fixuint_t; +#define fixint_t si_int +#define fixuint_t su_int +#define FP_FIX_SUFFIX fixsfsi #include "fp_fixint_impl.inc" COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/fixunsdfdi.c b/lib/fp/fixunsdfdi.c @@ -13,8 +13,12 @@ // flags to set, and we don't want to code-gen to an unknown soft-float // implementation. -typedef du_int fixuint_t; +#define fixuint_t du_int +#define FP_FIX_SUFFIX fixunsdfdi #include "fp_fixuint_impl.inc" COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/fixunsdfsi.c b/lib/fp/fixunsdfsi.c @@ -8,8 +8,12 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -typedef su_int fixuint_t; +#define fixuint_t su_int +#define FP_FIX_SUFFIX fixunsdfsi #include "fp_fixuint_impl.inc" COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/fixunssfdi.c b/lib/fp/fixunssfdi.c @@ -13,8 +13,12 @@ // flags to set, and we don't want to code-gen to an unknown soft-float // implementation. -typedef du_int fixuint_t; +#define fixuint_t du_int +#define FP_FIX_SUFFIX fixunssfdi #include "fp_fixuint_impl.inc" COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/fixunssfsi.c b/lib/fp/fixunssfsi.c @@ -12,8 +12,12 @@ #define SINGLE_PRECISION #include "fp_lib.h" -typedef su_int fixuint_t; +#define fixuint_t su_int +#define FP_FIX_SUFFIX fixunssfsi #include "fp_fixuint_impl.inc" COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp/floatdidf.c b/lib/fp/floatdidf.c @@ -30,3 +30,6 @@ COMPILER_RT_ABI double __floatdidf(di_int a) { return __floatXiYf__(a); } + +#undef SRC_I64 +#undef DST_DOUBLE diff --git a/lib/fp/floatdisf.c b/lib/fp/floatdisf.c @@ -25,3 +25,6 @@ COMPILER_RT_ABI float __floatdisf(di_int a) { return __floatXiYf__(a); } + +#undef SRC_I64 +#undef DST_SINGLE diff --git a/lib/fp/floatundidf.c b/lib/fp/floatundidf.c @@ -30,3 +30,6 @@ COMPILER_RT_ABI double __floatundidf(du_int a) { return __floatXiYf__(a); } + +#undef SRC_U64 +#undef DST_DOUBLE diff --git a/lib/fp/floatundisf.c b/lib/fp/floatundisf.c @@ -25,3 +25,6 @@ COMPILER_RT_ABI float __floatundisf(du_int a) { return __floatXiYf__(a); } + +#undef SRC_U64 +#undef DST_SINGLE diff --git a/lib/fp/truncdfsf2.c b/lib/fp/truncdfsf2.c @@ -12,3 +12,6 @@ COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); } + +#undef SRC_DOUBLE +#undef DST_SINGLE diff --git a/lib/fp_tf/divtf3.c b/lib/fp_tf/divtf3.c @@ -22,3 +22,6 @@ COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#undef NUMBER_OF_HALF_ITERATIONS +#undef NUMBER_OF_FULL_ITERATIONS diff --git a/lib/fp_tf/extenddftf2.c b/lib/fp_tf/extenddftf2.c @@ -15,3 +15,6 @@ COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); } + +#undef SRC_DOUBLE +#undef DST_QUAD diff --git a/lib/fp_tf/extendsftf2.c b/lib/fp_tf/extendsftf2.c @@ -15,3 +15,6 @@ COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); } + +#undef SRC_SINGLE +#undef DST_QUAD diff --git a/lib/fp_tf/fixtfdi.c b/lib/fp_tf/fixtfdi.c @@ -9,8 +9,13 @@ #define QUAD_PRECISION #include "fp_lib.h" -typedef di_int fixint_t; -typedef du_int fixuint_t; +#define fixint_t di_int +#define fixuint_t du_int +#define FP_FIX_SUFFIX fixtfdi #include "fp_fixint_impl.inc" COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_tf/fixtfsi.c b/lib/fp_tf/fixtfsi.c @@ -9,8 +9,13 @@ #define QUAD_PRECISION #include "fp_lib.h" -typedef si_int fixint_t; -typedef su_int fixuint_t; +#define fixint_t si_int +#define fixuint_t su_int +#define FP_FIX_SUFFIX fixtfsi #include "fp_fixint_impl.inc" COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_tf/fixtfti.c b/lib/fp_tf/fixtfti.c @@ -9,8 +9,13 @@ #define QUAD_PRECISION #include "fp_lib.h" -typedef ti_int fixint_t; -typedef tu_int fixuint_t; +#define fixint_t ti_int +#define fixuint_t tu_int +#define FP_FIX_SUFFIX fixtfti #include "fp_fixint_impl.inc" COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_tf/fixunstfdi.c b/lib/fp_tf/fixunstfdi.c @@ -9,7 +9,11 @@ #define QUAD_PRECISION #include "fp_lib.h" -typedef du_int fixuint_t; +#define fixuint_t du_int +#define FP_FIX_SUFFIX fixunstfdi #include "fp_fixuint_impl.inc" COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_tf/fixunstfsi.c b/lib/fp_tf/fixunstfsi.c @@ -9,7 +9,11 @@ #define QUAD_PRECISION #include "fp_lib.h" -typedef su_int fixuint_t; +#define fixuint_t su_int +#define FP_FIX_SUFFIX fixunstfsi #include "fp_fixuint_impl.inc" COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_tf/fixunstfti.c b/lib/fp_tf/fixunstfti.c @@ -9,7 +9,11 @@ #define QUAD_PRECISION #include "fp_lib.h" -typedef tu_int fixuint_t; +#define fixuint_t tu_int +#define FP_FIX_SUFFIX fixunstfti #include "fp_fixuint_impl.inc" COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_tf/floattitf.c b/lib/fp_tf/floattitf.c @@ -31,3 +31,6 @@ COMPILER_RT_ABI fp_t __floattitf(ti_int a) { return __floatXiYf__(a); } + +#undef SRC_I128 +#undef DST_QUAD diff --git a/lib/fp_tf/floatuntitf.c b/lib/fp_tf/floatuntitf.c @@ -31,3 +31,6 @@ COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { return __floatXiYf__(a); } + +#undef SRC_U128 +#undef DST_QUAD diff --git a/lib/fp_tf/trunctfdf2.c b/lib/fp_tf/trunctfdf2.c @@ -15,3 +15,6 @@ COMPILER_RT_ABI dst_t __trunctfdf2(src_t a) { return __truncXfYf2__(a); } + +#undef SRC_QUAD +#undef DST_DOUBLE diff --git a/lib/fp_tf/trunctfsf2.c b/lib/fp_tf/trunctfsf2.c @@ -15,3 +15,6 @@ COMPILER_RT_ABI dst_t __trunctfsf2(src_t a) { return __truncXfYf2__(a); } + +#undef SRC_QUAD +#undef DST_SINGLE diff --git a/lib/fp_ti/fixdfti.c b/lib/fp_ti/fixdfti.c @@ -11,9 +11,14 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -typedef ti_int fixint_t; -typedef tu_int fixuint_t; +#define fixint_t ti_int +#define fixuint_t tu_int +#define FP_FIX_SUFFIX fixdfti #include "fp_fixint_impl.inc" COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_ti/fixsfti.c b/lib/fp_ti/fixsfti.c @@ -11,9 +11,14 @@ #define SINGLE_PRECISION #include "fp_lib.h" -typedef ti_int fixint_t; -typedef tu_int fixuint_t; +#define fixint_t ti_int +#define fixuint_t tu_int +#define FP_FIX_SUFFIX fixsfti #include "fp_fixint_impl.inc" COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); } + +#undef fixint_t +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_ti/fixunsdfti.c b/lib/fp_ti/fixunsdfti.c @@ -10,7 +10,11 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -typedef tu_int fixuint_t; +#define fixuint_t tu_int +#define FP_FIX_SUFFIX fixunsdfti #include "fp_fixuint_impl.inc" COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_ti/fixunssfti.c b/lib/fp_ti/fixunssfti.c @@ -13,7 +13,11 @@ #define SINGLE_PRECISION #include "fp_lib.h" -typedef tu_int fixuint_t; +#define fixuint_t tu_int +#define FP_FIX_SUFFIX fixunssfti #include "fp_fixuint_impl.inc" COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); } + +#undef fixuint_t +#undef FP_FIX_SUFFIX diff --git a/lib/fp_ti/floattidf.c b/lib/fp_ti/floattidf.c @@ -27,3 +27,6 @@ COMPILER_RT_ABI double __floattidf(ti_int a) { return __floatXiYf__(a); } + +#undef SRC_I128 +#undef DST_DOUBLE diff --git a/lib/fp_ti/floattisf.c b/lib/fp_ti/floattisf.c @@ -26,3 +26,6 @@ COMPILER_RT_ABI float __floattisf(ti_int a) { return __floatXiYf__(a); } + +#undef SRC_I128 +#undef DST_SINGLE diff --git a/lib/fp_ti/floatuntidf.c b/lib/fp_ti/floatuntidf.c @@ -27,3 +27,6 @@ COMPILER_RT_ABI double __floatuntidf(tu_int a) { return __floatXiYf__(a); } + +#undef SRC_U128 +#undef DST_DOUBLE diff --git a/lib/fp_ti/floatuntisf.c b/lib/fp_ti/floatuntisf.c @@ -26,3 +26,6 @@ COMPILER_RT_ABI float __floatuntisf(tu_int a) { return __floatXiYf__(a); } + +#undef SRC_U128 +#undef DST_SINGLE diff --git a/lib/impl/fp_add_impl.inc b/lib/impl/fp_add_impl.inc @@ -14,6 +14,22 @@ #include "fp_lib.h" #include "fp_mode.h" +#define __addXf3__ _FP_NAME(__addXf3__) + +#if defined SINGLE_PRECISION && !defined FP_ADD_SF_EMITTED +#define FP_ADD_SF_EMITTED +#define _FP_ADD_EMIT 1 +#elif defined DOUBLE_PRECISION && !defined FP_ADD_DF_EMITTED +#define FP_ADD_DF_EMITTED +#define _FP_ADD_EMIT 1 +#elif defined QUAD_PRECISION && !defined FP_ADD_TF_EMITTED +#define FP_ADD_TF_EMITTED +#define _FP_ADD_EMIT 1 +#endif + +#ifdef _FP_ADD_EMIT +#undef _FP_ADD_EMIT + static __inline fp_t __addXf3__(fp_t a, fp_t b) { rep_t aRep = toRep(a); rep_t bRep = toRep(b); @@ -170,3 +186,5 @@ static __inline fp_t __addXf3__(fp_t a, fp_t b) { __fe_raise_inexact(); return fromRep(result); } + +#endif // _FP_ADD_EMIT diff --git a/lib/impl/fp_compare_impl.inc b/lib/impl/fp_compare_impl.inc @@ -8,6 +8,12 @@ #include "fp_lib.h" +// CMP_RESULT and the LE_*/GE_* sentinels are precision-independent; emit +// them once per TU. The static __inline comparators (__leXf2__ etc.) are +// per-precision and gated below. +#ifndef FP_COMPARE_COMMON_EMITTED +#define FP_COMPARE_COMMON_EMITTED + // GCC uses long (at least for x86_64) as the return type of the comparison // functions. We need to ensure that the return value is sign-extended in the // same way as GCC expects (since otherwise GCC-generated __builtin_isinf @@ -41,6 +47,34 @@ enum { LE_UNORDERED = 1, }; +enum { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +#endif // FP_COMPARE_COMMON_EMITTED + +// Bare-name aliases (re-set every inclusion, suffix-renamed via fp_lib). +#define __leXf2__ _FP_NAME(__leXf2__) +#define __geXf2__ _FP_NAME(__geXf2__) +#define __unordXf2__ _FP_NAME(__unordXf2__) + +#if defined SINGLE_PRECISION && !defined FP_COMPARE_SF_EMITTED +#define FP_COMPARE_SF_EMITTED +#define _FP_COMPARE_EMIT 1 +#elif defined DOUBLE_PRECISION && !defined FP_COMPARE_DF_EMITTED +#define FP_COMPARE_DF_EMITTED +#define _FP_COMPARE_EMIT 1 +#elif defined QUAD_PRECISION && !defined FP_COMPARE_TF_EMITTED +#define FP_COMPARE_TF_EMITTED +#define _FP_COMPARE_EMIT 1 +#endif + +#ifdef _FP_COMPARE_EMIT +#undef _FP_COMPARE_EMIT + static inline CMP_RESULT __leXf2__(fp_t a, fp_t b) { const srep_t aInt = toRep(a); const srep_t bInt = toRep(b); @@ -78,13 +112,6 @@ static inline CMP_RESULT __leXf2__(fp_t a, fp_t b) { } } -enum { - GE_LESS = -1, - GE_EQUAL = 0, - GE_GREATER = 1, - GE_UNORDERED = -1 // Note: different from LE_UNORDERED -}; - static inline CMP_RESULT __geXf2__(fp_t a, fp_t b) { const srep_t aInt = toRep(a); const srep_t bInt = toRep(b); @@ -117,3 +144,5 @@ static inline CMP_RESULT __unordXf2__(fp_t a, fp_t b) { const rep_t bAbs = toRep(b) & absMask; return aAbs > infRep || bAbs > infRep; } + +#endif // _FP_COMPARE_EMIT diff --git a/lib/impl/fp_div_impl.inc b/lib/impl/fp_div_impl.inc @@ -13,6 +13,22 @@ #include "fp_lib.h" +#define __divXf3__ _FP_NAME(__divXf3__) + +#if defined SINGLE_PRECISION && !defined FP_DIV_SF_EMITTED +#define FP_DIV_SF_EMITTED +#define _FP_DIV_EMIT 1 +#elif defined DOUBLE_PRECISION && !defined FP_DIV_DF_EMITTED +#define FP_DIV_DF_EMITTED +#define _FP_DIV_EMIT 1 +#elif defined QUAD_PRECISION && !defined FP_DIV_TF_EMITTED +#define FP_DIV_TF_EMITTED +#define _FP_DIV_EMIT 1 +#endif + +#ifdef _FP_DIV_EMIT +#undef _FP_DIV_EMIT + // The __divXf3__ function implements Newton-Raphson floating point division. // It uses 3 iterations for float32, 4 for float64 and 5 for float128, // respectively. Due to number of significant bits being roughly doubled @@ -417,3 +433,9 @@ static __inline fp_t __divXf3__(fp_t a, fp_t b) { #endif return fromRep(absResult | quotientSign); } + +#undef HW +#undef loMask +#undef RECIPROCAL_PRECISION + +#endif // _FP_DIV_EMIT diff --git a/lib/impl/fp_extend.h b/lib/impl/fp_extend.h @@ -1,5 +1,4 @@ -//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -//-*-===// +//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,19 +6,134 @@ // //===----------------------------------------------------------------------===// // -// Set source and destination setting +// Source/destination type setup for the extending FP conversions. Caller +// defines SRC_<X> (SINGLE/DOUBLE/80/HALF) and DST_<Y> (SINGLE/DOUBLE/QUAD) +// before each inclusion. // +// Re-includable. Names that depend on the (src, dst) pair are emitted with +// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define +// aliases are set on every inclusion so the body in `fp_extend_impl.inc` +// (and the caller) uses bare names that resolve to the right suffixed +// entity. Use the umbrella `fp_lib_undef.h` between sections in one TU to +// clear the bare-name aliases. //===----------------------------------------------------------------------===// -#ifndef FP_EXTEND_HEADER -#define FP_EXTEND_HEADER - #include "int_lib.h" +// Self-clean any prior inclusion's per-(src, dst) state. May coexist with +// fp_trunc.h / int_to_fp.h which define overlapping bare-name aliases. +#undef _FP_EXT_SRC_SUF +#undef _FP_EXT_DST_SUF +#undef SRC_REP_C +#undef DST_REP_C +#undef src_t +#undef src_rep_t +#undef dst_t +#undef dst_rep_t +#undef srcBits +#undef srcSigFracBits +#undef srcExpBits +#undef dstBits +#undef dstSigFracBits +#undef dstExpBits +#undef src_rep_t_clz +#undef src_rep_t_clz_impl +#undef srcToRep +#undef dstFromRep +#undef extract_sign_from_src +#undef extract_exp_from_src +#undef extract_sig_frac_from_src +#undef clz_in_sig_frac +#undef construct_dst_rep + +#if defined SRC_SINGLE +#define _FP_EXT_SRC_SUF sf +#elif defined SRC_DOUBLE +#define _FP_EXT_SRC_SUF df +#elif defined SRC_80 +#define _FP_EXT_SRC_SUF xf +#elif defined SRC_HALF +#define _FP_EXT_SRC_SUF hf +#else +#error Source should be half, single, or double precision! +#endif + +#if defined DST_SINGLE +#define _FP_EXT_DST_SUF sf +#elif defined DST_DOUBLE +#define _FP_EXT_DST_SUF df +#elif defined DST_QUAD +#define _FP_EXT_DST_SUF tf +#else +#error Destination should be single, double, or quad precision! +#endif + +#define _FP_EXT_PASTE4_(a, b, c, d) a##b##c##d +#define _FP_EXT_PASTE4(a, b, c, d) _FP_EXT_PASTE4_(a, b, c, d) +#define _FP_EXT_PAIR(stem) _FP_EXT_PASTE4(stem, _, _FP_EXT_SRC_SUF, _FP_EXT_DST_SUF) + +// ---- Bare-name aliases (re-set every inclusion). ------------------------ + +#define src_t _FP_EXT_PAIR(src_t) +#define src_rep_t _FP_EXT_PAIR(src_rep_t) +#define dst_t _FP_EXT_PAIR(dst_t) +#define dst_rep_t _FP_EXT_PAIR(dst_rep_t) +#define srcBits _FP_EXT_PAIR(srcBits) +#define srcSigFracBits _FP_EXT_PAIR(srcSigFracBits) +#define srcExpBits _FP_EXT_PAIR(srcExpBits) +#define dstBits _FP_EXT_PAIR(dstBits) +#define dstSigFracBits _FP_EXT_PAIR(dstSigFracBits) +#define dstExpBits _FP_EXT_PAIR(dstExpBits) +#define src_rep_t_clz_impl _FP_EXT_PAIR(src_rep_t_clz_impl) +#define srcToRep _FP_EXT_PAIR(srcToRep) +#define dstFromRep _FP_EXT_PAIR(dstFromRep) +#define extract_sign_from_src _FP_EXT_PAIR(extract_sign_from_src) +#define extract_exp_from_src _FP_EXT_PAIR(extract_exp_from_src) +#define extract_sig_frac_from_src _FP_EXT_PAIR(extract_sig_frac_from_src) +#define clz_in_sig_frac _FP_EXT_PAIR(clz_in_sig_frac) +#define construct_dst_rep _FP_EXT_PAIR(construct_dst_rep) + +// SRC_REP_C / DST_REP_C: textual macros (UINT32_C etc.); same body each +// inclusion within a precision. + +#if defined SRC_SINGLE +#define SRC_REP_C UINT32_C +#elif defined SRC_DOUBLE +#define SRC_REP_C UINT64_C +#elif defined SRC_80 +#define SRC_REP_C (__uint128_t) +#elif defined SRC_HALF +#define SRC_REP_C UINT16_C +#endif + +#if defined DST_SINGLE +#define DST_REP_C UINT32_C +#elif defined DST_DOUBLE +#define DST_REP_C UINT64_C +#elif defined DST_QUAD +#define DST_REP_C (__uint128_t) +#endif + +// ---- One-time emission per (TU, src+dst pair). -------------------------- +// Enumerate the pairs cfree actually uses (sf→df, sf→tf, df→tf). + +#if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_SFDF_EMITTED +#define FP_EXT_SFDF_EMITTED +#define _FP_EXT_EMIT 1 +#elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_SFTF_EMITTED +#define FP_EXT_SFTF_EMITTED +#define _FP_EXT_EMIT 1 +#elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_DFTF_EMITTED +#define FP_EXT_DFTF_EMITTED +#define _FP_EXT_EMIT 1 +#endif + +#ifdef _FP_EXT_EMIT +#undef _FP_EXT_EMIT + #if defined SRC_SINGLE typedef float src_t; typedef uint32_t src_rep_t; -#define SRC_REP_C UINT32_C static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 23; // -1 accounts for the sign bit. @@ -30,7 +144,6 @@ static const int srcExpBits = 8; #elif defined SRC_DOUBLE typedef double src_t; typedef uint64_t src_rep_t; -#define SRC_REP_C UINT64_C static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 52; // -1 accounts for the sign bit. @@ -52,7 +165,6 @@ static inline int src_rep_t_clz_impl(src_rep_t a) { #elif defined SRC_80 typedef xf_float src_t; typedef __uint128_t src_rep_t; -#define SRC_REP_C (__uint128_t) // sign bit, exponent and significand occupy the lower 80 bits. static const int srcBits = 80; static const int srcSigFracBits = 63; @@ -68,7 +180,6 @@ typedef _Float16 src_t; typedef uint16_t src_t; #endif typedef uint16_t src_rep_t; -#define SRC_REP_C UINT16_C static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 10; // -1 accounts for the sign bit. @@ -78,17 +189,13 @@ static const int srcExpBits = 5; static inline int src_rep_t_clz_impl(src_rep_t a) { return __builtin_clz(a) - 16; } - #define src_rep_t_clz src_rep_t_clz_impl -#else -#error Source should be half, single, or double precision! #endif // end source precision #if defined DST_SINGLE typedef float dst_t; typedef uint32_t dst_rep_t; -#define DST_REP_C UINT32_C static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 23; // -1 accounts for the sign bit. @@ -98,7 +205,6 @@ static const int dstExpBits = 8; #elif defined DST_DOUBLE typedef double dst_t; typedef uint64_t dst_rep_t; -#define DST_REP_C UINT64_C static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 52; // -1 accounts for the sign bit. @@ -108,15 +214,12 @@ static const int dstExpBits = 11; #elif defined DST_QUAD typedef tf_float dst_t; typedef __uint128_t dst_rep_t; -#define DST_REP_C (__uint128_t) static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 112; // -1 accounts for the sign bit. // dstBits - dstSigFracBits - 1 static const int dstExpBits = 15; -#else -#error Destination should be single, double, or quad precision! #endif // end destination precision // End of specialization parameters. @@ -171,4 +274,4 @@ static inline dst_t dstFromRep(dst_rep_t x) { } // End helper routines. Conversion implementation follows. -#endif // FP_EXTEND_HEADER +#endif // _FP_EXT_EMIT diff --git a/lib/impl/fp_extend_impl.inc b/lib/impl/fp_extend_impl.inc @@ -37,6 +37,22 @@ #include "fp_extend.h" +#define __extendXfYf2__ _FP_EXT_PAIR(__extendXfYf2__) + +#if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_IMPL_SFDF_EMITTED +#define FP_EXT_IMPL_SFDF_EMITTED +#define _FP_EXT_IMPL_EMIT 1 +#elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_IMPL_SFTF_EMITTED +#define FP_EXT_IMPL_SFTF_EMITTED +#define _FP_EXT_IMPL_EMIT 1 +#elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_IMPL_DFTF_EMITTED +#define FP_EXT_IMPL_DFTF_EMITTED +#define _FP_EXT_IMPL_EMIT 1 +#endif + +#ifdef _FP_EXT_IMPL_EMIT +#undef _FP_EXT_IMPL_EMIT + // The source type may use a usual IEEE-754 interchange format or Intel 80-bit // format. In particular, for the source type srcSigFracBits may be not equal to // srcSigBits. The destination type is assumed to be one of IEEE-754 standard @@ -106,3 +122,5 @@ static __inline dst_t __extendXfYf2__(src_t a) { const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac); return dstFromRep(result); } + +#endif // _FP_EXT_IMPL_EMIT diff --git a/lib/impl/fp_fixint_impl.inc b/lib/impl/fp_fixint_impl.inc @@ -13,6 +13,20 @@ #include "fp_lib.h" +#ifndef FP_FIX_SUFFIX +#error "fp_fixint_impl.inc: FP_FIX_SUFFIX must be defined before #include" +#endif + +#ifndef FP_FIX_IMPL_PASTE_ +#define FP_FIX_IMPL_PASTE_(a, b) a##_##b +#define FP_FIX_IMPL_PASTE(a, b) FP_FIX_IMPL_PASTE_(a, b) +#endif + +#ifdef __fixint +#undef __fixint +#endif +#define __fixint FP_FIX_IMPL_PASTE(__fixint, FP_FIX_SUFFIX) + static __inline fixint_t __fixint(fp_t a) { const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); const fixint_t fixint_min = -fixint_max - 1; @@ -38,3 +52,9 @@ static __inline fixint_t __fixint(fp_t a) { else return sign * ((fixuint_t)significand << (exponent - significandBits)); } + +// FP_FIX_SUFFIX, fixint_t, fixuint_t intentionally left defined: the +// caller's `return __fixint(a);` line is parsed AFTER this include, and +// the `__fixint` macro re-expands FP_FIX_SUFFIX at that point. The +// caller (per-op .c) is responsible for #undef'ing them at its bottom +// so consolidated builds can stack multiple per-op snippets in one TU. diff --git a/lib/impl/fp_fixuint_impl.inc b/lib/impl/fp_fixuint_impl.inc @@ -13,6 +13,20 @@ #include "fp_lib.h" +#ifndef FP_FIX_SUFFIX +#error "fp_fixuint_impl.inc: FP_FIX_SUFFIX must be defined before #include" +#endif + +#ifndef FP_FIX_IMPL_PASTE_ +#define FP_FIX_IMPL_PASTE_(a, b) a##_##b +#define FP_FIX_IMPL_PASTE(a, b) FP_FIX_IMPL_PASTE_(a, b) +#endif + +#ifdef __fixuint +#undef __fixuint +#endif +#define __fixuint FP_FIX_IMPL_PASTE(__fixuint, FP_FIX_SUFFIX) + static __inline fixuint_t __fixuint(fp_t a) { // Break a into sign, exponent, significand parts. const rep_t aRep = toRep(a); @@ -36,3 +50,5 @@ static __inline fixuint_t __fixuint(fp_t a) { else return (fixuint_t)significand << (exponent - significandBits); } + +// FP_FIX_SUFFIX, fixuint_t intentionally left defined; see fp_fixint_impl.inc. diff --git a/lib/impl/fp_mul_impl.inc b/lib/impl/fp_mul_impl.inc @@ -13,6 +13,22 @@ #include "fp_lib.h" +#define __mulXf3__ _FP_NAME(__mulXf3__) + +#if defined SINGLE_PRECISION && !defined FP_MUL_SF_EMITTED +#define FP_MUL_SF_EMITTED +#define _FP_MUL_EMIT 1 +#elif defined DOUBLE_PRECISION && !defined FP_MUL_DF_EMITTED +#define FP_MUL_DF_EMITTED +#define _FP_MUL_EMIT 1 +#elif defined QUAD_PRECISION && !defined FP_MUL_TF_EMITTED +#define FP_MUL_TF_EMITTED +#define _FP_MUL_EMIT 1 +#endif + +#ifdef _FP_MUL_EMIT +#undef _FP_MUL_EMIT + static __inline fp_t __mulXf3__(fp_t a, fp_t b) { const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; @@ -126,3 +142,5 @@ static __inline fp_t __mulXf3__(fp_t a, fp_t b) { productHi += productHi & 1; return fromRep(productHi); } + +#endif // _FP_MUL_EMIT diff --git a/lib/impl/fp_trunc.h b/lib/impl/fp_trunc.h @@ -6,19 +6,131 @@ // //===----------------------------------------------------------------------===// // -// Set source and destination precision setting +// Source/destination precision setup for the truncating FP conversions. +// Caller defines SRC_<X> (SINGLE/DOUBLE/QUAD) and DST_<Y> +// (SINGLE/DOUBLE/80/HALF/BFLOAT) before each inclusion. // +// Re-includable. Names that depend on the (src, dst) pair are emitted with +// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define +// aliases are set every inclusion. Use the umbrella `fp_lib_undef.h` to +// clear the bare-name aliases between sections in one TU. //===----------------------------------------------------------------------===// -#ifndef FP_TRUNC_HEADER -#define FP_TRUNC_HEADER - #include "int_lib.h" +// Self-clean any prior inclusion's per-(src, dst) state. May coexist with +// fp_extend.h / int_to_fp.h which define overlapping bare-name aliases. +#undef _FP_TRUNC_SRC_SUF +#undef _FP_TRUNC_DST_SUF +#undef SRC_REP_C +#undef DST_REP_C +#undef src_t +#undef src_rep_t +#undef dst_t +#undef dst_rep_t +#undef srcBits +#undef srcSigFracBits +#undef srcExpBits +#undef dstBits +#undef dstSigFracBits +#undef dstExpBits +#undef srcToRep +#undef dstFromRep +#undef extract_sign_from_src +#undef extract_exp_from_src +#undef extract_sig_frac_from_src +#undef construct_dst_rep + +#if defined SRC_SINGLE +#define _FP_TRUNC_SRC_SUF sf +#elif defined SRC_DOUBLE +#define _FP_TRUNC_SRC_SUF df +#elif defined SRC_QUAD +#define _FP_TRUNC_SRC_SUF tf +#else +#error Source should be double precision or quad precision! +#endif + +#if defined DST_SINGLE +#define _FP_TRUNC_DST_SUF sf +#elif defined DST_DOUBLE +#define _FP_TRUNC_DST_SUF df +#elif defined DST_80 +#define _FP_TRUNC_DST_SUF xf +#elif defined DST_HALF +#define _FP_TRUNC_DST_SUF hf +#elif defined DST_BFLOAT +#define _FP_TRUNC_DST_SUF bf +#else +#error Destination should be single precision or double precision! +#endif + +#define _FP_TRUNC_PASTE4_(a, b, c, d) a##b##c##d +#define _FP_TRUNC_PASTE4(a, b, c, d) _FP_TRUNC_PASTE4_(a, b, c, d) +#define _FP_TRUNC_PAIR(stem) _FP_TRUNC_PASTE4(stem, _, _FP_TRUNC_SRC_SUF, _FP_TRUNC_DST_SUF) + +// ---- Bare-name aliases (re-set every inclusion). ------------------------ +// fp_extend.h uses identical bare names; the aliases here suffix-rename +// to fp_trunc-specific symbols (different pair tokens), so the two +// headers can coexist in one TU without colliding. + +#define src_t _FP_TRUNC_PAIR(src_t) +#define src_rep_t _FP_TRUNC_PAIR(src_rep_t) +#define dst_t _FP_TRUNC_PAIR(dst_t) +#define dst_rep_t _FP_TRUNC_PAIR(dst_rep_t) +#define srcBits _FP_TRUNC_PAIR(srcBits) +#define srcSigFracBits _FP_TRUNC_PAIR(srcSigFracBits) +#define srcExpBits _FP_TRUNC_PAIR(srcExpBits) +#define dstBits _FP_TRUNC_PAIR(dstBits) +#define dstSigFracBits _FP_TRUNC_PAIR(dstSigFracBits) +#define dstExpBits _FP_TRUNC_PAIR(dstExpBits) +#define srcToRep _FP_TRUNC_PAIR(srcToRep) +#define dstFromRep _FP_TRUNC_PAIR(dstFromRep) +#define extract_sign_from_src _FP_TRUNC_PAIR(extract_sign_from_src) +#define extract_exp_from_src _FP_TRUNC_PAIR(extract_exp_from_src) +#define extract_sig_frac_from_src _FP_TRUNC_PAIR(extract_sig_frac_from_src) +#define construct_dst_rep _FP_TRUNC_PAIR(construct_dst_rep) + +#if defined SRC_SINGLE +#define SRC_REP_C UINT32_C +#elif defined SRC_DOUBLE +#define SRC_REP_C UINT64_C +#elif defined SRC_QUAD +#define SRC_REP_C (__uint128_t) +#endif + +#if defined DST_SINGLE +#define DST_REP_C UINT32_C +#elif defined DST_DOUBLE +#define DST_REP_C UINT64_C +#elif defined DST_80 +#define DST_REP_C (__uint128_t) +#elif defined DST_HALF +#define DST_REP_C UINT16_C +#elif defined DST_BFLOAT +#define DST_REP_C UINT16_C +#endif + +// ---- One-time emission per (TU, src+dst pair). -------------------------- +// Pairs cfree uses: df→sf, tf→df, tf→sf. + +#if defined SRC_DOUBLE && defined DST_SINGLE && !defined FP_TRUNC_DFSF_EMITTED +#define FP_TRUNC_DFSF_EMITTED +#define _FP_TRUNC_EMIT 1 +#elif defined SRC_QUAD && defined DST_DOUBLE && !defined FP_TRUNC_TFDF_EMITTED +#define FP_TRUNC_TFDF_EMITTED +#define _FP_TRUNC_EMIT 1 +#elif defined SRC_QUAD && defined DST_SINGLE && !defined FP_TRUNC_TFSF_EMITTED +#define FP_TRUNC_TFSF_EMITTED +#define _FP_TRUNC_EMIT 1 +#endif + +#ifdef _FP_TRUNC_EMIT +#undef _FP_TRUNC_EMIT + #if defined SRC_SINGLE typedef float src_t; typedef uint32_t src_rep_t; -#define SRC_REP_C UINT32_C static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 23; // -1 accounts for the sign bit. @@ -28,7 +140,6 @@ static const int srcExpBits = 8; #elif defined SRC_DOUBLE typedef double src_t; typedef uint64_t src_rep_t; -#define SRC_REP_C UINT64_C static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 52; // -1 accounts for the sign bit. @@ -38,21 +149,17 @@ static const int srcExpBits = 11; #elif defined SRC_QUAD typedef tf_float src_t; typedef __uint128_t src_rep_t; -#define SRC_REP_C (__uint128_t) static const int srcBits = sizeof(src_t) * CHAR_BIT; static const int srcSigFracBits = 112; // -1 accounts for the sign bit. // srcBits - srcSigFracBits - 1 static const int srcExpBits = 15; -#else -#error Source should be double precision or quad precision! #endif // end source precision #if defined DST_DOUBLE typedef double dst_t; typedef uint64_t dst_rep_t; -#define DST_REP_C UINT64_C static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 52; // -1 accounts for the sign bit. @@ -62,7 +169,6 @@ static const int dstExpBits = 11; #elif defined DST_80 typedef xf_float dst_t; typedef __uint128_t dst_rep_t; -#define DST_REP_C (__uint128_t) static const int dstBits = 80; static const int dstSigFracBits = 63; // -1 accounts for the sign bit. @@ -73,7 +179,6 @@ static const int dstExpBits = 15; #elif defined DST_SINGLE typedef float dst_t; typedef uint32_t dst_rep_t; -#define DST_REP_C UINT32_C static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 23; // -1 accounts for the sign bit. @@ -87,7 +192,6 @@ typedef _Float16 dst_t; typedef uint16_t dst_t; #endif typedef uint16_t dst_rep_t; -#define DST_REP_C UINT16_C static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 10; // -1 accounts for the sign bit. @@ -97,15 +201,12 @@ static const int dstExpBits = 5; #elif defined DST_BFLOAT typedef __bf16 dst_t; typedef uint16_t dst_rep_t; -#define DST_REP_C UINT16_C static const int dstBits = sizeof(dst_t) * CHAR_BIT; static const int dstSigFracBits = 7; // -1 accounts for the sign bit. // dstBits - dstSigFracBits - 1 static const int dstExpBits = 8; -#else -#error Destination should be single precision or double precision! #endif // end destination precision // TODO: These helper routines should be placed into fp_lib.h @@ -155,4 +256,4 @@ static inline dst_t dstFromRep(dst_rep_t x) { return rep.f; } -#endif // FP_TRUNC_HEADER +#endif // _FP_TRUNC_EMIT diff --git a/lib/impl/fp_trunc_impl.inc b/lib/impl/fp_trunc_impl.inc @@ -38,6 +38,22 @@ #include "fp_trunc.h" +#define __truncXfYf2__ _FP_TRUNC_PAIR(__truncXfYf2__) + +#if defined SRC_DOUBLE && defined DST_SINGLE && !defined FP_TRUNC_IMPL_DFSF_EMITTED +#define FP_TRUNC_IMPL_DFSF_EMITTED +#define _FP_TRUNC_IMPL_EMIT 1 +#elif defined SRC_QUAD && defined DST_DOUBLE && !defined FP_TRUNC_IMPL_TFDF_EMITTED +#define FP_TRUNC_IMPL_TFDF_EMITTED +#define _FP_TRUNC_IMPL_EMIT 1 +#elif defined SRC_QUAD && defined DST_SINGLE && !defined FP_TRUNC_IMPL_TFSF_EMITTED +#define FP_TRUNC_IMPL_TFSF_EMITTED +#define _FP_TRUNC_IMPL_EMIT 1 +#endif + +#ifdef _FP_TRUNC_IMPL_EMIT +#undef _FP_TRUNC_IMPL_EMIT + // The destination type may use a usual IEEE-754 interchange format or Intel // 80-bit format. In particular, for the destination type dstSigFracBits may be // not equal to dstSigBits. The source type is assumed to be one of IEEE-754 @@ -153,3 +169,5 @@ static __inline dst_t __truncXfYf2__(src_t a) { return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac)); } + +#endif // _FP_TRUNC_IMPL_EMIT diff --git a/lib/impl/int_div_impl.inc b/lib/impl/int_div_impl.inc @@ -8,15 +8,46 @@ // // Helpers used by __udivsi3, __umodsi3, __udivdi3, and __umodsi3. // +// Re-includable; safe to use multiple times in one TU. Inputs (caller +// must #define before each #include): +// fixint_t, fixuint_t -- the signed/unsigned integer width +// INT_DIV_SUFFIX -- a unique token; helper names get suffixed +// with `_<INT_DIV_SUFFIX>` so concurrent +// inclusions don't collide +// Optional inputs (gate emission of the signed wrappers): +// COMPUTE_UDIV(a, b) -- expression yielding unsigned quotient +// ASSIGN_UMOD(res, a, b)-- statement assigning unsigned remainder to res +// +// Outputs (always emitted as `static __inline`): +// __udivXi3_<suffix>, __umodXi3_<suffix> +// Plus, conditionally: +// __divXi3_<suffix> iff COMPUTE_UDIV is defined +// __modXi3_<suffix> iff ASSIGN_UMOD is defined +// +// At exit the inc #undef's all of its inputs (including INT_DIV_SUFFIX, +// COMPUTE_UDIV, ASSIGN_UMOD, fixint_t, fixuint_t) so it's clean to +// re-include with new settings. +// //===----------------------------------------------------------------------===// -#define clz(a) (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : clzsi(a)) +#ifndef INT_DIV_IMPL_INC_GUARD +#define INT_DIV_IMPL_INC_GUARD +#define INT_DIV_IMPL_CAT_(a, b) a##b +#define INT_DIV_IMPL_CAT(a, b) INT_DIV_IMPL_CAT_(a, b) +#define INT_DIV_IMPL_CLZ(a) \ + (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : clzsi(a)) +#endif + +#ifndef INT_DIV_SUFFIX +#error "int_div_impl.inc: INT_DIV_SUFFIX must be defined before #include" +#endif // Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide -static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) { +static __inline fixuint_t +INT_DIV_IMPL_CAT(__udivXi3_, INT_DIV_SUFFIX)(fixuint_t n, fixuint_t d) { const unsigned N = sizeof(fixuint_t) * CHAR_BIT; // d == 0 cases are unspecified. - unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N); + unsigned sr = (d ? INT_DIV_IMPL_CLZ(d) : N) - (n ? INT_DIV_IMPL_CLZ(n) : N); // 0 <= sr <= N - 1 or sr is very large. if (sr > N - 1) // n < d return 0; @@ -42,10 +73,11 @@ static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) { } // Mostly identical to __udivXi3 but the return values are different. -static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) { +static __inline fixuint_t +INT_DIV_IMPL_CAT(__umodXi3_, INT_DIV_SUFFIX)(fixuint_t n, fixuint_t d) { const unsigned N = sizeof(fixuint_t) * CHAR_BIT; // d == 0 cases are unspecified. - unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N); + unsigned sr = (d ? INT_DIV_IMPL_CLZ(d) : N) - (n ? INT_DIV_IMPL_CLZ(n) : N); // 0 <= sr <= N - 1 or sr is very large. if (sr > N - 1) // n < d return n; @@ -70,7 +102,8 @@ static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) { } #ifdef COMPUTE_UDIV -static __inline fixint_t __divXi3(fixint_t a, fixint_t b) { +static __inline fixint_t +INT_DIV_IMPL_CAT(__divXi3_, INT_DIV_SUFFIX)(fixint_t a, fixint_t b) { const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1; fixint_t s_a = a >> N; // s_a = a < 0 ? -1 : 0 fixint_t s_b = b >> N; // s_b = b < 0 ? -1 : 0 @@ -82,7 +115,8 @@ static __inline fixint_t __divXi3(fixint_t a, fixint_t b) { #endif // COMPUTE_UDIV #ifdef ASSIGN_UMOD -static __inline fixint_t __modXi3(fixint_t a, fixint_t b) { +static __inline fixint_t +INT_DIV_IMPL_CAT(__modXi3_, INT_DIV_SUFFIX)(fixint_t a, fixint_t b) { const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1; fixint_t s = b >> N; // s = b < 0 ? -1 : 0 fixuint_t b_u = (fixuint_t)(b ^ s) + (-s); // negate if s == -1 @@ -93,3 +127,9 @@ static __inline fixint_t __modXi3(fixint_t a, fixint_t b) { return (res ^ s) + (-s); // negate if s == -1 } #endif // ASSIGN_UMOD + +#undef INT_DIV_SUFFIX +#undef COMPUTE_UDIV +#undef ASSIGN_UMOD +#undef fixint_t +#undef fixuint_t diff --git a/lib/impl/int_to_fp.h b/lib/impl/int_to_fp.h @@ -6,16 +6,115 @@ // //===----------------------------------------------------------------------===// // -// Set source and destination defines in order to use a correctly -// parameterised floatXiYf implementation. +// Source/destination type setup for int → fp conversions. Caller defines +// SRC_<I64/U64/I128/U128> and DST_<SINGLE/DOUBLE/QUAD> before each +// inclusion. // +// Re-includable. Names depending on the (src, dst) pair are emitted with +// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define +// aliases are set every inclusion. Use the umbrella `fp_lib_undef.h` to +// clear the bare-name aliases between sections in one TU. //===----------------------------------------------------------------------===// -#ifndef INT_TO_FP_H -#define INT_TO_FP_H - #include "int_lib.h" +// Self-clean any prior inclusion's per-(src, dst) state. May coexist with +// fp_extend.h / fp_trunc.h which define overlapping bare-name aliases. +#undef _INT_TO_FP_SRC_SUF +#undef _INT_TO_FP_DST_SUF +#undef SRC_REP_C +#undef DST_REP_C +#undef src_t +#undef usrc_t +#undef dst_t +#undef dst_rep_t +#undef clzSrcT +#undef dstFromRep +#undef dstSigBits + +#if defined SRC_I64 +#define _INT_TO_FP_SRC_SUF i64 +#elif defined SRC_U64 +#define _INT_TO_FP_SRC_SUF u64 +#elif defined SRC_I128 +#define _INT_TO_FP_SRC_SUF i128 +#elif defined SRC_U128 +#define _INT_TO_FP_SRC_SUF u128 +#else +#error Source should be a handled integer type. +#endif + +#if defined DST_SINGLE +#define _INT_TO_FP_DST_SUF sf +#elif defined DST_DOUBLE +#define _INT_TO_FP_DST_SUF df +#elif defined DST_QUAD +#define _INT_TO_FP_DST_SUF tf +#else +#error Destination should be a handled floating point type +#endif + +#define _INT_TO_FP_PASTE4_(a, b, c, d) a##b##c##d +#define _INT_TO_FP_PASTE4(a, b, c, d) _INT_TO_FP_PASTE4_(a, b, c, d) +#define _INT_TO_FP_PAIR(stem) _INT_TO_FP_PASTE4(stem, _, _INT_TO_FP_SRC_SUF, _INT_TO_FP_DST_SUF) + +// ---- Bare-name aliases (re-set every inclusion). ------------------------ + +#define src_t _INT_TO_FP_PAIR(src_t) +#define usrc_t _INT_TO_FP_PAIR(usrc_t) +#define dst_t _INT_TO_FP_PAIR(dst_t) +#define dst_rep_t _INT_TO_FP_PAIR(dst_rep_t) +#define clzSrcT _INT_TO_FP_PAIR(clzSrcT) +#define dstFromRep _INT_TO_FP_PAIR(dstFromRep) +#define dstSigBits _INT_TO_FP_PAIR(dstSigBits) + +// DST_REP_C: simple textual macro per dst. +#if defined DST_SINGLE +#define DST_REP_C UINT32_C +#elif defined DST_DOUBLE +#define DST_REP_C UINT64_C +#elif defined DST_QUAD +#define DST_REP_C (__uint128_t) +#endif + +// ---- One-time emission per (TU, src+dst pair). -------------------------- +// Pairs cfree uses: (i64,u64) × (sf,df) and (i128,u128) × (sf,df,tf). + +#if defined SRC_I64 && defined DST_SINGLE && !defined INT_TO_FP_I64SF_EMITTED +#define INT_TO_FP_I64SF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_I64 && defined DST_DOUBLE && !defined INT_TO_FP_I64DF_EMITTED +#define INT_TO_FP_I64DF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_U64 && defined DST_SINGLE && !defined INT_TO_FP_U64SF_EMITTED +#define INT_TO_FP_U64SF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_U64 && defined DST_DOUBLE && !defined INT_TO_FP_U64DF_EMITTED +#define INT_TO_FP_U64DF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_I128 && defined DST_SINGLE && !defined INT_TO_FP_I128SF_EMITTED +#define INT_TO_FP_I128SF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_I128 && defined DST_DOUBLE && !defined INT_TO_FP_I128DF_EMITTED +#define INT_TO_FP_I128DF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_I128 && defined DST_QUAD && !defined INT_TO_FP_I128TF_EMITTED +#define INT_TO_FP_I128TF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_U128 && defined DST_SINGLE && !defined INT_TO_FP_U128SF_EMITTED +#define INT_TO_FP_U128SF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_U128 && defined DST_DOUBLE && !defined INT_TO_FP_U128DF_EMITTED +#define INT_TO_FP_U128DF_EMITTED +#define _INT_TO_FP_EMIT 1 +#elif defined SRC_U128 && defined DST_QUAD && !defined INT_TO_FP_U128TF_EMITTED +#define INT_TO_FP_U128TF_EMITTED +#define _INT_TO_FP_EMIT 1 +#endif + +#ifdef _INT_TO_FP_EMIT +#undef _INT_TO_FP_EMIT + #if defined SRC_I64 typedef int64_t src_t; typedef uint64_t usrc_t; @@ -36,14 +135,11 @@ typedef __uint128_t src_t; typedef __uint128_t usrc_t; static __inline int clzSrcT(usrc_t x) { return __clzti2(x); } -#else -#error Source should be a handled integer type. #endif #if defined DST_SINGLE typedef float dst_t; typedef uint32_t dst_rep_t; -#define DST_REP_C UINT32_C enum { dstSigBits = 23, @@ -52,7 +148,6 @@ enum { #elif defined DST_DOUBLE typedef double dst_t; typedef uint64_t dst_rep_t; -#define DST_REP_C UINT64_C enum { dstSigBits = 52, @@ -61,14 +156,11 @@ enum { #elif defined DST_QUAD typedef tf_float dst_t; typedef __uint128_t dst_rep_t; -#define DST_REP_C (__uint128_t) enum { dstSigBits = 112, }; -#else -#error Destination should be a handled floating point type #endif static __inline dst_t dstFromRep(dst_rep_t x) { @@ -79,4 +171,4 @@ static __inline dst_t dstFromRep(dst_rep_t x) { return rep.f; } -#endif // INT_TO_FP_H +#endif // _INT_TO_FP_EMIT diff --git a/lib/impl/int_to_fp_impl.inc b/lib/impl/int_to_fp_impl.inc @@ -14,6 +14,43 @@ #include "int_to_fp.h" +#define __floatXiYf__ _INT_TO_FP_PAIR(__floatXiYf__) + +#if defined SRC_I64 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_I64SF_EMITTED +#define INT_TO_FP_IMPL_I64SF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_I64 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_I64DF_EMITTED +#define INT_TO_FP_IMPL_I64DF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_U64 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_U64SF_EMITTED +#define INT_TO_FP_IMPL_U64SF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_U64 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_U64DF_EMITTED +#define INT_TO_FP_IMPL_U64DF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_I128 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_I128SF_EMITTED +#define INT_TO_FP_IMPL_I128SF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_I128 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_I128DF_EMITTED +#define INT_TO_FP_IMPL_I128DF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_I128 && defined DST_QUAD && !defined INT_TO_FP_IMPL_I128TF_EMITTED +#define INT_TO_FP_IMPL_I128TF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_U128 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_U128SF_EMITTED +#define INT_TO_FP_IMPL_U128SF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_U128 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_U128DF_EMITTED +#define INT_TO_FP_IMPL_U128DF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#elif defined SRC_U128 && defined DST_QUAD && !defined INT_TO_FP_IMPL_U128TF_EMITTED +#define INT_TO_FP_IMPL_U128TF_EMITTED +#define _INT_TO_FP_IMPL_EMIT 1 +#endif + +#ifdef _INT_TO_FP_IMPL_EMIT +#undef _INT_TO_FP_IMPL_EMIT + static __inline dst_t __floatXiYf__(src_t a) { if (a == 0) return 0.0; @@ -70,3 +107,5 @@ static __inline dst_t __floatXiYf__(src_t a) { ((dst_rep_t)(a) & dstSignificandMask); return dstFromRep(result); } + +#endif // _INT_TO_FP_IMPL_EMIT diff --git a/lib/include/common/fp_lib.h b/lib/include/common/fp_lib.h @@ -11,11 +11,19 @@ // // Selected by the includer via #define SINGLE_PRECISION | DOUBLE_PRECISION // | QUAD_PRECISION before #include "fp_lib.h". +// +// Re-includable. On each inclusion, fp_lib.h: +// 1. emits per-precision typedefs and static inlines exactly once per +// (TU, precision), with names suffix-renamed (e.g. rep_t_sf), +// 2. sets bare-name #define aliases (rep_t → rep_t_sf, ...) so caller +// code using bare names resolves to the right suffixed entity. +// +// To switch precision in the same TU, #include "fp_lib_undef.h" between +// the two #include "fp_lib.h" calls; that clears the bare aliases and +// the SINGLE/DOUBLE/QUAD_PRECISION marker so the next inclusion can +// install a fresh set. //===----------------------------------------------------------------------===// -#ifndef FP_LIB_HEADER -#define FP_LIB_HEADER - #include "int_lib.h" #include "int_math.h" #include "int_types.h" @@ -24,15 +32,93 @@ #include <stdint.h> #if defined SINGLE_PRECISION +#define FP_LIB_SUFFIX sf +#elif defined DOUBLE_PRECISION +#define FP_LIB_SUFFIX df +#elif defined QUAD_PRECISION +#define FP_LIB_SUFFIX tf +#else +#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. +#endif + +#define _FP_PASTE_(a, b) a##_##b +#define _FP_PASTE(a, b) _FP_PASTE_(a, b) +#define _FP_NAME(stem) _FP_PASTE(stem, FP_LIB_SUFFIX) + +// ---- Bare-name aliases (re-set every inclusion). ------------------------ +// Map the bare names callers use onto the suffix-renamed implementations +// emitted in the gated section below. + +#define half_rep_t _FP_NAME(half_rep_t) +#define rep_t _FP_NAME(rep_t) +#define srep_t _FP_NAME(srep_t) +#define fp_t _FP_NAME(fp_t) +#if defined SINGLE_PRECISION +#define twice_rep_t _FP_NAME(twice_rep_t) +#endif + +#define rep_clz _FP_NAME(rep_clz) +#define wideMultiply _FP_NAME(wideMultiply) +#define toRep _FP_NAME(toRep) +#define fromRep _FP_NAME(fromRep) +#define normalize _FP_NAME(normalize) +#define wideLeftShift _FP_NAME(wideLeftShift) +#define wideRightShiftWithSticky _FP_NAME(wideRightShiftWithSticky) +#define __compiler_rt_logbX _FP_NAME(__compiler_rt_logbX) +#define __compiler_rt_scalbnX _FP_NAME(__compiler_rt_scalbnX) +#define __compiler_rt_fmaxX _FP_NAME(__compiler_rt_fmaxX) + +// ---- Per-precision values (bare macros; re-#define'd every inclusion). -- + +#if defined SINGLE_PRECISION + +#define HALF_REP_C UINT16_C +#define REP_C UINT32_C +#define significandBits 23 + +#elif defined DOUBLE_PRECISION + +#define HALF_REP_C UINT32_C +#define REP_C UINT64_C +#define significandBits 52 + +#elif defined QUAD_PRECISION + +#define HALF_REP_C UINT64_C +#define REP_C (__uint128_t) +#define significandBits 112 +#define TF_MANT_DIG (significandBits + 1) + +#endif + +#define typeWidth (sizeof(rep_t) * CHAR_BIT) + +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) + +#define implicitBit (REP_C(1) << significandBits) +#define significandMask (implicitBit - 1U) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) + +// ---- One-time emission per (TU, precision). ----------------------------- +// Typedefs and static inlines, written in bare-name form so the aliases +// above suffix-rename them to a unique identifier per precision. + +#if defined SINGLE_PRECISION && !defined FP_LIB_SF_EMITTED +#define FP_LIB_SF_EMITTED typedef uint16_t half_rep_t; typedef uint32_t rep_t; typedef uint64_t twice_rep_t; typedef int32_t srep_t; typedef float fp_t; -#define HALF_REP_C UINT16_C -#define REP_C UINT32_C -#define significandBits 23 static __inline int rep_clz(rep_t a) { return clzsi(a); } @@ -43,15 +129,13 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { } COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); -#elif defined DOUBLE_PRECISION +#elif defined DOUBLE_PRECISION && !defined FP_LIB_DF_EMITTED +#define FP_LIB_DF_EMITTED typedef uint32_t half_rep_t; typedef uint64_t rep_t; typedef int64_t srep_t; typedef double fp_t; -#define HALF_REP_C UINT32_C -#define REP_C UINT64_C -#define significandBits 52 static __inline int rep_clz(rep_t a) { return __builtin_clzll(a); } @@ -73,17 +157,14 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); -#elif defined QUAD_PRECISION +#elif defined QUAD_PRECISION && !defined FP_LIB_TF_EMITTED +#define FP_LIB_TF_EMITTED // Requires tf_supplement.h to be pre-included so CRT_HAS_TF_MODE and // CRT_HAS_IEEE_TF are defined and tf_float is typedef'd. typedef uint64_t half_rep_t; typedef __uint128_t rep_t; typedef __int128_t srep_t; typedef tf_float fp_t; -#define HALF_REP_C UINT64_C -#define REP_C (__uint128_t) -#define significandBits 112 -#define TF_MANT_DIG (significandBits + 1) static __inline int rep_clz(rep_t a) { const union { @@ -147,11 +228,26 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { #undef Word_HiMask #undef Word_LoMask #undef Word_FullMask -#else -#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. + #endif -#define typeWidth (sizeof(rep_t) * CHAR_BIT) +// ---- One-time emission per (TU, precision): shared static inlines. ------ +// These bodies use the bare-name value macros above; the aliases at the +// top of the file ensure the entity names are suffix-renamed. + +#if defined SINGLE_PRECISION && !defined FP_LIB_SF_COMMON_EMITTED +#define FP_LIB_SF_COMMON_EMITTED +#define _FP_LIB_EMIT_COMMON 1 +#elif defined DOUBLE_PRECISION && !defined FP_LIB_DF_COMMON_EMITTED +#define FP_LIB_DF_COMMON_EMITTED +#define _FP_LIB_EMIT_COMMON 1 +#elif defined QUAD_PRECISION && !defined FP_LIB_TF_COMMON_EMITTED +#define FP_LIB_TF_COMMON_EMITTED +#define _FP_LIB_EMIT_COMMON 1 +#endif + +#ifdef _FP_LIB_EMIT_COMMON +#undef _FP_LIB_EMIT_COMMON static __inline rep_t toRep(fp_t x) { const union { fp_t f; rep_t i; } rep = {.f = x}; @@ -163,20 +259,6 @@ static __inline fp_t fromRep(rep_t x) { return rep.f; } -#define exponentBits (typeWidth - significandBits - 1) -#define maxExponent ((1 << exponentBits) - 1) -#define exponentBias (maxExponent >> 1) - -#define implicitBit (REP_C(1) << significandBits) -#define significandMask (implicitBit - 1U) -#define signBit (REP_C(1) << (significandBits + exponentBits)) -#define absMask (signBit - 1U) -#define exponentMask (absMask ^ significandMask) -#define oneRep ((rep_t)exponentBias << significandBits) -#define infRep exponentMask -#define quietBit (implicitBit >> 1) -#define qnanRep (exponentMask | quietBit) - static __inline int normalize(rep_t *significand) { const int shift = rep_clz(*significand) - rep_clz(implicitBit); *significand <<= shift; @@ -271,11 +353,16 @@ static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) { return __compiler_ static __inline tf_float __compiler_rt_logbtf(tf_float x) { return __compiler_rt_logbX(x); } static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y){ return __compiler_rt_scalbnX(x, y); } static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y){ return __compiler_rt_fmaxX(x, y); } +#endif + +#endif // _FP_LIB_EMIT_COMMON + +// Long-double aliases for QUAD targets. Idempotent (same text every +// inclusion), so set outside the one-time emission gate. +#if defined QUAD_PRECISION #define __compiler_rt_logbl __compiler_rt_logbtf #define __compiler_rt_scalbnl __compiler_rt_scalbntf #define __compiler_rt_fmaxl __compiler_rt_fmaxtf #define crt_fabstf crt_fabsf128 #define crt_copysigntf crt_copysignf128 #endif - -#endif // FP_LIB_HEADER diff --git a/lib/include/common/fp_lib_undef.h b/lib/include/common/fp_lib_undef.h @@ -0,0 +1,68 @@ +//===-- fp_lib_undef.h - Reset bare-name aliases set by fp_lib.h ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Use between two #include "fp_lib.h" calls in one TU when switching +// precision (e.g. SINGLE → DOUBLE in a consolidated soft-float source). +// Clears the bare-name #define aliases (rep_t, fp_t, signBit, ...) and +// the SINGLE/DOUBLE/QUAD_PRECISION marker so the next fp_lib.h +// inclusion can set up a fresh set. Leaves the suffix-renamed +// definitions (rep_t_sf, etc.) intact for any code that still +// references them post-undef. +//===----------------------------------------------------------------------===// + +// Bare-name aliases for typedefs and static inlines. +#undef half_rep_t +#undef rep_t +#undef srep_t +#undef fp_t +#undef twice_rep_t +#undef rep_clz +#undef wideMultiply +#undef toRep +#undef fromRep +#undef normalize +#undef wideLeftShift +#undef wideRightShiftWithSticky +#undef __compiler_rt_logbX +#undef __compiler_rt_scalbnX +#undef __compiler_rt_fmaxX + +// Per-precision value macros. +#undef HALF_REP_C +#undef REP_C +#undef significandBits +#undef TF_MANT_DIG + +// Width-derived value macros. +#undef typeWidth +#undef exponentBits +#undef maxExponent +#undef exponentBias +#undef implicitBit +#undef significandMask +#undef signBit +#undef absMask +#undef exponentMask +#undef oneRep +#undef infRep +#undef quietBit +#undef qnanRep + +// QUAD-only long-double aliases. +#undef __compiler_rt_logbl +#undef __compiler_rt_scalbnl +#undef __compiler_rt_fmaxl +#undef crt_fabstf +#undef crt_copysigntf + +// Precision selector and internal helpers. +#undef SINGLE_PRECISION +#undef DOUBLE_PRECISION +#undef QUAD_PRECISION +#undef FP_LIB_SUFFIX +#undef _FP_PASTE_ +#undef _FP_PASTE +#undef _FP_NAME diff --git a/lib/int/divdi3.c b/lib/int/divdi3.c @@ -16,7 +16,10 @@ #define fixint_t di_int #define fixuint_t du_int +#define INT_DIV_SUFFIX divdi3 #define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int *)0) #include "int_div_impl.inc" -COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { return __divXi3(a, b); } +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { + return __divXi3_divdi3(a, b); +} diff --git a/lib/int/moddi3.c b/lib/int/moddi3.c @@ -16,7 +16,10 @@ #define fixint_t di_int #define fixuint_t du_int +#define INT_DIV_SUFFIX moddi3 #define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res)) #include "int_div_impl.inc" -COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) { return __modXi3(a, b); } +COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) { + return __modXi3_moddi3(a, b); +} diff --git a/lib/int/udivdi3.c b/lib/int/udivdi3.c @@ -12,12 +12,13 @@ #include "int_lib.h" -typedef du_int fixuint_t; -typedef di_int fixint_t; -#include "int_div_impl.inc" - // Returns: a / b +#define fixint_t di_int +#define fixuint_t du_int +#define INT_DIV_SUFFIX udivdi3 +#include "int_div_impl.inc" + COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) { - return __udivXi3(a, b); + return __udivXi3_udivdi3(a, b); } diff --git a/lib/int/umoddi3.c b/lib/int/umoddi3.c @@ -12,12 +12,13 @@ #include "int_lib.h" -typedef du_int fixuint_t; -typedef di_int fixint_t; -#include "int_div_impl.inc" - // Returns: a % b +#define fixint_t di_int +#define fixuint_t du_int +#define INT_DIV_SUFFIX umoddi3 +#include "int_div_impl.inc" + COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) { - return __umodXi3(a, b); + return __umodXi3_umoddi3(a, b); } diff --git a/lib/int64/divti3.c b/lib/int64/divti3.c @@ -17,8 +17,11 @@ #define fixint_t ti_int #define fixuint_t tu_int +#define INT_DIV_SUFFIX divti3 #define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int *)0) #include "int_div_impl.inc" -COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { return __divXi3(a, b); } +COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { + return __divXi3_divti3(a, b); +} diff --git a/lib/int64/modti3.c b/lib/int64/modti3.c @@ -17,8 +17,11 @@ #define fixint_t ti_int #define fixuint_t tu_int +#define INT_DIV_SUFFIX modti3 #define ASSIGN_UMOD(res, a, b) __udivmodti4((a), (b), &(res)) #include "int_div_impl.inc" -COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { return __modXi3(a, b); } +COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { + return __modXi3_modti3(a, b); +}