kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

fp_lib.h (12885B)


      1 //===-- fp_lib.h - Floating-point utilities ------------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 // kit-tailored: little-endian, GCC/clang-compatible builtins, IEEE 754
      8 // binary32/binary64. binary128 (QUAD_PRECISION) requires tf_supplement.h to
      9 // have been pre-included so that CRT_HAS_TF_MODE / CRT_HAS_IEEE_TF and
     10 // tf_float are defined.
     11 //
     12 // Selected by the includer via #define SINGLE_PRECISION | DOUBLE_PRECISION
     13 // | QUAD_PRECISION before #include "fp_lib.h".
     14 //
     15 // Re-includable. On each inclusion, fp_lib.h:
     16 //   1. emits per-precision typedefs and static inlines exactly once per
     17 //      (TU, precision), with names suffix-renamed (e.g. rep_t_sf),
     18 //   2. sets bare-name #define aliases (rep_t → rep_t_sf, ...) so caller
     19 //      code using bare names resolves to the right suffixed entity.
     20 //
     21 // To switch precision in the same TU, #include "fp_lib_undef.h" between
     22 // the two #include "fp_lib.h" calls; that clears the bare aliases and
     23 // the SINGLE/DOUBLE/QUAD_PRECISION marker so the next inclusion can
     24 // install a fresh set.
     25 //===----------------------------------------------------------------------===//
     26 
     27 #include <limits.h>
     28 #include <stdbool.h>
     29 #include <stdint.h>
     30 
     31 #include "int_lib.h"
     32 #include "int_math.h"
     33 
     34 #if defined SINGLE_PRECISION
     35 #define FP_LIB_SUFFIX sf
     36 #elif defined DOUBLE_PRECISION
     37 #define FP_LIB_SUFFIX df
     38 #elif defined QUAD_PRECISION
     39 #define FP_LIB_SUFFIX tf
     40 #else
     41 #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
     42 #endif
     43 
     44 #define _FP_PASTE_(a, b) a##_##b
     45 #define _FP_PASTE(a, b) _FP_PASTE_(a, b)
     46 #define _FP_NAME(stem) _FP_PASTE(stem, FP_LIB_SUFFIX)
     47 
     48 // ---- Bare-name aliases (re-set every inclusion). ------------------------
     49 // Map the bare names callers use onto the suffix-renamed implementations
     50 // emitted in the gated section below.
     51 
     52 #define half_rep_t _FP_NAME(half_rep_t)
     53 #define rep_t _FP_NAME(rep_t)
     54 #define srep_t _FP_NAME(srep_t)
     55 #define fp_t _FP_NAME(fp_t)
     56 #if defined SINGLE_PRECISION
     57 #define twice_rep_t _FP_NAME(twice_rep_t)
     58 #endif
     59 
     60 #define rep_clz _FP_NAME(rep_clz)
     61 #define wideMultiply _FP_NAME(wideMultiply)
     62 #define toRep _FP_NAME(toRep)
     63 #define fromRep _FP_NAME(fromRep)
     64 #define normalize _FP_NAME(normalize)
     65 #define wideLeftShift _FP_NAME(wideLeftShift)
     66 #define wideRightShiftWithSticky _FP_NAME(wideRightShiftWithSticky)
     67 #define __compiler_rt_logbX _FP_NAME(__compiler_rt_logbX)
     68 #define __compiler_rt_scalbnX _FP_NAME(__compiler_rt_scalbnX)
     69 #define __compiler_rt_fmaxX _FP_NAME(__compiler_rt_fmaxX)
     70 
     71 // ---- Per-precision values (bare macros; re-#define'd every inclusion). --
     72 
     73 #if defined SINGLE_PRECISION
     74 
     75 #define HALF_REP_C UINT16_C
     76 #define REP_C UINT32_C
     77 #define significandBits 23
     78 
     79 #elif defined DOUBLE_PRECISION
     80 
     81 #define HALF_REP_C UINT32_C
     82 #define REP_C UINT64_C
     83 #define significandBits 52
     84 
     85 #elif defined QUAD_PRECISION
     86 
     87 #define HALF_REP_C UINT64_C
     88 #define REP_C (__uint128_t)
     89 #define significandBits 112
     90 #define TF_MANT_DIG (significandBits + 1)
     91 
     92 #endif
     93 
     94 #define typeWidth (sizeof(rep_t) * CHAR_BIT)
     95 
     96 #define exponentBits (typeWidth - significandBits - 1)
     97 #define maxExponent ((1 << exponentBits) - 1)
     98 #define exponentBias (maxExponent >> 1)
     99 
    100 #define implicitBit (REP_C(1) << significandBits)
    101 #define significandMask (implicitBit - 1U)
    102 #define signBit (REP_C(1) << (significandBits + exponentBits))
    103 #define absMask (signBit - 1U)
    104 #define exponentMask (absMask ^ significandMask)
    105 #define oneRep ((rep_t)exponentBias << significandBits)
    106 #define infRep exponentMask
    107 #define quietBit (implicitBit >> 1)
    108 #define qnanRep (exponentMask | quietBit)
    109 
    110 // ---- One-time emission per (TU, precision). -----------------------------
    111 // Typedefs and static inlines, written in bare-name form so the aliases
    112 // above suffix-rename them to a unique identifier per precision.
    113 
    114 #if defined SINGLE_PRECISION && !defined FP_LIB_SF_EMITTED
    115 #define FP_LIB_SF_EMITTED
    116 
    117 typedef uint16_t half_rep_t;
    118 typedef uint32_t rep_t;
    119 typedef uint64_t twice_rep_t;
    120 typedef int32_t srep_t;
    121 typedef float fp_t;
    122 
    123 static inline int rep_clz(rep_t a) { return clzsi(a); }
    124 
    125 static inline void wideMultiply(rep_t a, rep_t b, rep_t* hi, rep_t* lo) {
    126   const uint64_t product = (uint64_t)a * b;
    127   *hi = product >> 32;
    128   *lo = product;
    129 }
    130 COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);
    131 
    132 #elif defined DOUBLE_PRECISION && !defined FP_LIB_DF_EMITTED
    133 #define FP_LIB_DF_EMITTED
    134 
    135 typedef uint32_t half_rep_t;
    136 typedef uint64_t rep_t;
    137 typedef int64_t srep_t;
    138 typedef double fp_t;
    139 
    140 static inline int rep_clz(rep_t a) { return __builtin_clzll(a); }
    141 
    142 #define loWord(a) (a & 0xffffffffU)
    143 #define hiWord(a) (a >> 32)
    144 
    145 static inline void wideMultiply(rep_t a, rep_t b, rep_t* hi, rep_t* lo) {
    146   const uint64_t plolo = loWord(a) * loWord(b);
    147   const uint64_t plohi = loWord(a) * hiWord(b);
    148   const uint64_t philo = hiWord(a) * loWord(b);
    149   const uint64_t phihi = hiWord(a) * hiWord(b);
    150   const uint64_t r0 = loWord(plolo);
    151   const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo);
    152   *lo = r0 + (r1 << 32);
    153   *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi;
    154 }
    155 #undef loWord
    156 #undef hiWord
    157 
    158 COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
    159 
    160 #elif defined QUAD_PRECISION && !defined FP_LIB_TF_EMITTED
    161 #define FP_LIB_TF_EMITTED
    162 // Requires tf_supplement.h to be pre-included so CRT_HAS_TF_MODE and
    163 // CRT_HAS_IEEE_TF are defined and tf_float is typedef'd.
    164 typedef uint64_t half_rep_t;
    165 typedef __uint128_t rep_t;
    166 typedef __int128_t srep_t;
    167 typedef tf_float fp_t;
    168 
    169 static inline int rep_clz(rep_t a) {
    170   const union {
    171     __uint128_t ll;
    172     struct {
    173       uint64_t low, high;
    174     } s;
    175   } uu = {.ll = a};
    176   uint64_t word, add;
    177   if (uu.s.high) {
    178     word = uu.s.high;
    179     add = 0;
    180   } else {
    181     word = uu.s.low;
    182     add = 64;
    183   }
    184   return __builtin_clzll(word) + add;
    185 }
    186 
    187 #define Word_LoMask UINT64_C(0x00000000ffffffff)
    188 #define Word_HiMask UINT64_C(0xffffffff00000000)
    189 #define Word_FullMask UINT64_C(0xffffffffffffffff)
    190 #define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask)
    191 #define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask)
    192 #define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask)
    193 #define Word_4(a) (uint64_t)(a & Word_LoMask)
    194 
    195 static inline void wideMultiply(rep_t a, rep_t b, rep_t* hi, rep_t* lo) {
    196   const uint64_t product11 = Word_1(a) * Word_1(b);
    197   const uint64_t product12 = Word_1(a) * Word_2(b);
    198   const uint64_t product13 = Word_1(a) * Word_3(b);
    199   const uint64_t product14 = Word_1(a) * Word_4(b);
    200   const uint64_t product21 = Word_2(a) * Word_1(b);
    201   const uint64_t product22 = Word_2(a) * Word_2(b);
    202   const uint64_t product23 = Word_2(a) * Word_3(b);
    203   const uint64_t product24 = Word_2(a) * Word_4(b);
    204   const uint64_t product31 = Word_3(a) * Word_1(b);
    205   const uint64_t product32 = Word_3(a) * Word_2(b);
    206   const uint64_t product33 = Word_3(a) * Word_3(b);
    207   const uint64_t product34 = Word_3(a) * Word_4(b);
    208   const uint64_t product41 = Word_4(a) * Word_1(b);
    209   const uint64_t product42 = Word_4(a) * Word_2(b);
    210   const uint64_t product43 = Word_4(a) * Word_3(b);
    211   const uint64_t product44 = Word_4(a) * Word_4(b);
    212 
    213   const __uint128_t sum0 = (__uint128_t)product44;
    214   const __uint128_t sum1 = (__uint128_t)product34 + (__uint128_t)product43;
    215   const __uint128_t sum2 =
    216       (__uint128_t)product24 + (__uint128_t)product33 + (__uint128_t)product42;
    217   const __uint128_t sum3 = (__uint128_t)product14 + (__uint128_t)product23 +
    218                            (__uint128_t)product32 + (__uint128_t)product41;
    219   const __uint128_t sum4 =
    220       (__uint128_t)product13 + (__uint128_t)product22 + (__uint128_t)product31;
    221   const __uint128_t sum5 = (__uint128_t)product12 + (__uint128_t)product21;
    222   const __uint128_t sum6 = (__uint128_t)product11;
    223 
    224   const __uint128_t r0 = (sum0 & Word_FullMask) + ((sum1 & Word_LoMask) << 32);
    225   const __uint128_t r1 = (sum0 >> 64) + ((sum1 >> 32) & Word_FullMask) +
    226                          (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask);
    227   *lo = r0 + (r1 << 64);
    228   *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 +
    229         (sum5 << 32) + (sum6 << 64);
    230 }
    231 #undef Word_1
    232 #undef Word_2
    233 #undef Word_3
    234 #undef Word_4
    235 #undef Word_HiMask
    236 #undef Word_LoMask
    237 #undef Word_FullMask
    238 
    239 #endif
    240 
    241 // ---- One-time emission per (TU, precision): shared static inlines. ------
    242 // These bodies use the bare-name value macros above; the aliases at the
    243 // top of the file ensure the entity names are suffix-renamed.
    244 
    245 #if defined SINGLE_PRECISION && !defined FP_LIB_SF_COMMON_EMITTED
    246 #define FP_LIB_SF_COMMON_EMITTED
    247 #define _FP_LIB_EMIT_COMMON 1
    248 #elif defined DOUBLE_PRECISION && !defined FP_LIB_DF_COMMON_EMITTED
    249 #define FP_LIB_DF_COMMON_EMITTED
    250 #define _FP_LIB_EMIT_COMMON 1
    251 #elif defined QUAD_PRECISION && !defined FP_LIB_TF_COMMON_EMITTED
    252 #define FP_LIB_TF_COMMON_EMITTED
    253 #define _FP_LIB_EMIT_COMMON 1
    254 #endif
    255 
    256 #ifdef _FP_LIB_EMIT_COMMON
    257 #undef _FP_LIB_EMIT_COMMON
    258 
    259 static inline rep_t toRep(fp_t x) {
    260   const union {
    261     fp_t f;
    262     rep_t i;
    263   } rep = {.f = x};
    264   return rep.i;
    265 }
    266 
    267 static inline fp_t fromRep(rep_t x) {
    268   const union {
    269     fp_t f;
    270     rep_t i;
    271   } rep = {.i = x};
    272   return rep.f;
    273 }
    274 
    275 static inline int normalize(rep_t* significand) {
    276   const int shift = rep_clz(*significand) - rep_clz(implicitBit);
    277   *significand <<= shift;
    278   return 1 - shift;
    279 }
    280 
    281 static inline void wideLeftShift(rep_t* hi, rep_t* lo, int count) {
    282   *hi = *hi << count | *lo >> (typeWidth - count);
    283   *lo = *lo << count;
    284 }
    285 
    286 static inline void wideRightShiftWithSticky(rep_t* hi, rep_t* lo,
    287                                             unsigned int count) {
    288   if (count < typeWidth) {
    289     const bool sticky = (*lo << (typeWidth - count)) != 0;
    290     *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
    291     *hi = *hi >> count;
    292   } else if (count < 2 * typeWidth) {
    293     const bool sticky = *hi << (2 * typeWidth - count) | *lo;
    294     *lo = *hi >> (count - typeWidth) | sticky;
    295     *hi = 0;
    296   } else {
    297     const bool sticky = *hi | *lo;
    298     *lo = sticky;
    299     *hi = 0;
    300   }
    301 }
    302 
    303 static inline fp_t __compiler_rt_logbX(fp_t x) {
    304   rep_t rep = toRep(x);
    305   int exp = (rep & exponentMask) >> significandBits;
    306 
    307   if (exp == maxExponent) {
    308     if (((rep & signBit) == 0) || (x != x))
    309       return x;
    310     else
    311       return -x;
    312   } else if (x == 0.0) {
    313     return fromRep(infRep | signBit);
    314   }
    315 
    316   if (exp != 0) {
    317     return exp - exponentBias;
    318   } else {
    319     rep &= absMask;
    320     const int shift = 1 - normalize(&rep);
    321     exp = (rep & exponentMask) >> significandBits;
    322     return exp - exponentBias - shift;
    323   }
    324 }
    325 
    326 static inline fp_t __compiler_rt_scalbnX(fp_t x, int y) {
    327   const rep_t rep = toRep(x);
    328   int exp = (rep & exponentMask) >> significandBits;
    329 
    330   if (x == 0.0 || exp == maxExponent) return x;
    331 
    332   rep_t sig = rep & significandMask;
    333   if (exp == 0) {
    334     exp += normalize(&sig);
    335     sig &= ~implicitBit;
    336   }
    337 
    338   if (__builtin_sadd_overflow(exp, y, &exp)) {
    339     exp = (y >= 0) ? INT_MAX : INT_MIN;
    340   }
    341 
    342   const rep_t sign = rep & signBit;
    343   if (exp >= maxExponent) {
    344     return fromRep(sign | ((rep_t)(maxExponent - 1) << significandBits)) * 2.0f;
    345   } else if (exp <= 0) {
    346     fp_t tmp = fromRep(sign | (REP_C(1) << significandBits) | sig);
    347     exp += exponentBias - 1;
    348     if (exp < 1) exp = 1;
    349     tmp *= fromRep((rep_t)exp << significandBits);
    350     return tmp;
    351   } else
    352     return fromRep(sign | ((rep_t)exp << significandBits) | sig);
    353 }
    354 
    355 static inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) {
    356   return (crt_isnan(x) || x < y) ? y : x;
    357 }
    358 
    359 #if defined(SINGLE_PRECISION)
    360 static inline fp_t __compiler_rt_logbf(fp_t x) {
    361   return __compiler_rt_logbX(x);
    362 }
    363 static inline fp_t __compiler_rt_scalbnf(fp_t x, int y) {
    364   return __compiler_rt_scalbnX(x, y);
    365 }
    366 static inline fp_t __compiler_rt_fmaxf(fp_t x, fp_t y) {
    367   return __compiler_rt_fmaxX(x, y);
    368 }
    369 #elif defined(DOUBLE_PRECISION)
    370 static inline fp_t __compiler_rt_logb(fp_t x) { return __compiler_rt_logbX(x); }
    371 static inline fp_t __compiler_rt_scalbn(fp_t x, int y) {
    372   return __compiler_rt_scalbnX(x, y);
    373 }
    374 static inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) {
    375   return __compiler_rt_fmaxX(x, y);
    376 }
    377 #elif defined(QUAD_PRECISION)
    378 static inline tf_float __compiler_rt_logbtf(tf_float x) {
    379   return __compiler_rt_logbX(x);
    380 }
    381 static inline tf_float __compiler_rt_scalbntf(tf_float x, int y) {
    382   return __compiler_rt_scalbnX(x, y);
    383 }
    384 static inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
    385   return __compiler_rt_fmaxX(x, y);
    386 }
    387 #endif
    388 
    389 #endif  // _FP_LIB_EMIT_COMMON
    390 
    391 // Long-double aliases for QUAD targets. Idempotent (same text every
    392 // inclusion), so set outside the one-time emission gate.
    393 #if defined QUAD_PRECISION
    394 #define __compiler_rt_logbl __compiler_rt_logbtf
    395 #define __compiler_rt_scalbnl __compiler_rt_scalbntf
    396 #define __compiler_rt_fmaxl __compiler_rt_fmaxtf
    397 #define crt_fabstf crt_fabsf128
    398 #define crt_copysigntf crt_copysignf128
    399 #endif