kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

fp_extend_impl.inc (12659B)


      1 //=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements a fairly generic conversion from a narrower to a wider
     10 // IEEE-754 floating-point type.  The constants and types defined following the
     11 // includes below parameterize the conversion.
     12 //
     13 // It does not support types that don't use the usual IEEE-754 interchange
     14 // formats; specifically, some work would be needed to adapt it to
     15 // (for example) the Intel 80-bit format or PowerPC double-double format.
     16 //
     17 // Note please, however, that this implementation is only intended to support
     18 // *widening* operations; if you need to convert to a *narrower* floating-point
     19 // type (e.g. double -> float), then this routine will not do what you want it
     20 // to.
     21 //
     22 // It also requires that integer types at least as large as both formats
     23 // are available on the target platform; this may pose a problem when trying
     24 // to add support for quad on some 32-bit systems, for example.  You also may
     25 // run into trouble finding an appropriate CLZ function for wide source types;
     26 // you will likely need to roll your own on some platforms.
     27 //
     28 // Finally, the following assumptions are made:
     29 //
     30 // 1. Floating-point types and integer types have the same endianness on the
     31 //    target platform.
     32 //
     33 // 2. Quiet NaNs, if supported, are indicated by the leading bit of the
     34 //    significand field being set.
     35 //
     36 //===----------------------------------------------------------------------===//
     37 
     38 // ---- fp_extend.h (was a separate header; merged) ----
     39 #include "int_lib.h"
     40 
     41 // Self-clean any prior inclusion's per-(src, dst) state. May coexist with
     42 // fp_trunc.h / int_to_fp.h which define overlapping bare-name aliases.
     43 #undef _FP_EXT_SRC_SUF
     44 #undef _FP_EXT_DST_SUF
     45 #undef SRC_REP_C
     46 #undef DST_REP_C
     47 #undef src_t
     48 #undef src_rep_t
     49 #undef dst_t
     50 #undef dst_rep_t
     51 #undef srcBits
     52 #undef srcSigFracBits
     53 #undef srcExpBits
     54 #undef dstBits
     55 #undef dstSigFracBits
     56 #undef dstExpBits
     57 #undef src_rep_t_clz
     58 #undef src_rep_t_clz_impl
     59 #undef srcToRep
     60 #undef dstFromRep
     61 #undef extract_sign_from_src
     62 #undef extract_exp_from_src
     63 #undef extract_sig_frac_from_src
     64 #undef clz_in_sig_frac
     65 #undef construct_dst_rep
     66 
     67 #if defined SRC_SINGLE
     68 #define _FP_EXT_SRC_SUF sf
     69 #elif defined SRC_DOUBLE
     70 #define _FP_EXT_SRC_SUF df
     71 #elif defined SRC_80
     72 #define _FP_EXT_SRC_SUF xf
     73 #elif defined SRC_HALF
     74 #define _FP_EXT_SRC_SUF hf
     75 #else
     76 #error Source should be half, single, or double precision!
     77 #endif
     78 
     79 #if defined DST_SINGLE
     80 #define _FP_EXT_DST_SUF sf
     81 #elif defined DST_DOUBLE
     82 #define _FP_EXT_DST_SUF df
     83 #elif defined DST_QUAD
     84 #define _FP_EXT_DST_SUF tf
     85 #else
     86 #error Destination should be single, double, or quad precision!
     87 #endif
     88 
     89 #define _FP_EXT_PASTE4_(a, b, c, d) a##b##c##d
     90 #define _FP_EXT_PASTE4(a, b, c, d)  _FP_EXT_PASTE4_(a, b, c, d)
     91 #define _FP_EXT_PAIR(stem)          _FP_EXT_PASTE4(stem, _, _FP_EXT_SRC_SUF, _FP_EXT_DST_SUF)
     92 
     93 // ---- Bare-name aliases (re-set every inclusion). ------------------------
     94 
     95 #define src_t                       _FP_EXT_PAIR(src_t)
     96 #define src_rep_t                   _FP_EXT_PAIR(src_rep_t)
     97 #define dst_t                       _FP_EXT_PAIR(dst_t)
     98 #define dst_rep_t                   _FP_EXT_PAIR(dst_rep_t)
     99 #define srcBits                     _FP_EXT_PAIR(srcBits)
    100 #define srcSigFracBits              _FP_EXT_PAIR(srcSigFracBits)
    101 #define srcExpBits                  _FP_EXT_PAIR(srcExpBits)
    102 #define dstBits                     _FP_EXT_PAIR(dstBits)
    103 #define dstSigFracBits              _FP_EXT_PAIR(dstSigFracBits)
    104 #define dstExpBits                  _FP_EXT_PAIR(dstExpBits)
    105 #define src_rep_t_clz_impl          _FP_EXT_PAIR(src_rep_t_clz_impl)
    106 #define srcToRep                    _FP_EXT_PAIR(srcToRep)
    107 #define dstFromRep                  _FP_EXT_PAIR(dstFromRep)
    108 #define extract_sign_from_src       _FP_EXT_PAIR(extract_sign_from_src)
    109 #define extract_exp_from_src        _FP_EXT_PAIR(extract_exp_from_src)
    110 #define extract_sig_frac_from_src   _FP_EXT_PAIR(extract_sig_frac_from_src)
    111 #define clz_in_sig_frac             _FP_EXT_PAIR(clz_in_sig_frac)
    112 #define construct_dst_rep           _FP_EXT_PAIR(construct_dst_rep)
    113 
    114 // SRC_REP_C / DST_REP_C: textual macros (UINT32_C etc.); same body each
    115 // inclusion within a precision.
    116 
    117 #if defined SRC_SINGLE
    118 #define SRC_REP_C UINT32_C
    119 #elif defined SRC_DOUBLE
    120 #define SRC_REP_C UINT64_C
    121 #elif defined SRC_80
    122 #define SRC_REP_C (__uint128_t)
    123 #elif defined SRC_HALF
    124 #define SRC_REP_C UINT16_C
    125 #endif
    126 
    127 #if defined DST_SINGLE
    128 #define DST_REP_C UINT32_C
    129 #elif defined DST_DOUBLE
    130 #define DST_REP_C UINT64_C
    131 #elif defined DST_QUAD
    132 #define DST_REP_C (__uint128_t)
    133 #endif
    134 
    135 // ---- One-time emission per (TU, src+dst pair). --------------------------
    136 // Enumerate the pairs kit actually uses (sf→df, sf→tf, df→tf).
    137 
    138 #if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_SFDF_EMITTED
    139 #define FP_EXT_SFDF_EMITTED
    140 #define _FP_EXT_EMIT 1
    141 #elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_SFTF_EMITTED
    142 #define FP_EXT_SFTF_EMITTED
    143 #define _FP_EXT_EMIT 1
    144 #elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_DFTF_EMITTED
    145 #define FP_EXT_DFTF_EMITTED
    146 #define _FP_EXT_EMIT 1
    147 #endif
    148 
    149 #ifdef _FP_EXT_EMIT
    150 #undef _FP_EXT_EMIT
    151 
    152 #if defined SRC_SINGLE
    153 typedef float src_t;
    154 typedef uint32_t src_rep_t;
    155 static const int srcBits = sizeof(src_t) * CHAR_BIT;
    156 static const int srcSigFracBits = 23;
    157 // -1 accounts for the sign bit.
    158 // srcBits - srcSigFracBits - 1
    159 static const int srcExpBits = 8;
    160 #define src_rep_t_clz clzsi
    161 
    162 #elif defined SRC_DOUBLE
    163 typedef double src_t;
    164 typedef uint64_t src_rep_t;
    165 static const int srcBits = sizeof(src_t) * CHAR_BIT;
    166 static const int srcSigFracBits = 52;
    167 // -1 accounts for the sign bit.
    168 // srcBits - srcSigFracBits - 1
    169 static const int srcExpBits = 11;
    170 
    171 static inline int src_rep_t_clz_impl(src_rep_t a) {
    172 #if defined __LP64__
    173   return __builtin_clzl(a);
    174 #else
    175   if (a & REP_C(0xffffffff00000000))
    176     return clzsi(a >> 32);
    177   else
    178     return 32 + clzsi(a & REP_C(0xffffffff));
    179 #endif
    180 }
    181 #define src_rep_t_clz src_rep_t_clz_impl
    182 
    183 #elif defined SRC_80
    184 typedef xf_float src_t;
    185 typedef __uint128_t src_rep_t;
    186 // sign bit, exponent and significand occupy the lower 80 bits.
    187 static const int srcBits = 80;
    188 static const int srcSigFracBits = 63;
    189 // -1 accounts for the sign bit.
    190 // -1 accounts for the explicitly stored integer bit.
    191 // srcBits - srcSigFracBits - 1 - 1
    192 static const int srcExpBits = 15;
    193 
    194 #elif defined SRC_HALF
    195 #ifdef COMPILER_RT_HAS_FLOAT16
    196 typedef _Float16 src_t;
    197 #else
    198 typedef uint16_t src_t;
    199 #endif
    200 typedef uint16_t src_rep_t;
    201 static const int srcBits = sizeof(src_t) * CHAR_BIT;
    202 static const int srcSigFracBits = 10;
    203 // -1 accounts for the sign bit.
    204 // srcBits - srcSigFracBits - 1
    205 static const int srcExpBits = 5;
    206 
    207 static inline int src_rep_t_clz_impl(src_rep_t a) {
    208   return __builtin_clz(a) - 16;
    209 }
    210 #define src_rep_t_clz src_rep_t_clz_impl
    211 
    212 #endif // end source precision
    213 
    214 #if defined DST_SINGLE
    215 typedef float dst_t;
    216 typedef uint32_t dst_rep_t;
    217 static const int dstBits = sizeof(dst_t) * CHAR_BIT;
    218 static const int dstSigFracBits = 23;
    219 // -1 accounts for the sign bit.
    220 // dstBits - dstSigFracBits - 1
    221 static const int dstExpBits = 8;
    222 
    223 #elif defined DST_DOUBLE
    224 typedef double dst_t;
    225 typedef uint64_t dst_rep_t;
    226 static const int dstBits = sizeof(dst_t) * CHAR_BIT;
    227 static const int dstSigFracBits = 52;
    228 // -1 accounts for the sign bit.
    229 // dstBits - dstSigFracBits - 1
    230 static const int dstExpBits = 11;
    231 
    232 #elif defined DST_QUAD
    233 typedef tf_float dst_t;
    234 typedef __uint128_t dst_rep_t;
    235 static const int dstBits = sizeof(dst_t) * CHAR_BIT;
    236 static const int dstSigFracBits = 112;
    237 // -1 accounts for the sign bit.
    238 // dstBits - dstSigFracBits - 1
    239 static const int dstExpBits = 15;
    240 
    241 #endif // end destination precision
    242 
    243 // End of specialization parameters.
    244 
    245 // TODO: These helper routines should be placed into fp_lib.h
    246 // Currently they depend on macros/constants defined above.
    247 
    248 static inline src_rep_t extract_sign_from_src(src_rep_t x) {
    249   const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
    250   return (x & srcSignMask) >> (srcBits - 1);
    251 }
    252 
    253 static inline src_rep_t extract_exp_from_src(src_rep_t x) {
    254   const int srcSigBits = srcBits - 1 - srcExpBits;
    255   const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
    256   return (x & srcExpMask) >> srcSigBits;
    257 }
    258 
    259 static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
    260   const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
    261   return x & srcSigFracMask;
    262 }
    263 
    264 #ifdef src_rep_t_clz
    265 static inline int clz_in_sig_frac(src_rep_t sigFrac) {
    266       const int skip = 1 + srcExpBits;
    267       return src_rep_t_clz(sigFrac) - skip;
    268 }
    269 #endif
    270 
    271 static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
    272   return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
    273 }
    274 
    275 // Two helper routines for conversion to and from the representation of
    276 // floating-point data as integer values follow.
    277 
    278 static inline src_rep_t srcToRep(src_t x) {
    279   const union {
    280     src_t f;
    281     src_rep_t i;
    282   } rep = {.f = x};
    283   return rep.i;
    284 }
    285 
    286 static inline dst_t dstFromRep(dst_rep_t x) {
    287   const union {
    288     dst_t f;
    289     dst_rep_t i;
    290   } rep = {.i = x};
    291   return rep.f;
    292 }
    293 // End helper routines.  Conversion implementation follows.
    294 
    295 #endif // _FP_EXT_EMIT
    296 
    297 #define __extendXfYf2__ _FP_EXT_PAIR(__extendXfYf2__)
    298 
    299 #if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_IMPL_SFDF_EMITTED
    300 #define FP_EXT_IMPL_SFDF_EMITTED
    301 #define _FP_EXT_IMPL_EMIT 1
    302 #elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_IMPL_SFTF_EMITTED
    303 #define FP_EXT_IMPL_SFTF_EMITTED
    304 #define _FP_EXT_IMPL_EMIT 1
    305 #elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_IMPL_DFTF_EMITTED
    306 #define FP_EXT_IMPL_DFTF_EMITTED
    307 #define _FP_EXT_IMPL_EMIT 1
    308 #endif
    309 
    310 #ifdef _FP_EXT_IMPL_EMIT
    311 #undef _FP_EXT_IMPL_EMIT
    312 
    313 // The source type may use a usual IEEE-754 interchange format or Intel 80-bit
    314 // format. In particular, for the source type srcSigFracBits may be not equal to
    315 // srcSigBits. The destination type is assumed to be one of IEEE-754 standard
    316 // types.
    317 static inline dst_t __extendXfYf2__(src_t a) {
    318   // Various constants whose values follow from the type parameters.
    319   // Any reasonable optimizer will fold and propagate all of these.
    320   const int srcInfExp = (1 << srcExpBits) - 1;
    321   const int srcExpBias = srcInfExp >> 1;
    322 
    323   const int dstInfExp = (1 << dstExpBits) - 1;
    324   const int dstExpBias = dstInfExp >> 1;
    325 
    326   // Break a into a sign and representation of the absolute value.
    327   const src_rep_t aRep = srcToRep(a);
    328   const src_rep_t srcSign = extract_sign_from_src(aRep);
    329   const src_rep_t srcExp = extract_exp_from_src(aRep);
    330   const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
    331 
    332   dst_rep_t dstSign = srcSign;
    333   dst_rep_t dstExp;
    334   dst_rep_t dstSigFrac;
    335 
    336   if (srcExp >= 1 && srcExp < (src_rep_t)srcInfExp) {
    337     // a is a normal number.
    338     dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias);
    339     dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
    340   }
    341 
    342   else if (srcExp == srcInfExp) {
    343     // a is NaN or infinity.
    344     dstExp = dstInfExp;
    345     dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
    346   }
    347 
    348   else if (srcSigFrac) {
    349     // a is denormal.
    350     if (srcExpBits == dstExpBits) {
    351       // The exponent fields are identical and this is a denormal number, so all
    352       // the non-significand bits are zero. In particular, this branch is always
    353       // taken when we extend a denormal F80 to F128.
    354       dstExp = 0;
    355       dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits);
    356     } else {
    357 #ifndef src_rep_t_clz
    358       // If src_rep_t_clz is not defined this branch must be unreachable.
    359       __builtin_unreachable();
    360 #else
    361       // Renormalize the significand and clear the leading bit.
    362       // For F80 -> F128 this codepath is unused.
    363       const int scale = clz_in_sig_frac(srcSigFrac) + 1;
    364       dstExp = dstExpBias - srcExpBias - scale + 1;
    365       dstSigFrac = (dst_rep_t)srcSigFrac
    366                    << (dstSigFracBits - srcSigFracBits + scale);
    367       const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits);
    368       dstSigFrac ^= dstMinNormal;
    369 #endif
    370     }
    371   }
    372 
    373   else {
    374     // a is zero.
    375     dstExp = 0;
    376     dstSigFrac = 0;
    377   }
    378 
    379   const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
    380   return dstFromRep(result);
    381 }
    382 
    383 #endif // _FP_EXT_IMPL_EMIT