fp_extend_impl.inc (12659B)
1 //=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a fairly generic conversion from a narrower to a wider 10 // IEEE-754 floating-point type. The constants and types defined following the 11 // includes below parameterize the conversion. 12 // 13 // It does not support types that don't use the usual IEEE-754 interchange 14 // formats; specifically, some work would be needed to adapt it to 15 // (for example) the Intel 80-bit format or PowerPC double-double format. 16 // 17 // Note please, however, that this implementation is only intended to support 18 // *widening* operations; if you need to convert to a *narrower* floating-point 19 // type (e.g. double -> float), then this routine will not do what you want it 20 // to. 21 // 22 // It also requires that integer types at least as large as both formats 23 // are available on the target platform; this may pose a problem when trying 24 // to add support for quad on some 32-bit systems, for example. You also may 25 // run into trouble finding an appropriate CLZ function for wide source types; 26 // you will likely need to roll your own on some platforms. 27 // 28 // Finally, the following assumptions are made: 29 // 30 // 1. Floating-point types and integer types have the same endianness on the 31 // target platform. 32 // 33 // 2. Quiet NaNs, if supported, are indicated by the leading bit of the 34 // significand field being set. 35 // 36 //===----------------------------------------------------------------------===// 37 38 // ---- fp_extend.h (was a separate header; merged) ---- 39 #include "int_lib.h" 40 41 // Self-clean any prior inclusion's per-(src, dst) state. May coexist with 42 // fp_trunc.h / int_to_fp.h which define overlapping bare-name aliases. 43 #undef _FP_EXT_SRC_SUF 44 #undef _FP_EXT_DST_SUF 45 #undef SRC_REP_C 46 #undef DST_REP_C 47 #undef src_t 48 #undef src_rep_t 49 #undef dst_t 50 #undef dst_rep_t 51 #undef srcBits 52 #undef srcSigFracBits 53 #undef srcExpBits 54 #undef dstBits 55 #undef dstSigFracBits 56 #undef dstExpBits 57 #undef src_rep_t_clz 58 #undef src_rep_t_clz_impl 59 #undef srcToRep 60 #undef dstFromRep 61 #undef extract_sign_from_src 62 #undef extract_exp_from_src 63 #undef extract_sig_frac_from_src 64 #undef clz_in_sig_frac 65 #undef construct_dst_rep 66 67 #if defined SRC_SINGLE 68 #define _FP_EXT_SRC_SUF sf 69 #elif defined SRC_DOUBLE 70 #define _FP_EXT_SRC_SUF df 71 #elif defined SRC_80 72 #define _FP_EXT_SRC_SUF xf 73 #elif defined SRC_HALF 74 #define _FP_EXT_SRC_SUF hf 75 #else 76 #error Source should be half, single, or double precision! 77 #endif 78 79 #if defined DST_SINGLE 80 #define _FP_EXT_DST_SUF sf 81 #elif defined DST_DOUBLE 82 #define _FP_EXT_DST_SUF df 83 #elif defined DST_QUAD 84 #define _FP_EXT_DST_SUF tf 85 #else 86 #error Destination should be single, double, or quad precision! 87 #endif 88 89 #define _FP_EXT_PASTE4_(a, b, c, d) a##b##c##d 90 #define _FP_EXT_PASTE4(a, b, c, d) _FP_EXT_PASTE4_(a, b, c, d) 91 #define _FP_EXT_PAIR(stem) _FP_EXT_PASTE4(stem, _, _FP_EXT_SRC_SUF, _FP_EXT_DST_SUF) 92 93 // ---- Bare-name aliases (re-set every inclusion). ------------------------ 94 95 #define src_t _FP_EXT_PAIR(src_t) 96 #define src_rep_t _FP_EXT_PAIR(src_rep_t) 97 #define dst_t _FP_EXT_PAIR(dst_t) 98 #define dst_rep_t _FP_EXT_PAIR(dst_rep_t) 99 #define srcBits _FP_EXT_PAIR(srcBits) 100 #define srcSigFracBits _FP_EXT_PAIR(srcSigFracBits) 101 #define srcExpBits _FP_EXT_PAIR(srcExpBits) 102 #define dstBits _FP_EXT_PAIR(dstBits) 103 #define dstSigFracBits _FP_EXT_PAIR(dstSigFracBits) 104 #define dstExpBits _FP_EXT_PAIR(dstExpBits) 105 #define src_rep_t_clz_impl _FP_EXT_PAIR(src_rep_t_clz_impl) 106 #define srcToRep _FP_EXT_PAIR(srcToRep) 107 #define dstFromRep _FP_EXT_PAIR(dstFromRep) 108 #define extract_sign_from_src _FP_EXT_PAIR(extract_sign_from_src) 109 #define extract_exp_from_src _FP_EXT_PAIR(extract_exp_from_src) 110 #define extract_sig_frac_from_src _FP_EXT_PAIR(extract_sig_frac_from_src) 111 #define clz_in_sig_frac _FP_EXT_PAIR(clz_in_sig_frac) 112 #define construct_dst_rep _FP_EXT_PAIR(construct_dst_rep) 113 114 // SRC_REP_C / DST_REP_C: textual macros (UINT32_C etc.); same body each 115 // inclusion within a precision. 116 117 #if defined SRC_SINGLE 118 #define SRC_REP_C UINT32_C 119 #elif defined SRC_DOUBLE 120 #define SRC_REP_C UINT64_C 121 #elif defined SRC_80 122 #define SRC_REP_C (__uint128_t) 123 #elif defined SRC_HALF 124 #define SRC_REP_C UINT16_C 125 #endif 126 127 #if defined DST_SINGLE 128 #define DST_REP_C UINT32_C 129 #elif defined DST_DOUBLE 130 #define DST_REP_C UINT64_C 131 #elif defined DST_QUAD 132 #define DST_REP_C (__uint128_t) 133 #endif 134 135 // ---- One-time emission per (TU, src+dst pair). -------------------------- 136 // Enumerate the pairs kit actually uses (sf→df, sf→tf, df→tf). 137 138 #if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_SFDF_EMITTED 139 #define FP_EXT_SFDF_EMITTED 140 #define _FP_EXT_EMIT 1 141 #elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_SFTF_EMITTED 142 #define FP_EXT_SFTF_EMITTED 143 #define _FP_EXT_EMIT 1 144 #elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_DFTF_EMITTED 145 #define FP_EXT_DFTF_EMITTED 146 #define _FP_EXT_EMIT 1 147 #endif 148 149 #ifdef _FP_EXT_EMIT 150 #undef _FP_EXT_EMIT 151 152 #if defined SRC_SINGLE 153 typedef float src_t; 154 typedef uint32_t src_rep_t; 155 static const int srcBits = sizeof(src_t) * CHAR_BIT; 156 static const int srcSigFracBits = 23; 157 // -1 accounts for the sign bit. 158 // srcBits - srcSigFracBits - 1 159 static const int srcExpBits = 8; 160 #define src_rep_t_clz clzsi 161 162 #elif defined SRC_DOUBLE 163 typedef double src_t; 164 typedef uint64_t src_rep_t; 165 static const int srcBits = sizeof(src_t) * CHAR_BIT; 166 static const int srcSigFracBits = 52; 167 // -1 accounts for the sign bit. 168 // srcBits - srcSigFracBits - 1 169 static const int srcExpBits = 11; 170 171 static inline int src_rep_t_clz_impl(src_rep_t a) { 172 #if defined __LP64__ 173 return __builtin_clzl(a); 174 #else 175 if (a & REP_C(0xffffffff00000000)) 176 return clzsi(a >> 32); 177 else 178 return 32 + clzsi(a & REP_C(0xffffffff)); 179 #endif 180 } 181 #define src_rep_t_clz src_rep_t_clz_impl 182 183 #elif defined SRC_80 184 typedef xf_float src_t; 185 typedef __uint128_t src_rep_t; 186 // sign bit, exponent and significand occupy the lower 80 bits. 187 static const int srcBits = 80; 188 static const int srcSigFracBits = 63; 189 // -1 accounts for the sign bit. 190 // -1 accounts for the explicitly stored integer bit. 191 // srcBits - srcSigFracBits - 1 - 1 192 static const int srcExpBits = 15; 193 194 #elif defined SRC_HALF 195 #ifdef COMPILER_RT_HAS_FLOAT16 196 typedef _Float16 src_t; 197 #else 198 typedef uint16_t src_t; 199 #endif 200 typedef uint16_t src_rep_t; 201 static const int srcBits = sizeof(src_t) * CHAR_BIT; 202 static const int srcSigFracBits = 10; 203 // -1 accounts for the sign bit. 204 // srcBits - srcSigFracBits - 1 205 static const int srcExpBits = 5; 206 207 static inline int src_rep_t_clz_impl(src_rep_t a) { 208 return __builtin_clz(a) - 16; 209 } 210 #define src_rep_t_clz src_rep_t_clz_impl 211 212 #endif // end source precision 213 214 #if defined DST_SINGLE 215 typedef float dst_t; 216 typedef uint32_t dst_rep_t; 217 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 218 static const int dstSigFracBits = 23; 219 // -1 accounts for the sign bit. 220 // dstBits - dstSigFracBits - 1 221 static const int dstExpBits = 8; 222 223 #elif defined DST_DOUBLE 224 typedef double dst_t; 225 typedef uint64_t dst_rep_t; 226 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 227 static const int dstSigFracBits = 52; 228 // -1 accounts for the sign bit. 229 // dstBits - dstSigFracBits - 1 230 static const int dstExpBits = 11; 231 232 #elif defined DST_QUAD 233 typedef tf_float dst_t; 234 typedef __uint128_t dst_rep_t; 235 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 236 static const int dstSigFracBits = 112; 237 // -1 accounts for the sign bit. 238 // dstBits - dstSigFracBits - 1 239 static const int dstExpBits = 15; 240 241 #endif // end destination precision 242 243 // End of specialization parameters. 244 245 // TODO: These helper routines should be placed into fp_lib.h 246 // Currently they depend on macros/constants defined above. 247 248 static inline src_rep_t extract_sign_from_src(src_rep_t x) { 249 const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); 250 return (x & srcSignMask) >> (srcBits - 1); 251 } 252 253 static inline src_rep_t extract_exp_from_src(src_rep_t x) { 254 const int srcSigBits = srcBits - 1 - srcExpBits; 255 const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; 256 return (x & srcExpMask) >> srcSigBits; 257 } 258 259 static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { 260 const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; 261 return x & srcSigFracMask; 262 } 263 264 #ifdef src_rep_t_clz 265 static inline int clz_in_sig_frac(src_rep_t sigFrac) { 266 const int skip = 1 + srcExpBits; 267 return src_rep_t_clz(sigFrac) - skip; 268 } 269 #endif 270 271 static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { 272 return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; 273 } 274 275 // Two helper routines for conversion to and from the representation of 276 // floating-point data as integer values follow. 277 278 static inline src_rep_t srcToRep(src_t x) { 279 const union { 280 src_t f; 281 src_rep_t i; 282 } rep = {.f = x}; 283 return rep.i; 284 } 285 286 static inline dst_t dstFromRep(dst_rep_t x) { 287 const union { 288 dst_t f; 289 dst_rep_t i; 290 } rep = {.i = x}; 291 return rep.f; 292 } 293 // End helper routines. Conversion implementation follows. 294 295 #endif // _FP_EXT_EMIT 296 297 #define __extendXfYf2__ _FP_EXT_PAIR(__extendXfYf2__) 298 299 #if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_IMPL_SFDF_EMITTED 300 #define FP_EXT_IMPL_SFDF_EMITTED 301 #define _FP_EXT_IMPL_EMIT 1 302 #elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_IMPL_SFTF_EMITTED 303 #define FP_EXT_IMPL_SFTF_EMITTED 304 #define _FP_EXT_IMPL_EMIT 1 305 #elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_IMPL_DFTF_EMITTED 306 #define FP_EXT_IMPL_DFTF_EMITTED 307 #define _FP_EXT_IMPL_EMIT 1 308 #endif 309 310 #ifdef _FP_EXT_IMPL_EMIT 311 #undef _FP_EXT_IMPL_EMIT 312 313 // The source type may use a usual IEEE-754 interchange format or Intel 80-bit 314 // format. In particular, for the source type srcSigFracBits may be not equal to 315 // srcSigBits. The destination type is assumed to be one of IEEE-754 standard 316 // types. 317 static inline dst_t __extendXfYf2__(src_t a) { 318 // Various constants whose values follow from the type parameters. 319 // Any reasonable optimizer will fold and propagate all of these. 320 const int srcInfExp = (1 << srcExpBits) - 1; 321 const int srcExpBias = srcInfExp >> 1; 322 323 const int dstInfExp = (1 << dstExpBits) - 1; 324 const int dstExpBias = dstInfExp >> 1; 325 326 // Break a into a sign and representation of the absolute value. 327 const src_rep_t aRep = srcToRep(a); 328 const src_rep_t srcSign = extract_sign_from_src(aRep); 329 const src_rep_t srcExp = extract_exp_from_src(aRep); 330 const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep); 331 332 dst_rep_t dstSign = srcSign; 333 dst_rep_t dstExp; 334 dst_rep_t dstSigFrac; 335 336 if (srcExp >= 1 && srcExp < (src_rep_t)srcInfExp) { 337 // a is a normal number. 338 dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias); 339 dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits); 340 } 341 342 else if (srcExp == srcInfExp) { 343 // a is NaN or infinity. 344 dstExp = dstInfExp; 345 dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits); 346 } 347 348 else if (srcSigFrac) { 349 // a is denormal. 350 if (srcExpBits == dstExpBits) { 351 // The exponent fields are identical and this is a denormal number, so all 352 // the non-significand bits are zero. In particular, this branch is always 353 // taken when we extend a denormal F80 to F128. 354 dstExp = 0; 355 dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits); 356 } else { 357 #ifndef src_rep_t_clz 358 // If src_rep_t_clz is not defined this branch must be unreachable. 359 __builtin_unreachable(); 360 #else 361 // Renormalize the significand and clear the leading bit. 362 // For F80 -> F128 this codepath is unused. 363 const int scale = clz_in_sig_frac(srcSigFrac) + 1; 364 dstExp = dstExpBias - srcExpBias - scale + 1; 365 dstSigFrac = (dst_rep_t)srcSigFrac 366 << (dstSigFracBits - srcSigFracBits + scale); 367 const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits); 368 dstSigFrac ^= dstMinNormal; 369 #endif 370 } 371 } 372 373 else { 374 // a is zero. 375 dstExp = 0; 376 dstSigFrac = 0; 377 } 378 379 const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac); 380 return dstFromRep(result); 381 } 382 383 #endif // _FP_EXT_IMPL_EMIT