fp_tf.c - kit

fp_tf.c (16024B)
      1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      2 //
      3 // Consolidated soft-float runtime helpers for kit's libkit_rt.a.
      4 // The build compiles only this one file; the per-op .c files are #included
      5 // as snippets and not directly compiled. The fp_lib_undef.h reset header is
      6 // included between sections that switch precision or (src,dst) pair.
      7 // License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
      8 
      9 // ============================================================
     10 // Section 1: QUAD precision arith / compare / conv / fix
     11 // ============================================================
     12 #ifdef KITRT_LDBL128
     13 #include "tf_supplement.h"
     14 #endif
     15 
     16 // ---- addtf3.c ----
     17 #define QUAD_PRECISION
     18 #include "fp_add_impl.inc"
     19 #include "fp_lib.h"
     20 
     21 COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b) { return __addXf3__(a, b); }
     22 
     23 // ---- subtf3.c ----
     24 #define QUAD_PRECISION
     25 #include "fp_lib.h"
     26 
     27 COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b);
     28 
     29 // Subtraction; flip the sign bit of b and add.
     30 COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) {
     31   return __addtf3(a, fromRep(toRep(b) ^ signBit));
     32 }
     33 
     34 COMPILER_RT_ABI fp_t __negtf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
     35 
     36 // ---- multf3.c ----
     37 #define QUAD_PRECISION
     38 #include "fp_lib.h"
     39 
     40 typedef struct {
     41   du_int limb[4];
     42 } kit_tf_u256;
     43 
     44 static int kit_tf_rep_bit(rep_t value, int bit) {
     45   return ((value >> (unsigned)bit) & 1) != 0;
     46 }
     47 
     48 static int kit_tf_u256_bit(const kit_tf_u256* value, int bit) {
     49   if (bit < 0 || bit >= 256) return 0;
     50   return ((value->limb[bit / 64] >> (unsigned)(bit % 64)) & 1u) != 0;
     51 }
     52 
     53 static int kit_tf_u256_any_below(const kit_tf_u256* value, int bit) {
     54   int full;
     55   int rem;
     56   if (bit <= 0) return 0;
     57   if (bit > 256) bit = 256;
     58   full = bit / 64;
     59   rem = bit % 64;
     60   for (int i = 0; i < full; ++i) {
     61     if (value->limb[i]) return 1;
     62   }
     63   if (rem) {
     64     const du_int mask = ((du_int)1 << (unsigned)rem) - 1u;
     65     if (value->limb[full] & mask) return 1;
     66   }
     67   return 0;
     68 }
     69 
     70 static void kit_tf_u256_add_limb(kit_tf_u256* value, int index, du_int addend) {
     71   du_int old;
     72   if (!addend || index >= 4) return;
     73   old = value->limb[index];
     74   value->limb[index] = old + addend;
     75   if (value->limb[index] >= old) return;
     76   for (++index; index < 4; ++index) {
     77     old = value->limb[index];
     78     value->limb[index] = old + 1u;
     79     if (value->limb[index] != 0) return;
     80   }
     81 }
     82 
     83 static void kit_tf_u256_add_shifted_sig(kit_tf_u256* product, rep_t sig,
     84                                         int shift) {
     85   const du_int lo = (du_int)sig;
     86   const du_int hi = (du_int)(sig >> 64);
     87   const int index = shift / 64;
     88   const int bits = shift % 64;
     89   if (bits == 0) {
     90     kit_tf_u256_add_limb(product, index, lo);
     91     kit_tf_u256_add_limb(product, index + 1, hi);
     92   } else {
     93     kit_tf_u256_add_limb(product, index, lo << (unsigned)bits);
     94     kit_tf_u256_add_limb(
     95         product, index + 1,
     96         (lo >> (unsigned)(64 - bits)) | (hi << (unsigned)bits));
     97     kit_tf_u256_add_limb(product, index + 2, hi >> (unsigned)(64 - bits));
     98   }
     99 }
    100 
    101 static kit_tf_u256 kit_tf_sig_product(rep_t a, rep_t b) {
    102   kit_tf_u256 product = {{0, 0, 0, 0}};
    103   for (int bit = 0; bit <= significandBits; ++bit) {
    104     if (kit_tf_rep_bit(b, bit)) kit_tf_u256_add_shifted_sig(&product, a, bit);
    105   }
    106   return product;
    107 }
    108 
    109 static rep_t kit_tf_u256_extract_rounded(const kit_tf_u256* product,
    110                                          int shift) {
    111   rep_t result = 0;
    112   for (int bit = 0; bit <= significandBits; ++bit) {
    113     if (kit_tf_u256_bit(product, shift + bit))
    114       result |= (rep_t)1 << (unsigned)bit;
    115   }
    116   if (kit_tf_u256_bit(product, shift - 1) &&
    117       (kit_tf_u256_any_below(product, shift - 1) || (result & 1)))
    118     ++result;
    119   return result;
    120 }
    121 
    122 COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) {
    123   const rep_t aRep = toRep(a);
    124   const rep_t bRep = toRep(b);
    125   const rep_t aAbs = aRep & absMask;
    126   const rep_t bAbs = bRep & absMask;
    127   const rep_t productSign = (aRep ^ bRep) & signBit;
    128   int aExponent = (int)((aAbs >> significandBits) & maxExponent);
    129   int bExponent = (int)((bAbs >> significandBits) & maxExponent);
    130   int productExponent;
    131   int productTop;
    132   int shift;
    133   rep_t aSignificand = aAbs & significandMask;
    134   rep_t bSignificand = bAbs & significandMask;
    135   kit_tf_u256 product;
    136   rep_t resultSignificand;
    137 
    138   if (aAbs > infRep) return fromRep(aRep | quietBit);
    139   if (bAbs > infRep) return fromRep(bRep | quietBit);
    140   if (aAbs == infRep) {
    141     if (bAbs) return fromRep(infRep | productSign);
    142     return fromRep(qnanRep);
    143   }
    144   if (bAbs == infRep) {
    145     if (aAbs) return fromRep(infRep | productSign);
    146     return fromRep(qnanRep);
    147   }
    148   if (!aAbs || !bAbs) return fromRep(productSign);
    149 
    150   if (aExponent == 0)
    151     aExponent = normalize(&aSignificand);
    152   else
    153     aSignificand |= implicitBit;
    154   if (bExponent == 0)
    155     bExponent = normalize(&bSignificand);
    156   else
    157     bSignificand |= implicitBit;
    158 
    159   product = kit_tf_sig_product(aSignificand, bSignificand);
    160   productTop = kit_tf_u256_bit(&product, 225) ? 225 : 224;
    161   productExponent = aExponent + bExponent - exponentBias;
    162   if (productTop == 225) ++productExponent;
    163 
    164   if (productExponent >= maxExponent) return fromRep(infRep | productSign);
    165 
    166   shift = productTop - significandBits;
    167   if (productExponent <= 0) {
    168     shift += 1 - productExponent;
    169     productExponent = 0;
    170   }
    171 
    172   resultSignificand = kit_tf_u256_extract_rounded(&product, shift);
    173   if (resultSignificand & ((rep_t)1 << (significandBits + 1))) {
    174     resultSignificand >>= 1;
    175     ++productExponent;
    176   }
    177   if (productExponent == 0 && (resultSignificand & implicitBit))
    178     productExponent = 1;
    179   if (productExponent >= maxExponent) return fromRep(infRep | productSign);
    180 
    181   return fromRep(productSign | ((rep_t)productExponent << significandBits) |
    182                  (resultSignificand & significandMask));
    183 }
    184 
    185 // ---- divtf3.c ----
    186 #define QUAD_PRECISION
    187 #include "fp_lib.h"
    188 #include "fp_mode.h"
    189 
    190 COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
    191   const rep_t aRep = toRep(a);
    192   const rep_t bRep = toRep(b);
    193   const rep_t aAbs = aRep & absMask;
    194   const rep_t bAbs = bRep & absMask;
    195   const rep_t quotientSign = (aRep ^ bRep) & signBit;
    196   int aExponent = (int)((aAbs >> significandBits) & maxExponent);
    197   int bExponent = (int)((bAbs >> significandBits) & maxExponent);
    198   rep_t aSignificand = aAbs & significandMask;
    199   rep_t bSignificand = bAbs & significandMask;
    200   rep_t quotient = 0;
    201   rep_t remainder;
    202   int writtenExponent;
    203 
    204   if (aAbs > infRep) return fromRep(aRep | quietBit);
    205   if (bAbs > infRep) return fromRep(bRep | quietBit);
    206   if (aAbs == infRep) {
    207     if (bAbs == infRep) return fromRep(qnanRep);
    208     return fromRep(infRep | quotientSign);
    209   }
    210   if (bAbs == infRep) return fromRep(quotientSign);
    211   if (!aAbs) {
    212     if (!bAbs) return fromRep(qnanRep);
    213     return fromRep(quotientSign);
    214   }
    215   if (!bAbs) return fromRep(infRep | quotientSign);
    216 
    217   if (aExponent == 0)
    218     aExponent = normalize(&aSignificand);
    219   else
    220     aSignificand |= implicitBit;
    221   if (bExponent == 0)
    222     bExponent = normalize(&bSignificand);
    223   else
    224     bSignificand |= implicitBit;
    225 
    226   writtenExponent = aExponent - bExponent + exponentBias;
    227   if (aSignificand < bSignificand) {
    228     aSignificand <<= 1;
    229     writtenExponent -= 1;
    230   }
    231 
    232   remainder = aSignificand;
    233   for (int i = 0; i < significandBits + 4; ++i) {
    234     quotient <<= 1;
    235     if (remainder >= bSignificand) {
    236       quotient |= 1;
    237       remainder -= bSignificand;
    238     }
    239     if (i != significandBits + 3) remainder <<= 1;
    240   }
    241   if (remainder) quotient |= 1;
    242 
    243   if (writtenExponent >= maxExponent) return fromRep(infRep | quotientSign);
    244   if (writtenExponent <= 0) {
    245     const int shift = 1 - writtenExponent;
    246     if (shift >= typeWidth) return fromRep(quotientSign);
    247     if (shift > 0) {
    248       const bool sticky = (quotient << (typeWidth - shift)) != 0;
    249       quotient = (quotient >> shift) | sticky;
    250     }
    251     writtenExponent = 0;
    252   }
    253 
    254   const int roundGuardSticky = quotient & 0x7;
    255   rep_t absResult = (quotient >> 3) & significandMask;
    256   absResult |= (rep_t)writtenExponent << significandBits;
    257 
    258   switch (__fe_getround()) {
    259     case CRT_FE_TONEAREST:
    260       if (roundGuardSticky > 0x4) absResult++;
    261       if (roundGuardSticky == 0x4) absResult += absResult & 1;
    262       break;
    263     case CRT_FE_DOWNWARD:
    264       if (quotientSign && roundGuardSticky) absResult++;
    265       break;
    266     case CRT_FE_UPWARD:
    267       if (!quotientSign && roundGuardSticky) absResult++;
    268       break;
    269     case CRT_FE_TOWARDZERO:
    270       break;
    271   }
    272   if (roundGuardSticky) __fe_raise_inexact();
    273   return fromRep(absResult | quotientSign);
    274 }
    275 
    276 // ---- comparetf2.c ----
    277 #define QUAD_PRECISION
    278 #include "fp_compare_impl.inc"
    279 #include "fp_lib.h"
    280 
    281 COMPILER_RT_ABI CMP_RESULT __letf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
    282 COMPILER_RT_ABI CMP_RESULT __eqtf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
    283 COMPILER_RT_ABI CMP_RESULT __lttf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
    284 COMPILER_RT_ABI CMP_RESULT __netf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
    285 COMPILER_RT_ABI CMP_RESULT __getf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
    286 COMPILER_RT_ABI CMP_RESULT __gttf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
    287 COMPILER_RT_ABI CMP_RESULT __unordtf2(fp_t a, fp_t b) {
    288   return __unordXf2__(a, b);
    289 }
    290 
    291 // ---- floatsitf.c ----
    292 #define QUAD_PRECISION
    293 #include "fp_lib.h"
    294 
    295 static int kit_clz_u32(su_int x) {
    296   int n = 0;
    297   for (int bit = 31; bit >= 0; --bit) {
    298     if ((x >> (unsigned)bit) & 1u) break;
    299     ++n;
    300   }
    301   return n;
    302 }
    303 
    304 static int kit_clz_u64(du_int x) {
    305   int n = 0;
    306   for (int bit = 63; bit >= 0; --bit) {
    307     if ((x >> (unsigned)bit) & 1u) break;
    308     ++n;
    309   }
    310   return n;
    311 }
    312 
    313 static fp_t kit_tf_from_u64(du_int mag, rep_t sign, int width) {
    314   if (!mag) return fromRep(0);
    315   int exponent =
    316       (width - 1) - (width == 32 ? kit_clz_u32((su_int)mag) : kit_clz_u64(mag));
    317   int shift = significandBits - exponent;
    318   rep_t result = ((rep_t)mag << shift) ^ implicitBit;
    319   result |= (rep_t)(exponent + exponentBias) << significandBits;
    320   return fromRep(result | sign);
    321 }
    322 
    323 COMPILER_RT_ABI fp_t __floatsitf(si_int a) {
    324   rep_t sign = 0;
    325   su_int mag = (su_int)a;
    326   if (a < 0) {
    327     sign = signBit;
    328     mag = (su_int)(0u - mag);
    329   }
    330   return kit_tf_from_u64((du_int)mag, sign, 32);
    331 }
    332 
    333 // ---- floatunsitf.c ----
    334 #define QUAD_PRECISION
    335 #include "fp_lib.h"
    336 
    337 COMPILER_RT_ABI fp_t __floatunsitf(su_int a) {
    338   return kit_tf_from_u64((du_int)a, 0, 32);
    339 }
    340 
    341 // ---- floatditf.c ----
    342 #define QUAD_PRECISION
    343 #include "fp_lib.h"
    344 
    345 COMPILER_RT_ABI fp_t __floatditf(di_int a) {
    346   rep_t sign = 0;
    347   du_int mag = (du_int)a;
    348   if (a < 0) {
    349     sign = signBit;
    350     mag = (du_int)0 - mag;
    351   }
    352   return kit_tf_from_u64(mag, sign, 64);
    353 }
    354 
    355 // ---- floatunditf.c ----
    356 #define QUAD_PRECISION
    357 #include "fp_lib.h"
    358 
    359 COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
    360   return kit_tf_from_u64(a, 0, 64);
    361 }
    362 
    363 // ---- floattitf.c ----
    364 #define QUAD_PRECISION
    365 #include "fp_lib.h"
    366 #include "int_lib.h"
    367 
    368 #define SRC_I128
    369 #define DST_QUAD
    370 #include "int_to_fp_impl.inc"
    371 
    372 // Returns: convert a ti_int to a fp_t, rounding toward even.
    373 
    374 // Assumption: fp_t is a IEEE 128 bit floating point type
    375 //             ti_int is a 128 bit integral type
    376 
    377 // seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
    378 // mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
    379 // mmmm mmmm mmmm
    380 
    381 COMPILER_RT_ABI fp_t __floattitf(ti_int a) { return __floatXiYf__(a); }
    382 
    383 #undef SRC_I128
    384 #undef DST_QUAD
    385 // ---- floatuntitf.c ----
    386 #define QUAD_PRECISION
    387 #include "fp_lib.h"
    388 #include "int_lib.h"
    389 
    390 #define SRC_U128
    391 #define DST_QUAD
    392 #include "int_to_fp_impl.inc"
    393 
    394 // Returns: convert a tu_int to a fp_t, rounding toward even.
    395 
    396 // Assumption: fp_t is a IEEE 128 bit floating point type
    397 //             tu_int is a 128 bit integral type
    398 
    399 // seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
    400 // mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
    401 // mmmm mmmm mmmm
    402 
    403 COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { return __floatXiYf__(a); }
    404 
    405 #undef SRC_U128
    406 #undef DST_QUAD
    407 // ---- fixtfsi.c ----
    408 #define QUAD_PRECISION
    409 #include "fp_lib.h"
    410 
    411 #define fixint_t si_int
    412 #define fixuint_t su_int
    413 #define FP_FIX_SUFFIX fixtfsi
    414 #include "fp_fixint_impl.inc"
    415 
    416 COMPILER_RT_ABI si_int __fixtfsi(fp_t a) {
    417   union {
    418     fp_t f;
    419     du_int u[2];
    420   } bits;
    421   du_int lo;
    422   du_int hi;
    423   du_int sig_hi;
    424   du_int mag;
    425   int sign;
    426   int exp;
    427   int e;
    428 
    429   bits.f = a;
    430   lo = bits.u[0];
    431   hi = bits.u[1];
    432   sign = (int)(hi >> 63);
    433   exp = (int)((hi >> 48) & 0x7fffu);
    434   if (!exp) return 0;
    435   e = exp - 16383;
    436   if (e < 0) return 0;
    437   if (e >= 31) return sign ? (si_int)0x80000000u : (si_int)0x7fffffffu;
    438 
    439   sig_hi = (hi & 0x0000ffffffffffffull) | 0x0001000000000000ull;
    440   if (e <= 48) {
    441     mag = sig_hi >> (unsigned)(48 - e);
    442   } else {
    443     mag = (sig_hi << (unsigned)(e - 48)) | (lo >> (unsigned)(112 - e));
    444   }
    445   return sign ? -(si_int)mag : (si_int)mag;
    446 }
    447 
    448 #undef fixint_t
    449 #undef fixuint_t
    450 #undef FP_FIX_SUFFIX
    451 // ---- fixtfdi.c ----
    452 #define QUAD_PRECISION
    453 #include "fp_lib.h"
    454 
    455 #define fixint_t di_int
    456 #define fixuint_t du_int
    457 #define FP_FIX_SUFFIX fixtfdi
    458 #include "fp_fixint_impl.inc"
    459 
    460 COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); }
    461 
    462 #undef fixint_t
    463 #undef fixuint_t
    464 #undef FP_FIX_SUFFIX
    465 // ---- fixtfti.c ----
    466 #define QUAD_PRECISION
    467 #include "fp_lib.h"
    468 
    469 #define fixint_t ti_int
    470 #define fixuint_t tu_int
    471 #define FP_FIX_SUFFIX fixtfti
    472 #include "fp_fixint_impl.inc"
    473 
    474 COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); }
    475 
    476 #undef fixint_t
    477 #undef fixuint_t
    478 #undef FP_FIX_SUFFIX
    479 // ---- fixunstfsi.c ----
    480 #define QUAD_PRECISION
    481 #include "fp_lib.h"
    482 
    483 #define fixuint_t su_int
    484 #define FP_FIX_SUFFIX fixunstfsi
    485 #include "fp_fixuint_impl.inc"
    486 
    487 COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); }
    488 
    489 #undef fixuint_t
    490 #undef FP_FIX_SUFFIX
    491 // ---- fixunstfdi.c ----
    492 #define QUAD_PRECISION
    493 #include "fp_lib.h"
    494 
    495 #define fixuint_t du_int
    496 #define FP_FIX_SUFFIX fixunstfdi
    497 #include "fp_fixuint_impl.inc"
    498 
    499 COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); }
    500 
    501 #undef fixuint_t
    502 #undef FP_FIX_SUFFIX
    503 // ---- fixunstfti.c ----
    504 #define QUAD_PRECISION
    505 #include "fp_lib.h"
    506 
    507 #define fixuint_t tu_int
    508 #define FP_FIX_SUFFIX fixunstfti
    509 #include "fp_fixuint_impl.inc"
    510 
    511 COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); }
    512 
    513 #undef fixuint_t
    514 #undef FP_FIX_SUFFIX
    515 
    516 #include "fp_lib_undef.h"
    517 
    518 // ============================================================
    519 // Section 2: sf -> tf extend
    520 // ============================================================
    521 // ---- extendsftf2.c ----
    522 #define QUAD_PRECISION
    523 #include "fp_lib.h"
    524 
    525 #define SRC_SINGLE
    526 #define DST_QUAD
    527 #include "fp_extend_impl.inc"
    528 
    529 COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }
    530 
    531 #undef SRC_SINGLE
    532 #undef DST_QUAD
    533 
    534 #include "fp_lib_undef.h"
    535 
    536 // ============================================================
    537 // Section 3: df -> tf extend
    538 // ============================================================
    539 // ---- extenddftf2.c ----
    540 #define QUAD_PRECISION
    541 #include "fp_lib.h"
    542 
    543 #define SRC_DOUBLE
    544 #define DST_QUAD
    545 #include "fp_extend_impl.inc"
    546 
    547 COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }
    548 
    549 #undef SRC_DOUBLE
    550 #undef DST_QUAD
    551 
    552 #include "fp_lib_undef.h"
    553 
    554 // ============================================================
    555 // Section 4: tf -> sf truncate
    556 // ============================================================
    557 // ---- trunctfsf2.c ----
    558 #define QUAD_PRECISION
    559 #include "fp_lib.h"
    560 
    561 #define SRC_QUAD
    562 #define DST_SINGLE
    563 #include "fp_trunc_impl.inc"
    564 
    565 COMPILER_RT_ABI dst_t __trunctfsf2(src_t a) { return __truncXfYf2__(a); }
    566 
    567 #undef SRC_QUAD
    568 #undef DST_SINGLE
    569 
    570 #include "fp_lib_undef.h"
    571 
    572 // ============================================================
    573 // Section 5: tf -> df truncate
    574 // ============================================================
    575 // ---- trunctfdf2.c ----
    576 #define QUAD_PRECISION
    577 #include "fp_lib.h"
    578 
    579 #define SRC_QUAD
    580 #define DST_DOUBLE
    581 #include "fp_trunc_impl.inc"
    582 
    583 COMPILER_RT_ABI dst_t __trunctfdf2(src_t a) { return __truncXfYf2__(a); }
    584 
    585 #undef SRC_QUAD
    586 #undef DST_DOUBLE
	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README