kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

int.c (14847B)


      1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      2 //
      3 // Consolidated per-op runtime helpers for kit's libkit_rt.a.
      4 // The build compiles only this one file per directory; the per-op .c files
      5 // are #included as snippets and not directly compiled.
      6 // License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
      7 
      8 // int_util.c first: defines __compilerrt_abort_impl used by other helpers.
      9 // ---- int_util.c ----
     10 #include "int_lib.h"
     11 
     12 __attribute__((weak)) __attribute__((visibility("hidden"))) void
     13 __compilerrt_abort_impl(const char* file, int line, const char* function) {
     14   (void)file;
     15   (void)line;
     16   (void)function;
     17   __builtin_trap();
     18 }
     19 
     20 // udivmoddi4.c next: __udivmoddi4 is called by divdi3, moddi3, divmoddi4,
     21 // udivdi3, umoddi3 — must be defined before those callers.
     22 // ---- udivmoddi4.c ----
     23 #include "int_lib.h"
     24 
     25 // Effects: if rem != 0, *rem = a % b
     26 // Returns: a / b
     27 
     28 // Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
     29 
     30 COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem) {
     31   const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
     32   const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
     33   udwords n;
     34   n.all = a;
     35   udwords d;
     36   d.all = b;
     37   udwords q;
     38   udwords r;
     39   unsigned sr;
     40   // special cases, X is unknown, K != 0
     41   if (n.s.high == 0) {
     42     if (d.s.high == 0) {
     43       // 0 X
     44       // ---
     45       // 0 X
     46       if (rem) *rem = n.s.low % d.s.low;
     47       return n.s.low / d.s.low;
     48     }
     49     // 0 X
     50     // ---
     51     // K X
     52     if (rem) *rem = n.s.low;
     53     return 0;
     54   }
     55   // n.s.high != 0
     56   if (d.s.low == 0) {
     57     if (d.s.high == 0) {
     58       // K X
     59       // ---
     60       // 0 0
     61       if (rem) *rem = n.s.high % d.s.low;
     62       return n.s.high / d.s.low;
     63     }
     64     // d.s.high != 0
     65     if (n.s.low == 0) {
     66       // K 0
     67       // ---
     68       // K 0
     69       if (rem) {
     70         r.s.high = n.s.high % d.s.high;
     71         r.s.low = 0;
     72         *rem = r.all;
     73       }
     74       return n.s.high / d.s.high;
     75     }
     76     // K K
     77     // ---
     78     // K 0
     79     if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
     80       if (rem) {
     81         r.s.low = n.s.low;
     82         r.s.high = n.s.high & (d.s.high - 1);
     83         *rem = r.all;
     84       }
     85       return n.s.high >> ctzsi(d.s.high);
     86     }
     87     // K K
     88     // ---
     89     // K 0
     90     sr = clzsi(d.s.high) - clzsi(n.s.high);
     91     // 0 <= sr <= n_uword_bits - 2 or sr large
     92     if (sr > n_uword_bits - 2) {
     93       if (rem) *rem = n.all;
     94       return 0;
     95     }
     96     ++sr;
     97     // 1 <= sr <= n_uword_bits - 1
     98     // q.all = n.all << (n_udword_bits - sr);
     99     q.s.low = 0;
    100     q.s.high = n.s.low << (n_uword_bits - sr);
    101     // r.all = n.all >> sr;
    102     r.s.high = n.s.high >> sr;
    103     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    104   } else /* d.s.low != 0 */ {
    105     if (d.s.high == 0) {
    106       // K X
    107       // ---
    108       // 0 K
    109       if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
    110         if (rem) *rem = n.s.low & (d.s.low - 1);
    111         if (d.s.low == 1) return n.all;
    112         sr = ctzsi(d.s.low);
    113         q.s.high = n.s.high >> sr;
    114         q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    115         return q.all;
    116       }
    117       // K X
    118       // ---
    119       // 0 K
    120       sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high);
    121       // 2 <= sr <= n_udword_bits - 1
    122       // q.all = n.all << (n_udword_bits - sr);
    123       // r.all = n.all >> sr;
    124       if (sr == n_uword_bits) {
    125         q.s.low = 0;
    126         q.s.high = n.s.low;
    127         r.s.high = 0;
    128         r.s.low = n.s.high;
    129       } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ {
    130         q.s.low = 0;
    131         q.s.high = n.s.low << (n_uword_bits - sr);
    132         r.s.high = n.s.high >> sr;
    133         r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    134       } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ {
    135         q.s.low = n.s.low << (n_udword_bits - sr);
    136         q.s.high = (n.s.high << (n_udword_bits - sr)) |
    137                    (n.s.low >> (sr - n_uword_bits));
    138         r.s.high = 0;
    139         r.s.low = n.s.high >> (sr - n_uword_bits);
    140       }
    141     } else {
    142       // K X
    143       // ---
    144       // K K
    145       sr = clzsi(d.s.high) - clzsi(n.s.high);
    146       // 0 <= sr <= n_uword_bits - 1 or sr large
    147       if (sr > n_uword_bits - 1) {
    148         if (rem) *rem = n.all;
    149         return 0;
    150       }
    151       ++sr;
    152       // 1 <= sr <= n_uword_bits
    153       // q.all = n.all << (n_udword_bits - sr);
    154       q.s.low = 0;
    155       if (sr == n_uword_bits) {
    156         q.s.high = n.s.low;
    157         r.s.high = 0;
    158         r.s.low = n.s.high;
    159       } else {
    160         q.s.high = n.s.low << (n_uword_bits - sr);
    161         r.s.high = n.s.high >> sr;
    162         r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    163       }
    164     }
    165   }
    166   // Not a special case
    167   // q and r are initialized with:
    168   // q.all = n.all << (n_udword_bits - sr);
    169   // r.all = n.all >> sr;
    170   // 1 <= sr <= n_udword_bits - 1
    171   su_int carry = 0;
    172   for (; sr > 0; --sr) {
    173     // r:q = ((r:q)  << 1) | carry
    174     r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
    175     r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
    176     q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
    177     q.s.low = (q.s.low << 1) | carry;
    178     // carry = 0;
    179     // if (r.all >= d.all)
    180     // {
    181     //      r.all -= d.all;
    182     //      carry = 1;
    183     // }
    184     const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
    185     carry = s & 1;
    186     r.all -= d.all & s;
    187   }
    188   q.all = (q.all << 1) | carry;
    189   if (rem) *rem = r.all;
    190   return q.all;
    191 }
    192 
    193 // Leaf ops (no intra-directory dependencies):
    194 // ---- absvdi2.c ----
    195 #include "int_lib.h"
    196 
    197 // Returns: absolute value
    198 
    199 // Effects: aborts if abs(x) < 0
    200 
    201 COMPILER_RT_ABI di_int __absvdi2(di_int a) {
    202   const int N = (int)(sizeof(di_int) * CHAR_BIT);
    203   if (a == ((di_int)((du_int)1 << (N - 1)))) compilerrt_abort();
    204   const di_int t = a >> (N - 1);
    205   return (a ^ t) - t;
    206 }
    207 // ---- bswapdi2.c ----
    208 #include "int_lib.h"
    209 
    210 COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
    211   return ((((u) & 0xff00000000000000ULL) >> 56) |
    212           (((u) & 0x00ff000000000000ULL) >> 40) |
    213           (((u) & 0x0000ff0000000000ULL) >> 24) |
    214           (((u) & 0x000000ff00000000ULL) >> 8) |
    215           (((u) & 0x00000000ff000000ULL) << 8) |
    216           (((u) & 0x0000000000ff0000ULL) << 24) |
    217           (((u) & 0x000000000000ff00ULL) << 40) |
    218           (((u) & 0x00000000000000ffULL) << 56));
    219 }
    220 // ---- bswapsi2.c ----
    221 #include "int_lib.h"
    222 
    223 COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
    224   return ((((u) & 0xff000000) >> 24) | (((u) & 0x00ff0000) >> 8) |
    225           (((u) & 0x0000ff00) << 8) | (((u) & 0x000000ff) << 24));
    226 }
    227 // ---- clzdi2.c ----
    228 #include "int_lib.h"
    229 
    230 // Returns: the number of leading 0-bits
    231 
    232 // Precondition: a != 0
    233 
    234 COMPILER_RT_ABI int __clzdi2(di_int a) {
    235   dwords x;
    236   x.all = a;
    237   const si_int f = -(x.s.high == 0);
    238   return clzsi((x.s.high & ~f) | (x.s.low & f)) +
    239          (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
    240 }
    241 // ---- clzsi2.c ----
    242 #include "int_lib.h"
    243 
    244 // Returns: the number of leading 0-bits
    245 
    246 // Precondition: a != 0
    247 
    248 COMPILER_RT_ABI int __clzsi2(si_int a) {
    249   su_int x = (su_int)a;
    250   si_int t = ((x & 0xFFFF0000) == 0) << 4;  // if (x is small) t = 16 else 0
    251   x >>= 16 - t;                             // x = [0 - 0xFFFF]
    252   su_int r = t;                             // r = [0, 16]
    253   // return r + clz(x)
    254   t = ((x & 0xFF00) == 0) << 3;
    255   x >>= 8 - t;  // x = [0 - 0xFF]
    256   r += t;       // r = [0, 8, 16, 24]
    257   // return r + clz(x)
    258   t = ((x & 0xF0) == 0) << 2;
    259   x >>= 4 - t;  // x = [0 - 0xF]
    260   r += t;       // r = [0, 4, 8, 12, 16, 20, 24, 28]
    261   // return r + clz(x)
    262   t = ((x & 0xC) == 0) << 1;
    263   x >>= 2 - t;  // x = [0 - 3]
    264   r += t;       // r = [0 - 30] and is even
    265   // return r + clz(x)
    266   //     switch (x)
    267   //     {
    268   //     case 0:
    269   //         return r + 2;
    270   //     case 1:
    271   //         return r + 1;
    272   //     case 2:
    273   //     case 3:
    274   //         return r;
    275   //     }
    276   return r + ((2 - x) & -((x & 2) == 0));
    277 }
    278 // ---- cmpdi2.c ----
    279 #include "int_lib.h"
    280 
    281 // Returns: if (a <  b) returns 0
    282 //           if (a == b) returns 1
    283 //           if (a >  b) returns 2
    284 
    285 COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) {
    286   dwords x;
    287   x.all = a;
    288   dwords y;
    289   y.all = b;
    290   if (x.s.high < y.s.high) return 0;
    291   if (x.s.high > y.s.high) return 2;
    292   if (x.s.low < y.s.low) return 0;
    293   if (x.s.low > y.s.low) return 2;
    294   return 1;
    295 }
    296 
    297 // ---- ctzdi2.c ----
    298 #include "int_lib.h"
    299 
    300 // Returns: the number of trailing 0-bits
    301 
    302 // Precondition: a != 0
    303 
    304 COMPILER_RT_ABI int __ctzdi2(di_int a) {
    305   dwords x;
    306   x.all = a;
    307   const si_int f = -(x.s.low == 0);
    308   return ctzsi((x.s.high & f) | (x.s.low & ~f)) +
    309          (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
    310 }
    311 // ---- ctzsi2.c ----
    312 #include "int_lib.h"
    313 
    314 // Returns: the number of trailing 0-bits
    315 
    316 // Precondition: a != 0
    317 
    318 COMPILER_RT_ABI int __ctzsi2(si_int a) {
    319   su_int x = (su_int)a;
    320   si_int t = ((x & 0x0000FFFF) == 0)
    321              << 4;  // if (x has no small bits) t = 16 else 0
    322   x >>= t;          // x = [0 - 0xFFFF] + higher garbage bits
    323   su_int r = t;     // r = [0, 16]
    324   // return r + ctz(x)
    325   t = ((x & 0x00FF) == 0) << 3;
    326   x >>= t;  // x = [0 - 0xFF] + higher garbage bits
    327   r += t;   // r = [0, 8, 16, 24]
    328   // return r + ctz(x)
    329   t = ((x & 0x0F) == 0) << 2;
    330   x >>= t;  // x = [0 - 0xF] + higher garbage bits
    331   r += t;   // r = [0, 4, 8, 12, 16, 20, 24, 28]
    332   // return r + ctz(x)
    333   t = ((x & 0x3) == 0) << 1;
    334   x >>= t;
    335   x &= 3;  // x = [0 - 3]
    336   r += t;  // r = [0 - 30] and is even
    337   // return r + ctz(x)
    338 
    339   //  The branch-less return statement below is equivalent
    340   //  to the following switch statement:
    341   //     switch (x)
    342   //    {
    343   //     case 0:
    344   //         return r + 2;
    345   //     case 2:
    346   //         return r + 1;
    347   //     case 1:
    348   //     case 3:
    349   //         return r;
    350   //     }
    351   return r + ((2 - (x >> 1)) & -((x & 1) == 0));
    352 }
    353 // ---- ffsdi2.c ----
    354 #include "int_lib.h"
    355 
    356 // Returns: the index of the least significant 1-bit in a, or
    357 // the value zero if a is zero. The least significant bit is index one.
    358 
    359 COMPILER_RT_ABI int __ffsdi2(di_int a) {
    360   dwords x;
    361   x.all = a;
    362   if (x.s.low == 0) {
    363     if (x.s.high == 0) return 0;
    364     return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
    365   }
    366   return ctzsi(x.s.low) + 1;
    367 }
    368 // ---- negdi2.c ----
    369 #include "int_lib.h"
    370 
    371 // Returns: -a
    372 
    373 COMPILER_RT_ABI di_int __negdi2(di_int a) {
    374   // Note: this routine is here for API compatibility; any sane compiler
    375   // should expand it inline.
    376   return -(du_int)a;
    377 }
    378 // ---- paritydi2.c ----
    379 #include "int_lib.h"
    380 
    381 // Returns: 1 if number of bits is odd else returns 0
    382 
    383 COMPILER_RT_ABI int __paritydi2(di_int a) {
    384   dwords x;
    385   x.all = a;
    386   su_int x2 = x.s.high ^ x.s.low;
    387   x2 ^= x2 >> 16;
    388   x2 ^= x2 >> 8;
    389   x2 ^= x2 >> 4;
    390   return (0x6996 >> (x2 & 0xF)) & 1;
    391 }
    392 // ---- paritysi2.c ----
    393 #include "int_lib.h"
    394 
    395 // Returns: 1 if number of bits is odd else returns 0
    396 
    397 COMPILER_RT_ABI int __paritysi2(si_int a) {
    398   su_int x = (su_int)a;
    399   x ^= x >> 16;
    400   x ^= x >> 8;
    401   x ^= x >> 4;
    402   return (0x6996 >> (x & 0xF)) & 1;
    403 }
    404 // ---- popcountdi2.c ----
    405 #include "int_lib.h"
    406 
    407 // Returns: count of 1 bits
    408 
    409 COMPILER_RT_ABI int __popcountdi2(di_int a) {
    410   du_int x2 = (du_int)a;
    411   x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
    412   // Every 2 bits holds the sum of every pair of bits (32)
    413   x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL);
    414   // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16)
    415   x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL;
    416   // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8)
    417   su_int x = (su_int)(x2 + (x2 >> 32));
    418   // The lower 32 bits hold four 16 bit sums (5 significant bits).
    419   //   Upper 32 bits are garbage
    420   x = x + (x >> 16);
    421   // The lower 16 bits hold two 32 bit sums (6 significant bits).
    422   //   Upper 16 bits are garbage
    423   return (x + (x >> 8)) & 0x0000007F;  // (7 significant bits)
    424 }
    425 // ---- popcountsi2.c ----
    426 #include "int_lib.h"
    427 
    428 // Returns: count of 1 bits
    429 
    430 COMPILER_RT_ABI int __popcountsi2(si_int a) {
    431   su_int x = (su_int)a;
    432   x = x - ((x >> 1) & 0x55555555);
    433   // Every 2 bits holds the sum of every pair of bits
    434   x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
    435   // Every 4 bits holds the sum of every 4-set of bits (3 significant bits)
    436   x = (x + (x >> 4)) & 0x0F0F0F0F;
    437   // Every 8 bits holds the sum of every 8-set of bits (4 significant bits)
    438   x = (x + (x >> 16));
    439   // The lower 16 bits hold two 8 bit sums (5 significant bits).
    440   //    Upper 16 bits are garbage
    441   return (x + (x >> 8)) & 0x0000003F;  // (6 significant bits)
    442 }
    443 // ---- ucmpdi2.c ----
    444 #include "int_lib.h"
    445 
    446 // Returns:  if (a <  b) returns 0
    447 //           if (a == b) returns 1
    448 //           if (a >  b) returns 2
    449 
    450 COMPILER_RT_ABI si_int __ucmpdi2(du_int a, du_int b) {
    451   udwords x;
    452   x.all = a;
    453   udwords y;
    454   y.all = b;
    455   if (x.s.high < y.s.high) return 0;
    456   if (x.s.high > y.s.high) return 2;
    457   if (x.s.low < y.s.low) return 0;
    458   if (x.s.low > y.s.low) return 2;
    459   return 1;
    460 }
    461 
    462 // Callers of __udivmoddi4:
    463 // ---- udivdi3.c ----
    464 #include "int_lib.h"
    465 
    466 // Returns: a / b
    467 
    468 #define fixint_t di_int
    469 #define fixuint_t du_int
    470 #define INT_DIV_SUFFIX udivdi3
    471 #include "int_div_impl.inc"
    472 
    473 COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) {
    474   return __udivXi3_udivdi3(a, b);
    475 }
    476 // ---- umoddi3.c ----
    477 #include "int_lib.h"
    478 
    479 // Returns: a % b
    480 
    481 #define fixint_t di_int
    482 #define fixuint_t du_int
    483 #define INT_DIV_SUFFIX umoddi3
    484 #include "int_div_impl.inc"
    485 
    486 COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) {
    487   return __umodXi3_umoddi3(a, b);
    488 }
    489 // ---- divdi3.c ----
    490 #include "int_lib.h"
    491 
    492 // Returns: a / b
    493 
    494 #define fixint_t di_int
    495 #define fixuint_t du_int
    496 #define INT_DIV_SUFFIX divdi3
    497 #define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int*)0)
    498 #include "int_div_impl.inc"
    499 
    500 COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) {
    501   return __divXi3_divdi3(a, b);
    502 }
    503 // ---- moddi3.c ----
    504 #include "int_lib.h"
    505 
    506 // Returns: a % b
    507 
    508 #define fixint_t di_int
    509 #define fixuint_t du_int
    510 #define INT_DIV_SUFFIX moddi3
    511 #define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res))
    512 #include "int_div_impl.inc"
    513 
    514 COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) {
    515   return __modXi3_moddi3(a, b);
    516 }
    517 // ---- divmoddi4.c ----
    518 #include "int_lib.h"
    519 
    520 // Returns: a / b, *rem = a % b
    521 
    522 COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int* rem) {
    523   const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
    524   di_int s_a = a >> bits_in_dword_m1;  // s_a = a < 0 ? -1 : 0
    525   di_int s_b = b >> bits_in_dword_m1;  // s_b = b < 0 ? -1 : 0
    526   a = (du_int)(a ^ s_a) - s_a;         // negate if s_a == -1
    527   b = (du_int)(b ^ s_b) - s_b;         // negate if s_b == -1
    528   s_b ^= s_a;                          // sign of quotient
    529   du_int r;
    530   di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b;  // negate if s_b == -1
    531   *rem = (r ^ s_a) - s_a;                           // negate if s_a == -1
    532   return q;
    533 }