kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

int64.c (12936B)


      1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      2 //
      3 // Consolidated per-op runtime helpers for kit's libkit_rt.a.
      4 // The build compiles only this one file per directory; the per-op .c files
      5 // are #included as snippets and not directly compiled.
      6 // License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
      7 
      8 // udivmodti4.c first: __udivmodti4 is called by divmodti4, divti3, modti3,
      9 // udivti3, and umodti3 — must be defined before those callers.
     10 // ---- udivmodti4.c ----
     11 #include "int_lib.h"
     12 
     13 // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
     14 // Remainder stored in r.
     15 // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
     16 // fallback. For a correctness proof see the reference for this algorithm
     17 // in Knuth, Volume 2, section 4.3.1, Algorithm D.
     18 UNUSED
     19 static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
     20                                             du_int* r) {
     21   const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
     22   const du_int b = (1ULL << (n_udword_bits / 2));  // Number base (32 bits)
     23   du_int un1, un0;                                 // Norm. dividend LSD's
     24   du_int vn1, vn0;                                 // Norm. divisor digits
     25   du_int q1, q0;                                   // Quotient digits
     26   du_int un64, un21, un10;                         // Dividend digit pairs
     27   du_int rhat;                                     // A remainder
     28   si_int s;  // Shift amount for normalization
     29 
     30   s = __builtin_clzll(v);
     31   if (s > 0) {
     32     // Normalize the divisor.
     33     v = v << s;
     34     un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
     35     un10 = u0 << s;  // Shift dividend left
     36   } else {
     37     // Avoid undefined behavior of (u0 >> 64).
     38     un64 = u1;
     39     un10 = u0;
     40   }
     41 
     42   // Break divisor up into two 32-bit digits.
     43   vn1 = v >> (n_udword_bits / 2);
     44   vn0 = v & 0xFFFFFFFF;
     45 
     46   // Break right half of dividend into two digits.
     47   un1 = un10 >> (n_udword_bits / 2);
     48   un0 = un10 & 0xFFFFFFFF;
     49 
     50   // Compute the first quotient digit, q1.
     51   q1 = un64 / vn1;
     52   rhat = un64 - q1 * vn1;
     53 
     54   // q1 has at most error 2. No more than 2 iterations.
     55   while (q1 >= b || q1 * vn0 > b * rhat + un1) {
     56     q1 = q1 - 1;
     57     rhat = rhat + vn1;
     58     if (rhat >= b) break;
     59   }
     60 
     61   un21 = un64 * b + un1 - q1 * v;
     62 
     63   // Compute the second quotient digit.
     64   q0 = un21 / vn1;
     65   rhat = un21 - q0 * vn1;
     66 
     67   // q0 has at most error 2. No more than 2 iterations.
     68   while (q0 >= b || q0 * vn0 > b * rhat + un0) {
     69     q0 = q0 - 1;
     70     rhat = rhat + vn1;
     71     if (rhat >= b) break;
     72   }
     73 
     74   *r = (un21 * b + un0 - q0 * v) >> s;
     75   return q1 * b + q0;
     76 }
     77 
     78 static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
     79                                      du_int* r) {
     80   return udiv128by64to64default(u1, u0, v, r);
     81 }
     82 
     83 static inline int ut_is_zero(utwords a) {
     84   return a.s.low == 0 && a.s.high == 0;
     85 }
     86 
     87 static inline int ut_cmp(utwords a, utwords b) {
     88   if (a.s.high != b.s.high) return a.s.high < b.s.high ? -1 : 1;
     89   if (a.s.low != b.s.low) return a.s.low < b.s.low ? -1 : 1;
     90   return 0;
     91 }
     92 
     93 static inline utwords ut_add(utwords a, utwords b) {
     94   utwords r;
     95   r.s.low = a.s.low + b.s.low;
     96   r.s.high = a.s.high + b.s.high + (r.s.low < a.s.low);
     97   return r;
     98 }
     99 
    100 static inline utwords ut_sub(utwords a, utwords b) {
    101   utwords r;
    102   r.s.low = a.s.low - b.s.low;
    103   r.s.high = a.s.high - b.s.high - (a.s.low < b.s.low);
    104   return r;
    105 }
    106 
    107 static inline utwords ut_neg(utwords a) {
    108   utwords z;
    109   z.s.low = 0;
    110   z.s.high = 0;
    111   return ut_sub(z, a);
    112 }
    113 
    114 static inline utwords ut_shl1(utwords a) {
    115   utwords r;
    116   r.s.low = a.s.low << 1;
    117   r.s.high = (a.s.high << 1) | (a.s.low >> 63);
    118   return r;
    119 }
    120 
    121 static inline utwords ut_shr1(utwords a) {
    122   utwords r;
    123   r.s.low = (a.s.low >> 1) | (a.s.high << 63);
    124   r.s.high = a.s.high >> 1;
    125   return r;
    126 }
    127 
    128 static inline utwords ut_shl(utwords a, unsigned sh) {
    129   utwords r;
    130   if (sh >= 128u) {
    131     r.s.low = 0;
    132     r.s.high = 0;
    133   } else if (sh == 0) {
    134     r = a;
    135   } else if (sh >= 64u) {
    136     r.s.low = 0;
    137     r.s.high = a.s.low << (sh - 64u);
    138   } else {
    139     r.s.low = a.s.low << sh;
    140     r.s.high = (a.s.high << sh) | (a.s.low >> (64u - sh));
    141   }
    142   return r;
    143 }
    144 
    145 static inline utwords ut_lshr(utwords a, unsigned sh) {
    146   utwords r;
    147   if (sh >= 128u) {
    148     r.s.low = 0;
    149     r.s.high = 0;
    150   } else if (sh == 0) {
    151     r = a;
    152   } else if (sh >= 64u) {
    153     r.s.low = a.s.high >> (sh - 64u);
    154     r.s.high = 0;
    155   } else {
    156     r.s.low = (a.s.low >> sh) | (a.s.high << (64u - sh));
    157     r.s.high = a.s.high >> sh;
    158   }
    159   return r;
    160 }
    161 
    162 static inline twords t_ashr(twords a, unsigned sh) {
    163   twords r;
    164   if (sh >= 128u) {
    165     r.s.low = a.s.high < 0 ? ~(du_int)0 : 0;
    166     r.s.high = a.s.high < 0 ? (di_int)-1 : 0;
    167   } else if (sh == 0) {
    168     r = a;
    169   } else if (sh >= 64u) {
    170     r.s.low = (du_int)(a.s.high >> (sh - 64u));
    171     r.s.high = a.s.high < 0 ? (di_int)-1 : 0;
    172   } else {
    173     r.s.low = ((du_int)a.s.high << (64u - sh)) | (a.s.low >> sh);
    174     r.s.high = a.s.high >> sh;
    175   }
    176   return r;
    177 }
    178 
    179 static inline utwords ut_mul(utwords a, utwords b) {
    180   utwords r;
    181   const int half_bits = (int)(sizeof(du_int) * CHAR_BIT) / 2;
    182   const du_int mask = (du_int)~0 >> half_bits;
    183   du_int t;
    184   r.s.low = (a.s.low & mask) * (b.s.low & mask);
    185   t = r.s.low >> half_bits;
    186   r.s.low &= mask;
    187   t += (a.s.low >> half_bits) * (b.s.low & mask);
    188   r.s.low += (t & mask) << half_bits;
    189   r.s.high = t >> half_bits;
    190   t = r.s.low >> half_bits;
    191   r.s.low &= mask;
    192   t += (b.s.low >> half_bits) * (a.s.low & mask);
    193   r.s.low += (t & mask) << half_bits;
    194   r.s.high += t >> half_bits;
    195   r.s.high += (a.s.low >> half_bits) * (b.s.low >> half_bits);
    196   r.s.high += a.s.high * b.s.low + a.s.low * b.s.high;
    197   return r;
    198 }
    199 
    200 static inline void ut_udivmod(utwords n, utwords d, utwords* q, utwords* rem) {
    201   utwords quotient;
    202   utwords remainder;
    203   quotient.s.low = 0;
    204   quotient.s.high = 0;
    205   remainder.s.low = 0;
    206   remainder.s.high = 0;
    207   if (ut_is_zero(d)) {
    208     if (q) *q = quotient;
    209     if (rem) *rem = n;
    210     return;
    211   }
    212   for (int i = 127; i >= 0; --i) {
    213     du_int bit = i < 64 ? ((n.s.low >> (unsigned)i) & 1u)
    214                         : ((n.s.high >> (unsigned)(i - 64)) & 1u);
    215     remainder = ut_shl1(remainder);
    216     remainder.s.low |= bit;
    217     if (ut_cmp(remainder, d) >= 0) {
    218       remainder = ut_sub(remainder, d);
    219       if (i < 64)
    220         quotient.s.low |= (du_int)1 << (unsigned)i;
    221       else
    222         quotient.s.high |= (du_int)1 << (unsigned)(i - 64);
    223     }
    224   }
    225   if (q) *q = quotient;
    226   if (rem) *rem = remainder;
    227 }
    228 
    229 // Effects: if rem != 0, *rem = a % b
    230 // Returns: a / b
    231 
    232 COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem) {
    233   utwords dividend;
    234   dividend.all = a;
    235   utwords divisor;
    236   divisor.all = b;
    237   utwords quotient;
    238   utwords remainder;
    239   ut_udivmod(dividend, divisor, &quotient, &remainder);
    240   if (rem) *rem = remainder.all;
    241   return quotient.all;
    242 }
    243 
    244 // Leaf ops (no intra-directory dependencies):
    245 // ---- ashlti3.c ----
    246 #include "int_lib.h"
    247 
    248 // Returns: a << b
    249 
    250 // Precondition:  0 <= b < bits_in_tword
    251 
    252 COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) {
    253   utwords input;
    254   utwords result;
    255   input.all = a;
    256   result = ut_shl(input, (unsigned)b);
    257   return (ti_int)result.all;
    258 }
    259 
    260 // ---- ashrti3.c ----
    261 #include "int_lib.h"
    262 
    263 // Returns: arithmetic a >> b
    264 
    265 // Precondition:  0 <= b < bits_in_tword
    266 
    267 COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) {
    268   twords input;
    269   twords result;
    270   input.all = a;
    271   result = t_ashr(input, (unsigned)b);
    272   return result.all;
    273 }
    274 
    275 // ---- clzti2.c ----
    276 #include "int_lib.h"
    277 
    278 // Returns: the number of leading 0-bits
    279 
    280 // Precondition: a != 0
    281 
    282 COMPILER_RT_ABI int __clzti2(ti_int a) {
    283   twords x;
    284   x.all = a;
    285   const di_int f = -(x.s.high == 0);
    286   return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
    287          ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
    288 }
    289 
    290 // ---- ctzti2.c ----
    291 #include "int_lib.h"
    292 
    293 // Returns: the number of trailing 0-bits
    294 
    295 // Precondition: a != 0
    296 
    297 COMPILER_RT_ABI int __ctzti2(ti_int a) {
    298   twords x;
    299   x.all = a;
    300   const di_int f = -(x.s.low == 0);
    301   return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
    302          ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
    303 }
    304 
    305 // ---- lshrti3.c ----
    306 #include "int_lib.h"
    307 
    308 // Returns: logical a >> b
    309 
    310 // Precondition:  0 <= b < bits_in_tword
    311 
    312 COMPILER_RT_ABI ti_int __lshrti3(ti_int a, int b) {
    313   utwords input;
    314   utwords result;
    315   input.all = a;
    316   result = ut_lshr(input, (unsigned)b);
    317   return (ti_int)result.all;
    318 }
    319 
    320 // ---- multi3.c ----
    321 #include "int_lib.h"
    322 
    323 // Returns: a * b
    324 
    325 static ti_int __mulddi3(du_int a, du_int b) {
    326   twords r;
    327   const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2;
    328   const du_int lower_mask = (du_int)~0 >> bits_in_dword_2;
    329   r.s.low = (a & lower_mask) * (b & lower_mask);
    330   du_int t = r.s.low >> bits_in_dword_2;
    331   r.s.low &= lower_mask;
    332   t += (a >> bits_in_dword_2) * (b & lower_mask);
    333   r.s.low += (t & lower_mask) << bits_in_dword_2;
    334   r.s.high = t >> bits_in_dword_2;
    335   t = r.s.low >> bits_in_dword_2;
    336   r.s.low &= lower_mask;
    337   t += (b >> bits_in_dword_2) * (a & lower_mask);
    338   r.s.low += (t & lower_mask) << bits_in_dword_2;
    339   r.s.high += t >> bits_in_dword_2;
    340   r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2);
    341   return r.all;
    342 }
    343 
    344 // Returns: a * b
    345 
    346 COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) {
    347   utwords x;
    348   utwords y;
    349   utwords r;
    350   x.all = (tu_int)a;
    351   y.all = (tu_int)b;
    352   r = ut_mul(x, y);
    353   return (ti_int)r.all;
    354 }
    355 
    356 // ---- negti2.c ----
    357 #include "int_lib.h"
    358 
    359 // Returns: -a
    360 
    361 COMPILER_RT_ABI ti_int __negti2(ti_int a) {
    362   utwords x;
    363   utwords r;
    364   x.all = (tu_int)a;
    365   r = ut_neg(x);
    366   return (ti_int)r.all;
    367 }
    368 
    369 COMPILER_RT_ABI ti_int __kit_addti3(ti_int a, ti_int b) {
    370   utwords x;
    371   utwords y;
    372   utwords r;
    373   x.all = (tu_int)a;
    374   y.all = (tu_int)b;
    375   r = ut_add(x, y);
    376   return (ti_int)r.all;
    377 }
    378 
    379 COMPILER_RT_ABI ti_int __kit_subti3(ti_int a, ti_int b) {
    380   utwords x;
    381   utwords y;
    382   utwords r;
    383   x.all = (tu_int)a;
    384   y.all = (tu_int)b;
    385   r = ut_sub(x, y);
    386   return (ti_int)r.all;
    387 }
    388 
    389 COMPILER_RT_ABI ti_int __kit_andti3(ti_int a, ti_int b) {
    390   utwords x;
    391   utwords y;
    392   utwords r;
    393   x.all = (tu_int)a;
    394   y.all = (tu_int)b;
    395   r.s.low = x.s.low & y.s.low;
    396   r.s.high = x.s.high & y.s.high;
    397   return (ti_int)r.all;
    398 }
    399 
    400 COMPILER_RT_ABI ti_int __kit_orti3(ti_int a, ti_int b) {
    401   utwords x;
    402   utwords y;
    403   utwords r;
    404   x.all = (tu_int)a;
    405   y.all = (tu_int)b;
    406   r.s.low = x.s.low | y.s.low;
    407   r.s.high = x.s.high | y.s.high;
    408   return (ti_int)r.all;
    409 }
    410 
    411 COMPILER_RT_ABI ti_int __kit_xorti3(ti_int a, ti_int b) {
    412   utwords x;
    413   utwords y;
    414   utwords r;
    415   x.all = (tu_int)a;
    416   y.all = (tu_int)b;
    417   r.s.low = x.s.low ^ y.s.low;
    418   r.s.high = x.s.high ^ y.s.high;
    419   return (ti_int)r.all;
    420 }
    421 
    422 COMPILER_RT_ABI ti_int __kit_notti3(ti_int a) {
    423   utwords x;
    424   utwords r;
    425   x.all = (tu_int)a;
    426   r.s.low = ~x.s.low;
    427   r.s.high = ~x.s.high;
    428   return (ti_int)r.all;
    429 }
    430 
    431 COMPILER_RT_ABI ti_int __kit_sext64ti(di_int a) {
    432   twords r;
    433   r.s.low = (du_int)a;
    434   r.s.high = a < 0 ? -1 : 0;
    435   return r.all;
    436 }
    437 
    438 COMPILER_RT_ABI ti_int __kit_zext64ti(du_int a) {
    439   utwords r;
    440   r.s.low = a;
    441   r.s.high = 0;
    442   return (ti_int)r.all;
    443 }
    444 
    445 COMPILER_RT_ABI si_int __kit_cmpti2(ti_int a, ti_int b) {
    446   twords x;
    447   twords y;
    448   x.all = a;
    449   y.all = b;
    450   if (x.s.high < y.s.high) return -1;
    451   if (x.s.high > y.s.high) return 1;
    452   if (x.s.low < y.s.low) return -1;
    453   if (x.s.low > y.s.low) return 1;
    454   return 0;
    455 }
    456 
    457 COMPILER_RT_ABI si_int __kit_ucmpti2(tu_int a, tu_int b) {
    458   utwords x;
    459   utwords y;
    460   x.all = a;
    461   y.all = b;
    462   if (x.s.high < y.s.high) return -1;
    463   if (x.s.high > y.s.high) return 1;
    464   if (x.s.low < y.s.low) return -1;
    465   if (x.s.low > y.s.low) return 1;
    466   return 0;
    467 }
    468 
    469 // Callers of __udivmodti4:
    470 // ---- udivti3.c ----
    471 #include "int_lib.h"
    472 
    473 // Returns: a / b
    474 
    475 COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) {
    476   return __udivmodti4(a, b, 0);
    477 }
    478 
    479 // ---- umodti3.c ----
    480 #include "int_lib.h"
    481 
    482 // Returns: a % b
    483 
    484 COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) {
    485   tu_int r;
    486   __udivmodti4(a, b, &r);
    487   return r;
    488 }
    489 
    490 // ---- divmodti4.c ----
    491 #include "int_lib.h"
    492 
    493 // Returns: a / b, *rem = a % b
    494 
    495 COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem) {
    496   twords sa;
    497   twords sb;
    498   utwords ua;
    499   utwords ub;
    500   utwords uq;
    501   utwords ur;
    502   int neg_a;
    503   int neg_b;
    504   sa.all = a;
    505   sb.all = b;
    506   neg_a = sa.s.high < 0;
    507   neg_b = sb.s.high < 0;
    508   ua.all = (tu_int)a;
    509   ub.all = (tu_int)b;
    510   if (neg_a) ua = ut_neg(ua);
    511   if (neg_b) ub = ut_neg(ub);
    512   ut_udivmod(ua, ub, &uq, &ur);
    513   if (neg_a != neg_b) uq = ut_neg(uq);
    514   if (neg_a) ur = ut_neg(ur);
    515   if (rem) *rem = (ti_int)ur.all;
    516   return (ti_int)uq.all;
    517 }
    518 
    519 // ---- divti3.c ----
    520 #include "int_lib.h"
    521 
    522 // Returns: a / b
    523 
    524 COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
    525   ti_int r;
    526   return __divmodti4(a, b, &r);
    527 }
    528 
    529 // ---- modti3.c ----
    530 #include "int_lib.h"
    531 
    532 // Returns: a % b
    533 
    534 COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) {
    535   ti_int r;
    536   (void)__divmodti4(a, b, &r);
    537   return r;
    538 }