int64.c (12936B)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 2 // 3 // Consolidated per-op runtime helpers for kit's libkit_rt.a. 4 // The build compiles only this one file per directory; the per-op .c files 5 // are #included as snippets and not directly compiled. 6 // License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt). 7 8 // udivmodti4.c first: __udivmodti4 is called by divmodti4, divti3, modti3, 9 // udivti3, and umodti3 — must be defined before those callers. 10 // ---- udivmodti4.c ---- 11 #include "int_lib.h" 12 13 // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits. 14 // Remainder stored in r. 15 // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division 16 // fallback. For a correctness proof see the reference for this algorithm 17 // in Knuth, Volume 2, section 4.3.1, Algorithm D. 18 UNUSED 19 static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v, 20 du_int* r) { 21 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 22 const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits) 23 du_int un1, un0; // Norm. dividend LSD's 24 du_int vn1, vn0; // Norm. divisor digits 25 du_int q1, q0; // Quotient digits 26 du_int un64, un21, un10; // Dividend digit pairs 27 du_int rhat; // A remainder 28 si_int s; // Shift amount for normalization 29 30 s = __builtin_clzll(v); 31 if (s > 0) { 32 // Normalize the divisor. 33 v = v << s; 34 un64 = (u1 << s) | (u0 >> (n_udword_bits - s)); 35 un10 = u0 << s; // Shift dividend left 36 } else { 37 // Avoid undefined behavior of (u0 >> 64). 38 un64 = u1; 39 un10 = u0; 40 } 41 42 // Break divisor up into two 32-bit digits. 43 vn1 = v >> (n_udword_bits / 2); 44 vn0 = v & 0xFFFFFFFF; 45 46 // Break right half of dividend into two digits. 47 un1 = un10 >> (n_udword_bits / 2); 48 un0 = un10 & 0xFFFFFFFF; 49 50 // Compute the first quotient digit, q1. 51 q1 = un64 / vn1; 52 rhat = un64 - q1 * vn1; 53 54 // q1 has at most error 2. No more than 2 iterations. 55 while (q1 >= b || q1 * vn0 > b * rhat + un1) { 56 q1 = q1 - 1; 57 rhat = rhat + vn1; 58 if (rhat >= b) break; 59 } 60 61 un21 = un64 * b + un1 - q1 * v; 62 63 // Compute the second quotient digit. 64 q0 = un21 / vn1; 65 rhat = un21 - q0 * vn1; 66 67 // q0 has at most error 2. No more than 2 iterations. 68 while (q0 >= b || q0 * vn0 > b * rhat + un0) { 69 q0 = q0 - 1; 70 rhat = rhat + vn1; 71 if (rhat >= b) break; 72 } 73 74 *r = (un21 * b + un0 - q0 * v) >> s; 75 return q1 * b + q0; 76 } 77 78 static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, 79 du_int* r) { 80 return udiv128by64to64default(u1, u0, v, r); 81 } 82 83 static inline int ut_is_zero(utwords a) { 84 return a.s.low == 0 && a.s.high == 0; 85 } 86 87 static inline int ut_cmp(utwords a, utwords b) { 88 if (a.s.high != b.s.high) return a.s.high < b.s.high ? -1 : 1; 89 if (a.s.low != b.s.low) return a.s.low < b.s.low ? -1 : 1; 90 return 0; 91 } 92 93 static inline utwords ut_add(utwords a, utwords b) { 94 utwords r; 95 r.s.low = a.s.low + b.s.low; 96 r.s.high = a.s.high + b.s.high + (r.s.low < a.s.low); 97 return r; 98 } 99 100 static inline utwords ut_sub(utwords a, utwords b) { 101 utwords r; 102 r.s.low = a.s.low - b.s.low; 103 r.s.high = a.s.high - b.s.high - (a.s.low < b.s.low); 104 return r; 105 } 106 107 static inline utwords ut_neg(utwords a) { 108 utwords z; 109 z.s.low = 0; 110 z.s.high = 0; 111 return ut_sub(z, a); 112 } 113 114 static inline utwords ut_shl1(utwords a) { 115 utwords r; 116 r.s.low = a.s.low << 1; 117 r.s.high = (a.s.high << 1) | (a.s.low >> 63); 118 return r; 119 } 120 121 static inline utwords ut_shr1(utwords a) { 122 utwords r; 123 r.s.low = (a.s.low >> 1) | (a.s.high << 63); 124 r.s.high = a.s.high >> 1; 125 return r; 126 } 127 128 static inline utwords ut_shl(utwords a, unsigned sh) { 129 utwords r; 130 if (sh >= 128u) { 131 r.s.low = 0; 132 r.s.high = 0; 133 } else if (sh == 0) { 134 r = a; 135 } else if (sh >= 64u) { 136 r.s.low = 0; 137 r.s.high = a.s.low << (sh - 64u); 138 } else { 139 r.s.low = a.s.low << sh; 140 r.s.high = (a.s.high << sh) | (a.s.low >> (64u - sh)); 141 } 142 return r; 143 } 144 145 static inline utwords ut_lshr(utwords a, unsigned sh) { 146 utwords r; 147 if (sh >= 128u) { 148 r.s.low = 0; 149 r.s.high = 0; 150 } else if (sh == 0) { 151 r = a; 152 } else if (sh >= 64u) { 153 r.s.low = a.s.high >> (sh - 64u); 154 r.s.high = 0; 155 } else { 156 r.s.low = (a.s.low >> sh) | (a.s.high << (64u - sh)); 157 r.s.high = a.s.high >> sh; 158 } 159 return r; 160 } 161 162 static inline twords t_ashr(twords a, unsigned sh) { 163 twords r; 164 if (sh >= 128u) { 165 r.s.low = a.s.high < 0 ? ~(du_int)0 : 0; 166 r.s.high = a.s.high < 0 ? (di_int)-1 : 0; 167 } else if (sh == 0) { 168 r = a; 169 } else if (sh >= 64u) { 170 r.s.low = (du_int)(a.s.high >> (sh - 64u)); 171 r.s.high = a.s.high < 0 ? (di_int)-1 : 0; 172 } else { 173 r.s.low = ((du_int)a.s.high << (64u - sh)) | (a.s.low >> sh); 174 r.s.high = a.s.high >> sh; 175 } 176 return r; 177 } 178 179 static inline utwords ut_mul(utwords a, utwords b) { 180 utwords r; 181 const int half_bits = (int)(sizeof(du_int) * CHAR_BIT) / 2; 182 const du_int mask = (du_int)~0 >> half_bits; 183 du_int t; 184 r.s.low = (a.s.low & mask) * (b.s.low & mask); 185 t = r.s.low >> half_bits; 186 r.s.low &= mask; 187 t += (a.s.low >> half_bits) * (b.s.low & mask); 188 r.s.low += (t & mask) << half_bits; 189 r.s.high = t >> half_bits; 190 t = r.s.low >> half_bits; 191 r.s.low &= mask; 192 t += (b.s.low >> half_bits) * (a.s.low & mask); 193 r.s.low += (t & mask) << half_bits; 194 r.s.high += t >> half_bits; 195 r.s.high += (a.s.low >> half_bits) * (b.s.low >> half_bits); 196 r.s.high += a.s.high * b.s.low + a.s.low * b.s.high; 197 return r; 198 } 199 200 static inline void ut_udivmod(utwords n, utwords d, utwords* q, utwords* rem) { 201 utwords quotient; 202 utwords remainder; 203 quotient.s.low = 0; 204 quotient.s.high = 0; 205 remainder.s.low = 0; 206 remainder.s.high = 0; 207 if (ut_is_zero(d)) { 208 if (q) *q = quotient; 209 if (rem) *rem = n; 210 return; 211 } 212 for (int i = 127; i >= 0; --i) { 213 du_int bit = i < 64 ? ((n.s.low >> (unsigned)i) & 1u) 214 : ((n.s.high >> (unsigned)(i - 64)) & 1u); 215 remainder = ut_shl1(remainder); 216 remainder.s.low |= bit; 217 if (ut_cmp(remainder, d) >= 0) { 218 remainder = ut_sub(remainder, d); 219 if (i < 64) 220 quotient.s.low |= (du_int)1 << (unsigned)i; 221 else 222 quotient.s.high |= (du_int)1 << (unsigned)(i - 64); 223 } 224 } 225 if (q) *q = quotient; 226 if (rem) *rem = remainder; 227 } 228 229 // Effects: if rem != 0, *rem = a % b 230 // Returns: a / b 231 232 COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem) { 233 utwords dividend; 234 dividend.all = a; 235 utwords divisor; 236 divisor.all = b; 237 utwords quotient; 238 utwords remainder; 239 ut_udivmod(dividend, divisor, "ient, &remainder); 240 if (rem) *rem = remainder.all; 241 return quotient.all; 242 } 243 244 // Leaf ops (no intra-directory dependencies): 245 // ---- ashlti3.c ---- 246 #include "int_lib.h" 247 248 // Returns: a << b 249 250 // Precondition: 0 <= b < bits_in_tword 251 252 COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) { 253 utwords input; 254 utwords result; 255 input.all = a; 256 result = ut_shl(input, (unsigned)b); 257 return (ti_int)result.all; 258 } 259 260 // ---- ashrti3.c ---- 261 #include "int_lib.h" 262 263 // Returns: arithmetic a >> b 264 265 // Precondition: 0 <= b < bits_in_tword 266 267 COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) { 268 twords input; 269 twords result; 270 input.all = a; 271 result = t_ashr(input, (unsigned)b); 272 return result.all; 273 } 274 275 // ---- clzti2.c ---- 276 #include "int_lib.h" 277 278 // Returns: the number of leading 0-bits 279 280 // Precondition: a != 0 281 282 COMPILER_RT_ABI int __clzti2(ti_int a) { 283 twords x; 284 x.all = a; 285 const di_int f = -(x.s.high == 0); 286 return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + 287 ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); 288 } 289 290 // ---- ctzti2.c ---- 291 #include "int_lib.h" 292 293 // Returns: the number of trailing 0-bits 294 295 // Precondition: a != 0 296 297 COMPILER_RT_ABI int __ctzti2(ti_int a) { 298 twords x; 299 x.all = a; 300 const di_int f = -(x.s.low == 0); 301 return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + 302 ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); 303 } 304 305 // ---- lshrti3.c ---- 306 #include "int_lib.h" 307 308 // Returns: logical a >> b 309 310 // Precondition: 0 <= b < bits_in_tword 311 312 COMPILER_RT_ABI ti_int __lshrti3(ti_int a, int b) { 313 utwords input; 314 utwords result; 315 input.all = a; 316 result = ut_lshr(input, (unsigned)b); 317 return (ti_int)result.all; 318 } 319 320 // ---- multi3.c ---- 321 #include "int_lib.h" 322 323 // Returns: a * b 324 325 static ti_int __mulddi3(du_int a, du_int b) { 326 twords r; 327 const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; 328 const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; 329 r.s.low = (a & lower_mask) * (b & lower_mask); 330 du_int t = r.s.low >> bits_in_dword_2; 331 r.s.low &= lower_mask; 332 t += (a >> bits_in_dword_2) * (b & lower_mask); 333 r.s.low += (t & lower_mask) << bits_in_dword_2; 334 r.s.high = t >> bits_in_dword_2; 335 t = r.s.low >> bits_in_dword_2; 336 r.s.low &= lower_mask; 337 t += (b >> bits_in_dword_2) * (a & lower_mask); 338 r.s.low += (t & lower_mask) << bits_in_dword_2; 339 r.s.high += t >> bits_in_dword_2; 340 r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); 341 return r.all; 342 } 343 344 // Returns: a * b 345 346 COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) { 347 utwords x; 348 utwords y; 349 utwords r; 350 x.all = (tu_int)a; 351 y.all = (tu_int)b; 352 r = ut_mul(x, y); 353 return (ti_int)r.all; 354 } 355 356 // ---- negti2.c ---- 357 #include "int_lib.h" 358 359 // Returns: -a 360 361 COMPILER_RT_ABI ti_int __negti2(ti_int a) { 362 utwords x; 363 utwords r; 364 x.all = (tu_int)a; 365 r = ut_neg(x); 366 return (ti_int)r.all; 367 } 368 369 COMPILER_RT_ABI ti_int __kit_addti3(ti_int a, ti_int b) { 370 utwords x; 371 utwords y; 372 utwords r; 373 x.all = (tu_int)a; 374 y.all = (tu_int)b; 375 r = ut_add(x, y); 376 return (ti_int)r.all; 377 } 378 379 COMPILER_RT_ABI ti_int __kit_subti3(ti_int a, ti_int b) { 380 utwords x; 381 utwords y; 382 utwords r; 383 x.all = (tu_int)a; 384 y.all = (tu_int)b; 385 r = ut_sub(x, y); 386 return (ti_int)r.all; 387 } 388 389 COMPILER_RT_ABI ti_int __kit_andti3(ti_int a, ti_int b) { 390 utwords x; 391 utwords y; 392 utwords r; 393 x.all = (tu_int)a; 394 y.all = (tu_int)b; 395 r.s.low = x.s.low & y.s.low; 396 r.s.high = x.s.high & y.s.high; 397 return (ti_int)r.all; 398 } 399 400 COMPILER_RT_ABI ti_int __kit_orti3(ti_int a, ti_int b) { 401 utwords x; 402 utwords y; 403 utwords r; 404 x.all = (tu_int)a; 405 y.all = (tu_int)b; 406 r.s.low = x.s.low | y.s.low; 407 r.s.high = x.s.high | y.s.high; 408 return (ti_int)r.all; 409 } 410 411 COMPILER_RT_ABI ti_int __kit_xorti3(ti_int a, ti_int b) { 412 utwords x; 413 utwords y; 414 utwords r; 415 x.all = (tu_int)a; 416 y.all = (tu_int)b; 417 r.s.low = x.s.low ^ y.s.low; 418 r.s.high = x.s.high ^ y.s.high; 419 return (ti_int)r.all; 420 } 421 422 COMPILER_RT_ABI ti_int __kit_notti3(ti_int a) { 423 utwords x; 424 utwords r; 425 x.all = (tu_int)a; 426 r.s.low = ~x.s.low; 427 r.s.high = ~x.s.high; 428 return (ti_int)r.all; 429 } 430 431 COMPILER_RT_ABI ti_int __kit_sext64ti(di_int a) { 432 twords r; 433 r.s.low = (du_int)a; 434 r.s.high = a < 0 ? -1 : 0; 435 return r.all; 436 } 437 438 COMPILER_RT_ABI ti_int __kit_zext64ti(du_int a) { 439 utwords r; 440 r.s.low = a; 441 r.s.high = 0; 442 return (ti_int)r.all; 443 } 444 445 COMPILER_RT_ABI si_int __kit_cmpti2(ti_int a, ti_int b) { 446 twords x; 447 twords y; 448 x.all = a; 449 y.all = b; 450 if (x.s.high < y.s.high) return -1; 451 if (x.s.high > y.s.high) return 1; 452 if (x.s.low < y.s.low) return -1; 453 if (x.s.low > y.s.low) return 1; 454 return 0; 455 } 456 457 COMPILER_RT_ABI si_int __kit_ucmpti2(tu_int a, tu_int b) { 458 utwords x; 459 utwords y; 460 x.all = a; 461 y.all = b; 462 if (x.s.high < y.s.high) return -1; 463 if (x.s.high > y.s.high) return 1; 464 if (x.s.low < y.s.low) return -1; 465 if (x.s.low > y.s.low) return 1; 466 return 0; 467 } 468 469 // Callers of __udivmodti4: 470 // ---- udivti3.c ---- 471 #include "int_lib.h" 472 473 // Returns: a / b 474 475 COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) { 476 return __udivmodti4(a, b, 0); 477 } 478 479 // ---- umodti3.c ---- 480 #include "int_lib.h" 481 482 // Returns: a % b 483 484 COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) { 485 tu_int r; 486 __udivmodti4(a, b, &r); 487 return r; 488 } 489 490 // ---- divmodti4.c ---- 491 #include "int_lib.h" 492 493 // Returns: a / b, *rem = a % b 494 495 COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem) { 496 twords sa; 497 twords sb; 498 utwords ua; 499 utwords ub; 500 utwords uq; 501 utwords ur; 502 int neg_a; 503 int neg_b; 504 sa.all = a; 505 sb.all = b; 506 neg_a = sa.s.high < 0; 507 neg_b = sb.s.high < 0; 508 ua.all = (tu_int)a; 509 ub.all = (tu_int)b; 510 if (neg_a) ua = ut_neg(ua); 511 if (neg_b) ub = ut_neg(ub); 512 ut_udivmod(ua, ub, &uq, &ur); 513 if (neg_a != neg_b) uq = ut_neg(uq); 514 if (neg_a) ur = ut_neg(ur); 515 if (rem) *rem = (ti_int)ur.all; 516 return (ti_int)uq.all; 517 } 518 519 // ---- divti3.c ---- 520 #include "int_lib.h" 521 522 // Returns: a / b 523 524 COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { 525 ti_int r; 526 return __divmodti4(a, b, &r); 527 } 528 529 // ---- modti3.c ---- 530 #include "int_lib.h" 531 532 // Returns: a % b 533 534 COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { 535 ti_int r; 536 (void)__divmodti4(a, b, &r); 537 return r; 538 }