int.c (14847B)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 2 // 3 // Consolidated per-op runtime helpers for kit's libkit_rt.a. 4 // The build compiles only this one file per directory; the per-op .c files 5 // are #included as snippets and not directly compiled. 6 // License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt). 7 8 // int_util.c first: defines __compilerrt_abort_impl used by other helpers. 9 // ---- int_util.c ---- 10 #include "int_lib.h" 11 12 __attribute__((weak)) __attribute__((visibility("hidden"))) void 13 __compilerrt_abort_impl(const char* file, int line, const char* function) { 14 (void)file; 15 (void)line; 16 (void)function; 17 __builtin_trap(); 18 } 19 20 // udivmoddi4.c next: __udivmoddi4 is called by divdi3, moddi3, divmoddi4, 21 // udivdi3, umoddi3 — must be defined before those callers. 22 // ---- udivmoddi4.c ---- 23 #include "int_lib.h" 24 25 // Effects: if rem != 0, *rem = a % b 26 // Returns: a / b 27 28 // Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide 29 30 COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem) { 31 const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 32 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 33 udwords n; 34 n.all = a; 35 udwords d; 36 d.all = b; 37 udwords q; 38 udwords r; 39 unsigned sr; 40 // special cases, X is unknown, K != 0 41 if (n.s.high == 0) { 42 if (d.s.high == 0) { 43 // 0 X 44 // --- 45 // 0 X 46 if (rem) *rem = n.s.low % d.s.low; 47 return n.s.low / d.s.low; 48 } 49 // 0 X 50 // --- 51 // K X 52 if (rem) *rem = n.s.low; 53 return 0; 54 } 55 // n.s.high != 0 56 if (d.s.low == 0) { 57 if (d.s.high == 0) { 58 // K X 59 // --- 60 // 0 0 61 if (rem) *rem = n.s.high % d.s.low; 62 return n.s.high / d.s.low; 63 } 64 // d.s.high != 0 65 if (n.s.low == 0) { 66 // K 0 67 // --- 68 // K 0 69 if (rem) { 70 r.s.high = n.s.high % d.s.high; 71 r.s.low = 0; 72 *rem = r.all; 73 } 74 return n.s.high / d.s.high; 75 } 76 // K K 77 // --- 78 // K 0 79 if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ { 80 if (rem) { 81 r.s.low = n.s.low; 82 r.s.high = n.s.high & (d.s.high - 1); 83 *rem = r.all; 84 } 85 return n.s.high >> ctzsi(d.s.high); 86 } 87 // K K 88 // --- 89 // K 0 90 sr = clzsi(d.s.high) - clzsi(n.s.high); 91 // 0 <= sr <= n_uword_bits - 2 or sr large 92 if (sr > n_uword_bits - 2) { 93 if (rem) *rem = n.all; 94 return 0; 95 } 96 ++sr; 97 // 1 <= sr <= n_uword_bits - 1 98 // q.all = n.all << (n_udword_bits - sr); 99 q.s.low = 0; 100 q.s.high = n.s.low << (n_uword_bits - sr); 101 // r.all = n.all >> sr; 102 r.s.high = n.s.high >> sr; 103 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 104 } else /* d.s.low != 0 */ { 105 if (d.s.high == 0) { 106 // K X 107 // --- 108 // 0 K 109 if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ { 110 if (rem) *rem = n.s.low & (d.s.low - 1); 111 if (d.s.low == 1) return n.all; 112 sr = ctzsi(d.s.low); 113 q.s.high = n.s.high >> sr; 114 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 115 return q.all; 116 } 117 // K X 118 // --- 119 // 0 K 120 sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high); 121 // 2 <= sr <= n_udword_bits - 1 122 // q.all = n.all << (n_udword_bits - sr); 123 // r.all = n.all >> sr; 124 if (sr == n_uword_bits) { 125 q.s.low = 0; 126 q.s.high = n.s.low; 127 r.s.high = 0; 128 r.s.low = n.s.high; 129 } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ { 130 q.s.low = 0; 131 q.s.high = n.s.low << (n_uword_bits - sr); 132 r.s.high = n.s.high >> sr; 133 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 134 } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ { 135 q.s.low = n.s.low << (n_udword_bits - sr); 136 q.s.high = (n.s.high << (n_udword_bits - sr)) | 137 (n.s.low >> (sr - n_uword_bits)); 138 r.s.high = 0; 139 r.s.low = n.s.high >> (sr - n_uword_bits); 140 } 141 } else { 142 // K X 143 // --- 144 // K K 145 sr = clzsi(d.s.high) - clzsi(n.s.high); 146 // 0 <= sr <= n_uword_bits - 1 or sr large 147 if (sr > n_uword_bits - 1) { 148 if (rem) *rem = n.all; 149 return 0; 150 } 151 ++sr; 152 // 1 <= sr <= n_uword_bits 153 // q.all = n.all << (n_udword_bits - sr); 154 q.s.low = 0; 155 if (sr == n_uword_bits) { 156 q.s.high = n.s.low; 157 r.s.high = 0; 158 r.s.low = n.s.high; 159 } else { 160 q.s.high = n.s.low << (n_uword_bits - sr); 161 r.s.high = n.s.high >> sr; 162 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 163 } 164 } 165 } 166 // Not a special case 167 // q and r are initialized with: 168 // q.all = n.all << (n_udword_bits - sr); 169 // r.all = n.all >> sr; 170 // 1 <= sr <= n_udword_bits - 1 171 su_int carry = 0; 172 for (; sr > 0; --sr) { 173 // r:q = ((r:q) << 1) | carry 174 r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 175 r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 176 q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 177 q.s.low = (q.s.low << 1) | carry; 178 // carry = 0; 179 // if (r.all >= d.all) 180 // { 181 // r.all -= d.all; 182 // carry = 1; 183 // } 184 const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 185 carry = s & 1; 186 r.all -= d.all & s; 187 } 188 q.all = (q.all << 1) | carry; 189 if (rem) *rem = r.all; 190 return q.all; 191 } 192 193 // Leaf ops (no intra-directory dependencies): 194 // ---- absvdi2.c ---- 195 #include "int_lib.h" 196 197 // Returns: absolute value 198 199 // Effects: aborts if abs(x) < 0 200 201 COMPILER_RT_ABI di_int __absvdi2(di_int a) { 202 const int N = (int)(sizeof(di_int) * CHAR_BIT); 203 if (a == ((di_int)((du_int)1 << (N - 1)))) compilerrt_abort(); 204 const di_int t = a >> (N - 1); 205 return (a ^ t) - t; 206 } 207 // ---- bswapdi2.c ---- 208 #include "int_lib.h" 209 210 COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) { 211 return ((((u) & 0xff00000000000000ULL) >> 56) | 212 (((u) & 0x00ff000000000000ULL) >> 40) | 213 (((u) & 0x0000ff0000000000ULL) >> 24) | 214 (((u) & 0x000000ff00000000ULL) >> 8) | 215 (((u) & 0x00000000ff000000ULL) << 8) | 216 (((u) & 0x0000000000ff0000ULL) << 24) | 217 (((u) & 0x000000000000ff00ULL) << 40) | 218 (((u) & 0x00000000000000ffULL) << 56)); 219 } 220 // ---- bswapsi2.c ---- 221 #include "int_lib.h" 222 223 COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { 224 return ((((u) & 0xff000000) >> 24) | (((u) & 0x00ff0000) >> 8) | 225 (((u) & 0x0000ff00) << 8) | (((u) & 0x000000ff) << 24)); 226 } 227 // ---- clzdi2.c ---- 228 #include "int_lib.h" 229 230 // Returns: the number of leading 0-bits 231 232 // Precondition: a != 0 233 234 COMPILER_RT_ABI int __clzdi2(di_int a) { 235 dwords x; 236 x.all = a; 237 const si_int f = -(x.s.high == 0); 238 return clzsi((x.s.high & ~f) | (x.s.low & f)) + 239 (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); 240 } 241 // ---- clzsi2.c ---- 242 #include "int_lib.h" 243 244 // Returns: the number of leading 0-bits 245 246 // Precondition: a != 0 247 248 COMPILER_RT_ABI int __clzsi2(si_int a) { 249 su_int x = (su_int)a; 250 si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0 251 x >>= 16 - t; // x = [0 - 0xFFFF] 252 su_int r = t; // r = [0, 16] 253 // return r + clz(x) 254 t = ((x & 0xFF00) == 0) << 3; 255 x >>= 8 - t; // x = [0 - 0xFF] 256 r += t; // r = [0, 8, 16, 24] 257 // return r + clz(x) 258 t = ((x & 0xF0) == 0) << 2; 259 x >>= 4 - t; // x = [0 - 0xF] 260 r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] 261 // return r + clz(x) 262 t = ((x & 0xC) == 0) << 1; 263 x >>= 2 - t; // x = [0 - 3] 264 r += t; // r = [0 - 30] and is even 265 // return r + clz(x) 266 // switch (x) 267 // { 268 // case 0: 269 // return r + 2; 270 // case 1: 271 // return r + 1; 272 // case 2: 273 // case 3: 274 // return r; 275 // } 276 return r + ((2 - x) & -((x & 2) == 0)); 277 } 278 // ---- cmpdi2.c ---- 279 #include "int_lib.h" 280 281 // Returns: if (a < b) returns 0 282 // if (a == b) returns 1 283 // if (a > b) returns 2 284 285 COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) { 286 dwords x; 287 x.all = a; 288 dwords y; 289 y.all = b; 290 if (x.s.high < y.s.high) return 0; 291 if (x.s.high > y.s.high) return 2; 292 if (x.s.low < y.s.low) return 0; 293 if (x.s.low > y.s.low) return 2; 294 return 1; 295 } 296 297 // ---- ctzdi2.c ---- 298 #include "int_lib.h" 299 300 // Returns: the number of trailing 0-bits 301 302 // Precondition: a != 0 303 304 COMPILER_RT_ABI int __ctzdi2(di_int a) { 305 dwords x; 306 x.all = a; 307 const si_int f = -(x.s.low == 0); 308 return ctzsi((x.s.high & f) | (x.s.low & ~f)) + 309 (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); 310 } 311 // ---- ctzsi2.c ---- 312 #include "int_lib.h" 313 314 // Returns: the number of trailing 0-bits 315 316 // Precondition: a != 0 317 318 COMPILER_RT_ABI int __ctzsi2(si_int a) { 319 su_int x = (su_int)a; 320 si_int t = ((x & 0x0000FFFF) == 0) 321 << 4; // if (x has no small bits) t = 16 else 0 322 x >>= t; // x = [0 - 0xFFFF] + higher garbage bits 323 su_int r = t; // r = [0, 16] 324 // return r + ctz(x) 325 t = ((x & 0x00FF) == 0) << 3; 326 x >>= t; // x = [0 - 0xFF] + higher garbage bits 327 r += t; // r = [0, 8, 16, 24] 328 // return r + ctz(x) 329 t = ((x & 0x0F) == 0) << 2; 330 x >>= t; // x = [0 - 0xF] + higher garbage bits 331 r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] 332 // return r + ctz(x) 333 t = ((x & 0x3) == 0) << 1; 334 x >>= t; 335 x &= 3; // x = [0 - 3] 336 r += t; // r = [0 - 30] and is even 337 // return r + ctz(x) 338 339 // The branch-less return statement below is equivalent 340 // to the following switch statement: 341 // switch (x) 342 // { 343 // case 0: 344 // return r + 2; 345 // case 2: 346 // return r + 1; 347 // case 1: 348 // case 3: 349 // return r; 350 // } 351 return r + ((2 - (x >> 1)) & -((x & 1) == 0)); 352 } 353 // ---- ffsdi2.c ---- 354 #include "int_lib.h" 355 356 // Returns: the index of the least significant 1-bit in a, or 357 // the value zero if a is zero. The least significant bit is index one. 358 359 COMPILER_RT_ABI int __ffsdi2(di_int a) { 360 dwords x; 361 x.all = a; 362 if (x.s.low == 0) { 363 if (x.s.high == 0) return 0; 364 return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); 365 } 366 return ctzsi(x.s.low) + 1; 367 } 368 // ---- negdi2.c ---- 369 #include "int_lib.h" 370 371 // Returns: -a 372 373 COMPILER_RT_ABI di_int __negdi2(di_int a) { 374 // Note: this routine is here for API compatibility; any sane compiler 375 // should expand it inline. 376 return -(du_int)a; 377 } 378 // ---- paritydi2.c ---- 379 #include "int_lib.h" 380 381 // Returns: 1 if number of bits is odd else returns 0 382 383 COMPILER_RT_ABI int __paritydi2(di_int a) { 384 dwords x; 385 x.all = a; 386 su_int x2 = x.s.high ^ x.s.low; 387 x2 ^= x2 >> 16; 388 x2 ^= x2 >> 8; 389 x2 ^= x2 >> 4; 390 return (0x6996 >> (x2 & 0xF)) & 1; 391 } 392 // ---- paritysi2.c ---- 393 #include "int_lib.h" 394 395 // Returns: 1 if number of bits is odd else returns 0 396 397 COMPILER_RT_ABI int __paritysi2(si_int a) { 398 su_int x = (su_int)a; 399 x ^= x >> 16; 400 x ^= x >> 8; 401 x ^= x >> 4; 402 return (0x6996 >> (x & 0xF)) & 1; 403 } 404 // ---- popcountdi2.c ---- 405 #include "int_lib.h" 406 407 // Returns: count of 1 bits 408 409 COMPILER_RT_ABI int __popcountdi2(di_int a) { 410 du_int x2 = (du_int)a; 411 x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); 412 // Every 2 bits holds the sum of every pair of bits (32) 413 x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); 414 // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) 415 x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; 416 // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) 417 su_int x = (su_int)(x2 + (x2 >> 32)); 418 // The lower 32 bits hold four 16 bit sums (5 significant bits). 419 // Upper 32 bits are garbage 420 x = x + (x >> 16); 421 // The lower 16 bits hold two 32 bit sums (6 significant bits). 422 // Upper 16 bits are garbage 423 return (x + (x >> 8)) & 0x0000007F; // (7 significant bits) 424 } 425 // ---- popcountsi2.c ---- 426 #include "int_lib.h" 427 428 // Returns: count of 1 bits 429 430 COMPILER_RT_ABI int __popcountsi2(si_int a) { 431 su_int x = (su_int)a; 432 x = x - ((x >> 1) & 0x55555555); 433 // Every 2 bits holds the sum of every pair of bits 434 x = ((x >> 2) & 0x33333333) + (x & 0x33333333); 435 // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) 436 x = (x + (x >> 4)) & 0x0F0F0F0F; 437 // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) 438 x = (x + (x >> 16)); 439 // The lower 16 bits hold two 8 bit sums (5 significant bits). 440 // Upper 16 bits are garbage 441 return (x + (x >> 8)) & 0x0000003F; // (6 significant bits) 442 } 443 // ---- ucmpdi2.c ---- 444 #include "int_lib.h" 445 446 // Returns: if (a < b) returns 0 447 // if (a == b) returns 1 448 // if (a > b) returns 2 449 450 COMPILER_RT_ABI si_int __ucmpdi2(du_int a, du_int b) { 451 udwords x; 452 x.all = a; 453 udwords y; 454 y.all = b; 455 if (x.s.high < y.s.high) return 0; 456 if (x.s.high > y.s.high) return 2; 457 if (x.s.low < y.s.low) return 0; 458 if (x.s.low > y.s.low) return 2; 459 return 1; 460 } 461 462 // Callers of __udivmoddi4: 463 // ---- udivdi3.c ---- 464 #include "int_lib.h" 465 466 // Returns: a / b 467 468 #define fixint_t di_int 469 #define fixuint_t du_int 470 #define INT_DIV_SUFFIX udivdi3 471 #include "int_div_impl.inc" 472 473 COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) { 474 return __udivXi3_udivdi3(a, b); 475 } 476 // ---- umoddi3.c ---- 477 #include "int_lib.h" 478 479 // Returns: a % b 480 481 #define fixint_t di_int 482 #define fixuint_t du_int 483 #define INT_DIV_SUFFIX umoddi3 484 #include "int_div_impl.inc" 485 486 COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) { 487 return __umodXi3_umoddi3(a, b); 488 } 489 // ---- divdi3.c ---- 490 #include "int_lib.h" 491 492 // Returns: a / b 493 494 #define fixint_t di_int 495 #define fixuint_t du_int 496 #define INT_DIV_SUFFIX divdi3 497 #define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int*)0) 498 #include "int_div_impl.inc" 499 500 COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { 501 return __divXi3_divdi3(a, b); 502 } 503 // ---- moddi3.c ---- 504 #include "int_lib.h" 505 506 // Returns: a % b 507 508 #define fixint_t di_int 509 #define fixuint_t du_int 510 #define INT_DIV_SUFFIX moddi3 511 #define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res)) 512 #include "int_div_impl.inc" 513 514 COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) { 515 return __modXi3_moddi3(a, b); 516 } 517 // ---- divmoddi4.c ---- 518 #include "int_lib.h" 519 520 // Returns: a / b, *rem = a % b 521 522 COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int* rem) { 523 const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; 524 di_int s_a = a >> bits_in_dword_m1; // s_a = a < 0 ? -1 : 0 525 di_int s_b = b >> bits_in_dword_m1; // s_b = b < 0 ? -1 : 0 526 a = (du_int)(a ^ s_a) - s_a; // negate if s_a == -1 527 b = (du_int)(b ^ s_b) - s_b; // negate if s_b == -1 528 s_b ^= s_a; // sign of quotient 529 du_int r; 530 di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 531 *rem = (r ^ s_a) - s_a; // negate if s_a == -1 532 return q; 533 }