qsort.c (5891B)
1 /* Copyright (C) 2011 by Lynn Ochs 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to 5 * deal in the Software without restriction, including without limitation the 6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 * sell copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 * IN THE SOFTWARE. 20 */ 21 22 /* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ 23 24 /* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1). 25 Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */ 26 27 #include <stddef.h> 28 #include <stdint.h> 29 30 void* memcpy(void* dest, const void* src, size_t n); 31 32 typedef int (*cmpfun)(const void*, const void*); 33 34 static int ntz(size_t x) { 35 int n = 0; 36 while ((x & 1) == 0) { 37 x >>= 1; 38 n++; 39 } 40 return n; 41 } 42 43 /* returns index of first bit set, excluding the low bit assumed to always 44 * be set, starting from low bit of p[0] up through high bit of p[1] */ 45 static inline int pntz(size_t p[2]) { 46 if (p[0] != 1) return ntz(p[0] - 1); 47 if (p[1]) return 8 * sizeof(size_t) + ntz(p[1]); 48 return 0; 49 } 50 51 static void cycle(size_t width, unsigned char* ar[], int n) { 52 unsigned char tmp[256]; 53 size_t l; 54 int i; 55 56 if (n < 2) { 57 return; 58 } 59 60 ar[n] = tmp; 61 while (width) { 62 l = sizeof(tmp) < width ? sizeof(tmp) : width; 63 memcpy(ar[n], ar[0], l); 64 for (i = 0; i < n; i++) { 65 memcpy(ar[i], ar[i + 1], l); 66 ar[i] += l; 67 } 68 width -= l; 69 } 70 } 71 72 /* shl() and shr() need n > 0 */ 73 static inline void shl(size_t p[2], int n) { 74 if (n >= 8 * sizeof(size_t)) { 75 n -= 8 * sizeof(size_t); 76 p[1] = p[0]; 77 p[0] = 0; 78 if (!n) return; 79 } 80 p[1] <<= n; 81 p[1] |= p[0] >> (sizeof(size_t) * 8 - n); 82 p[0] <<= n; 83 } 84 85 static inline void shr(size_t p[2], int n) { 86 if (n >= 8 * sizeof(size_t)) { 87 n -= 8 * sizeof(size_t); 88 p[0] = p[1]; 89 p[1] = 0; 90 if (!n) return; 91 } 92 p[0] >>= n; 93 p[0] |= p[1] << (sizeof(size_t) * 8 - n); 94 p[1] >>= n; 95 } 96 97 /* power-of-two length for working array so that we can mask indices and 98 * not depend on any invariant of the algorithm for spatial memory safety. 99 * the original size was just 14*sizeof(size_t)+1 */ 100 #define AR_LEN (16 * sizeof(size_t)) 101 #define AR_MASK (AR_LEN - 1) 102 103 static void sift(unsigned char* head, size_t width, cmpfun cmp, int pshift, 104 size_t lp[]) { 105 unsigned char *rt, *lf; 106 unsigned char* ar[AR_LEN]; 107 int i = 1; 108 109 ar[0] = head; 110 while (pshift > 1) { 111 rt = head - width; 112 lf = head - width - lp[pshift - 2]; 113 114 if (cmp(ar[0], lf) >= 0 && cmp(ar[0], rt) >= 0) { 115 break; 116 } 117 if (cmp(lf, rt) >= 0) { 118 ar[i++ & AR_MASK] = lf; 119 head = lf; 120 pshift -= 1; 121 } else { 122 ar[i++ & AR_MASK] = rt; 123 head = rt; 124 pshift -= 2; 125 } 126 } 127 cycle(width, ar, i & AR_MASK); 128 } 129 130 static void trinkle(unsigned char* head, size_t width, cmpfun cmp, size_t pp[2], 131 int pshift, int trusty, size_t lp[]) { 132 unsigned char *stepson, *rt, *lf; 133 size_t p[2]; 134 unsigned char* ar[AR_LEN]; 135 int i = 1; 136 int trail; 137 138 p[0] = pp[0]; 139 p[1] = pp[1]; 140 141 ar[0] = head; 142 while (p[0] != 1 || p[1] != 0) { 143 stepson = head - lp[pshift]; 144 if (cmp(stepson, ar[0]) <= 0) { 145 break; 146 } 147 if (!trusty && pshift > 1) { 148 rt = head - width; 149 lf = head - width - lp[pshift - 2]; 150 if (cmp(rt, stepson) >= 0 || cmp(lf, stepson) >= 0) { 151 break; 152 } 153 } 154 155 ar[i++ & AR_MASK] = stepson; 156 head = stepson; 157 trail = pntz(p); 158 shr(p, trail); 159 pshift += trail; 160 trusty = 0; 161 } 162 if (!trusty) { 163 cycle(width, ar, i & AR_MASK); 164 sift(head, width, cmp, pshift, lp); 165 } 166 } 167 168 __attribute__((weak)) void qsort(void* base, size_t nel, size_t width, 169 cmpfun cmp) { 170 size_t lp[12 * sizeof(size_t)]; 171 size_t i, size = width * nel; 172 unsigned char *head, *high; 173 size_t p[2] = {1, 0}; 174 int pshift = 1; 175 int trail; 176 177 if (!size) return; 178 179 head = base; 180 high = head + size - width; 181 182 /* Precompute Leonardo numbers, scaled by element width */ 183 for (lp[0] = lp[1] = width, i = 2; 184 (lp[i] = lp[i - 2] + lp[i - 1] + width) < size; i++); 185 186 while (head < high) { 187 if ((p[0] & 3) == 3) { 188 sift(head, width, cmp, pshift, lp); 189 shr(p, 2); 190 pshift += 2; 191 } else { 192 if (lp[pshift - 1] >= high - head) { 193 trinkle(head, width, cmp, p, pshift, 0, lp); 194 } else { 195 sift(head, width, cmp, pshift, lp); 196 } 197 198 if (pshift == 1) { 199 shl(p, 1); 200 pshift = 0; 201 } else { 202 shl(p, pshift - 1); 203 pshift = 1; 204 } 205 } 206 207 p[0] |= 1; 208 head += width; 209 } 210 211 trinkle(head, width, cmp, p, pshift, 0, lp); 212 213 while (pshift != 1 || p[0] != 1 || p[1] != 0) { 214 if (pshift <= 1) { 215 trail = pntz(p); 216 shr(p, trail); 217 pshift += trail; 218 } else { 219 shl(p, 2); 220 pshift -= 2; 221 p[0] ^= 7; 222 shr(p, 1); 223 trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp); 224 shl(p, 1); 225 p[0] |= 1; 226 trinkle(head - width, width, cmp, p, pshift, 1, lp); 227 } 228 head -= width; 229 } 230 }