1*cf204528SSimon Glass 2*cf204528SSimon Glass /*-------------------------------------------------------------*/ 3*cf204528SSimon Glass /*--- Block sorting machinery ---*/ 4*cf204528SSimon Glass /*--- blocksort.c ---*/ 5*cf204528SSimon Glass /*-------------------------------------------------------------*/ 6*cf204528SSimon Glass 7*cf204528SSimon Glass /*-- 8*cf204528SSimon Glass This file is a part of bzip2 and/or libbzip2, a program and 9*cf204528SSimon Glass library for lossless, block-sorting data compression. 10*cf204528SSimon Glass 11*cf204528SSimon Glass Copyright (C) 1996-2002 Julian R Seward. All rights reserved. 12*cf204528SSimon Glass 13*cf204528SSimon Glass Redistribution and use in source and binary forms, with or without 14*cf204528SSimon Glass modification, are permitted provided that the following conditions 15*cf204528SSimon Glass are met: 16*cf204528SSimon Glass 17*cf204528SSimon Glass 1. Redistributions of source code must retain the above copyright 18*cf204528SSimon Glass notice, this list of conditions and the following disclaimer. 19*cf204528SSimon Glass 20*cf204528SSimon Glass 2. The origin of this software must not be misrepresented; you must 21*cf204528SSimon Glass not claim that you wrote the original software. If you use this 22*cf204528SSimon Glass software in a product, an acknowledgment in the product 23*cf204528SSimon Glass documentation would be appreciated but is not required. 24*cf204528SSimon Glass 25*cf204528SSimon Glass 3. Altered source versions must be plainly marked as such, and must 26*cf204528SSimon Glass not be misrepresented as being the original software. 27*cf204528SSimon Glass 28*cf204528SSimon Glass 4. The name of the author may not be used to endorse or promote 29*cf204528SSimon Glass products derived from this software without specific prior written 30*cf204528SSimon Glass permission. 31*cf204528SSimon Glass 32*cf204528SSimon Glass THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 33*cf204528SSimon Glass OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34*cf204528SSimon Glass WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35*cf204528SSimon Glass ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 36*cf204528SSimon Glass DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37*cf204528SSimon Glass DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 38*cf204528SSimon Glass GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 39*cf204528SSimon Glass INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 40*cf204528SSimon Glass WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 41*cf204528SSimon Glass NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42*cf204528SSimon Glass SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43*cf204528SSimon Glass 44*cf204528SSimon Glass Julian Seward, Cambridge, UK. 45*cf204528SSimon Glass jseward@acm.org 46*cf204528SSimon Glass bzip2/libbzip2 version 1.0.6 of 6 September 2010 47*cf204528SSimon Glass Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 48*cf204528SSimon Glass 49*cf204528SSimon Glass This program is based on (at least) the work of: 50*cf204528SSimon Glass Mike Burrows 51*cf204528SSimon Glass David Wheeler 52*cf204528SSimon Glass Peter Fenwick 53*cf204528SSimon Glass Alistair Moffat 54*cf204528SSimon Glass Radford Neal 55*cf204528SSimon Glass Ian H. Witten 56*cf204528SSimon Glass Robert Sedgewick 57*cf204528SSimon Glass Jon L. Bentley 58*cf204528SSimon Glass 59*cf204528SSimon Glass For more information on these sources, see the manual. 60*cf204528SSimon Glass --*/ 61*cf204528SSimon Glass 62*cf204528SSimon Glass #include "bzlib_private.h" 63*cf204528SSimon Glass 64*cf204528SSimon Glass /*---------------------------------------------*/ 65*cf204528SSimon Glass /*--- Fallback O(N log(N)^2) sorting ---*/ 66*cf204528SSimon Glass /*--- algorithm, for repetitive blocks ---*/ 67*cf204528SSimon Glass /*---------------------------------------------*/ 68*cf204528SSimon Glass 69*cf204528SSimon Glass /*---------------------------------------------*/ 70*cf204528SSimon Glass static 71*cf204528SSimon Glass __inline__ 72*cf204528SSimon Glass void fallbackSimpleSort ( UInt32* fmap, 73*cf204528SSimon Glass UInt32* eclass, 74*cf204528SSimon Glass Int32 lo, 75*cf204528SSimon Glass Int32 hi ) 76*cf204528SSimon Glass { 77*cf204528SSimon Glass Int32 i, j, tmp; 78*cf204528SSimon Glass UInt32 ec_tmp; 79*cf204528SSimon Glass 80*cf204528SSimon Glass if (lo == hi) return; 81*cf204528SSimon Glass 82*cf204528SSimon Glass if (hi - lo > 3) { 83*cf204528SSimon Glass for ( i = hi-4; i >= lo; i-- ) { 84*cf204528SSimon Glass tmp = fmap[i]; 85*cf204528SSimon Glass ec_tmp = eclass[tmp]; 86*cf204528SSimon Glass for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 87*cf204528SSimon Glass fmap[j-4] = fmap[j]; 88*cf204528SSimon Glass fmap[j-4] = tmp; 89*cf204528SSimon Glass } 90*cf204528SSimon Glass } 91*cf204528SSimon Glass 92*cf204528SSimon Glass for ( i = hi-1; i >= lo; i-- ) { 93*cf204528SSimon Glass tmp = fmap[i]; 94*cf204528SSimon Glass ec_tmp = eclass[tmp]; 95*cf204528SSimon Glass for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 96*cf204528SSimon Glass fmap[j-1] = fmap[j]; 97*cf204528SSimon Glass fmap[j-1] = tmp; 98*cf204528SSimon Glass } 99*cf204528SSimon Glass } 100*cf204528SSimon Glass 101*cf204528SSimon Glass 102*cf204528SSimon Glass /*---------------------------------------------*/ 103*cf204528SSimon Glass #define fswap(zz1, zz2) \ 104*cf204528SSimon Glass { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 105*cf204528SSimon Glass 106*cf204528SSimon Glass #define fvswap(zzp1, zzp2, zzn) \ 107*cf204528SSimon Glass { \ 108*cf204528SSimon Glass Int32 yyp1 = (zzp1); \ 109*cf204528SSimon Glass Int32 yyp2 = (zzp2); \ 110*cf204528SSimon Glass Int32 yyn = (zzn); \ 111*cf204528SSimon Glass while (yyn > 0) { \ 112*cf204528SSimon Glass fswap(fmap[yyp1], fmap[yyp2]); \ 113*cf204528SSimon Glass yyp1++; yyp2++; yyn--; \ 114*cf204528SSimon Glass } \ 115*cf204528SSimon Glass } 116*cf204528SSimon Glass 117*cf204528SSimon Glass 118*cf204528SSimon Glass #define fmin(a,b) ((a) < (b)) ? (a) : (b) 119*cf204528SSimon Glass 120*cf204528SSimon Glass #define fpush(lz,hz) { stackLo[sp] = lz; \ 121*cf204528SSimon Glass stackHi[sp] = hz; \ 122*cf204528SSimon Glass sp++; } 123*cf204528SSimon Glass 124*cf204528SSimon Glass #define fpop(lz,hz) { sp--; \ 125*cf204528SSimon Glass lz = stackLo[sp]; \ 126*cf204528SSimon Glass hz = stackHi[sp]; } 127*cf204528SSimon Glass 128*cf204528SSimon Glass #define FALLBACK_QSORT_SMALL_THRESH 10 129*cf204528SSimon Glass #define FALLBACK_QSORT_STACK_SIZE 100 130*cf204528SSimon Glass 131*cf204528SSimon Glass 132*cf204528SSimon Glass static 133*cf204528SSimon Glass void fallbackQSort3 ( UInt32* fmap, 134*cf204528SSimon Glass UInt32* eclass, 135*cf204528SSimon Glass Int32 loSt, 136*cf204528SSimon Glass Int32 hiSt ) 137*cf204528SSimon Glass { 138*cf204528SSimon Glass Int32 unLo, unHi, ltLo, gtHi, n, m; 139*cf204528SSimon Glass Int32 sp, lo, hi; 140*cf204528SSimon Glass UInt32 med, r, r3; 141*cf204528SSimon Glass Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 142*cf204528SSimon Glass Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 143*cf204528SSimon Glass 144*cf204528SSimon Glass r = 0; 145*cf204528SSimon Glass 146*cf204528SSimon Glass sp = 0; 147*cf204528SSimon Glass fpush ( loSt, hiSt ); 148*cf204528SSimon Glass 149*cf204528SSimon Glass while (sp > 0) { 150*cf204528SSimon Glass 151*cf204528SSimon Glass AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); 152*cf204528SSimon Glass 153*cf204528SSimon Glass fpop ( lo, hi ); 154*cf204528SSimon Glass if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 155*cf204528SSimon Glass fallbackSimpleSort ( fmap, eclass, lo, hi ); 156*cf204528SSimon Glass continue; 157*cf204528SSimon Glass } 158*cf204528SSimon Glass 159*cf204528SSimon Glass /* Random partitioning. Median of 3 sometimes fails to 160*cf204528SSimon Glass avoid bad cases. Median of 9 seems to help but 161*cf204528SSimon Glass looks rather expensive. This too seems to work but 162*cf204528SSimon Glass is cheaper. Guidance for the magic constants 163*cf204528SSimon Glass 7621 and 32768 is taken from Sedgewick's algorithms 164*cf204528SSimon Glass book, chapter 35. 165*cf204528SSimon Glass */ 166*cf204528SSimon Glass r = ((r * 7621) + 1) % 32768; 167*cf204528SSimon Glass r3 = r % 3; 168*cf204528SSimon Glass if (r3 == 0) med = eclass[fmap[lo]]; else 169*cf204528SSimon Glass if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 170*cf204528SSimon Glass med = eclass[fmap[hi]]; 171*cf204528SSimon Glass 172*cf204528SSimon Glass unLo = ltLo = lo; 173*cf204528SSimon Glass unHi = gtHi = hi; 174*cf204528SSimon Glass 175*cf204528SSimon Glass while (1) { 176*cf204528SSimon Glass while (1) { 177*cf204528SSimon Glass if (unLo > unHi) break; 178*cf204528SSimon Glass n = (Int32)eclass[fmap[unLo]] - (Int32)med; 179*cf204528SSimon Glass if (n == 0) { 180*cf204528SSimon Glass fswap(fmap[unLo], fmap[ltLo]); 181*cf204528SSimon Glass ltLo++; unLo++; 182*cf204528SSimon Glass continue; 183*cf204528SSimon Glass }; 184*cf204528SSimon Glass if (n > 0) break; 185*cf204528SSimon Glass unLo++; 186*cf204528SSimon Glass } 187*cf204528SSimon Glass while (1) { 188*cf204528SSimon Glass if (unLo > unHi) break; 189*cf204528SSimon Glass n = (Int32)eclass[fmap[unHi]] - (Int32)med; 190*cf204528SSimon Glass if (n == 0) { 191*cf204528SSimon Glass fswap(fmap[unHi], fmap[gtHi]); 192*cf204528SSimon Glass gtHi--; unHi--; 193*cf204528SSimon Glass continue; 194*cf204528SSimon Glass }; 195*cf204528SSimon Glass if (n < 0) break; 196*cf204528SSimon Glass unHi--; 197*cf204528SSimon Glass } 198*cf204528SSimon Glass if (unLo > unHi) break; 199*cf204528SSimon Glass fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 200*cf204528SSimon Glass } 201*cf204528SSimon Glass 202*cf204528SSimon Glass AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 203*cf204528SSimon Glass 204*cf204528SSimon Glass if (gtHi < ltLo) continue; 205*cf204528SSimon Glass 206*cf204528SSimon Glass n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 207*cf204528SSimon Glass m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 208*cf204528SSimon Glass 209*cf204528SSimon Glass n = lo + unLo - ltLo - 1; 210*cf204528SSimon Glass m = hi - (gtHi - unHi) + 1; 211*cf204528SSimon Glass 212*cf204528SSimon Glass if (n - lo > hi - m) { 213*cf204528SSimon Glass fpush ( lo, n ); 214*cf204528SSimon Glass fpush ( m, hi ); 215*cf204528SSimon Glass } else { 216*cf204528SSimon Glass fpush ( m, hi ); 217*cf204528SSimon Glass fpush ( lo, n ); 218*cf204528SSimon Glass } 219*cf204528SSimon Glass } 220*cf204528SSimon Glass } 221*cf204528SSimon Glass 222*cf204528SSimon Glass #undef fmin 223*cf204528SSimon Glass #undef fpush 224*cf204528SSimon Glass #undef fpop 225*cf204528SSimon Glass #undef fswap 226*cf204528SSimon Glass #undef fvswap 227*cf204528SSimon Glass #undef FALLBACK_QSORT_SMALL_THRESH 228*cf204528SSimon Glass #undef FALLBACK_QSORT_STACK_SIZE 229*cf204528SSimon Glass 230*cf204528SSimon Glass 231*cf204528SSimon Glass /*---------------------------------------------*/ 232*cf204528SSimon Glass /* Pre: 233*cf204528SSimon Glass nblock > 0 234*cf204528SSimon Glass eclass exists for [0 .. nblock-1] 235*cf204528SSimon Glass ((UChar*)eclass) [0 .. nblock-1] holds block 236*cf204528SSimon Glass ptr exists for [0 .. nblock-1] 237*cf204528SSimon Glass 238*cf204528SSimon Glass Post: 239*cf204528SSimon Glass ((UChar*)eclass) [0 .. nblock-1] holds block 240*cf204528SSimon Glass All other areas of eclass destroyed 241*cf204528SSimon Glass fmap [0 .. nblock-1] holds sorted order 242*cf204528SSimon Glass bhtab [ 0 .. 2+(nblock/32) ] destroyed 243*cf204528SSimon Glass */ 244*cf204528SSimon Glass 245*cf204528SSimon Glass #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 246*cf204528SSimon Glass #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 247*cf204528SSimon Glass #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 248*cf204528SSimon Glass #define WORD_BH(zz) bhtab[(zz) >> 5] 249*cf204528SSimon Glass #define UNALIGNED_BH(zz) ((zz) & 0x01f) 250*cf204528SSimon Glass 251*cf204528SSimon Glass static 252*cf204528SSimon Glass void fallbackSort ( UInt32* fmap, 253*cf204528SSimon Glass UInt32* eclass, 254*cf204528SSimon Glass UInt32* bhtab, 255*cf204528SSimon Glass Int32 nblock, 256*cf204528SSimon Glass Int32 verb ) 257*cf204528SSimon Glass { 258*cf204528SSimon Glass Int32 ftab[257]; 259*cf204528SSimon Glass Int32 ftabCopy[256]; 260*cf204528SSimon Glass Int32 H, i, j, k, l, r, cc, cc1; 261*cf204528SSimon Glass Int32 nNotDone; 262*cf204528SSimon Glass Int32 nBhtab; 263*cf204528SSimon Glass UChar* eclass8 = (UChar*)eclass; 264*cf204528SSimon Glass 265*cf204528SSimon Glass /*-- 266*cf204528SSimon Glass Initial 1-char radix sort to generate 267*cf204528SSimon Glass initial fmap and initial BH bits. 268*cf204528SSimon Glass --*/ 269*cf204528SSimon Glass if (verb >= 4) 270*cf204528SSimon Glass VPrintf0 ( " bucket sorting ...\n" ); 271*cf204528SSimon Glass for (i = 0; i < 257; i++) ftab[i] = 0; 272*cf204528SSimon Glass for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 273*cf204528SSimon Glass for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 274*cf204528SSimon Glass for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 275*cf204528SSimon Glass 276*cf204528SSimon Glass for (i = 0; i < nblock; i++) { 277*cf204528SSimon Glass j = eclass8[i]; 278*cf204528SSimon Glass k = ftab[j] - 1; 279*cf204528SSimon Glass ftab[j] = k; 280*cf204528SSimon Glass fmap[k] = i; 281*cf204528SSimon Glass } 282*cf204528SSimon Glass 283*cf204528SSimon Glass nBhtab = 2 + (nblock / 32); 284*cf204528SSimon Glass for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 285*cf204528SSimon Glass for (i = 0; i < 256; i++) SET_BH(ftab[i]); 286*cf204528SSimon Glass 287*cf204528SSimon Glass /*-- 288*cf204528SSimon Glass Inductively refine the buckets. Kind-of an 289*cf204528SSimon Glass "exponential radix sort" (!), inspired by the 290*cf204528SSimon Glass Manber-Myers suffix array construction algorithm. 291*cf204528SSimon Glass --*/ 292*cf204528SSimon Glass 293*cf204528SSimon Glass /*-- set sentinel bits for block-end detection --*/ 294*cf204528SSimon Glass for (i = 0; i < 32; i++) { 295*cf204528SSimon Glass SET_BH(nblock + 2*i); 296*cf204528SSimon Glass CLEAR_BH(nblock + 2*i + 1); 297*cf204528SSimon Glass } 298*cf204528SSimon Glass 299*cf204528SSimon Glass /*-- the log(N) loop --*/ 300*cf204528SSimon Glass H = 1; 301*cf204528SSimon Glass while (1) { 302*cf204528SSimon Glass 303*cf204528SSimon Glass if (verb >= 4) 304*cf204528SSimon Glass VPrintf1 ( " depth %6d has ", H ); 305*cf204528SSimon Glass 306*cf204528SSimon Glass j = 0; 307*cf204528SSimon Glass for (i = 0; i < nblock; i++) { 308*cf204528SSimon Glass if (ISSET_BH(i)) j = i; 309*cf204528SSimon Glass k = fmap[i] - H; if (k < 0) k += nblock; 310*cf204528SSimon Glass eclass[k] = j; 311*cf204528SSimon Glass } 312*cf204528SSimon Glass 313*cf204528SSimon Glass nNotDone = 0; 314*cf204528SSimon Glass r = -1; 315*cf204528SSimon Glass while (1) { 316*cf204528SSimon Glass 317*cf204528SSimon Glass /*-- find the next non-singleton bucket --*/ 318*cf204528SSimon Glass k = r + 1; 319*cf204528SSimon Glass while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 320*cf204528SSimon Glass if (ISSET_BH(k)) { 321*cf204528SSimon Glass while (WORD_BH(k) == 0xffffffff) k += 32; 322*cf204528SSimon Glass while (ISSET_BH(k)) k++; 323*cf204528SSimon Glass } 324*cf204528SSimon Glass l = k - 1; 325*cf204528SSimon Glass if (l >= nblock) break; 326*cf204528SSimon Glass while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 327*cf204528SSimon Glass if (!ISSET_BH(k)) { 328*cf204528SSimon Glass while (WORD_BH(k) == 0x00000000) k += 32; 329*cf204528SSimon Glass while (!ISSET_BH(k)) k++; 330*cf204528SSimon Glass } 331*cf204528SSimon Glass r = k - 1; 332*cf204528SSimon Glass if (r >= nblock) break; 333*cf204528SSimon Glass 334*cf204528SSimon Glass /*-- now [l, r] bracket current bucket --*/ 335*cf204528SSimon Glass if (r > l) { 336*cf204528SSimon Glass nNotDone += (r - l + 1); 337*cf204528SSimon Glass fallbackQSort3 ( fmap, eclass, l, r ); 338*cf204528SSimon Glass 339*cf204528SSimon Glass /*-- scan bucket and generate header bits-- */ 340*cf204528SSimon Glass cc = -1; 341*cf204528SSimon Glass for (i = l; i <= r; i++) { 342*cf204528SSimon Glass cc1 = eclass[fmap[i]]; 343*cf204528SSimon Glass if (cc != cc1) { SET_BH(i); cc = cc1; }; 344*cf204528SSimon Glass } 345*cf204528SSimon Glass } 346*cf204528SSimon Glass } 347*cf204528SSimon Glass 348*cf204528SSimon Glass if (verb >= 4) 349*cf204528SSimon Glass VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 350*cf204528SSimon Glass 351*cf204528SSimon Glass H *= 2; 352*cf204528SSimon Glass if (H > nblock || nNotDone == 0) break; 353*cf204528SSimon Glass } 354*cf204528SSimon Glass 355*cf204528SSimon Glass /*-- 356*cf204528SSimon Glass Reconstruct the original block in 357*cf204528SSimon Glass eclass8 [0 .. nblock-1], since the 358*cf204528SSimon Glass previous phase destroyed it. 359*cf204528SSimon Glass --*/ 360*cf204528SSimon Glass if (verb >= 4) 361*cf204528SSimon Glass VPrintf0 ( " reconstructing block ...\n" ); 362*cf204528SSimon Glass j = 0; 363*cf204528SSimon Glass for (i = 0; i < nblock; i++) { 364*cf204528SSimon Glass while (ftabCopy[j] == 0) j++; 365*cf204528SSimon Glass ftabCopy[j]--; 366*cf204528SSimon Glass eclass8[fmap[i]] = (UChar)j; 367*cf204528SSimon Glass } 368*cf204528SSimon Glass AssertH ( j < 256, 1005 ); 369*cf204528SSimon Glass } 370*cf204528SSimon Glass 371*cf204528SSimon Glass #undef SET_BH 372*cf204528SSimon Glass #undef CLEAR_BH 373*cf204528SSimon Glass #undef ISSET_BH 374*cf204528SSimon Glass #undef WORD_BH 375*cf204528SSimon Glass #undef UNALIGNED_BH 376*cf204528SSimon Glass 377*cf204528SSimon Glass 378*cf204528SSimon Glass /*---------------------------------------------*/ 379*cf204528SSimon Glass /*--- The main, O(N^2 log(N)) sorting ---*/ 380*cf204528SSimon Glass /*--- algorithm. Faster for "normal" ---*/ 381*cf204528SSimon Glass /*--- non-repetitive blocks. ---*/ 382*cf204528SSimon Glass /*---------------------------------------------*/ 383*cf204528SSimon Glass 384*cf204528SSimon Glass /*---------------------------------------------*/ 385*cf204528SSimon Glass static 386*cf204528SSimon Glass __inline__ 387*cf204528SSimon Glass Bool mainGtU ( UInt32 i1, 388*cf204528SSimon Glass UInt32 i2, 389*cf204528SSimon Glass UChar* block, 390*cf204528SSimon Glass UInt16* quadrant, 391*cf204528SSimon Glass UInt32 nblock, 392*cf204528SSimon Glass Int32* budget ) 393*cf204528SSimon Glass { 394*cf204528SSimon Glass Int32 k; 395*cf204528SSimon Glass UChar c1, c2; 396*cf204528SSimon Glass UInt16 s1, s2; 397*cf204528SSimon Glass 398*cf204528SSimon Glass AssertD ( i1 != i2, "mainGtU" ); 399*cf204528SSimon Glass /* 1 */ 400*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 401*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 402*cf204528SSimon Glass i1++; i2++; 403*cf204528SSimon Glass /* 2 */ 404*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 405*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 406*cf204528SSimon Glass i1++; i2++; 407*cf204528SSimon Glass /* 3 */ 408*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 409*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 410*cf204528SSimon Glass i1++; i2++; 411*cf204528SSimon Glass /* 4 */ 412*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 413*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 414*cf204528SSimon Glass i1++; i2++; 415*cf204528SSimon Glass /* 5 */ 416*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 417*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 418*cf204528SSimon Glass i1++; i2++; 419*cf204528SSimon Glass /* 6 */ 420*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 421*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 422*cf204528SSimon Glass i1++; i2++; 423*cf204528SSimon Glass /* 7 */ 424*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 425*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 426*cf204528SSimon Glass i1++; i2++; 427*cf204528SSimon Glass /* 8 */ 428*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 429*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 430*cf204528SSimon Glass i1++; i2++; 431*cf204528SSimon Glass /* 9 */ 432*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 433*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 434*cf204528SSimon Glass i1++; i2++; 435*cf204528SSimon Glass /* 10 */ 436*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 437*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 438*cf204528SSimon Glass i1++; i2++; 439*cf204528SSimon Glass /* 11 */ 440*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 441*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 442*cf204528SSimon Glass i1++; i2++; 443*cf204528SSimon Glass /* 12 */ 444*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 445*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 446*cf204528SSimon Glass i1++; i2++; 447*cf204528SSimon Glass 448*cf204528SSimon Glass k = nblock + 8; 449*cf204528SSimon Glass 450*cf204528SSimon Glass do { 451*cf204528SSimon Glass /* 1 */ 452*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 453*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 454*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 455*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 456*cf204528SSimon Glass i1++; i2++; 457*cf204528SSimon Glass /* 2 */ 458*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 459*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 460*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 461*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 462*cf204528SSimon Glass i1++; i2++; 463*cf204528SSimon Glass /* 3 */ 464*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 465*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 466*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 467*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 468*cf204528SSimon Glass i1++; i2++; 469*cf204528SSimon Glass /* 4 */ 470*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 471*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 472*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 473*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 474*cf204528SSimon Glass i1++; i2++; 475*cf204528SSimon Glass /* 5 */ 476*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 477*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 478*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 479*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 480*cf204528SSimon Glass i1++; i2++; 481*cf204528SSimon Glass /* 6 */ 482*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 483*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 484*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 485*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 486*cf204528SSimon Glass i1++; i2++; 487*cf204528SSimon Glass /* 7 */ 488*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 489*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 490*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 491*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 492*cf204528SSimon Glass i1++; i2++; 493*cf204528SSimon Glass /* 8 */ 494*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2]; 495*cf204528SSimon Glass if (c1 != c2) return (c1 > c2); 496*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2]; 497*cf204528SSimon Glass if (s1 != s2) return (s1 > s2); 498*cf204528SSimon Glass i1++; i2++; 499*cf204528SSimon Glass 500*cf204528SSimon Glass if (i1 >= nblock) i1 -= nblock; 501*cf204528SSimon Glass if (i2 >= nblock) i2 -= nblock; 502*cf204528SSimon Glass 503*cf204528SSimon Glass k -= 8; 504*cf204528SSimon Glass (*budget)--; 505*cf204528SSimon Glass } 506*cf204528SSimon Glass while (k >= 0); 507*cf204528SSimon Glass 508*cf204528SSimon Glass return False; 509*cf204528SSimon Glass } 510*cf204528SSimon Glass 511*cf204528SSimon Glass 512*cf204528SSimon Glass /*---------------------------------------------*/ 513*cf204528SSimon Glass /*-- 514*cf204528SSimon Glass Knuth's increments seem to work better 515*cf204528SSimon Glass than Incerpi-Sedgewick here. Possibly 516*cf204528SSimon Glass because the number of elems to sort is 517*cf204528SSimon Glass usually small, typically <= 20. 518*cf204528SSimon Glass --*/ 519*cf204528SSimon Glass static 520*cf204528SSimon Glass Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 521*cf204528SSimon Glass 9841, 29524, 88573, 265720, 522*cf204528SSimon Glass 797161, 2391484 }; 523*cf204528SSimon Glass 524*cf204528SSimon Glass static 525*cf204528SSimon Glass void mainSimpleSort ( UInt32* ptr, 526*cf204528SSimon Glass UChar* block, 527*cf204528SSimon Glass UInt16* quadrant, 528*cf204528SSimon Glass Int32 nblock, 529*cf204528SSimon Glass Int32 lo, 530*cf204528SSimon Glass Int32 hi, 531*cf204528SSimon Glass Int32 d, 532*cf204528SSimon Glass Int32* budget ) 533*cf204528SSimon Glass { 534*cf204528SSimon Glass Int32 i, j, h, bigN, hp; 535*cf204528SSimon Glass UInt32 v; 536*cf204528SSimon Glass 537*cf204528SSimon Glass bigN = hi - lo + 1; 538*cf204528SSimon Glass if (bigN < 2) return; 539*cf204528SSimon Glass 540*cf204528SSimon Glass hp = 0; 541*cf204528SSimon Glass while (incs[hp] < bigN) hp++; 542*cf204528SSimon Glass hp--; 543*cf204528SSimon Glass 544*cf204528SSimon Glass for (; hp >= 0; hp--) { 545*cf204528SSimon Glass h = incs[hp]; 546*cf204528SSimon Glass 547*cf204528SSimon Glass i = lo + h; 548*cf204528SSimon Glass while (True) { 549*cf204528SSimon Glass 550*cf204528SSimon Glass /*-- copy 1 --*/ 551*cf204528SSimon Glass if (i > hi) break; 552*cf204528SSimon Glass v = ptr[i]; 553*cf204528SSimon Glass j = i; 554*cf204528SSimon Glass while ( mainGtU ( 555*cf204528SSimon Glass ptr[j-h]+d, v+d, block, quadrant, nblock, budget 556*cf204528SSimon Glass ) ) { 557*cf204528SSimon Glass ptr[j] = ptr[j-h]; 558*cf204528SSimon Glass j = j - h; 559*cf204528SSimon Glass if (j <= (lo + h - 1)) break; 560*cf204528SSimon Glass } 561*cf204528SSimon Glass ptr[j] = v; 562*cf204528SSimon Glass i++; 563*cf204528SSimon Glass 564*cf204528SSimon Glass /*-- copy 2 --*/ 565*cf204528SSimon Glass if (i > hi) break; 566*cf204528SSimon Glass v = ptr[i]; 567*cf204528SSimon Glass j = i; 568*cf204528SSimon Glass while ( mainGtU ( 569*cf204528SSimon Glass ptr[j-h]+d, v+d, block, quadrant, nblock, budget 570*cf204528SSimon Glass ) ) { 571*cf204528SSimon Glass ptr[j] = ptr[j-h]; 572*cf204528SSimon Glass j = j - h; 573*cf204528SSimon Glass if (j <= (lo + h - 1)) break; 574*cf204528SSimon Glass } 575*cf204528SSimon Glass ptr[j] = v; 576*cf204528SSimon Glass i++; 577*cf204528SSimon Glass 578*cf204528SSimon Glass /*-- copy 3 --*/ 579*cf204528SSimon Glass if (i > hi) break; 580*cf204528SSimon Glass v = ptr[i]; 581*cf204528SSimon Glass j = i; 582*cf204528SSimon Glass while ( mainGtU ( 583*cf204528SSimon Glass ptr[j-h]+d, v+d, block, quadrant, nblock, budget 584*cf204528SSimon Glass ) ) { 585*cf204528SSimon Glass ptr[j] = ptr[j-h]; 586*cf204528SSimon Glass j = j - h; 587*cf204528SSimon Glass if (j <= (lo + h - 1)) break; 588*cf204528SSimon Glass } 589*cf204528SSimon Glass ptr[j] = v; 590*cf204528SSimon Glass i++; 591*cf204528SSimon Glass 592*cf204528SSimon Glass if (*budget < 0) return; 593*cf204528SSimon Glass } 594*cf204528SSimon Glass } 595*cf204528SSimon Glass } 596*cf204528SSimon Glass 597*cf204528SSimon Glass 598*cf204528SSimon Glass /*---------------------------------------------*/ 599*cf204528SSimon Glass /*-- 600*cf204528SSimon Glass The following is an implementation of 601*cf204528SSimon Glass an elegant 3-way quicksort for strings, 602*cf204528SSimon Glass described in a paper "Fast Algorithms for 603*cf204528SSimon Glass Sorting and Searching Strings", by Robert 604*cf204528SSimon Glass Sedgewick and Jon L. Bentley. 605*cf204528SSimon Glass --*/ 606*cf204528SSimon Glass 607*cf204528SSimon Glass #define mswap(zz1, zz2) \ 608*cf204528SSimon Glass { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 609*cf204528SSimon Glass 610*cf204528SSimon Glass #define mvswap(zzp1, zzp2, zzn) \ 611*cf204528SSimon Glass { \ 612*cf204528SSimon Glass Int32 yyp1 = (zzp1); \ 613*cf204528SSimon Glass Int32 yyp2 = (zzp2); \ 614*cf204528SSimon Glass Int32 yyn = (zzn); \ 615*cf204528SSimon Glass while (yyn > 0) { \ 616*cf204528SSimon Glass mswap(ptr[yyp1], ptr[yyp2]); \ 617*cf204528SSimon Glass yyp1++; yyp2++; yyn--; \ 618*cf204528SSimon Glass } \ 619*cf204528SSimon Glass } 620*cf204528SSimon Glass 621*cf204528SSimon Glass static 622*cf204528SSimon Glass __inline__ 623*cf204528SSimon Glass UChar mmed3 ( UChar a, UChar b, UChar c ) 624*cf204528SSimon Glass { 625*cf204528SSimon Glass UChar t; 626*cf204528SSimon Glass if (a > b) { t = a; a = b; b = t; }; 627*cf204528SSimon Glass if (b > c) { 628*cf204528SSimon Glass b = c; 629*cf204528SSimon Glass if (a > b) b = a; 630*cf204528SSimon Glass } 631*cf204528SSimon Glass return b; 632*cf204528SSimon Glass } 633*cf204528SSimon Glass 634*cf204528SSimon Glass #define mmin(a,b) ((a) < (b)) ? (a) : (b) 635*cf204528SSimon Glass 636*cf204528SSimon Glass #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 637*cf204528SSimon Glass stackHi[sp] = hz; \ 638*cf204528SSimon Glass stackD [sp] = dz; \ 639*cf204528SSimon Glass sp++; } 640*cf204528SSimon Glass 641*cf204528SSimon Glass #define mpop(lz,hz,dz) { sp--; \ 642*cf204528SSimon Glass lz = stackLo[sp]; \ 643*cf204528SSimon Glass hz = stackHi[sp]; \ 644*cf204528SSimon Glass dz = stackD [sp]; } 645*cf204528SSimon Glass 646*cf204528SSimon Glass 647*cf204528SSimon Glass #define mnextsize(az) (nextHi[az]-nextLo[az]) 648*cf204528SSimon Glass 649*cf204528SSimon Glass #define mnextswap(az,bz) \ 650*cf204528SSimon Glass { Int32 tz; \ 651*cf204528SSimon Glass tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 652*cf204528SSimon Glass tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 653*cf204528SSimon Glass tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 654*cf204528SSimon Glass 655*cf204528SSimon Glass 656*cf204528SSimon Glass #define MAIN_QSORT_SMALL_THRESH 20 657*cf204528SSimon Glass #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 658*cf204528SSimon Glass #define MAIN_QSORT_STACK_SIZE 100 659*cf204528SSimon Glass 660*cf204528SSimon Glass static 661*cf204528SSimon Glass void mainQSort3 ( UInt32* ptr, 662*cf204528SSimon Glass UChar* block, 663*cf204528SSimon Glass UInt16* quadrant, 664*cf204528SSimon Glass Int32 nblock, 665*cf204528SSimon Glass Int32 loSt, 666*cf204528SSimon Glass Int32 hiSt, 667*cf204528SSimon Glass Int32 dSt, 668*cf204528SSimon Glass Int32* budget ) 669*cf204528SSimon Glass { 670*cf204528SSimon Glass Int32 unLo, unHi, ltLo, gtHi, n, m, med; 671*cf204528SSimon Glass Int32 sp, lo, hi, d; 672*cf204528SSimon Glass 673*cf204528SSimon Glass Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 674*cf204528SSimon Glass Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 675*cf204528SSimon Glass Int32 stackD [MAIN_QSORT_STACK_SIZE]; 676*cf204528SSimon Glass 677*cf204528SSimon Glass Int32 nextLo[3]; 678*cf204528SSimon Glass Int32 nextHi[3]; 679*cf204528SSimon Glass Int32 nextD [3]; 680*cf204528SSimon Glass 681*cf204528SSimon Glass sp = 0; 682*cf204528SSimon Glass mpush ( loSt, hiSt, dSt ); 683*cf204528SSimon Glass 684*cf204528SSimon Glass while (sp > 0) { 685*cf204528SSimon Glass 686*cf204528SSimon Glass AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); 687*cf204528SSimon Glass 688*cf204528SSimon Glass mpop ( lo, hi, d ); 689*cf204528SSimon Glass if (hi - lo < MAIN_QSORT_SMALL_THRESH || 690*cf204528SSimon Glass d > MAIN_QSORT_DEPTH_THRESH) { 691*cf204528SSimon Glass mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 692*cf204528SSimon Glass if (*budget < 0) return; 693*cf204528SSimon Glass continue; 694*cf204528SSimon Glass } 695*cf204528SSimon Glass 696*cf204528SSimon Glass med = (Int32) 697*cf204528SSimon Glass mmed3 ( block[ptr[ lo ]+d], 698*cf204528SSimon Glass block[ptr[ hi ]+d], 699*cf204528SSimon Glass block[ptr[ (lo+hi)>>1 ]+d] ); 700*cf204528SSimon Glass 701*cf204528SSimon Glass unLo = ltLo = lo; 702*cf204528SSimon Glass unHi = gtHi = hi; 703*cf204528SSimon Glass 704*cf204528SSimon Glass while (True) { 705*cf204528SSimon Glass while (True) { 706*cf204528SSimon Glass if (unLo > unHi) break; 707*cf204528SSimon Glass n = ((Int32)block[ptr[unLo]+d]) - med; 708*cf204528SSimon Glass if (n == 0) { 709*cf204528SSimon Glass mswap(ptr[unLo], ptr[ltLo]); 710*cf204528SSimon Glass ltLo++; unLo++; continue; 711*cf204528SSimon Glass }; 712*cf204528SSimon Glass if (n > 0) break; 713*cf204528SSimon Glass unLo++; 714*cf204528SSimon Glass } 715*cf204528SSimon Glass while (True) { 716*cf204528SSimon Glass if (unLo > unHi) break; 717*cf204528SSimon Glass n = ((Int32)block[ptr[unHi]+d]) - med; 718*cf204528SSimon Glass if (n == 0) { 719*cf204528SSimon Glass mswap(ptr[unHi], ptr[gtHi]); 720*cf204528SSimon Glass gtHi--; unHi--; continue; 721*cf204528SSimon Glass }; 722*cf204528SSimon Glass if (n < 0) break; 723*cf204528SSimon Glass unHi--; 724*cf204528SSimon Glass } 725*cf204528SSimon Glass if (unLo > unHi) break; 726*cf204528SSimon Glass mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 727*cf204528SSimon Glass } 728*cf204528SSimon Glass 729*cf204528SSimon Glass AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 730*cf204528SSimon Glass 731*cf204528SSimon Glass if (gtHi < ltLo) { 732*cf204528SSimon Glass mpush(lo, hi, d+1 ); 733*cf204528SSimon Glass continue; 734*cf204528SSimon Glass } 735*cf204528SSimon Glass 736*cf204528SSimon Glass n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 737*cf204528SSimon Glass m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 738*cf204528SSimon Glass 739*cf204528SSimon Glass n = lo + unLo - ltLo - 1; 740*cf204528SSimon Glass m = hi - (gtHi - unHi) + 1; 741*cf204528SSimon Glass 742*cf204528SSimon Glass nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 743*cf204528SSimon Glass nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 744*cf204528SSimon Glass nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 745*cf204528SSimon Glass 746*cf204528SSimon Glass if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 747*cf204528SSimon Glass if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 748*cf204528SSimon Glass if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 749*cf204528SSimon Glass 750*cf204528SSimon Glass AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 751*cf204528SSimon Glass AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 752*cf204528SSimon Glass 753*cf204528SSimon Glass mpush (nextLo[0], nextHi[0], nextD[0]); 754*cf204528SSimon Glass mpush (nextLo[1], nextHi[1], nextD[1]); 755*cf204528SSimon Glass mpush (nextLo[2], nextHi[2], nextD[2]); 756*cf204528SSimon Glass } 757*cf204528SSimon Glass } 758*cf204528SSimon Glass 759*cf204528SSimon Glass #undef mswap 760*cf204528SSimon Glass #undef mvswap 761*cf204528SSimon Glass #undef mpush 762*cf204528SSimon Glass #undef mpop 763*cf204528SSimon Glass #undef mmin 764*cf204528SSimon Glass #undef mnextsize 765*cf204528SSimon Glass #undef mnextswap 766*cf204528SSimon Glass #undef MAIN_QSORT_SMALL_THRESH 767*cf204528SSimon Glass #undef MAIN_QSORT_DEPTH_THRESH 768*cf204528SSimon Glass #undef MAIN_QSORT_STACK_SIZE 769*cf204528SSimon Glass 770*cf204528SSimon Glass 771*cf204528SSimon Glass /*---------------------------------------------*/ 772*cf204528SSimon Glass /* Pre: 773*cf204528SSimon Glass nblock > N_OVERSHOOT 774*cf204528SSimon Glass block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 775*cf204528SSimon Glass ((UChar*)block32) [0 .. nblock-1] holds block 776*cf204528SSimon Glass ptr exists for [0 .. nblock-1] 777*cf204528SSimon Glass 778*cf204528SSimon Glass Post: 779*cf204528SSimon Glass ((UChar*)block32) [0 .. nblock-1] holds block 780*cf204528SSimon Glass All other areas of block32 destroyed 781*cf204528SSimon Glass ftab [0 .. 65536 ] destroyed 782*cf204528SSimon Glass ptr [0 .. nblock-1] holds sorted order 783*cf204528SSimon Glass if (*budget < 0), sorting was abandoned 784*cf204528SSimon Glass */ 785*cf204528SSimon Glass 786*cf204528SSimon Glass #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 787*cf204528SSimon Glass #define SETMASK (1 << 21) 788*cf204528SSimon Glass #define CLEARMASK (~(SETMASK)) 789*cf204528SSimon Glass 790*cf204528SSimon Glass static 791*cf204528SSimon Glass void mainSort ( UInt32* ptr, 792*cf204528SSimon Glass UChar* block, 793*cf204528SSimon Glass UInt16* quadrant, 794*cf204528SSimon Glass UInt32* ftab, 795*cf204528SSimon Glass Int32 nblock, 796*cf204528SSimon Glass Int32 verb, 797*cf204528SSimon Glass Int32* budget ) 798*cf204528SSimon Glass { 799*cf204528SSimon Glass Int32 i, j, k, ss, sb; 800*cf204528SSimon Glass Int32 runningOrder[256]; 801*cf204528SSimon Glass Bool bigDone[256]; 802*cf204528SSimon Glass Int32 copyStart[256]; 803*cf204528SSimon Glass Int32 copyEnd [256]; 804*cf204528SSimon Glass UChar c1; 805*cf204528SSimon Glass Int32 numQSorted; 806*cf204528SSimon Glass UInt16 s; 807*cf204528SSimon Glass if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 808*cf204528SSimon Glass 809*cf204528SSimon Glass /*-- set up the 2-byte frequency table --*/ 810*cf204528SSimon Glass for (i = 65536; i >= 0; i--) ftab[i] = 0; 811*cf204528SSimon Glass 812*cf204528SSimon Glass j = block[0] << 8; 813*cf204528SSimon Glass i = nblock-1; 814*cf204528SSimon Glass for (; i >= 3; i -= 4) { 815*cf204528SSimon Glass quadrant[i] = 0; 816*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i]) << 8); 817*cf204528SSimon Glass ftab[j]++; 818*cf204528SSimon Glass quadrant[i-1] = 0; 819*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 820*cf204528SSimon Glass ftab[j]++; 821*cf204528SSimon Glass quadrant[i-2] = 0; 822*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 823*cf204528SSimon Glass ftab[j]++; 824*cf204528SSimon Glass quadrant[i-3] = 0; 825*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 826*cf204528SSimon Glass ftab[j]++; 827*cf204528SSimon Glass } 828*cf204528SSimon Glass for (; i >= 0; i--) { 829*cf204528SSimon Glass quadrant[i] = 0; 830*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i]) << 8); 831*cf204528SSimon Glass ftab[j]++; 832*cf204528SSimon Glass } 833*cf204528SSimon Glass 834*cf204528SSimon Glass /*-- (emphasises close relationship of block & quadrant) --*/ 835*cf204528SSimon Glass for (i = 0; i < BZ_N_OVERSHOOT; i++) { 836*cf204528SSimon Glass block [nblock+i] = block[i]; 837*cf204528SSimon Glass quadrant[nblock+i] = 0; 838*cf204528SSimon Glass } 839*cf204528SSimon Glass 840*cf204528SSimon Glass if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 841*cf204528SSimon Glass 842*cf204528SSimon Glass /*-- Complete the initial radix sort --*/ 843*cf204528SSimon Glass for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 844*cf204528SSimon Glass 845*cf204528SSimon Glass s = block[0] << 8; 846*cf204528SSimon Glass i = nblock-1; 847*cf204528SSimon Glass for (; i >= 3; i -= 4) { 848*cf204528SSimon Glass s = (s >> 8) | (block[i] << 8); 849*cf204528SSimon Glass j = ftab[s] -1; 850*cf204528SSimon Glass ftab[s] = j; 851*cf204528SSimon Glass ptr[j] = i; 852*cf204528SSimon Glass s = (s >> 8) | (block[i-1] << 8); 853*cf204528SSimon Glass j = ftab[s] -1; 854*cf204528SSimon Glass ftab[s] = j; 855*cf204528SSimon Glass ptr[j] = i-1; 856*cf204528SSimon Glass s = (s >> 8) | (block[i-2] << 8); 857*cf204528SSimon Glass j = ftab[s] -1; 858*cf204528SSimon Glass ftab[s] = j; 859*cf204528SSimon Glass ptr[j] = i-2; 860*cf204528SSimon Glass s = (s >> 8) | (block[i-3] << 8); 861*cf204528SSimon Glass j = ftab[s] -1; 862*cf204528SSimon Glass ftab[s] = j; 863*cf204528SSimon Glass ptr[j] = i-3; 864*cf204528SSimon Glass } 865*cf204528SSimon Glass for (; i >= 0; i--) { 866*cf204528SSimon Glass s = (s >> 8) | (block[i] << 8); 867*cf204528SSimon Glass j = ftab[s] -1; 868*cf204528SSimon Glass ftab[s] = j; 869*cf204528SSimon Glass ptr[j] = i; 870*cf204528SSimon Glass } 871*cf204528SSimon Glass 872*cf204528SSimon Glass /*-- 873*cf204528SSimon Glass Now ftab contains the first loc of every small bucket. 874*cf204528SSimon Glass Calculate the running order, from smallest to largest 875*cf204528SSimon Glass big bucket. 876*cf204528SSimon Glass --*/ 877*cf204528SSimon Glass for (i = 0; i <= 255; i++) { 878*cf204528SSimon Glass bigDone [i] = False; 879*cf204528SSimon Glass runningOrder[i] = i; 880*cf204528SSimon Glass } 881*cf204528SSimon Glass 882*cf204528SSimon Glass { 883*cf204528SSimon Glass Int32 vv; 884*cf204528SSimon Glass Int32 h = 1; 885*cf204528SSimon Glass do h = 3 * h + 1; while (h <= 256); 886*cf204528SSimon Glass do { 887*cf204528SSimon Glass h = h / 3; 888*cf204528SSimon Glass for (i = h; i <= 255; i++) { 889*cf204528SSimon Glass vv = runningOrder[i]; 890*cf204528SSimon Glass j = i; 891*cf204528SSimon Glass while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 892*cf204528SSimon Glass runningOrder[j] = runningOrder[j-h]; 893*cf204528SSimon Glass j = j - h; 894*cf204528SSimon Glass if (j <= (h - 1)) goto zero; 895*cf204528SSimon Glass } 896*cf204528SSimon Glass zero: 897*cf204528SSimon Glass runningOrder[j] = vv; 898*cf204528SSimon Glass } 899*cf204528SSimon Glass } while (h != 1); 900*cf204528SSimon Glass } 901*cf204528SSimon Glass 902*cf204528SSimon Glass /*-- 903*cf204528SSimon Glass The main sorting loop. 904*cf204528SSimon Glass --*/ 905*cf204528SSimon Glass 906*cf204528SSimon Glass numQSorted = 0; 907*cf204528SSimon Glass 908*cf204528SSimon Glass for (i = 0; i <= 255; i++) { 909*cf204528SSimon Glass 910*cf204528SSimon Glass /*-- 911*cf204528SSimon Glass Process big buckets, starting with the least full. 912*cf204528SSimon Glass Basically this is a 3-step process in which we call 913*cf204528SSimon Glass mainQSort3 to sort the small buckets [ss, j], but 914*cf204528SSimon Glass also make a big effort to avoid the calls if we can. 915*cf204528SSimon Glass --*/ 916*cf204528SSimon Glass ss = runningOrder[i]; 917*cf204528SSimon Glass 918*cf204528SSimon Glass /*-- 919*cf204528SSimon Glass Step 1: 920*cf204528SSimon Glass Complete the big bucket [ss] by quicksorting 921*cf204528SSimon Glass any unsorted small buckets [ss, j], for j != ss. 922*cf204528SSimon Glass Hopefully previous pointer-scanning phases have already 923*cf204528SSimon Glass completed many of the small buckets [ss, j], so 924*cf204528SSimon Glass we don't have to sort them at all. 925*cf204528SSimon Glass --*/ 926*cf204528SSimon Glass for (j = 0; j <= 255; j++) { 927*cf204528SSimon Glass if (j != ss) { 928*cf204528SSimon Glass sb = (ss << 8) + j; 929*cf204528SSimon Glass if ( ! (ftab[sb] & SETMASK) ) { 930*cf204528SSimon Glass Int32 lo = ftab[sb] & CLEARMASK; 931*cf204528SSimon Glass Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 932*cf204528SSimon Glass if (hi > lo) { 933*cf204528SSimon Glass if (verb >= 4) 934*cf204528SSimon Glass VPrintf4 ( " qsort [0x%x, 0x%x] " 935*cf204528SSimon Glass "done %d this %d\n", 936*cf204528SSimon Glass ss, j, numQSorted, hi - lo + 1 ); 937*cf204528SSimon Glass mainQSort3 ( 938*cf204528SSimon Glass ptr, block, quadrant, nblock, 939*cf204528SSimon Glass lo, hi, BZ_N_RADIX, budget 940*cf204528SSimon Glass ); 941*cf204528SSimon Glass numQSorted += (hi - lo + 1); 942*cf204528SSimon Glass if (*budget < 0) return; 943*cf204528SSimon Glass } 944*cf204528SSimon Glass } 945*cf204528SSimon Glass ftab[sb] |= SETMASK; 946*cf204528SSimon Glass } 947*cf204528SSimon Glass } 948*cf204528SSimon Glass 949*cf204528SSimon Glass AssertH ( !bigDone[ss], 1006 ); 950*cf204528SSimon Glass 951*cf204528SSimon Glass /*-- 952*cf204528SSimon Glass Step 2: 953*cf204528SSimon Glass Now scan this big bucket [ss] so as to synthesise the 954*cf204528SSimon Glass sorted order for small buckets [t, ss] for all t, 955*cf204528SSimon Glass including, magically, the bucket [ss,ss] too. 956*cf204528SSimon Glass This will avoid doing Real Work in subsequent Step 1's. 957*cf204528SSimon Glass --*/ 958*cf204528SSimon Glass { 959*cf204528SSimon Glass for (j = 0; j <= 255; j++) { 960*cf204528SSimon Glass copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 961*cf204528SSimon Glass copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 962*cf204528SSimon Glass } 963*cf204528SSimon Glass for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 964*cf204528SSimon Glass k = ptr[j]-1; if (k < 0) k += nblock; 965*cf204528SSimon Glass c1 = block[k]; 966*cf204528SSimon Glass if (!bigDone[c1]) 967*cf204528SSimon Glass ptr[ copyStart[c1]++ ] = k; 968*cf204528SSimon Glass } 969*cf204528SSimon Glass for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 970*cf204528SSimon Glass k = ptr[j]-1; if (k < 0) k += nblock; 971*cf204528SSimon Glass c1 = block[k]; 972*cf204528SSimon Glass if (!bigDone[c1]) 973*cf204528SSimon Glass ptr[ copyEnd[c1]-- ] = k; 974*cf204528SSimon Glass } 975*cf204528SSimon Glass } 976*cf204528SSimon Glass 977*cf204528SSimon Glass AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 978*cf204528SSimon Glass || 979*cf204528SSimon Glass /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 980*cf204528SSimon Glass Necessity for this case is demonstrated by compressing 981*cf204528SSimon Glass a sequence of approximately 48.5 million of character 982*cf204528SSimon Glass 251; 1.0.0/1.0.1 will then die here. */ 983*cf204528SSimon Glass (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 984*cf204528SSimon Glass 1007 ) 985*cf204528SSimon Glass 986*cf204528SSimon Glass for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 987*cf204528SSimon Glass 988*cf204528SSimon Glass /*-- 989*cf204528SSimon Glass Step 3: 990*cf204528SSimon Glass The [ss] big bucket is now done. Record this fact, 991*cf204528SSimon Glass and update the quadrant descriptors. Remember to 992*cf204528SSimon Glass update quadrants in the overshoot area too, if 993*cf204528SSimon Glass necessary. The "if (i < 255)" test merely skips 994*cf204528SSimon Glass this updating for the last bucket processed, since 995*cf204528SSimon Glass updating for the last bucket is pointless. 996*cf204528SSimon Glass 997*cf204528SSimon Glass The quadrant array provides a way to incrementally 998*cf204528SSimon Glass cache sort orderings, as they appear, so as to 999*cf204528SSimon Glass make subsequent comparisons in fullGtU() complete 1000*cf204528SSimon Glass faster. For repetitive blocks this makes a big 1001*cf204528SSimon Glass difference (but not big enough to be able to avoid 1002*cf204528SSimon Glass the fallback sorting mechanism, exponential radix sort). 1003*cf204528SSimon Glass 1004*cf204528SSimon Glass The precise meaning is: at all times: 1005*cf204528SSimon Glass 1006*cf204528SSimon Glass for 0 <= i < nblock and 0 <= j <= nblock 1007*cf204528SSimon Glass 1008*cf204528SSimon Glass if block[i] != block[j], 1009*cf204528SSimon Glass 1010*cf204528SSimon Glass then the relative values of quadrant[i] and 1011*cf204528SSimon Glass quadrant[j] are meaningless. 1012*cf204528SSimon Glass 1013*cf204528SSimon Glass else { 1014*cf204528SSimon Glass if quadrant[i] < quadrant[j] 1015*cf204528SSimon Glass then the string starting at i lexicographically 1016*cf204528SSimon Glass precedes the string starting at j 1017*cf204528SSimon Glass 1018*cf204528SSimon Glass else if quadrant[i] > quadrant[j] 1019*cf204528SSimon Glass then the string starting at j lexicographically 1020*cf204528SSimon Glass precedes the string starting at i 1021*cf204528SSimon Glass 1022*cf204528SSimon Glass else 1023*cf204528SSimon Glass the relative ordering of the strings starting 1024*cf204528SSimon Glass at i and j has not yet been determined. 1025*cf204528SSimon Glass } 1026*cf204528SSimon Glass --*/ 1027*cf204528SSimon Glass bigDone[ss] = True; 1028*cf204528SSimon Glass 1029*cf204528SSimon Glass if (i < 255) { 1030*cf204528SSimon Glass Int32 bbStart = ftab[ss << 8] & CLEARMASK; 1031*cf204528SSimon Glass Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 1032*cf204528SSimon Glass Int32 shifts = 0; 1033*cf204528SSimon Glass 1034*cf204528SSimon Glass while ((bbSize >> shifts) > 65534) shifts++; 1035*cf204528SSimon Glass 1036*cf204528SSimon Glass for (j = bbSize-1; j >= 0; j--) { 1037*cf204528SSimon Glass Int32 a2update = ptr[bbStart + j]; 1038*cf204528SSimon Glass UInt16 qVal = (UInt16)(j >> shifts); 1039*cf204528SSimon Glass quadrant[a2update] = qVal; 1040*cf204528SSimon Glass if (a2update < BZ_N_OVERSHOOT) 1041*cf204528SSimon Glass quadrant[a2update + nblock] = qVal; 1042*cf204528SSimon Glass } 1043*cf204528SSimon Glass AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 1044*cf204528SSimon Glass } 1045*cf204528SSimon Glass 1046*cf204528SSimon Glass } 1047*cf204528SSimon Glass 1048*cf204528SSimon Glass if (verb >= 4) 1049*cf204528SSimon Glass VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 1050*cf204528SSimon Glass nblock, numQSorted, nblock - numQSorted ); 1051*cf204528SSimon Glass } 1052*cf204528SSimon Glass 1053*cf204528SSimon Glass #undef BIGFREQ 1054*cf204528SSimon Glass #undef SETMASK 1055*cf204528SSimon Glass #undef CLEARMASK 1056*cf204528SSimon Glass 1057*cf204528SSimon Glass 1058*cf204528SSimon Glass /*---------------------------------------------*/ 1059*cf204528SSimon Glass /* Pre: 1060*cf204528SSimon Glass nblock > 0 1061*cf204528SSimon Glass arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 1062*cf204528SSimon Glass ((UChar*)arr2) [0 .. nblock-1] holds block 1063*cf204528SSimon Glass arr1 exists for [0 .. nblock-1] 1064*cf204528SSimon Glass 1065*cf204528SSimon Glass Post: 1066*cf204528SSimon Glass ((UChar*)arr2) [0 .. nblock-1] holds block 1067*cf204528SSimon Glass All other areas of block destroyed 1068*cf204528SSimon Glass ftab [ 0 .. 65536 ] destroyed 1069*cf204528SSimon Glass arr1 [0 .. nblock-1] holds sorted order 1070*cf204528SSimon Glass */ 1071*cf204528SSimon Glass void BZ2_blockSort ( EState* s ) 1072*cf204528SSimon Glass { 1073*cf204528SSimon Glass UInt32* ptr = s->ptr; 1074*cf204528SSimon Glass UChar* block = s->block; 1075*cf204528SSimon Glass UInt32* ftab = s->ftab; 1076*cf204528SSimon Glass Int32 nblock = s->nblock; 1077*cf204528SSimon Glass Int32 verb = s->verbosity; 1078*cf204528SSimon Glass Int32 wfact = s->workFactor; 1079*cf204528SSimon Glass UInt16* quadrant; 1080*cf204528SSimon Glass Int32 budget; 1081*cf204528SSimon Glass Int32 budgetInit; 1082*cf204528SSimon Glass Int32 i; 1083*cf204528SSimon Glass 1084*cf204528SSimon Glass if (nblock < 10000) { 1085*cf204528SSimon Glass fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1086*cf204528SSimon Glass } else { 1087*cf204528SSimon Glass /* Calculate the location for quadrant, remembering to get 1088*cf204528SSimon Glass the alignment right. Assumes that &(block[0]) is at least 1089*cf204528SSimon Glass 2-byte aligned -- this should be ok since block is really 1090*cf204528SSimon Glass the first section of arr2. 1091*cf204528SSimon Glass */ 1092*cf204528SSimon Glass i = nblock+BZ_N_OVERSHOOT; 1093*cf204528SSimon Glass if (i & 1) i++; 1094*cf204528SSimon Glass quadrant = (UInt16*)(&(block[i])); 1095*cf204528SSimon Glass 1096*cf204528SSimon Glass /* (wfact-1) / 3 puts the default-factor-30 1097*cf204528SSimon Glass transition point at very roughly the same place as 1098*cf204528SSimon Glass with v0.1 and v0.9.0. 1099*cf204528SSimon Glass Not that it particularly matters any more, since the 1100*cf204528SSimon Glass resulting compressed stream is now the same regardless 1101*cf204528SSimon Glass of whether or not we use the main sort or fallback sort. 1102*cf204528SSimon Glass */ 1103*cf204528SSimon Glass if (wfact < 1 ) wfact = 1; 1104*cf204528SSimon Glass if (wfact > 100) wfact = 100; 1105*cf204528SSimon Glass budgetInit = nblock * ((wfact-1) / 3); 1106*cf204528SSimon Glass budget = budgetInit; 1107*cf204528SSimon Glass 1108*cf204528SSimon Glass mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 1109*cf204528SSimon Glass if (verb >= 3) 1110*cf204528SSimon Glass VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 1111*cf204528SSimon Glass budgetInit - budget, 1112*cf204528SSimon Glass nblock, 1113*cf204528SSimon Glass (float)(budgetInit - budget) / 1114*cf204528SSimon Glass (float)(nblock==0 ? 1 : nblock) ); 1115*cf204528SSimon Glass if (budget < 0) { 1116*cf204528SSimon Glass if (verb >= 2) 1117*cf204528SSimon Glass VPrintf0 ( " too repetitive; using fallback" 1118*cf204528SSimon Glass " sorting algorithm\n" ); 1119*cf204528SSimon Glass fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1120*cf204528SSimon Glass } 1121*cf204528SSimon Glass } 1122*cf204528SSimon Glass 1123*cf204528SSimon Glass s->origPtr = -1; 1124*cf204528SSimon Glass for (i = 0; i < s->nblock; i++) 1125*cf204528SSimon Glass if (ptr[i] == 0) 1126*cf204528SSimon Glass { s->origPtr = i; break; }; 1127*cf204528SSimon Glass 1128*cf204528SSimon Glass AssertH( s->origPtr != -1, 1003 ); 1129*cf204528SSimon Glass } 1130*cf204528SSimon Glass 1131*cf204528SSimon Glass 1132*cf204528SSimon Glass /*-------------------------------------------------------------*/ 1133*cf204528SSimon Glass /*--- end blocksort.c ---*/ 1134*cf204528SSimon Glass /*-------------------------------------------------------------*/ 1135