xref: /OK3568_Linux_fs/u-boot/lib/bzip2/bzlib_blocksort.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun 
2*4882a593Smuzhiyun /*-------------------------------------------------------------*/
3*4882a593Smuzhiyun /*--- Block sorting machinery                               ---*/
4*4882a593Smuzhiyun /*---                                           blocksort.c ---*/
5*4882a593Smuzhiyun /*-------------------------------------------------------------*/
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun /*--
8*4882a593Smuzhiyun   This file is a part of bzip2 and/or libbzip2, a program and
9*4882a593Smuzhiyun   library for lossless, block-sorting data compression.
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun   Copyright (C) 1996-2002 Julian R Seward.  All rights reserved.
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun   Redistribution and use in source and binary forms, with or without
14*4882a593Smuzhiyun   modification, are permitted provided that the following conditions
15*4882a593Smuzhiyun   are met:
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun   1. Redistributions of source code must retain the above copyright
18*4882a593Smuzhiyun      notice, this list of conditions and the following disclaimer.
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun   2. The origin of this software must not be misrepresented; you must
21*4882a593Smuzhiyun      not claim that you wrote the original software.  If you use this
22*4882a593Smuzhiyun      software in a product, an acknowledgment in the product
23*4882a593Smuzhiyun      documentation would be appreciated but is not required.
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun   3. Altered source versions must be plainly marked as such, and must
26*4882a593Smuzhiyun      not be misrepresented as being the original software.
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun   4. The name of the author may not be used to endorse or promote
29*4882a593Smuzhiyun      products derived from this software without specific prior written
30*4882a593Smuzhiyun      permission.
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33*4882a593Smuzhiyun   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34*4882a593Smuzhiyun   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35*4882a593Smuzhiyun   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36*4882a593Smuzhiyun   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37*4882a593Smuzhiyun   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38*4882a593Smuzhiyun   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39*4882a593Smuzhiyun   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40*4882a593Smuzhiyun   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41*4882a593Smuzhiyun   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42*4882a593Smuzhiyun   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun   Julian Seward, Cambridge, UK.
45*4882a593Smuzhiyun   jseward@acm.org
46*4882a593Smuzhiyun   bzip2/libbzip2 version 1.0.6 of 6 September 2010
47*4882a593Smuzhiyun   Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun   This program is based on (at least) the work of:
50*4882a593Smuzhiyun      Mike Burrows
51*4882a593Smuzhiyun      David Wheeler
52*4882a593Smuzhiyun      Peter Fenwick
53*4882a593Smuzhiyun      Alistair Moffat
54*4882a593Smuzhiyun      Radford Neal
55*4882a593Smuzhiyun      Ian H. Witten
56*4882a593Smuzhiyun      Robert Sedgewick
57*4882a593Smuzhiyun      Jon L. Bentley
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun   For more information on these sources, see the manual.
60*4882a593Smuzhiyun --*/
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun #include "bzlib_private.h"
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun /*---------------------------------------------*/
65*4882a593Smuzhiyun /*--- Fallback O(N log(N)^2) sorting        ---*/
66*4882a593Smuzhiyun /*--- algorithm, for repetitive blocks      ---*/
67*4882a593Smuzhiyun /*---------------------------------------------*/
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun /*---------------------------------------------*/
70*4882a593Smuzhiyun static
71*4882a593Smuzhiyun __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)72*4882a593Smuzhiyun void fallbackSimpleSort ( UInt32* fmap,
73*4882a593Smuzhiyun                           UInt32* eclass,
74*4882a593Smuzhiyun                           Int32   lo,
75*4882a593Smuzhiyun                           Int32   hi )
76*4882a593Smuzhiyun {
77*4882a593Smuzhiyun    Int32 i, j, tmp;
78*4882a593Smuzhiyun    UInt32 ec_tmp;
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun    if (lo == hi) return;
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun    if (hi - lo > 3) {
83*4882a593Smuzhiyun       for ( i = hi-4; i >= lo; i-- ) {
84*4882a593Smuzhiyun          tmp = fmap[i];
85*4882a593Smuzhiyun          ec_tmp = eclass[tmp];
86*4882a593Smuzhiyun          for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
87*4882a593Smuzhiyun             fmap[j-4] = fmap[j];
88*4882a593Smuzhiyun          fmap[j-4] = tmp;
89*4882a593Smuzhiyun       }
90*4882a593Smuzhiyun    }
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun    for ( i = hi-1; i >= lo; i-- ) {
93*4882a593Smuzhiyun       tmp = fmap[i];
94*4882a593Smuzhiyun       ec_tmp = eclass[tmp];
95*4882a593Smuzhiyun       for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
96*4882a593Smuzhiyun          fmap[j-1] = fmap[j];
97*4882a593Smuzhiyun       fmap[j-1] = tmp;
98*4882a593Smuzhiyun    }
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun /*---------------------------------------------*/
103*4882a593Smuzhiyun #define fswap(zz1, zz2) \
104*4882a593Smuzhiyun    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun #define fvswap(zzp1, zzp2, zzn)       \
107*4882a593Smuzhiyun {                                     \
108*4882a593Smuzhiyun    Int32 yyp1 = (zzp1);               \
109*4882a593Smuzhiyun    Int32 yyp2 = (zzp2);               \
110*4882a593Smuzhiyun    Int32 yyn  = (zzn);                \
111*4882a593Smuzhiyun    while (yyn > 0) {                  \
112*4882a593Smuzhiyun       fswap(fmap[yyp1], fmap[yyp2]);  \
113*4882a593Smuzhiyun       yyp1++; yyp2++; yyn--;          \
114*4882a593Smuzhiyun    }                                  \
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun #define fmin(a,b) ((a) < (b)) ? (a) : (b)
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun #define fpush(lz,hz) { stackLo[sp] = lz; \
121*4882a593Smuzhiyun                        stackHi[sp] = hz; \
122*4882a593Smuzhiyun                        sp++; }
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun #define fpop(lz,hz) { sp--;              \
125*4882a593Smuzhiyun                       lz = stackLo[sp];  \
126*4882a593Smuzhiyun                       hz = stackHi[sp]; }
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun #define FALLBACK_QSORT_SMALL_THRESH 10
129*4882a593Smuzhiyun #define FALLBACK_QSORT_STACK_SIZE   100
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)133*4882a593Smuzhiyun void fallbackQSort3 ( UInt32* fmap,
134*4882a593Smuzhiyun                       UInt32* eclass,
135*4882a593Smuzhiyun                       Int32   loSt,
136*4882a593Smuzhiyun                       Int32   hiSt )
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun    Int32 unLo, unHi, ltLo, gtHi, n, m;
139*4882a593Smuzhiyun    Int32 sp, lo, hi;
140*4882a593Smuzhiyun    UInt32 med, r, r3;
141*4882a593Smuzhiyun    Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
142*4882a593Smuzhiyun    Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun    r = 0;
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun    sp = 0;
147*4882a593Smuzhiyun    fpush ( loSt, hiSt );
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun    while (sp > 0) {
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun       AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun       fpop ( lo, hi );
154*4882a593Smuzhiyun       if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
155*4882a593Smuzhiyun          fallbackSimpleSort ( fmap, eclass, lo, hi );
156*4882a593Smuzhiyun          continue;
157*4882a593Smuzhiyun       }
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun       /* Random partitioning.  Median of 3 sometimes fails to
160*4882a593Smuzhiyun          avoid bad cases.  Median of 9 seems to help but
161*4882a593Smuzhiyun          looks rather expensive.  This too seems to work but
162*4882a593Smuzhiyun          is cheaper.  Guidance for the magic constants
163*4882a593Smuzhiyun          7621 and 32768 is taken from Sedgewick's algorithms
164*4882a593Smuzhiyun          book, chapter 35.
165*4882a593Smuzhiyun       */
166*4882a593Smuzhiyun       r = ((r * 7621) + 1) % 32768;
167*4882a593Smuzhiyun       r3 = r % 3;
168*4882a593Smuzhiyun       if (r3 == 0) med = eclass[fmap[lo]]; else
169*4882a593Smuzhiyun       if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
170*4882a593Smuzhiyun                    med = eclass[fmap[hi]];
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun       unLo = ltLo = lo;
173*4882a593Smuzhiyun       unHi = gtHi = hi;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun       while (1) {
176*4882a593Smuzhiyun          while (1) {
177*4882a593Smuzhiyun             if (unLo > unHi) break;
178*4882a593Smuzhiyun             n = (Int32)eclass[fmap[unLo]] - (Int32)med;
179*4882a593Smuzhiyun             if (n == 0) {
180*4882a593Smuzhiyun                fswap(fmap[unLo], fmap[ltLo]);
181*4882a593Smuzhiyun                ltLo++; unLo++;
182*4882a593Smuzhiyun                continue;
183*4882a593Smuzhiyun             };
184*4882a593Smuzhiyun             if (n > 0) break;
185*4882a593Smuzhiyun             unLo++;
186*4882a593Smuzhiyun          }
187*4882a593Smuzhiyun          while (1) {
188*4882a593Smuzhiyun             if (unLo > unHi) break;
189*4882a593Smuzhiyun             n = (Int32)eclass[fmap[unHi]] - (Int32)med;
190*4882a593Smuzhiyun             if (n == 0) {
191*4882a593Smuzhiyun                fswap(fmap[unHi], fmap[gtHi]);
192*4882a593Smuzhiyun                gtHi--; unHi--;
193*4882a593Smuzhiyun                continue;
194*4882a593Smuzhiyun             };
195*4882a593Smuzhiyun             if (n < 0) break;
196*4882a593Smuzhiyun             unHi--;
197*4882a593Smuzhiyun          }
198*4882a593Smuzhiyun          if (unLo > unHi) break;
199*4882a593Smuzhiyun          fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
200*4882a593Smuzhiyun       }
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun       AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun       if (gtHi < ltLo) continue;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun       n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
207*4882a593Smuzhiyun       m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun       n = lo + unLo - ltLo - 1;
210*4882a593Smuzhiyun       m = hi - (gtHi - unHi) + 1;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun       if (n - lo > hi - m) {
213*4882a593Smuzhiyun          fpush ( lo, n );
214*4882a593Smuzhiyun          fpush ( m, hi );
215*4882a593Smuzhiyun       } else {
216*4882a593Smuzhiyun          fpush ( m, hi );
217*4882a593Smuzhiyun          fpush ( lo, n );
218*4882a593Smuzhiyun       }
219*4882a593Smuzhiyun    }
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun #undef fmin
223*4882a593Smuzhiyun #undef fpush
224*4882a593Smuzhiyun #undef fpop
225*4882a593Smuzhiyun #undef fswap
226*4882a593Smuzhiyun #undef fvswap
227*4882a593Smuzhiyun #undef FALLBACK_QSORT_SMALL_THRESH
228*4882a593Smuzhiyun #undef FALLBACK_QSORT_STACK_SIZE
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun /*---------------------------------------------*/
232*4882a593Smuzhiyun /* Pre:
233*4882a593Smuzhiyun       nblock > 0
234*4882a593Smuzhiyun       eclass exists for [0 .. nblock-1]
235*4882a593Smuzhiyun       ((UChar*)eclass) [0 .. nblock-1] holds block
236*4882a593Smuzhiyun       ptr exists for [0 .. nblock-1]
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun    Post:
239*4882a593Smuzhiyun       ((UChar*)eclass) [0 .. nblock-1] holds block
240*4882a593Smuzhiyun       All other areas of eclass destroyed
241*4882a593Smuzhiyun       fmap [0 .. nblock-1] holds sorted order
242*4882a593Smuzhiyun       bhtab [ 0 .. 2+(nblock/32) ] destroyed
243*4882a593Smuzhiyun */
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun #define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
246*4882a593Smuzhiyun #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
247*4882a593Smuzhiyun #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
248*4882a593Smuzhiyun #define      WORD_BH(zz)  bhtab[(zz) >> 5]
249*4882a593Smuzhiyun #define UNALIGNED_BH(zz)  ((zz) & 0x01f)
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)252*4882a593Smuzhiyun void fallbackSort ( UInt32* fmap,
253*4882a593Smuzhiyun                     UInt32* eclass,
254*4882a593Smuzhiyun                     UInt32* bhtab,
255*4882a593Smuzhiyun                     Int32   nblock,
256*4882a593Smuzhiyun                     Int32   verb )
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun    Int32 ftab[257];
259*4882a593Smuzhiyun    Int32 ftabCopy[256];
260*4882a593Smuzhiyun    Int32 H, i, j, k, l, r, cc, cc1;
261*4882a593Smuzhiyun    Int32 nNotDone;
262*4882a593Smuzhiyun    Int32 nBhtab;
263*4882a593Smuzhiyun    UChar* eclass8 = (UChar*)eclass;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun    /*--
266*4882a593Smuzhiyun       Initial 1-char radix sort to generate
267*4882a593Smuzhiyun       initial fmap and initial BH bits.
268*4882a593Smuzhiyun    --*/
269*4882a593Smuzhiyun    if (verb >= 4)
270*4882a593Smuzhiyun       VPrintf0 ( "        bucket sorting ...\n" );
271*4882a593Smuzhiyun    for (i = 0; i < 257;    i++) ftab[i] = 0;
272*4882a593Smuzhiyun    for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
273*4882a593Smuzhiyun    for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
274*4882a593Smuzhiyun    for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun    for (i = 0; i < nblock; i++) {
277*4882a593Smuzhiyun       j = eclass8[i];
278*4882a593Smuzhiyun       k = ftab[j] - 1;
279*4882a593Smuzhiyun       ftab[j] = k;
280*4882a593Smuzhiyun       fmap[k] = i;
281*4882a593Smuzhiyun    }
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun    nBhtab = 2 + (nblock / 32);
284*4882a593Smuzhiyun    for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
285*4882a593Smuzhiyun    for (i = 0; i < 256; i++) SET_BH(ftab[i]);
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun    /*--
288*4882a593Smuzhiyun       Inductively refine the buckets.  Kind-of an
289*4882a593Smuzhiyun       "exponential radix sort" (!), inspired by the
290*4882a593Smuzhiyun       Manber-Myers suffix array construction algorithm.
291*4882a593Smuzhiyun    --*/
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun    /*-- set sentinel bits for block-end detection --*/
294*4882a593Smuzhiyun    for (i = 0; i < 32; i++) {
295*4882a593Smuzhiyun       SET_BH(nblock + 2*i);
296*4882a593Smuzhiyun       CLEAR_BH(nblock + 2*i + 1);
297*4882a593Smuzhiyun    }
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun    /*-- the log(N) loop --*/
300*4882a593Smuzhiyun    H = 1;
301*4882a593Smuzhiyun    while (1) {
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun       if (verb >= 4)
304*4882a593Smuzhiyun          VPrintf1 ( "        depth %6d has ", H );
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun       j = 0;
307*4882a593Smuzhiyun       for (i = 0; i < nblock; i++) {
308*4882a593Smuzhiyun          if (ISSET_BH(i)) j = i;
309*4882a593Smuzhiyun          k = fmap[i] - H; if (k < 0) k += nblock;
310*4882a593Smuzhiyun          eclass[k] = j;
311*4882a593Smuzhiyun       }
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun       nNotDone = 0;
314*4882a593Smuzhiyun       r = -1;
315*4882a593Smuzhiyun       while (1) {
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	 /*-- find the next non-singleton bucket --*/
318*4882a593Smuzhiyun          k = r + 1;
319*4882a593Smuzhiyun          while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
320*4882a593Smuzhiyun          if (ISSET_BH(k)) {
321*4882a593Smuzhiyun             while (WORD_BH(k) == 0xffffffff) k += 32;
322*4882a593Smuzhiyun             while (ISSET_BH(k)) k++;
323*4882a593Smuzhiyun          }
324*4882a593Smuzhiyun          l = k - 1;
325*4882a593Smuzhiyun          if (l >= nblock) break;
326*4882a593Smuzhiyun          while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
327*4882a593Smuzhiyun          if (!ISSET_BH(k)) {
328*4882a593Smuzhiyun             while (WORD_BH(k) == 0x00000000) k += 32;
329*4882a593Smuzhiyun             while (!ISSET_BH(k)) k++;
330*4882a593Smuzhiyun          }
331*4882a593Smuzhiyun          r = k - 1;
332*4882a593Smuzhiyun          if (r >= nblock) break;
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun          /*-- now [l, r] bracket current bucket --*/
335*4882a593Smuzhiyun          if (r > l) {
336*4882a593Smuzhiyun             nNotDone += (r - l + 1);
337*4882a593Smuzhiyun             fallbackQSort3 ( fmap, eclass, l, r );
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun             /*-- scan bucket and generate header bits-- */
340*4882a593Smuzhiyun             cc = -1;
341*4882a593Smuzhiyun             for (i = l; i <= r; i++) {
342*4882a593Smuzhiyun                cc1 = eclass[fmap[i]];
343*4882a593Smuzhiyun                if (cc != cc1) { SET_BH(i); cc = cc1; };
344*4882a593Smuzhiyun             }
345*4882a593Smuzhiyun          }
346*4882a593Smuzhiyun       }
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun       if (verb >= 4)
349*4882a593Smuzhiyun          VPrintf1 ( "%6d unresolved strings\n", nNotDone );
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun       H *= 2;
352*4882a593Smuzhiyun       if (H > nblock || nNotDone == 0) break;
353*4882a593Smuzhiyun    }
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun    /*--
356*4882a593Smuzhiyun       Reconstruct the original block in
357*4882a593Smuzhiyun       eclass8 [0 .. nblock-1], since the
358*4882a593Smuzhiyun       previous phase destroyed it.
359*4882a593Smuzhiyun    --*/
360*4882a593Smuzhiyun    if (verb >= 4)
361*4882a593Smuzhiyun       VPrintf0 ( "        reconstructing block ...\n" );
362*4882a593Smuzhiyun    j = 0;
363*4882a593Smuzhiyun    for (i = 0; i < nblock; i++) {
364*4882a593Smuzhiyun       while (ftabCopy[j] == 0) j++;
365*4882a593Smuzhiyun       ftabCopy[j]--;
366*4882a593Smuzhiyun       eclass8[fmap[i]] = (UChar)j;
367*4882a593Smuzhiyun    }
368*4882a593Smuzhiyun    AssertH ( j < 256, 1005 );
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun #undef       SET_BH
372*4882a593Smuzhiyun #undef     CLEAR_BH
373*4882a593Smuzhiyun #undef     ISSET_BH
374*4882a593Smuzhiyun #undef      WORD_BH
375*4882a593Smuzhiyun #undef UNALIGNED_BH
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun /*---------------------------------------------*/
379*4882a593Smuzhiyun /*--- The main, O(N^2 log(N)) sorting       ---*/
380*4882a593Smuzhiyun /*--- algorithm.  Faster for "normal"       ---*/
381*4882a593Smuzhiyun /*--- non-repetitive blocks.                ---*/
382*4882a593Smuzhiyun /*---------------------------------------------*/
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun /*---------------------------------------------*/
385*4882a593Smuzhiyun static
386*4882a593Smuzhiyun __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)387*4882a593Smuzhiyun Bool mainGtU ( UInt32  i1,
388*4882a593Smuzhiyun                UInt32  i2,
389*4882a593Smuzhiyun                UChar*  block,
390*4882a593Smuzhiyun                UInt16* quadrant,
391*4882a593Smuzhiyun                UInt32  nblock,
392*4882a593Smuzhiyun                Int32*  budget )
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun    Int32  k;
395*4882a593Smuzhiyun    UChar  c1, c2;
396*4882a593Smuzhiyun    UInt16 s1, s2;
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun    AssertD ( i1 != i2, "mainGtU" );
399*4882a593Smuzhiyun    /* 1 */
400*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
401*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
402*4882a593Smuzhiyun    i1++; i2++;
403*4882a593Smuzhiyun    /* 2 */
404*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
405*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
406*4882a593Smuzhiyun    i1++; i2++;
407*4882a593Smuzhiyun    /* 3 */
408*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
409*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
410*4882a593Smuzhiyun    i1++; i2++;
411*4882a593Smuzhiyun    /* 4 */
412*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
413*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
414*4882a593Smuzhiyun    i1++; i2++;
415*4882a593Smuzhiyun    /* 5 */
416*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
417*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
418*4882a593Smuzhiyun    i1++; i2++;
419*4882a593Smuzhiyun    /* 6 */
420*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
421*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
422*4882a593Smuzhiyun    i1++; i2++;
423*4882a593Smuzhiyun    /* 7 */
424*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
425*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
426*4882a593Smuzhiyun    i1++; i2++;
427*4882a593Smuzhiyun    /* 8 */
428*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
429*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
430*4882a593Smuzhiyun    i1++; i2++;
431*4882a593Smuzhiyun    /* 9 */
432*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
433*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
434*4882a593Smuzhiyun    i1++; i2++;
435*4882a593Smuzhiyun    /* 10 */
436*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
437*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
438*4882a593Smuzhiyun    i1++; i2++;
439*4882a593Smuzhiyun    /* 11 */
440*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
441*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
442*4882a593Smuzhiyun    i1++; i2++;
443*4882a593Smuzhiyun    /* 12 */
444*4882a593Smuzhiyun    c1 = block[i1]; c2 = block[i2];
445*4882a593Smuzhiyun    if (c1 != c2) return (c1 > c2);
446*4882a593Smuzhiyun    i1++; i2++;
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun    k = nblock + 8;
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun    do {
451*4882a593Smuzhiyun       /* 1 */
452*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
453*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
454*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
455*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
456*4882a593Smuzhiyun       i1++; i2++;
457*4882a593Smuzhiyun       /* 2 */
458*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
459*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
460*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
461*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
462*4882a593Smuzhiyun       i1++; i2++;
463*4882a593Smuzhiyun       /* 3 */
464*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
465*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
466*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
467*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
468*4882a593Smuzhiyun       i1++; i2++;
469*4882a593Smuzhiyun       /* 4 */
470*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
471*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
472*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
473*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
474*4882a593Smuzhiyun       i1++; i2++;
475*4882a593Smuzhiyun       /* 5 */
476*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
477*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
478*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
479*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
480*4882a593Smuzhiyun       i1++; i2++;
481*4882a593Smuzhiyun       /* 6 */
482*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
483*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
484*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
485*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
486*4882a593Smuzhiyun       i1++; i2++;
487*4882a593Smuzhiyun       /* 7 */
488*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
489*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
490*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
491*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
492*4882a593Smuzhiyun       i1++; i2++;
493*4882a593Smuzhiyun       /* 8 */
494*4882a593Smuzhiyun       c1 = block[i1]; c2 = block[i2];
495*4882a593Smuzhiyun       if (c1 != c2) return (c1 > c2);
496*4882a593Smuzhiyun       s1 = quadrant[i1]; s2 = quadrant[i2];
497*4882a593Smuzhiyun       if (s1 != s2) return (s1 > s2);
498*4882a593Smuzhiyun       i1++; i2++;
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun       if (i1 >= nblock) i1 -= nblock;
501*4882a593Smuzhiyun       if (i2 >= nblock) i2 -= nblock;
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun       k -= 8;
504*4882a593Smuzhiyun       (*budget)--;
505*4882a593Smuzhiyun    }
506*4882a593Smuzhiyun       while (k >= 0);
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun    return False;
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun /*---------------------------------------------*/
513*4882a593Smuzhiyun /*--
514*4882a593Smuzhiyun    Knuth's increments seem to work better
515*4882a593Smuzhiyun    than Incerpi-Sedgewick here.  Possibly
516*4882a593Smuzhiyun    because the number of elems to sort is
517*4882a593Smuzhiyun    usually small, typically <= 20.
518*4882a593Smuzhiyun --*/
519*4882a593Smuzhiyun static
520*4882a593Smuzhiyun Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
521*4882a593Smuzhiyun                    9841, 29524, 88573, 265720,
522*4882a593Smuzhiyun                    797161, 2391484 };
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)525*4882a593Smuzhiyun void mainSimpleSort ( UInt32* ptr,
526*4882a593Smuzhiyun                       UChar*  block,
527*4882a593Smuzhiyun                       UInt16* quadrant,
528*4882a593Smuzhiyun                       Int32   nblock,
529*4882a593Smuzhiyun                       Int32   lo,
530*4882a593Smuzhiyun                       Int32   hi,
531*4882a593Smuzhiyun                       Int32   d,
532*4882a593Smuzhiyun                       Int32*  budget )
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun    Int32 i, j, h, bigN, hp;
535*4882a593Smuzhiyun    UInt32 v;
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun    bigN = hi - lo + 1;
538*4882a593Smuzhiyun    if (bigN < 2) return;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun    hp = 0;
541*4882a593Smuzhiyun    while (incs[hp] < bigN) hp++;
542*4882a593Smuzhiyun    hp--;
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun    for (; hp >= 0; hp--) {
545*4882a593Smuzhiyun       h = incs[hp];
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun       i = lo + h;
548*4882a593Smuzhiyun       while (True) {
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun          /*-- copy 1 --*/
551*4882a593Smuzhiyun          if (i > hi) break;
552*4882a593Smuzhiyun          v = ptr[i];
553*4882a593Smuzhiyun          j = i;
554*4882a593Smuzhiyun          while ( mainGtU (
555*4882a593Smuzhiyun                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget
556*4882a593Smuzhiyun                  ) ) {
557*4882a593Smuzhiyun             ptr[j] = ptr[j-h];
558*4882a593Smuzhiyun             j = j - h;
559*4882a593Smuzhiyun             if (j <= (lo + h - 1)) break;
560*4882a593Smuzhiyun          }
561*4882a593Smuzhiyun          ptr[j] = v;
562*4882a593Smuzhiyun          i++;
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun          /*-- copy 2 --*/
565*4882a593Smuzhiyun          if (i > hi) break;
566*4882a593Smuzhiyun          v = ptr[i];
567*4882a593Smuzhiyun          j = i;
568*4882a593Smuzhiyun          while ( mainGtU (
569*4882a593Smuzhiyun                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget
570*4882a593Smuzhiyun                  ) ) {
571*4882a593Smuzhiyun             ptr[j] = ptr[j-h];
572*4882a593Smuzhiyun             j = j - h;
573*4882a593Smuzhiyun             if (j <= (lo + h - 1)) break;
574*4882a593Smuzhiyun          }
575*4882a593Smuzhiyun          ptr[j] = v;
576*4882a593Smuzhiyun          i++;
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun          /*-- copy 3 --*/
579*4882a593Smuzhiyun          if (i > hi) break;
580*4882a593Smuzhiyun          v = ptr[i];
581*4882a593Smuzhiyun          j = i;
582*4882a593Smuzhiyun          while ( mainGtU (
583*4882a593Smuzhiyun                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget
584*4882a593Smuzhiyun                  ) ) {
585*4882a593Smuzhiyun             ptr[j] = ptr[j-h];
586*4882a593Smuzhiyun             j = j - h;
587*4882a593Smuzhiyun             if (j <= (lo + h - 1)) break;
588*4882a593Smuzhiyun          }
589*4882a593Smuzhiyun          ptr[j] = v;
590*4882a593Smuzhiyun          i++;
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun          if (*budget < 0) return;
593*4882a593Smuzhiyun       }
594*4882a593Smuzhiyun    }
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun /*---------------------------------------------*/
599*4882a593Smuzhiyun /*--
600*4882a593Smuzhiyun    The following is an implementation of
601*4882a593Smuzhiyun    an elegant 3-way quicksort for strings,
602*4882a593Smuzhiyun    described in a paper "Fast Algorithms for
603*4882a593Smuzhiyun    Sorting and Searching Strings", by Robert
604*4882a593Smuzhiyun    Sedgewick and Jon L. Bentley.
605*4882a593Smuzhiyun --*/
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun #define mswap(zz1, zz2) \
608*4882a593Smuzhiyun    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun #define mvswap(zzp1, zzp2, zzn)       \
611*4882a593Smuzhiyun {                                     \
612*4882a593Smuzhiyun    Int32 yyp1 = (zzp1);               \
613*4882a593Smuzhiyun    Int32 yyp2 = (zzp2);               \
614*4882a593Smuzhiyun    Int32 yyn  = (zzn);                \
615*4882a593Smuzhiyun    while (yyn > 0) {                  \
616*4882a593Smuzhiyun       mswap(ptr[yyp1], ptr[yyp2]);    \
617*4882a593Smuzhiyun       yyp1++; yyp2++; yyn--;          \
618*4882a593Smuzhiyun    }                                  \
619*4882a593Smuzhiyun }
620*4882a593Smuzhiyun 
621*4882a593Smuzhiyun static
622*4882a593Smuzhiyun __inline__
mmed3(UChar a,UChar b,UChar c)623*4882a593Smuzhiyun UChar mmed3 ( UChar a, UChar b, UChar c )
624*4882a593Smuzhiyun {
625*4882a593Smuzhiyun    UChar t;
626*4882a593Smuzhiyun    if (a > b) { t = a; a = b; b = t; };
627*4882a593Smuzhiyun    if (b > c) {
628*4882a593Smuzhiyun       b = c;
629*4882a593Smuzhiyun       if (a > b) b = a;
630*4882a593Smuzhiyun    }
631*4882a593Smuzhiyun    return b;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun #define mmin(a,b) ((a) < (b)) ? (a) : (b)
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
637*4882a593Smuzhiyun                           stackHi[sp] = hz; \
638*4882a593Smuzhiyun                           stackD [sp] = dz; \
639*4882a593Smuzhiyun                           sp++; }
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun #define mpop(lz,hz,dz) { sp--;             \
642*4882a593Smuzhiyun                          lz = stackLo[sp]; \
643*4882a593Smuzhiyun                          hz = stackHi[sp]; \
644*4882a593Smuzhiyun                          dz = stackD [sp]; }
645*4882a593Smuzhiyun 
646*4882a593Smuzhiyun 
647*4882a593Smuzhiyun #define mnextsize(az) (nextHi[az]-nextLo[az])
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun #define mnextswap(az,bz)                                        \
650*4882a593Smuzhiyun    { Int32 tz;                                                  \
651*4882a593Smuzhiyun      tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
652*4882a593Smuzhiyun      tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
653*4882a593Smuzhiyun      tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
654*4882a593Smuzhiyun 
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun #define MAIN_QSORT_SMALL_THRESH 20
657*4882a593Smuzhiyun #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
658*4882a593Smuzhiyun #define MAIN_QSORT_STACK_SIZE 100
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)661*4882a593Smuzhiyun void mainQSort3 ( UInt32* ptr,
662*4882a593Smuzhiyun                   UChar*  block,
663*4882a593Smuzhiyun                   UInt16* quadrant,
664*4882a593Smuzhiyun                   Int32   nblock,
665*4882a593Smuzhiyun                   Int32   loSt,
666*4882a593Smuzhiyun                   Int32   hiSt,
667*4882a593Smuzhiyun                   Int32   dSt,
668*4882a593Smuzhiyun                   Int32*  budget )
669*4882a593Smuzhiyun {
670*4882a593Smuzhiyun    Int32 unLo, unHi, ltLo, gtHi, n, m, med;
671*4882a593Smuzhiyun    Int32 sp, lo, hi, d;
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun    Int32 stackLo[MAIN_QSORT_STACK_SIZE];
674*4882a593Smuzhiyun    Int32 stackHi[MAIN_QSORT_STACK_SIZE];
675*4882a593Smuzhiyun    Int32 stackD [MAIN_QSORT_STACK_SIZE];
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun    Int32 nextLo[3];
678*4882a593Smuzhiyun    Int32 nextHi[3];
679*4882a593Smuzhiyun    Int32 nextD [3];
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun    sp = 0;
682*4882a593Smuzhiyun    mpush ( loSt, hiSt, dSt );
683*4882a593Smuzhiyun 
684*4882a593Smuzhiyun    while (sp > 0) {
685*4882a593Smuzhiyun 
686*4882a593Smuzhiyun       AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun       mpop ( lo, hi, d );
689*4882a593Smuzhiyun       if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
690*4882a593Smuzhiyun           d > MAIN_QSORT_DEPTH_THRESH) {
691*4882a593Smuzhiyun          mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
692*4882a593Smuzhiyun          if (*budget < 0) return;
693*4882a593Smuzhiyun          continue;
694*4882a593Smuzhiyun       }
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun       med = (Int32)
697*4882a593Smuzhiyun             mmed3 ( block[ptr[ lo         ]+d],
698*4882a593Smuzhiyun                     block[ptr[ hi         ]+d],
699*4882a593Smuzhiyun                     block[ptr[ (lo+hi)>>1 ]+d] );
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun       unLo = ltLo = lo;
702*4882a593Smuzhiyun       unHi = gtHi = hi;
703*4882a593Smuzhiyun 
704*4882a593Smuzhiyun       while (True) {
705*4882a593Smuzhiyun          while (True) {
706*4882a593Smuzhiyun             if (unLo > unHi) break;
707*4882a593Smuzhiyun             n = ((Int32)block[ptr[unLo]+d]) - med;
708*4882a593Smuzhiyun             if (n == 0) {
709*4882a593Smuzhiyun                mswap(ptr[unLo], ptr[ltLo]);
710*4882a593Smuzhiyun                ltLo++; unLo++; continue;
711*4882a593Smuzhiyun             };
712*4882a593Smuzhiyun             if (n >  0) break;
713*4882a593Smuzhiyun             unLo++;
714*4882a593Smuzhiyun          }
715*4882a593Smuzhiyun          while (True) {
716*4882a593Smuzhiyun             if (unLo > unHi) break;
717*4882a593Smuzhiyun             n = ((Int32)block[ptr[unHi]+d]) - med;
718*4882a593Smuzhiyun             if (n == 0) {
719*4882a593Smuzhiyun                mswap(ptr[unHi], ptr[gtHi]);
720*4882a593Smuzhiyun                gtHi--; unHi--; continue;
721*4882a593Smuzhiyun             };
722*4882a593Smuzhiyun             if (n <  0) break;
723*4882a593Smuzhiyun             unHi--;
724*4882a593Smuzhiyun          }
725*4882a593Smuzhiyun          if (unLo > unHi) break;
726*4882a593Smuzhiyun          mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
727*4882a593Smuzhiyun       }
728*4882a593Smuzhiyun 
729*4882a593Smuzhiyun       AssertD ( unHi == unLo-1, "mainQSort3(2)" );
730*4882a593Smuzhiyun 
731*4882a593Smuzhiyun       if (gtHi < ltLo) {
732*4882a593Smuzhiyun          mpush(lo, hi, d+1 );
733*4882a593Smuzhiyun          continue;
734*4882a593Smuzhiyun       }
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun       n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
737*4882a593Smuzhiyun       m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun       n = lo + unLo - ltLo - 1;
740*4882a593Smuzhiyun       m = hi - (gtHi - unHi) + 1;
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun       nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
743*4882a593Smuzhiyun       nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
744*4882a593Smuzhiyun       nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
747*4882a593Smuzhiyun       if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
748*4882a593Smuzhiyun       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun       AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
751*4882a593Smuzhiyun       AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun       mpush (nextLo[0], nextHi[0], nextD[0]);
754*4882a593Smuzhiyun       mpush (nextLo[1], nextHi[1], nextD[1]);
755*4882a593Smuzhiyun       mpush (nextLo[2], nextHi[2], nextD[2]);
756*4882a593Smuzhiyun    }
757*4882a593Smuzhiyun }
758*4882a593Smuzhiyun 
759*4882a593Smuzhiyun #undef mswap
760*4882a593Smuzhiyun #undef mvswap
761*4882a593Smuzhiyun #undef mpush
762*4882a593Smuzhiyun #undef mpop
763*4882a593Smuzhiyun #undef mmin
764*4882a593Smuzhiyun #undef mnextsize
765*4882a593Smuzhiyun #undef mnextswap
766*4882a593Smuzhiyun #undef MAIN_QSORT_SMALL_THRESH
767*4882a593Smuzhiyun #undef MAIN_QSORT_DEPTH_THRESH
768*4882a593Smuzhiyun #undef MAIN_QSORT_STACK_SIZE
769*4882a593Smuzhiyun 
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun /*---------------------------------------------*/
772*4882a593Smuzhiyun /* Pre:
773*4882a593Smuzhiyun       nblock > N_OVERSHOOT
774*4882a593Smuzhiyun       block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
775*4882a593Smuzhiyun       ((UChar*)block32) [0 .. nblock-1] holds block
776*4882a593Smuzhiyun       ptr exists for [0 .. nblock-1]
777*4882a593Smuzhiyun 
778*4882a593Smuzhiyun    Post:
779*4882a593Smuzhiyun       ((UChar*)block32) [0 .. nblock-1] holds block
780*4882a593Smuzhiyun       All other areas of block32 destroyed
781*4882a593Smuzhiyun       ftab [0 .. 65536 ] destroyed
782*4882a593Smuzhiyun       ptr [0 .. nblock-1] holds sorted order
783*4882a593Smuzhiyun       if (*budget < 0), sorting was abandoned
784*4882a593Smuzhiyun */
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
787*4882a593Smuzhiyun #define SETMASK (1 << 21)
788*4882a593Smuzhiyun #define CLEARMASK (~(SETMASK))
789*4882a593Smuzhiyun 
790*4882a593Smuzhiyun static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)791*4882a593Smuzhiyun void mainSort ( UInt32* ptr,
792*4882a593Smuzhiyun                 UChar*  block,
793*4882a593Smuzhiyun                 UInt16* quadrant,
794*4882a593Smuzhiyun                 UInt32* ftab,
795*4882a593Smuzhiyun                 Int32   nblock,
796*4882a593Smuzhiyun                 Int32   verb,
797*4882a593Smuzhiyun                 Int32*  budget )
798*4882a593Smuzhiyun {
799*4882a593Smuzhiyun    Int32  i, j, k, ss, sb;
800*4882a593Smuzhiyun    Int32  runningOrder[256];
801*4882a593Smuzhiyun    Bool   bigDone[256];
802*4882a593Smuzhiyun    Int32  copyStart[256];
803*4882a593Smuzhiyun    Int32  copyEnd  [256];
804*4882a593Smuzhiyun    UChar  c1;
805*4882a593Smuzhiyun    Int32  numQSorted;
806*4882a593Smuzhiyun    UInt16 s;
807*4882a593Smuzhiyun    if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
808*4882a593Smuzhiyun 
809*4882a593Smuzhiyun    /*-- set up the 2-byte frequency table --*/
810*4882a593Smuzhiyun    for (i = 65536; i >= 0; i--) ftab[i] = 0;
811*4882a593Smuzhiyun 
812*4882a593Smuzhiyun    j = block[0] << 8;
813*4882a593Smuzhiyun    i = nblock-1;
814*4882a593Smuzhiyun    for (; i >= 3; i -= 4) {
815*4882a593Smuzhiyun       quadrant[i] = 0;
816*4882a593Smuzhiyun       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
817*4882a593Smuzhiyun       ftab[j]++;
818*4882a593Smuzhiyun       quadrant[i-1] = 0;
819*4882a593Smuzhiyun       j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
820*4882a593Smuzhiyun       ftab[j]++;
821*4882a593Smuzhiyun       quadrant[i-2] = 0;
822*4882a593Smuzhiyun       j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
823*4882a593Smuzhiyun       ftab[j]++;
824*4882a593Smuzhiyun       quadrant[i-3] = 0;
825*4882a593Smuzhiyun       j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
826*4882a593Smuzhiyun       ftab[j]++;
827*4882a593Smuzhiyun    }
828*4882a593Smuzhiyun    for (; i >= 0; i--) {
829*4882a593Smuzhiyun       quadrant[i] = 0;
830*4882a593Smuzhiyun       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
831*4882a593Smuzhiyun       ftab[j]++;
832*4882a593Smuzhiyun    }
833*4882a593Smuzhiyun 
834*4882a593Smuzhiyun    /*-- (emphasises close relationship of block & quadrant) --*/
835*4882a593Smuzhiyun    for (i = 0; i < BZ_N_OVERSHOOT; i++) {
836*4882a593Smuzhiyun       block   [nblock+i] = block[i];
837*4882a593Smuzhiyun       quadrant[nblock+i] = 0;
838*4882a593Smuzhiyun    }
839*4882a593Smuzhiyun 
840*4882a593Smuzhiyun    if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
841*4882a593Smuzhiyun 
842*4882a593Smuzhiyun    /*-- Complete the initial radix sort --*/
843*4882a593Smuzhiyun    for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun    s = block[0] << 8;
846*4882a593Smuzhiyun    i = nblock-1;
847*4882a593Smuzhiyun    for (; i >= 3; i -= 4) {
848*4882a593Smuzhiyun       s = (s >> 8) | (block[i] << 8);
849*4882a593Smuzhiyun       j = ftab[s] -1;
850*4882a593Smuzhiyun       ftab[s] = j;
851*4882a593Smuzhiyun       ptr[j] = i;
852*4882a593Smuzhiyun       s = (s >> 8) | (block[i-1] << 8);
853*4882a593Smuzhiyun       j = ftab[s] -1;
854*4882a593Smuzhiyun       ftab[s] = j;
855*4882a593Smuzhiyun       ptr[j] = i-1;
856*4882a593Smuzhiyun       s = (s >> 8) | (block[i-2] << 8);
857*4882a593Smuzhiyun       j = ftab[s] -1;
858*4882a593Smuzhiyun       ftab[s] = j;
859*4882a593Smuzhiyun       ptr[j] = i-2;
860*4882a593Smuzhiyun       s = (s >> 8) | (block[i-3] << 8);
861*4882a593Smuzhiyun       j = ftab[s] -1;
862*4882a593Smuzhiyun       ftab[s] = j;
863*4882a593Smuzhiyun       ptr[j] = i-3;
864*4882a593Smuzhiyun    }
865*4882a593Smuzhiyun    for (; i >= 0; i--) {
866*4882a593Smuzhiyun       s = (s >> 8) | (block[i] << 8);
867*4882a593Smuzhiyun       j = ftab[s] -1;
868*4882a593Smuzhiyun       ftab[s] = j;
869*4882a593Smuzhiyun       ptr[j] = i;
870*4882a593Smuzhiyun    }
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun    /*--
873*4882a593Smuzhiyun       Now ftab contains the first loc of every small bucket.
874*4882a593Smuzhiyun       Calculate the running order, from smallest to largest
875*4882a593Smuzhiyun       big bucket.
876*4882a593Smuzhiyun    --*/
877*4882a593Smuzhiyun    for (i = 0; i <= 255; i++) {
878*4882a593Smuzhiyun       bigDone     [i] = False;
879*4882a593Smuzhiyun       runningOrder[i] = i;
880*4882a593Smuzhiyun    }
881*4882a593Smuzhiyun 
882*4882a593Smuzhiyun    {
883*4882a593Smuzhiyun       Int32 vv;
884*4882a593Smuzhiyun       Int32 h = 1;
885*4882a593Smuzhiyun       do h = 3 * h + 1; while (h <= 256);
886*4882a593Smuzhiyun       do {
887*4882a593Smuzhiyun          h = h / 3;
888*4882a593Smuzhiyun          for (i = h; i <= 255; i++) {
889*4882a593Smuzhiyun             vv = runningOrder[i];
890*4882a593Smuzhiyun             j = i;
891*4882a593Smuzhiyun             while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
892*4882a593Smuzhiyun                runningOrder[j] = runningOrder[j-h];
893*4882a593Smuzhiyun                j = j - h;
894*4882a593Smuzhiyun                if (j <= (h - 1)) goto zero;
895*4882a593Smuzhiyun             }
896*4882a593Smuzhiyun             zero:
897*4882a593Smuzhiyun             runningOrder[j] = vv;
898*4882a593Smuzhiyun          }
899*4882a593Smuzhiyun       } while (h != 1);
900*4882a593Smuzhiyun    }
901*4882a593Smuzhiyun 
902*4882a593Smuzhiyun    /*--
903*4882a593Smuzhiyun       The main sorting loop.
904*4882a593Smuzhiyun    --*/
905*4882a593Smuzhiyun 
906*4882a593Smuzhiyun    numQSorted = 0;
907*4882a593Smuzhiyun 
908*4882a593Smuzhiyun    for (i = 0; i <= 255; i++) {
909*4882a593Smuzhiyun 
910*4882a593Smuzhiyun       /*--
911*4882a593Smuzhiyun          Process big buckets, starting with the least full.
912*4882a593Smuzhiyun          Basically this is a 3-step process in which we call
913*4882a593Smuzhiyun          mainQSort3 to sort the small buckets [ss, j], but
914*4882a593Smuzhiyun          also make a big effort to avoid the calls if we can.
915*4882a593Smuzhiyun       --*/
916*4882a593Smuzhiyun       ss = runningOrder[i];
917*4882a593Smuzhiyun 
918*4882a593Smuzhiyun       /*--
919*4882a593Smuzhiyun          Step 1:
920*4882a593Smuzhiyun          Complete the big bucket [ss] by quicksorting
921*4882a593Smuzhiyun          any unsorted small buckets [ss, j], for j != ss.
922*4882a593Smuzhiyun          Hopefully previous pointer-scanning phases have already
923*4882a593Smuzhiyun          completed many of the small buckets [ss, j], so
924*4882a593Smuzhiyun          we don't have to sort them at all.
925*4882a593Smuzhiyun       --*/
926*4882a593Smuzhiyun       for (j = 0; j <= 255; j++) {
927*4882a593Smuzhiyun          if (j != ss) {
928*4882a593Smuzhiyun             sb = (ss << 8) + j;
929*4882a593Smuzhiyun             if ( ! (ftab[sb] & SETMASK) ) {
930*4882a593Smuzhiyun                Int32 lo = ftab[sb]   & CLEARMASK;
931*4882a593Smuzhiyun                Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
932*4882a593Smuzhiyun                if (hi > lo) {
933*4882a593Smuzhiyun                   if (verb >= 4)
934*4882a593Smuzhiyun                      VPrintf4 ( "        qsort [0x%x, 0x%x]   "
935*4882a593Smuzhiyun                                 "done %d   this %d\n",
936*4882a593Smuzhiyun                                 ss, j, numQSorted, hi - lo + 1 );
937*4882a593Smuzhiyun                   mainQSort3 (
938*4882a593Smuzhiyun                      ptr, block, quadrant, nblock,
939*4882a593Smuzhiyun                      lo, hi, BZ_N_RADIX, budget
940*4882a593Smuzhiyun                   );
941*4882a593Smuzhiyun                   numQSorted += (hi - lo + 1);
942*4882a593Smuzhiyun                   if (*budget < 0) return;
943*4882a593Smuzhiyun                }
944*4882a593Smuzhiyun             }
945*4882a593Smuzhiyun             ftab[sb] |= SETMASK;
946*4882a593Smuzhiyun          }
947*4882a593Smuzhiyun       }
948*4882a593Smuzhiyun 
949*4882a593Smuzhiyun       AssertH ( !bigDone[ss], 1006 );
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun       /*--
952*4882a593Smuzhiyun          Step 2:
953*4882a593Smuzhiyun          Now scan this big bucket [ss] so as to synthesise the
954*4882a593Smuzhiyun          sorted order for small buckets [t, ss] for all t,
955*4882a593Smuzhiyun          including, magically, the bucket [ss,ss] too.
956*4882a593Smuzhiyun          This will avoid doing Real Work in subsequent Step 1's.
957*4882a593Smuzhiyun       --*/
958*4882a593Smuzhiyun       {
959*4882a593Smuzhiyun          for (j = 0; j <= 255; j++) {
960*4882a593Smuzhiyun             copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
961*4882a593Smuzhiyun             copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
962*4882a593Smuzhiyun          }
963*4882a593Smuzhiyun          for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
964*4882a593Smuzhiyun             k = ptr[j]-1; if (k < 0) k += nblock;
965*4882a593Smuzhiyun             c1 = block[k];
966*4882a593Smuzhiyun             if (!bigDone[c1])
967*4882a593Smuzhiyun                ptr[ copyStart[c1]++ ] = k;
968*4882a593Smuzhiyun          }
969*4882a593Smuzhiyun          for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
970*4882a593Smuzhiyun             k = ptr[j]-1; if (k < 0) k += nblock;
971*4882a593Smuzhiyun             c1 = block[k];
972*4882a593Smuzhiyun             if (!bigDone[c1])
973*4882a593Smuzhiyun                ptr[ copyEnd[c1]-- ] = k;
974*4882a593Smuzhiyun          }
975*4882a593Smuzhiyun       }
976*4882a593Smuzhiyun 
977*4882a593Smuzhiyun       AssertH ( (copyStart[ss]-1 == copyEnd[ss])
978*4882a593Smuzhiyun                 ||
979*4882a593Smuzhiyun                 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
980*4882a593Smuzhiyun                    Necessity for this case is demonstrated by compressing
981*4882a593Smuzhiyun                    a sequence of approximately 48.5 million of character
982*4882a593Smuzhiyun                    251; 1.0.0/1.0.1 will then die here. */
983*4882a593Smuzhiyun                 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
984*4882a593Smuzhiyun                 1007 )
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun       for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
987*4882a593Smuzhiyun 
988*4882a593Smuzhiyun       /*--
989*4882a593Smuzhiyun          Step 3:
990*4882a593Smuzhiyun          The [ss] big bucket is now done.  Record this fact,
991*4882a593Smuzhiyun          and update the quadrant descriptors.  Remember to
992*4882a593Smuzhiyun          update quadrants in the overshoot area too, if
993*4882a593Smuzhiyun          necessary.  The "if (i < 255)" test merely skips
994*4882a593Smuzhiyun          this updating for the last bucket processed, since
995*4882a593Smuzhiyun          updating for the last bucket is pointless.
996*4882a593Smuzhiyun 
997*4882a593Smuzhiyun          The quadrant array provides a way to incrementally
998*4882a593Smuzhiyun          cache sort orderings, as they appear, so as to
999*4882a593Smuzhiyun          make subsequent comparisons in fullGtU() complete
1000*4882a593Smuzhiyun          faster.  For repetitive blocks this makes a big
1001*4882a593Smuzhiyun          difference (but not big enough to be able to avoid
1002*4882a593Smuzhiyun          the fallback sorting mechanism, exponential radix sort).
1003*4882a593Smuzhiyun 
1004*4882a593Smuzhiyun          The precise meaning is: at all times:
1005*4882a593Smuzhiyun 
1006*4882a593Smuzhiyun             for 0 <= i < nblock and 0 <= j <= nblock
1007*4882a593Smuzhiyun 
1008*4882a593Smuzhiyun             if block[i] != block[j],
1009*4882a593Smuzhiyun 
1010*4882a593Smuzhiyun                then the relative values of quadrant[i] and
1011*4882a593Smuzhiyun                     quadrant[j] are meaningless.
1012*4882a593Smuzhiyun 
1013*4882a593Smuzhiyun                else {
1014*4882a593Smuzhiyun                   if quadrant[i] < quadrant[j]
1015*4882a593Smuzhiyun                      then the string starting at i lexicographically
1016*4882a593Smuzhiyun                      precedes the string starting at j
1017*4882a593Smuzhiyun 
1018*4882a593Smuzhiyun                   else if quadrant[i] > quadrant[j]
1019*4882a593Smuzhiyun                      then the string starting at j lexicographically
1020*4882a593Smuzhiyun                      precedes the string starting at i
1021*4882a593Smuzhiyun 
1022*4882a593Smuzhiyun                   else
1023*4882a593Smuzhiyun                      the relative ordering of the strings starting
1024*4882a593Smuzhiyun                      at i and j has not yet been determined.
1025*4882a593Smuzhiyun                }
1026*4882a593Smuzhiyun       --*/
1027*4882a593Smuzhiyun       bigDone[ss] = True;
1028*4882a593Smuzhiyun 
1029*4882a593Smuzhiyun       if (i < 255) {
1030*4882a593Smuzhiyun          Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
1031*4882a593Smuzhiyun          Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1032*4882a593Smuzhiyun          Int32 shifts   = 0;
1033*4882a593Smuzhiyun 
1034*4882a593Smuzhiyun          while ((bbSize >> shifts) > 65534) shifts++;
1035*4882a593Smuzhiyun 
1036*4882a593Smuzhiyun          for (j = bbSize-1; j >= 0; j--) {
1037*4882a593Smuzhiyun             Int32 a2update     = ptr[bbStart + j];
1038*4882a593Smuzhiyun             UInt16 qVal        = (UInt16)(j >> shifts);
1039*4882a593Smuzhiyun             quadrant[a2update] = qVal;
1040*4882a593Smuzhiyun             if (a2update < BZ_N_OVERSHOOT)
1041*4882a593Smuzhiyun                quadrant[a2update + nblock] = qVal;
1042*4882a593Smuzhiyun          }
1043*4882a593Smuzhiyun          AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1044*4882a593Smuzhiyun       }
1045*4882a593Smuzhiyun 
1046*4882a593Smuzhiyun    }
1047*4882a593Smuzhiyun 
1048*4882a593Smuzhiyun    if (verb >= 4)
1049*4882a593Smuzhiyun       VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
1050*4882a593Smuzhiyun                  nblock, numQSorted, nblock - numQSorted );
1051*4882a593Smuzhiyun }
1052*4882a593Smuzhiyun 
1053*4882a593Smuzhiyun #undef BIGFREQ
1054*4882a593Smuzhiyun #undef SETMASK
1055*4882a593Smuzhiyun #undef CLEARMASK
1056*4882a593Smuzhiyun 
1057*4882a593Smuzhiyun 
1058*4882a593Smuzhiyun /*---------------------------------------------*/
1059*4882a593Smuzhiyun /* Pre:
1060*4882a593Smuzhiyun       nblock > 0
1061*4882a593Smuzhiyun       arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1062*4882a593Smuzhiyun       ((UChar*)arr2)  [0 .. nblock-1] holds block
1063*4882a593Smuzhiyun       arr1 exists for [0 .. nblock-1]
1064*4882a593Smuzhiyun 
1065*4882a593Smuzhiyun    Post:
1066*4882a593Smuzhiyun       ((UChar*)arr2) [0 .. nblock-1] holds block
1067*4882a593Smuzhiyun       All other areas of block destroyed
1068*4882a593Smuzhiyun       ftab [ 0 .. 65536 ] destroyed
1069*4882a593Smuzhiyun       arr1 [0 .. nblock-1] holds sorted order
1070*4882a593Smuzhiyun */
BZ2_blockSort(EState * s)1071*4882a593Smuzhiyun void BZ2_blockSort ( EState* s )
1072*4882a593Smuzhiyun {
1073*4882a593Smuzhiyun    UInt32* ptr    = s->ptr;
1074*4882a593Smuzhiyun    UChar*  block  = s->block;
1075*4882a593Smuzhiyun    UInt32* ftab   = s->ftab;
1076*4882a593Smuzhiyun    Int32   nblock = s->nblock;
1077*4882a593Smuzhiyun    Int32   verb   = s->verbosity;
1078*4882a593Smuzhiyun    Int32   wfact  = s->workFactor;
1079*4882a593Smuzhiyun    UInt16* quadrant;
1080*4882a593Smuzhiyun    Int32   budget;
1081*4882a593Smuzhiyun    Int32   budgetInit;
1082*4882a593Smuzhiyun    Int32   i;
1083*4882a593Smuzhiyun 
1084*4882a593Smuzhiyun    if (nblock < 10000) {
1085*4882a593Smuzhiyun       fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1086*4882a593Smuzhiyun    } else {
1087*4882a593Smuzhiyun       /* Calculate the location for quadrant, remembering to get
1088*4882a593Smuzhiyun          the alignment right.  Assumes that &(block[0]) is at least
1089*4882a593Smuzhiyun          2-byte aligned -- this should be ok since block is really
1090*4882a593Smuzhiyun          the first section of arr2.
1091*4882a593Smuzhiyun       */
1092*4882a593Smuzhiyun       i = nblock+BZ_N_OVERSHOOT;
1093*4882a593Smuzhiyun       if (i & 1) i++;
1094*4882a593Smuzhiyun       quadrant = (UInt16*)(&(block[i]));
1095*4882a593Smuzhiyun 
1096*4882a593Smuzhiyun       /* (wfact-1) / 3 puts the default-factor-30
1097*4882a593Smuzhiyun          transition point at very roughly the same place as
1098*4882a593Smuzhiyun          with v0.1 and v0.9.0.
1099*4882a593Smuzhiyun          Not that it particularly matters any more, since the
1100*4882a593Smuzhiyun          resulting compressed stream is now the same regardless
1101*4882a593Smuzhiyun          of whether or not we use the main sort or fallback sort.
1102*4882a593Smuzhiyun       */
1103*4882a593Smuzhiyun       if (wfact < 1  ) wfact = 1;
1104*4882a593Smuzhiyun       if (wfact > 100) wfact = 100;
1105*4882a593Smuzhiyun       budgetInit = nblock * ((wfact-1) / 3);
1106*4882a593Smuzhiyun       budget = budgetInit;
1107*4882a593Smuzhiyun 
1108*4882a593Smuzhiyun       mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1109*4882a593Smuzhiyun       if (verb >= 3)
1110*4882a593Smuzhiyun          VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
1111*4882a593Smuzhiyun                     budgetInit - budget,
1112*4882a593Smuzhiyun                     nblock,
1113*4882a593Smuzhiyun                     (float)(budgetInit - budget) /
1114*4882a593Smuzhiyun                     (float)(nblock==0 ? 1 : nblock) );
1115*4882a593Smuzhiyun       if (budget < 0) {
1116*4882a593Smuzhiyun          if (verb >= 2)
1117*4882a593Smuzhiyun             VPrintf0 ( "    too repetitive; using fallback"
1118*4882a593Smuzhiyun                        " sorting algorithm\n" );
1119*4882a593Smuzhiyun          fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1120*4882a593Smuzhiyun       }
1121*4882a593Smuzhiyun    }
1122*4882a593Smuzhiyun 
1123*4882a593Smuzhiyun    s->origPtr = -1;
1124*4882a593Smuzhiyun    for (i = 0; i < s->nblock; i++)
1125*4882a593Smuzhiyun       if (ptr[i] == 0)
1126*4882a593Smuzhiyun          { s->origPtr = i; break; };
1127*4882a593Smuzhiyun 
1128*4882a593Smuzhiyun    AssertH( s->origPtr != -1, 1003 );
1129*4882a593Smuzhiyun }
1130*4882a593Smuzhiyun 
1131*4882a593Smuzhiyun 
1132*4882a593Smuzhiyun /*-------------------------------------------------------------*/
1133*4882a593Smuzhiyun /*--- end                                       blocksort.c ---*/
1134*4882a593Smuzhiyun /*-------------------------------------------------------------*/
1135