1*cf204528SSimon Glass
2*cf204528SSimon Glass /*-------------------------------------------------------------*/
3*cf204528SSimon Glass /*--- Block sorting machinery ---*/
4*cf204528SSimon Glass /*--- blocksort.c ---*/
5*cf204528SSimon Glass /*-------------------------------------------------------------*/
6*cf204528SSimon Glass
7*cf204528SSimon Glass /*--
8*cf204528SSimon Glass This file is a part of bzip2 and/or libbzip2, a program and
9*cf204528SSimon Glass library for lossless, block-sorting data compression.
10*cf204528SSimon Glass
11*cf204528SSimon Glass Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
12*cf204528SSimon Glass
13*cf204528SSimon Glass Redistribution and use in source and binary forms, with or without
14*cf204528SSimon Glass modification, are permitted provided that the following conditions
15*cf204528SSimon Glass are met:
16*cf204528SSimon Glass
17*cf204528SSimon Glass 1. Redistributions of source code must retain the above copyright
18*cf204528SSimon Glass notice, this list of conditions and the following disclaimer.
19*cf204528SSimon Glass
20*cf204528SSimon Glass 2. The origin of this software must not be misrepresented; you must
21*cf204528SSimon Glass not claim that you wrote the original software. If you use this
22*cf204528SSimon Glass software in a product, an acknowledgment in the product
23*cf204528SSimon Glass documentation would be appreciated but is not required.
24*cf204528SSimon Glass
25*cf204528SSimon Glass 3. Altered source versions must be plainly marked as such, and must
26*cf204528SSimon Glass not be misrepresented as being the original software.
27*cf204528SSimon Glass
28*cf204528SSimon Glass 4. The name of the author may not be used to endorse or promote
29*cf204528SSimon Glass products derived from this software without specific prior written
30*cf204528SSimon Glass permission.
31*cf204528SSimon Glass
32*cf204528SSimon Glass THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33*cf204528SSimon Glass OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34*cf204528SSimon Glass WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35*cf204528SSimon Glass ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36*cf204528SSimon Glass DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37*cf204528SSimon Glass DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38*cf204528SSimon Glass GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39*cf204528SSimon Glass INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40*cf204528SSimon Glass WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41*cf204528SSimon Glass NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42*cf204528SSimon Glass SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43*cf204528SSimon Glass
44*cf204528SSimon Glass Julian Seward, Cambridge, UK.
45*cf204528SSimon Glass jseward@acm.org
46*cf204528SSimon Glass bzip2/libbzip2 version 1.0.6 of 6 September 2010
47*cf204528SSimon Glass Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
48*cf204528SSimon Glass
49*cf204528SSimon Glass This program is based on (at least) the work of:
50*cf204528SSimon Glass Mike Burrows
51*cf204528SSimon Glass David Wheeler
52*cf204528SSimon Glass Peter Fenwick
53*cf204528SSimon Glass Alistair Moffat
54*cf204528SSimon Glass Radford Neal
55*cf204528SSimon Glass Ian H. Witten
56*cf204528SSimon Glass Robert Sedgewick
57*cf204528SSimon Glass Jon L. Bentley
58*cf204528SSimon Glass
59*cf204528SSimon Glass For more information on these sources, see the manual.
60*cf204528SSimon Glass --*/
61*cf204528SSimon Glass
62*cf204528SSimon Glass #include "bzlib_private.h"
63*cf204528SSimon Glass
64*cf204528SSimon Glass /*---------------------------------------------*/
65*cf204528SSimon Glass /*--- Fallback O(N log(N)^2) sorting ---*/
66*cf204528SSimon Glass /*--- algorithm, for repetitive blocks ---*/
67*cf204528SSimon Glass /*---------------------------------------------*/
68*cf204528SSimon Glass
69*cf204528SSimon Glass /*---------------------------------------------*/
70*cf204528SSimon Glass static
71*cf204528SSimon Glass __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)72*cf204528SSimon Glass void fallbackSimpleSort ( UInt32* fmap,
73*cf204528SSimon Glass UInt32* eclass,
74*cf204528SSimon Glass Int32 lo,
75*cf204528SSimon Glass Int32 hi )
76*cf204528SSimon Glass {
77*cf204528SSimon Glass Int32 i, j, tmp;
78*cf204528SSimon Glass UInt32 ec_tmp;
79*cf204528SSimon Glass
80*cf204528SSimon Glass if (lo == hi) return;
81*cf204528SSimon Glass
82*cf204528SSimon Glass if (hi - lo > 3) {
83*cf204528SSimon Glass for ( i = hi-4; i >= lo; i-- ) {
84*cf204528SSimon Glass tmp = fmap[i];
85*cf204528SSimon Glass ec_tmp = eclass[tmp];
86*cf204528SSimon Glass for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
87*cf204528SSimon Glass fmap[j-4] = fmap[j];
88*cf204528SSimon Glass fmap[j-4] = tmp;
89*cf204528SSimon Glass }
90*cf204528SSimon Glass }
91*cf204528SSimon Glass
92*cf204528SSimon Glass for ( i = hi-1; i >= lo; i-- ) {
93*cf204528SSimon Glass tmp = fmap[i];
94*cf204528SSimon Glass ec_tmp = eclass[tmp];
95*cf204528SSimon Glass for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
96*cf204528SSimon Glass fmap[j-1] = fmap[j];
97*cf204528SSimon Glass fmap[j-1] = tmp;
98*cf204528SSimon Glass }
99*cf204528SSimon Glass }
100*cf204528SSimon Glass
101*cf204528SSimon Glass
102*cf204528SSimon Glass /*---------------------------------------------*/
103*cf204528SSimon Glass #define fswap(zz1, zz2) \
104*cf204528SSimon Glass { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
105*cf204528SSimon Glass
106*cf204528SSimon Glass #define fvswap(zzp1, zzp2, zzn) \
107*cf204528SSimon Glass { \
108*cf204528SSimon Glass Int32 yyp1 = (zzp1); \
109*cf204528SSimon Glass Int32 yyp2 = (zzp2); \
110*cf204528SSimon Glass Int32 yyn = (zzn); \
111*cf204528SSimon Glass while (yyn > 0) { \
112*cf204528SSimon Glass fswap(fmap[yyp1], fmap[yyp2]); \
113*cf204528SSimon Glass yyp1++; yyp2++; yyn--; \
114*cf204528SSimon Glass } \
115*cf204528SSimon Glass }
116*cf204528SSimon Glass
117*cf204528SSimon Glass
118*cf204528SSimon Glass #define fmin(a,b) ((a) < (b)) ? (a) : (b)
119*cf204528SSimon Glass
120*cf204528SSimon Glass #define fpush(lz,hz) { stackLo[sp] = lz; \
121*cf204528SSimon Glass stackHi[sp] = hz; \
122*cf204528SSimon Glass sp++; }
123*cf204528SSimon Glass
124*cf204528SSimon Glass #define fpop(lz,hz) { sp--; \
125*cf204528SSimon Glass lz = stackLo[sp]; \
126*cf204528SSimon Glass hz = stackHi[sp]; }
127*cf204528SSimon Glass
128*cf204528SSimon Glass #define FALLBACK_QSORT_SMALL_THRESH 10
129*cf204528SSimon Glass #define FALLBACK_QSORT_STACK_SIZE 100
130*cf204528SSimon Glass
131*cf204528SSimon Glass
132*cf204528SSimon Glass static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)133*cf204528SSimon Glass void fallbackQSort3 ( UInt32* fmap,
134*cf204528SSimon Glass UInt32* eclass,
135*cf204528SSimon Glass Int32 loSt,
136*cf204528SSimon Glass Int32 hiSt )
137*cf204528SSimon Glass {
138*cf204528SSimon Glass Int32 unLo, unHi, ltLo, gtHi, n, m;
139*cf204528SSimon Glass Int32 sp, lo, hi;
140*cf204528SSimon Glass UInt32 med, r, r3;
141*cf204528SSimon Glass Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
142*cf204528SSimon Glass Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
143*cf204528SSimon Glass
144*cf204528SSimon Glass r = 0;
145*cf204528SSimon Glass
146*cf204528SSimon Glass sp = 0;
147*cf204528SSimon Glass fpush ( loSt, hiSt );
148*cf204528SSimon Glass
149*cf204528SSimon Glass while (sp > 0) {
150*cf204528SSimon Glass
151*cf204528SSimon Glass AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
152*cf204528SSimon Glass
153*cf204528SSimon Glass fpop ( lo, hi );
154*cf204528SSimon Glass if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
155*cf204528SSimon Glass fallbackSimpleSort ( fmap, eclass, lo, hi );
156*cf204528SSimon Glass continue;
157*cf204528SSimon Glass }
158*cf204528SSimon Glass
159*cf204528SSimon Glass /* Random partitioning. Median of 3 sometimes fails to
160*cf204528SSimon Glass avoid bad cases. Median of 9 seems to help but
161*cf204528SSimon Glass looks rather expensive. This too seems to work but
162*cf204528SSimon Glass is cheaper. Guidance for the magic constants
163*cf204528SSimon Glass 7621 and 32768 is taken from Sedgewick's algorithms
164*cf204528SSimon Glass book, chapter 35.
165*cf204528SSimon Glass */
166*cf204528SSimon Glass r = ((r * 7621) + 1) % 32768;
167*cf204528SSimon Glass r3 = r % 3;
168*cf204528SSimon Glass if (r3 == 0) med = eclass[fmap[lo]]; else
169*cf204528SSimon Glass if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
170*cf204528SSimon Glass med = eclass[fmap[hi]];
171*cf204528SSimon Glass
172*cf204528SSimon Glass unLo = ltLo = lo;
173*cf204528SSimon Glass unHi = gtHi = hi;
174*cf204528SSimon Glass
175*cf204528SSimon Glass while (1) {
176*cf204528SSimon Glass while (1) {
177*cf204528SSimon Glass if (unLo > unHi) break;
178*cf204528SSimon Glass n = (Int32)eclass[fmap[unLo]] - (Int32)med;
179*cf204528SSimon Glass if (n == 0) {
180*cf204528SSimon Glass fswap(fmap[unLo], fmap[ltLo]);
181*cf204528SSimon Glass ltLo++; unLo++;
182*cf204528SSimon Glass continue;
183*cf204528SSimon Glass };
184*cf204528SSimon Glass if (n > 0) break;
185*cf204528SSimon Glass unLo++;
186*cf204528SSimon Glass }
187*cf204528SSimon Glass while (1) {
188*cf204528SSimon Glass if (unLo > unHi) break;
189*cf204528SSimon Glass n = (Int32)eclass[fmap[unHi]] - (Int32)med;
190*cf204528SSimon Glass if (n == 0) {
191*cf204528SSimon Glass fswap(fmap[unHi], fmap[gtHi]);
192*cf204528SSimon Glass gtHi--; unHi--;
193*cf204528SSimon Glass continue;
194*cf204528SSimon Glass };
195*cf204528SSimon Glass if (n < 0) break;
196*cf204528SSimon Glass unHi--;
197*cf204528SSimon Glass }
198*cf204528SSimon Glass if (unLo > unHi) break;
199*cf204528SSimon Glass fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
200*cf204528SSimon Glass }
201*cf204528SSimon Glass
202*cf204528SSimon Glass AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
203*cf204528SSimon Glass
204*cf204528SSimon Glass if (gtHi < ltLo) continue;
205*cf204528SSimon Glass
206*cf204528SSimon Glass n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
207*cf204528SSimon Glass m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
208*cf204528SSimon Glass
209*cf204528SSimon Glass n = lo + unLo - ltLo - 1;
210*cf204528SSimon Glass m = hi - (gtHi - unHi) + 1;
211*cf204528SSimon Glass
212*cf204528SSimon Glass if (n - lo > hi - m) {
213*cf204528SSimon Glass fpush ( lo, n );
214*cf204528SSimon Glass fpush ( m, hi );
215*cf204528SSimon Glass } else {
216*cf204528SSimon Glass fpush ( m, hi );
217*cf204528SSimon Glass fpush ( lo, n );
218*cf204528SSimon Glass }
219*cf204528SSimon Glass }
220*cf204528SSimon Glass }
221*cf204528SSimon Glass
222*cf204528SSimon Glass #undef fmin
223*cf204528SSimon Glass #undef fpush
224*cf204528SSimon Glass #undef fpop
225*cf204528SSimon Glass #undef fswap
226*cf204528SSimon Glass #undef fvswap
227*cf204528SSimon Glass #undef FALLBACK_QSORT_SMALL_THRESH
228*cf204528SSimon Glass #undef FALLBACK_QSORT_STACK_SIZE
229*cf204528SSimon Glass
230*cf204528SSimon Glass
231*cf204528SSimon Glass /*---------------------------------------------*/
232*cf204528SSimon Glass /* Pre:
233*cf204528SSimon Glass nblock > 0
234*cf204528SSimon Glass eclass exists for [0 .. nblock-1]
235*cf204528SSimon Glass ((UChar*)eclass) [0 .. nblock-1] holds block
236*cf204528SSimon Glass ptr exists for [0 .. nblock-1]
237*cf204528SSimon Glass
238*cf204528SSimon Glass Post:
239*cf204528SSimon Glass ((UChar*)eclass) [0 .. nblock-1] holds block
240*cf204528SSimon Glass All other areas of eclass destroyed
241*cf204528SSimon Glass fmap [0 .. nblock-1] holds sorted order
242*cf204528SSimon Glass bhtab [ 0 .. 2+(nblock/32) ] destroyed
243*cf204528SSimon Glass */
244*cf204528SSimon Glass
245*cf204528SSimon Glass #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
246*cf204528SSimon Glass #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
247*cf204528SSimon Glass #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
248*cf204528SSimon Glass #define WORD_BH(zz) bhtab[(zz) >> 5]
249*cf204528SSimon Glass #define UNALIGNED_BH(zz) ((zz) & 0x01f)
250*cf204528SSimon Glass
251*cf204528SSimon Glass static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)252*cf204528SSimon Glass void fallbackSort ( UInt32* fmap,
253*cf204528SSimon Glass UInt32* eclass,
254*cf204528SSimon Glass UInt32* bhtab,
255*cf204528SSimon Glass Int32 nblock,
256*cf204528SSimon Glass Int32 verb )
257*cf204528SSimon Glass {
258*cf204528SSimon Glass Int32 ftab[257];
259*cf204528SSimon Glass Int32 ftabCopy[256];
260*cf204528SSimon Glass Int32 H, i, j, k, l, r, cc, cc1;
261*cf204528SSimon Glass Int32 nNotDone;
262*cf204528SSimon Glass Int32 nBhtab;
263*cf204528SSimon Glass UChar* eclass8 = (UChar*)eclass;
264*cf204528SSimon Glass
265*cf204528SSimon Glass /*--
266*cf204528SSimon Glass Initial 1-char radix sort to generate
267*cf204528SSimon Glass initial fmap and initial BH bits.
268*cf204528SSimon Glass --*/
269*cf204528SSimon Glass if (verb >= 4)
270*cf204528SSimon Glass VPrintf0 ( " bucket sorting ...\n" );
271*cf204528SSimon Glass for (i = 0; i < 257; i++) ftab[i] = 0;
272*cf204528SSimon Glass for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
273*cf204528SSimon Glass for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
274*cf204528SSimon Glass for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
275*cf204528SSimon Glass
276*cf204528SSimon Glass for (i = 0; i < nblock; i++) {
277*cf204528SSimon Glass j = eclass8[i];
278*cf204528SSimon Glass k = ftab[j] - 1;
279*cf204528SSimon Glass ftab[j] = k;
280*cf204528SSimon Glass fmap[k] = i;
281*cf204528SSimon Glass }
282*cf204528SSimon Glass
283*cf204528SSimon Glass nBhtab = 2 + (nblock / 32);
284*cf204528SSimon Glass for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
285*cf204528SSimon Glass for (i = 0; i < 256; i++) SET_BH(ftab[i]);
286*cf204528SSimon Glass
287*cf204528SSimon Glass /*--
288*cf204528SSimon Glass Inductively refine the buckets. Kind-of an
289*cf204528SSimon Glass "exponential radix sort" (!), inspired by the
290*cf204528SSimon Glass Manber-Myers suffix array construction algorithm.
291*cf204528SSimon Glass --*/
292*cf204528SSimon Glass
293*cf204528SSimon Glass /*-- set sentinel bits for block-end detection --*/
294*cf204528SSimon Glass for (i = 0; i < 32; i++) {
295*cf204528SSimon Glass SET_BH(nblock + 2*i);
296*cf204528SSimon Glass CLEAR_BH(nblock + 2*i + 1);
297*cf204528SSimon Glass }
298*cf204528SSimon Glass
299*cf204528SSimon Glass /*-- the log(N) loop --*/
300*cf204528SSimon Glass H = 1;
301*cf204528SSimon Glass while (1) {
302*cf204528SSimon Glass
303*cf204528SSimon Glass if (verb >= 4)
304*cf204528SSimon Glass VPrintf1 ( " depth %6d has ", H );
305*cf204528SSimon Glass
306*cf204528SSimon Glass j = 0;
307*cf204528SSimon Glass for (i = 0; i < nblock; i++) {
308*cf204528SSimon Glass if (ISSET_BH(i)) j = i;
309*cf204528SSimon Glass k = fmap[i] - H; if (k < 0) k += nblock;
310*cf204528SSimon Glass eclass[k] = j;
311*cf204528SSimon Glass }
312*cf204528SSimon Glass
313*cf204528SSimon Glass nNotDone = 0;
314*cf204528SSimon Glass r = -1;
315*cf204528SSimon Glass while (1) {
316*cf204528SSimon Glass
317*cf204528SSimon Glass /*-- find the next non-singleton bucket --*/
318*cf204528SSimon Glass k = r + 1;
319*cf204528SSimon Glass while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
320*cf204528SSimon Glass if (ISSET_BH(k)) {
321*cf204528SSimon Glass while (WORD_BH(k) == 0xffffffff) k += 32;
322*cf204528SSimon Glass while (ISSET_BH(k)) k++;
323*cf204528SSimon Glass }
324*cf204528SSimon Glass l = k - 1;
325*cf204528SSimon Glass if (l >= nblock) break;
326*cf204528SSimon Glass while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
327*cf204528SSimon Glass if (!ISSET_BH(k)) {
328*cf204528SSimon Glass while (WORD_BH(k) == 0x00000000) k += 32;
329*cf204528SSimon Glass while (!ISSET_BH(k)) k++;
330*cf204528SSimon Glass }
331*cf204528SSimon Glass r = k - 1;
332*cf204528SSimon Glass if (r >= nblock) break;
333*cf204528SSimon Glass
334*cf204528SSimon Glass /*-- now [l, r] bracket current bucket --*/
335*cf204528SSimon Glass if (r > l) {
336*cf204528SSimon Glass nNotDone += (r - l + 1);
337*cf204528SSimon Glass fallbackQSort3 ( fmap, eclass, l, r );
338*cf204528SSimon Glass
339*cf204528SSimon Glass /*-- scan bucket and generate header bits-- */
340*cf204528SSimon Glass cc = -1;
341*cf204528SSimon Glass for (i = l; i <= r; i++) {
342*cf204528SSimon Glass cc1 = eclass[fmap[i]];
343*cf204528SSimon Glass if (cc != cc1) { SET_BH(i); cc = cc1; };
344*cf204528SSimon Glass }
345*cf204528SSimon Glass }
346*cf204528SSimon Glass }
347*cf204528SSimon Glass
348*cf204528SSimon Glass if (verb >= 4)
349*cf204528SSimon Glass VPrintf1 ( "%6d unresolved strings\n", nNotDone );
350*cf204528SSimon Glass
351*cf204528SSimon Glass H *= 2;
352*cf204528SSimon Glass if (H > nblock || nNotDone == 0) break;
353*cf204528SSimon Glass }
354*cf204528SSimon Glass
355*cf204528SSimon Glass /*--
356*cf204528SSimon Glass Reconstruct the original block in
357*cf204528SSimon Glass eclass8 [0 .. nblock-1], since the
358*cf204528SSimon Glass previous phase destroyed it.
359*cf204528SSimon Glass --*/
360*cf204528SSimon Glass if (verb >= 4)
361*cf204528SSimon Glass VPrintf0 ( " reconstructing block ...\n" );
362*cf204528SSimon Glass j = 0;
363*cf204528SSimon Glass for (i = 0; i < nblock; i++) {
364*cf204528SSimon Glass while (ftabCopy[j] == 0) j++;
365*cf204528SSimon Glass ftabCopy[j]--;
366*cf204528SSimon Glass eclass8[fmap[i]] = (UChar)j;
367*cf204528SSimon Glass }
368*cf204528SSimon Glass AssertH ( j < 256, 1005 );
369*cf204528SSimon Glass }
370*cf204528SSimon Glass
371*cf204528SSimon Glass #undef SET_BH
372*cf204528SSimon Glass #undef CLEAR_BH
373*cf204528SSimon Glass #undef ISSET_BH
374*cf204528SSimon Glass #undef WORD_BH
375*cf204528SSimon Glass #undef UNALIGNED_BH
376*cf204528SSimon Glass
377*cf204528SSimon Glass
378*cf204528SSimon Glass /*---------------------------------------------*/
379*cf204528SSimon Glass /*--- The main, O(N^2 log(N)) sorting ---*/
380*cf204528SSimon Glass /*--- algorithm. Faster for "normal" ---*/
381*cf204528SSimon Glass /*--- non-repetitive blocks. ---*/
382*cf204528SSimon Glass /*---------------------------------------------*/
383*cf204528SSimon Glass
384*cf204528SSimon Glass /*---------------------------------------------*/
385*cf204528SSimon Glass static
386*cf204528SSimon Glass __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)387*cf204528SSimon Glass Bool mainGtU ( UInt32 i1,
388*cf204528SSimon Glass UInt32 i2,
389*cf204528SSimon Glass UChar* block,
390*cf204528SSimon Glass UInt16* quadrant,
391*cf204528SSimon Glass UInt32 nblock,
392*cf204528SSimon Glass Int32* budget )
393*cf204528SSimon Glass {
394*cf204528SSimon Glass Int32 k;
395*cf204528SSimon Glass UChar c1, c2;
396*cf204528SSimon Glass UInt16 s1, s2;
397*cf204528SSimon Glass
398*cf204528SSimon Glass AssertD ( i1 != i2, "mainGtU" );
399*cf204528SSimon Glass /* 1 */
400*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
401*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
402*cf204528SSimon Glass i1++; i2++;
403*cf204528SSimon Glass /* 2 */
404*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
405*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
406*cf204528SSimon Glass i1++; i2++;
407*cf204528SSimon Glass /* 3 */
408*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
409*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
410*cf204528SSimon Glass i1++; i2++;
411*cf204528SSimon Glass /* 4 */
412*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
413*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
414*cf204528SSimon Glass i1++; i2++;
415*cf204528SSimon Glass /* 5 */
416*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
417*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
418*cf204528SSimon Glass i1++; i2++;
419*cf204528SSimon Glass /* 6 */
420*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
421*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
422*cf204528SSimon Glass i1++; i2++;
423*cf204528SSimon Glass /* 7 */
424*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
425*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
426*cf204528SSimon Glass i1++; i2++;
427*cf204528SSimon Glass /* 8 */
428*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
429*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
430*cf204528SSimon Glass i1++; i2++;
431*cf204528SSimon Glass /* 9 */
432*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
433*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
434*cf204528SSimon Glass i1++; i2++;
435*cf204528SSimon Glass /* 10 */
436*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
437*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
438*cf204528SSimon Glass i1++; i2++;
439*cf204528SSimon Glass /* 11 */
440*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
441*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
442*cf204528SSimon Glass i1++; i2++;
443*cf204528SSimon Glass /* 12 */
444*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
445*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
446*cf204528SSimon Glass i1++; i2++;
447*cf204528SSimon Glass
448*cf204528SSimon Glass k = nblock + 8;
449*cf204528SSimon Glass
450*cf204528SSimon Glass do {
451*cf204528SSimon Glass /* 1 */
452*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
453*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
454*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
455*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
456*cf204528SSimon Glass i1++; i2++;
457*cf204528SSimon Glass /* 2 */
458*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
459*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
460*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
461*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
462*cf204528SSimon Glass i1++; i2++;
463*cf204528SSimon Glass /* 3 */
464*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
465*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
466*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
467*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
468*cf204528SSimon Glass i1++; i2++;
469*cf204528SSimon Glass /* 4 */
470*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
471*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
472*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
473*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
474*cf204528SSimon Glass i1++; i2++;
475*cf204528SSimon Glass /* 5 */
476*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
477*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
478*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
479*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
480*cf204528SSimon Glass i1++; i2++;
481*cf204528SSimon Glass /* 6 */
482*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
483*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
484*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
485*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
486*cf204528SSimon Glass i1++; i2++;
487*cf204528SSimon Glass /* 7 */
488*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
489*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
490*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
491*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
492*cf204528SSimon Glass i1++; i2++;
493*cf204528SSimon Glass /* 8 */
494*cf204528SSimon Glass c1 = block[i1]; c2 = block[i2];
495*cf204528SSimon Glass if (c1 != c2) return (c1 > c2);
496*cf204528SSimon Glass s1 = quadrant[i1]; s2 = quadrant[i2];
497*cf204528SSimon Glass if (s1 != s2) return (s1 > s2);
498*cf204528SSimon Glass i1++; i2++;
499*cf204528SSimon Glass
500*cf204528SSimon Glass if (i1 >= nblock) i1 -= nblock;
501*cf204528SSimon Glass if (i2 >= nblock) i2 -= nblock;
502*cf204528SSimon Glass
503*cf204528SSimon Glass k -= 8;
504*cf204528SSimon Glass (*budget)--;
505*cf204528SSimon Glass }
506*cf204528SSimon Glass while (k >= 0);
507*cf204528SSimon Glass
508*cf204528SSimon Glass return False;
509*cf204528SSimon Glass }
510*cf204528SSimon Glass
511*cf204528SSimon Glass
512*cf204528SSimon Glass /*---------------------------------------------*/
513*cf204528SSimon Glass /*--
514*cf204528SSimon Glass Knuth's increments seem to work better
515*cf204528SSimon Glass than Incerpi-Sedgewick here. Possibly
516*cf204528SSimon Glass because the number of elems to sort is
517*cf204528SSimon Glass usually small, typically <= 20.
518*cf204528SSimon Glass --*/
519*cf204528SSimon Glass static
520*cf204528SSimon Glass Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
521*cf204528SSimon Glass 9841, 29524, 88573, 265720,
522*cf204528SSimon Glass 797161, 2391484 };
523*cf204528SSimon Glass
524*cf204528SSimon Glass static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)525*cf204528SSimon Glass void mainSimpleSort ( UInt32* ptr,
526*cf204528SSimon Glass UChar* block,
527*cf204528SSimon Glass UInt16* quadrant,
528*cf204528SSimon Glass Int32 nblock,
529*cf204528SSimon Glass Int32 lo,
530*cf204528SSimon Glass Int32 hi,
531*cf204528SSimon Glass Int32 d,
532*cf204528SSimon Glass Int32* budget )
533*cf204528SSimon Glass {
534*cf204528SSimon Glass Int32 i, j, h, bigN, hp;
535*cf204528SSimon Glass UInt32 v;
536*cf204528SSimon Glass
537*cf204528SSimon Glass bigN = hi - lo + 1;
538*cf204528SSimon Glass if (bigN < 2) return;
539*cf204528SSimon Glass
540*cf204528SSimon Glass hp = 0;
541*cf204528SSimon Glass while (incs[hp] < bigN) hp++;
542*cf204528SSimon Glass hp--;
543*cf204528SSimon Glass
544*cf204528SSimon Glass for (; hp >= 0; hp--) {
545*cf204528SSimon Glass h = incs[hp];
546*cf204528SSimon Glass
547*cf204528SSimon Glass i = lo + h;
548*cf204528SSimon Glass while (True) {
549*cf204528SSimon Glass
550*cf204528SSimon Glass /*-- copy 1 --*/
551*cf204528SSimon Glass if (i > hi) break;
552*cf204528SSimon Glass v = ptr[i];
553*cf204528SSimon Glass j = i;
554*cf204528SSimon Glass while ( mainGtU (
555*cf204528SSimon Glass ptr[j-h]+d, v+d, block, quadrant, nblock, budget
556*cf204528SSimon Glass ) ) {
557*cf204528SSimon Glass ptr[j] = ptr[j-h];
558*cf204528SSimon Glass j = j - h;
559*cf204528SSimon Glass if (j <= (lo + h - 1)) break;
560*cf204528SSimon Glass }
561*cf204528SSimon Glass ptr[j] = v;
562*cf204528SSimon Glass i++;
563*cf204528SSimon Glass
564*cf204528SSimon Glass /*-- copy 2 --*/
565*cf204528SSimon Glass if (i > hi) break;
566*cf204528SSimon Glass v = ptr[i];
567*cf204528SSimon Glass j = i;
568*cf204528SSimon Glass while ( mainGtU (
569*cf204528SSimon Glass ptr[j-h]+d, v+d, block, quadrant, nblock, budget
570*cf204528SSimon Glass ) ) {
571*cf204528SSimon Glass ptr[j] = ptr[j-h];
572*cf204528SSimon Glass j = j - h;
573*cf204528SSimon Glass if (j <= (lo + h - 1)) break;
574*cf204528SSimon Glass }
575*cf204528SSimon Glass ptr[j] = v;
576*cf204528SSimon Glass i++;
577*cf204528SSimon Glass
578*cf204528SSimon Glass /*-- copy 3 --*/
579*cf204528SSimon Glass if (i > hi) break;
580*cf204528SSimon Glass v = ptr[i];
581*cf204528SSimon Glass j = i;
582*cf204528SSimon Glass while ( mainGtU (
583*cf204528SSimon Glass ptr[j-h]+d, v+d, block, quadrant, nblock, budget
584*cf204528SSimon Glass ) ) {
585*cf204528SSimon Glass ptr[j] = ptr[j-h];
586*cf204528SSimon Glass j = j - h;
587*cf204528SSimon Glass if (j <= (lo + h - 1)) break;
588*cf204528SSimon Glass }
589*cf204528SSimon Glass ptr[j] = v;
590*cf204528SSimon Glass i++;
591*cf204528SSimon Glass
592*cf204528SSimon Glass if (*budget < 0) return;
593*cf204528SSimon Glass }
594*cf204528SSimon Glass }
595*cf204528SSimon Glass }
596*cf204528SSimon Glass
597*cf204528SSimon Glass
598*cf204528SSimon Glass /*---------------------------------------------*/
599*cf204528SSimon Glass /*--
600*cf204528SSimon Glass The following is an implementation of
601*cf204528SSimon Glass an elegant 3-way quicksort for strings,
602*cf204528SSimon Glass described in a paper "Fast Algorithms for
603*cf204528SSimon Glass Sorting and Searching Strings", by Robert
604*cf204528SSimon Glass Sedgewick and Jon L. Bentley.
605*cf204528SSimon Glass --*/
606*cf204528SSimon Glass
607*cf204528SSimon Glass #define mswap(zz1, zz2) \
608*cf204528SSimon Glass { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
609*cf204528SSimon Glass
610*cf204528SSimon Glass #define mvswap(zzp1, zzp2, zzn) \
611*cf204528SSimon Glass { \
612*cf204528SSimon Glass Int32 yyp1 = (zzp1); \
613*cf204528SSimon Glass Int32 yyp2 = (zzp2); \
614*cf204528SSimon Glass Int32 yyn = (zzn); \
615*cf204528SSimon Glass while (yyn > 0) { \
616*cf204528SSimon Glass mswap(ptr[yyp1], ptr[yyp2]); \
617*cf204528SSimon Glass yyp1++; yyp2++; yyn--; \
618*cf204528SSimon Glass } \
619*cf204528SSimon Glass }
620*cf204528SSimon Glass
621*cf204528SSimon Glass static
622*cf204528SSimon Glass __inline__
mmed3(UChar a,UChar b,UChar c)623*cf204528SSimon Glass UChar mmed3 ( UChar a, UChar b, UChar c )
624*cf204528SSimon Glass {
625*cf204528SSimon Glass UChar t;
626*cf204528SSimon Glass if (a > b) { t = a; a = b; b = t; };
627*cf204528SSimon Glass if (b > c) {
628*cf204528SSimon Glass b = c;
629*cf204528SSimon Glass if (a > b) b = a;
630*cf204528SSimon Glass }
631*cf204528SSimon Glass return b;
632*cf204528SSimon Glass }
633*cf204528SSimon Glass
634*cf204528SSimon Glass #define mmin(a,b) ((a) < (b)) ? (a) : (b)
635*cf204528SSimon Glass
636*cf204528SSimon Glass #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
637*cf204528SSimon Glass stackHi[sp] = hz; \
638*cf204528SSimon Glass stackD [sp] = dz; \
639*cf204528SSimon Glass sp++; }
640*cf204528SSimon Glass
641*cf204528SSimon Glass #define mpop(lz,hz,dz) { sp--; \
642*cf204528SSimon Glass lz = stackLo[sp]; \
643*cf204528SSimon Glass hz = stackHi[sp]; \
644*cf204528SSimon Glass dz = stackD [sp]; }
645*cf204528SSimon Glass
646*cf204528SSimon Glass
647*cf204528SSimon Glass #define mnextsize(az) (nextHi[az]-nextLo[az])
648*cf204528SSimon Glass
649*cf204528SSimon Glass #define mnextswap(az,bz) \
650*cf204528SSimon Glass { Int32 tz; \
651*cf204528SSimon Glass tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
652*cf204528SSimon Glass tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
653*cf204528SSimon Glass tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
654*cf204528SSimon Glass
655*cf204528SSimon Glass
656*cf204528SSimon Glass #define MAIN_QSORT_SMALL_THRESH 20
657*cf204528SSimon Glass #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
658*cf204528SSimon Glass #define MAIN_QSORT_STACK_SIZE 100
659*cf204528SSimon Glass
660*cf204528SSimon Glass static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)661*cf204528SSimon Glass void mainQSort3 ( UInt32* ptr,
662*cf204528SSimon Glass UChar* block,
663*cf204528SSimon Glass UInt16* quadrant,
664*cf204528SSimon Glass Int32 nblock,
665*cf204528SSimon Glass Int32 loSt,
666*cf204528SSimon Glass Int32 hiSt,
667*cf204528SSimon Glass Int32 dSt,
668*cf204528SSimon Glass Int32* budget )
669*cf204528SSimon Glass {
670*cf204528SSimon Glass Int32 unLo, unHi, ltLo, gtHi, n, m, med;
671*cf204528SSimon Glass Int32 sp, lo, hi, d;
672*cf204528SSimon Glass
673*cf204528SSimon Glass Int32 stackLo[MAIN_QSORT_STACK_SIZE];
674*cf204528SSimon Glass Int32 stackHi[MAIN_QSORT_STACK_SIZE];
675*cf204528SSimon Glass Int32 stackD [MAIN_QSORT_STACK_SIZE];
676*cf204528SSimon Glass
677*cf204528SSimon Glass Int32 nextLo[3];
678*cf204528SSimon Glass Int32 nextHi[3];
679*cf204528SSimon Glass Int32 nextD [3];
680*cf204528SSimon Glass
681*cf204528SSimon Glass sp = 0;
682*cf204528SSimon Glass mpush ( loSt, hiSt, dSt );
683*cf204528SSimon Glass
684*cf204528SSimon Glass while (sp > 0) {
685*cf204528SSimon Glass
686*cf204528SSimon Glass AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
687*cf204528SSimon Glass
688*cf204528SSimon Glass mpop ( lo, hi, d );
689*cf204528SSimon Glass if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
690*cf204528SSimon Glass d > MAIN_QSORT_DEPTH_THRESH) {
691*cf204528SSimon Glass mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
692*cf204528SSimon Glass if (*budget < 0) return;
693*cf204528SSimon Glass continue;
694*cf204528SSimon Glass }
695*cf204528SSimon Glass
696*cf204528SSimon Glass med = (Int32)
697*cf204528SSimon Glass mmed3 ( block[ptr[ lo ]+d],
698*cf204528SSimon Glass block[ptr[ hi ]+d],
699*cf204528SSimon Glass block[ptr[ (lo+hi)>>1 ]+d] );
700*cf204528SSimon Glass
701*cf204528SSimon Glass unLo = ltLo = lo;
702*cf204528SSimon Glass unHi = gtHi = hi;
703*cf204528SSimon Glass
704*cf204528SSimon Glass while (True) {
705*cf204528SSimon Glass while (True) {
706*cf204528SSimon Glass if (unLo > unHi) break;
707*cf204528SSimon Glass n = ((Int32)block[ptr[unLo]+d]) - med;
708*cf204528SSimon Glass if (n == 0) {
709*cf204528SSimon Glass mswap(ptr[unLo], ptr[ltLo]);
710*cf204528SSimon Glass ltLo++; unLo++; continue;
711*cf204528SSimon Glass };
712*cf204528SSimon Glass if (n > 0) break;
713*cf204528SSimon Glass unLo++;
714*cf204528SSimon Glass }
715*cf204528SSimon Glass while (True) {
716*cf204528SSimon Glass if (unLo > unHi) break;
717*cf204528SSimon Glass n = ((Int32)block[ptr[unHi]+d]) - med;
718*cf204528SSimon Glass if (n == 0) {
719*cf204528SSimon Glass mswap(ptr[unHi], ptr[gtHi]);
720*cf204528SSimon Glass gtHi--; unHi--; continue;
721*cf204528SSimon Glass };
722*cf204528SSimon Glass if (n < 0) break;
723*cf204528SSimon Glass unHi--;
724*cf204528SSimon Glass }
725*cf204528SSimon Glass if (unLo > unHi) break;
726*cf204528SSimon Glass mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
727*cf204528SSimon Glass }
728*cf204528SSimon Glass
729*cf204528SSimon Glass AssertD ( unHi == unLo-1, "mainQSort3(2)" );
730*cf204528SSimon Glass
731*cf204528SSimon Glass if (gtHi < ltLo) {
732*cf204528SSimon Glass mpush(lo, hi, d+1 );
733*cf204528SSimon Glass continue;
734*cf204528SSimon Glass }
735*cf204528SSimon Glass
736*cf204528SSimon Glass n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
737*cf204528SSimon Glass m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
738*cf204528SSimon Glass
739*cf204528SSimon Glass n = lo + unLo - ltLo - 1;
740*cf204528SSimon Glass m = hi - (gtHi - unHi) + 1;
741*cf204528SSimon Glass
742*cf204528SSimon Glass nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
743*cf204528SSimon Glass nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
744*cf204528SSimon Glass nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
745*cf204528SSimon Glass
746*cf204528SSimon Glass if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
747*cf204528SSimon Glass if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
748*cf204528SSimon Glass if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
749*cf204528SSimon Glass
750*cf204528SSimon Glass AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
751*cf204528SSimon Glass AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
752*cf204528SSimon Glass
753*cf204528SSimon Glass mpush (nextLo[0], nextHi[0], nextD[0]);
754*cf204528SSimon Glass mpush (nextLo[1], nextHi[1], nextD[1]);
755*cf204528SSimon Glass mpush (nextLo[2], nextHi[2], nextD[2]);
756*cf204528SSimon Glass }
757*cf204528SSimon Glass }
758*cf204528SSimon Glass
759*cf204528SSimon Glass #undef mswap
760*cf204528SSimon Glass #undef mvswap
761*cf204528SSimon Glass #undef mpush
762*cf204528SSimon Glass #undef mpop
763*cf204528SSimon Glass #undef mmin
764*cf204528SSimon Glass #undef mnextsize
765*cf204528SSimon Glass #undef mnextswap
766*cf204528SSimon Glass #undef MAIN_QSORT_SMALL_THRESH
767*cf204528SSimon Glass #undef MAIN_QSORT_DEPTH_THRESH
768*cf204528SSimon Glass #undef MAIN_QSORT_STACK_SIZE
769*cf204528SSimon Glass
770*cf204528SSimon Glass
771*cf204528SSimon Glass /*---------------------------------------------*/
772*cf204528SSimon Glass /* Pre:
773*cf204528SSimon Glass nblock > N_OVERSHOOT
774*cf204528SSimon Glass block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
775*cf204528SSimon Glass ((UChar*)block32) [0 .. nblock-1] holds block
776*cf204528SSimon Glass ptr exists for [0 .. nblock-1]
777*cf204528SSimon Glass
778*cf204528SSimon Glass Post:
779*cf204528SSimon Glass ((UChar*)block32) [0 .. nblock-1] holds block
780*cf204528SSimon Glass All other areas of block32 destroyed
781*cf204528SSimon Glass ftab [0 .. 65536 ] destroyed
782*cf204528SSimon Glass ptr [0 .. nblock-1] holds sorted order
783*cf204528SSimon Glass if (*budget < 0), sorting was abandoned
784*cf204528SSimon Glass */
785*cf204528SSimon Glass
786*cf204528SSimon Glass #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
787*cf204528SSimon Glass #define SETMASK (1 << 21)
788*cf204528SSimon Glass #define CLEARMASK (~(SETMASK))
789*cf204528SSimon Glass
790*cf204528SSimon Glass static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)791*cf204528SSimon Glass void mainSort ( UInt32* ptr,
792*cf204528SSimon Glass UChar* block,
793*cf204528SSimon Glass UInt16* quadrant,
794*cf204528SSimon Glass UInt32* ftab,
795*cf204528SSimon Glass Int32 nblock,
796*cf204528SSimon Glass Int32 verb,
797*cf204528SSimon Glass Int32* budget )
798*cf204528SSimon Glass {
799*cf204528SSimon Glass Int32 i, j, k, ss, sb;
800*cf204528SSimon Glass Int32 runningOrder[256];
801*cf204528SSimon Glass Bool bigDone[256];
802*cf204528SSimon Glass Int32 copyStart[256];
803*cf204528SSimon Glass Int32 copyEnd [256];
804*cf204528SSimon Glass UChar c1;
805*cf204528SSimon Glass Int32 numQSorted;
806*cf204528SSimon Glass UInt16 s;
807*cf204528SSimon Glass if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
808*cf204528SSimon Glass
809*cf204528SSimon Glass /*-- set up the 2-byte frequency table --*/
810*cf204528SSimon Glass for (i = 65536; i >= 0; i--) ftab[i] = 0;
811*cf204528SSimon Glass
812*cf204528SSimon Glass j = block[0] << 8;
813*cf204528SSimon Glass i = nblock-1;
814*cf204528SSimon Glass for (; i >= 3; i -= 4) {
815*cf204528SSimon Glass quadrant[i] = 0;
816*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i]) << 8);
817*cf204528SSimon Glass ftab[j]++;
818*cf204528SSimon Glass quadrant[i-1] = 0;
819*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
820*cf204528SSimon Glass ftab[j]++;
821*cf204528SSimon Glass quadrant[i-2] = 0;
822*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
823*cf204528SSimon Glass ftab[j]++;
824*cf204528SSimon Glass quadrant[i-3] = 0;
825*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
826*cf204528SSimon Glass ftab[j]++;
827*cf204528SSimon Glass }
828*cf204528SSimon Glass for (; i >= 0; i--) {
829*cf204528SSimon Glass quadrant[i] = 0;
830*cf204528SSimon Glass j = (j >> 8) | ( ((UInt16)block[i]) << 8);
831*cf204528SSimon Glass ftab[j]++;
832*cf204528SSimon Glass }
833*cf204528SSimon Glass
834*cf204528SSimon Glass /*-- (emphasises close relationship of block & quadrant) --*/
835*cf204528SSimon Glass for (i = 0; i < BZ_N_OVERSHOOT; i++) {
836*cf204528SSimon Glass block [nblock+i] = block[i];
837*cf204528SSimon Glass quadrant[nblock+i] = 0;
838*cf204528SSimon Glass }
839*cf204528SSimon Glass
840*cf204528SSimon Glass if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
841*cf204528SSimon Glass
842*cf204528SSimon Glass /*-- Complete the initial radix sort --*/
843*cf204528SSimon Glass for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
844*cf204528SSimon Glass
845*cf204528SSimon Glass s = block[0] << 8;
846*cf204528SSimon Glass i = nblock-1;
847*cf204528SSimon Glass for (; i >= 3; i -= 4) {
848*cf204528SSimon Glass s = (s >> 8) | (block[i] << 8);
849*cf204528SSimon Glass j = ftab[s] -1;
850*cf204528SSimon Glass ftab[s] = j;
851*cf204528SSimon Glass ptr[j] = i;
852*cf204528SSimon Glass s = (s >> 8) | (block[i-1] << 8);
853*cf204528SSimon Glass j = ftab[s] -1;
854*cf204528SSimon Glass ftab[s] = j;
855*cf204528SSimon Glass ptr[j] = i-1;
856*cf204528SSimon Glass s = (s >> 8) | (block[i-2] << 8);
857*cf204528SSimon Glass j = ftab[s] -1;
858*cf204528SSimon Glass ftab[s] = j;
859*cf204528SSimon Glass ptr[j] = i-2;
860*cf204528SSimon Glass s = (s >> 8) | (block[i-3] << 8);
861*cf204528SSimon Glass j = ftab[s] -1;
862*cf204528SSimon Glass ftab[s] = j;
863*cf204528SSimon Glass ptr[j] = i-3;
864*cf204528SSimon Glass }
865*cf204528SSimon Glass for (; i >= 0; i--) {
866*cf204528SSimon Glass s = (s >> 8) | (block[i] << 8);
867*cf204528SSimon Glass j = ftab[s] -1;
868*cf204528SSimon Glass ftab[s] = j;
869*cf204528SSimon Glass ptr[j] = i;
870*cf204528SSimon Glass }
871*cf204528SSimon Glass
872*cf204528SSimon Glass /*--
873*cf204528SSimon Glass Now ftab contains the first loc of every small bucket.
874*cf204528SSimon Glass Calculate the running order, from smallest to largest
875*cf204528SSimon Glass big bucket.
876*cf204528SSimon Glass --*/
877*cf204528SSimon Glass for (i = 0; i <= 255; i++) {
878*cf204528SSimon Glass bigDone [i] = False;
879*cf204528SSimon Glass runningOrder[i] = i;
880*cf204528SSimon Glass }
881*cf204528SSimon Glass
882*cf204528SSimon Glass {
883*cf204528SSimon Glass Int32 vv;
884*cf204528SSimon Glass Int32 h = 1;
885*cf204528SSimon Glass do h = 3 * h + 1; while (h <= 256);
886*cf204528SSimon Glass do {
887*cf204528SSimon Glass h = h / 3;
888*cf204528SSimon Glass for (i = h; i <= 255; i++) {
889*cf204528SSimon Glass vv = runningOrder[i];
890*cf204528SSimon Glass j = i;
891*cf204528SSimon Glass while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
892*cf204528SSimon Glass runningOrder[j] = runningOrder[j-h];
893*cf204528SSimon Glass j = j - h;
894*cf204528SSimon Glass if (j <= (h - 1)) goto zero;
895*cf204528SSimon Glass }
896*cf204528SSimon Glass zero:
897*cf204528SSimon Glass runningOrder[j] = vv;
898*cf204528SSimon Glass }
899*cf204528SSimon Glass } while (h != 1);
900*cf204528SSimon Glass }
901*cf204528SSimon Glass
902*cf204528SSimon Glass /*--
903*cf204528SSimon Glass The main sorting loop.
904*cf204528SSimon Glass --*/
905*cf204528SSimon Glass
906*cf204528SSimon Glass numQSorted = 0;
907*cf204528SSimon Glass
908*cf204528SSimon Glass for (i = 0; i <= 255; i++) {
909*cf204528SSimon Glass
910*cf204528SSimon Glass /*--
911*cf204528SSimon Glass Process big buckets, starting with the least full.
912*cf204528SSimon Glass Basically this is a 3-step process in which we call
913*cf204528SSimon Glass mainQSort3 to sort the small buckets [ss, j], but
914*cf204528SSimon Glass also make a big effort to avoid the calls if we can.
915*cf204528SSimon Glass --*/
916*cf204528SSimon Glass ss = runningOrder[i];
917*cf204528SSimon Glass
918*cf204528SSimon Glass /*--
919*cf204528SSimon Glass Step 1:
920*cf204528SSimon Glass Complete the big bucket [ss] by quicksorting
921*cf204528SSimon Glass any unsorted small buckets [ss, j], for j != ss.
922*cf204528SSimon Glass Hopefully previous pointer-scanning phases have already
923*cf204528SSimon Glass completed many of the small buckets [ss, j], so
924*cf204528SSimon Glass we don't have to sort them at all.
925*cf204528SSimon Glass --*/
926*cf204528SSimon Glass for (j = 0; j <= 255; j++) {
927*cf204528SSimon Glass if (j != ss) {
928*cf204528SSimon Glass sb = (ss << 8) + j;
929*cf204528SSimon Glass if ( ! (ftab[sb] & SETMASK) ) {
930*cf204528SSimon Glass Int32 lo = ftab[sb] & CLEARMASK;
931*cf204528SSimon Glass Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
932*cf204528SSimon Glass if (hi > lo) {
933*cf204528SSimon Glass if (verb >= 4)
934*cf204528SSimon Glass VPrintf4 ( " qsort [0x%x, 0x%x] "
935*cf204528SSimon Glass "done %d this %d\n",
936*cf204528SSimon Glass ss, j, numQSorted, hi - lo + 1 );
937*cf204528SSimon Glass mainQSort3 (
938*cf204528SSimon Glass ptr, block, quadrant, nblock,
939*cf204528SSimon Glass lo, hi, BZ_N_RADIX, budget
940*cf204528SSimon Glass );
941*cf204528SSimon Glass numQSorted += (hi - lo + 1);
942*cf204528SSimon Glass if (*budget < 0) return;
943*cf204528SSimon Glass }
944*cf204528SSimon Glass }
945*cf204528SSimon Glass ftab[sb] |= SETMASK;
946*cf204528SSimon Glass }
947*cf204528SSimon Glass }
948*cf204528SSimon Glass
949*cf204528SSimon Glass AssertH ( !bigDone[ss], 1006 );
950*cf204528SSimon Glass
951*cf204528SSimon Glass /*--
952*cf204528SSimon Glass Step 2:
953*cf204528SSimon Glass Now scan this big bucket [ss] so as to synthesise the
954*cf204528SSimon Glass sorted order for small buckets [t, ss] for all t,
955*cf204528SSimon Glass including, magically, the bucket [ss,ss] too.
956*cf204528SSimon Glass This will avoid doing Real Work in subsequent Step 1's.
957*cf204528SSimon Glass --*/
958*cf204528SSimon Glass {
959*cf204528SSimon Glass for (j = 0; j <= 255; j++) {
960*cf204528SSimon Glass copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
961*cf204528SSimon Glass copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
962*cf204528SSimon Glass }
963*cf204528SSimon Glass for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
964*cf204528SSimon Glass k = ptr[j]-1; if (k < 0) k += nblock;
965*cf204528SSimon Glass c1 = block[k];
966*cf204528SSimon Glass if (!bigDone[c1])
967*cf204528SSimon Glass ptr[ copyStart[c1]++ ] = k;
968*cf204528SSimon Glass }
969*cf204528SSimon Glass for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
970*cf204528SSimon Glass k = ptr[j]-1; if (k < 0) k += nblock;
971*cf204528SSimon Glass c1 = block[k];
972*cf204528SSimon Glass if (!bigDone[c1])
973*cf204528SSimon Glass ptr[ copyEnd[c1]-- ] = k;
974*cf204528SSimon Glass }
975*cf204528SSimon Glass }
976*cf204528SSimon Glass
977*cf204528SSimon Glass AssertH ( (copyStart[ss]-1 == copyEnd[ss])
978*cf204528SSimon Glass ||
979*cf204528SSimon Glass /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
980*cf204528SSimon Glass Necessity for this case is demonstrated by compressing
981*cf204528SSimon Glass a sequence of approximately 48.5 million of character
982*cf204528SSimon Glass 251; 1.0.0/1.0.1 will then die here. */
983*cf204528SSimon Glass (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
984*cf204528SSimon Glass 1007 )
985*cf204528SSimon Glass
986*cf204528SSimon Glass for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
987*cf204528SSimon Glass
988*cf204528SSimon Glass /*--
989*cf204528SSimon Glass Step 3:
990*cf204528SSimon Glass The [ss] big bucket is now done. Record this fact,
991*cf204528SSimon Glass and update the quadrant descriptors. Remember to
992*cf204528SSimon Glass update quadrants in the overshoot area too, if
993*cf204528SSimon Glass necessary. The "if (i < 255)" test merely skips
994*cf204528SSimon Glass this updating for the last bucket processed, since
995*cf204528SSimon Glass updating for the last bucket is pointless.
996*cf204528SSimon Glass
997*cf204528SSimon Glass The quadrant array provides a way to incrementally
998*cf204528SSimon Glass cache sort orderings, as they appear, so as to
999*cf204528SSimon Glass make subsequent comparisons in fullGtU() complete
1000*cf204528SSimon Glass faster. For repetitive blocks this makes a big
1001*cf204528SSimon Glass difference (but not big enough to be able to avoid
1002*cf204528SSimon Glass the fallback sorting mechanism, exponential radix sort).
1003*cf204528SSimon Glass
1004*cf204528SSimon Glass The precise meaning is: at all times:
1005*cf204528SSimon Glass
1006*cf204528SSimon Glass for 0 <= i < nblock and 0 <= j <= nblock
1007*cf204528SSimon Glass
1008*cf204528SSimon Glass if block[i] != block[j],
1009*cf204528SSimon Glass
1010*cf204528SSimon Glass then the relative values of quadrant[i] and
1011*cf204528SSimon Glass quadrant[j] are meaningless.
1012*cf204528SSimon Glass
1013*cf204528SSimon Glass else {
1014*cf204528SSimon Glass if quadrant[i] < quadrant[j]
1015*cf204528SSimon Glass then the string starting at i lexicographically
1016*cf204528SSimon Glass precedes the string starting at j
1017*cf204528SSimon Glass
1018*cf204528SSimon Glass else if quadrant[i] > quadrant[j]
1019*cf204528SSimon Glass then the string starting at j lexicographically
1020*cf204528SSimon Glass precedes the string starting at i
1021*cf204528SSimon Glass
1022*cf204528SSimon Glass else
1023*cf204528SSimon Glass the relative ordering of the strings starting
1024*cf204528SSimon Glass at i and j has not yet been determined.
1025*cf204528SSimon Glass }
1026*cf204528SSimon Glass --*/
1027*cf204528SSimon Glass bigDone[ss] = True;
1028*cf204528SSimon Glass
1029*cf204528SSimon Glass if (i < 255) {
1030*cf204528SSimon Glass Int32 bbStart = ftab[ss << 8] & CLEARMASK;
1031*cf204528SSimon Glass Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1032*cf204528SSimon Glass Int32 shifts = 0;
1033*cf204528SSimon Glass
1034*cf204528SSimon Glass while ((bbSize >> shifts) > 65534) shifts++;
1035*cf204528SSimon Glass
1036*cf204528SSimon Glass for (j = bbSize-1; j >= 0; j--) {
1037*cf204528SSimon Glass Int32 a2update = ptr[bbStart + j];
1038*cf204528SSimon Glass UInt16 qVal = (UInt16)(j >> shifts);
1039*cf204528SSimon Glass quadrant[a2update] = qVal;
1040*cf204528SSimon Glass if (a2update < BZ_N_OVERSHOOT)
1041*cf204528SSimon Glass quadrant[a2update + nblock] = qVal;
1042*cf204528SSimon Glass }
1043*cf204528SSimon Glass AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1044*cf204528SSimon Glass }
1045*cf204528SSimon Glass
1046*cf204528SSimon Glass }
1047*cf204528SSimon Glass
1048*cf204528SSimon Glass if (verb >= 4)
1049*cf204528SSimon Glass VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1050*cf204528SSimon Glass nblock, numQSorted, nblock - numQSorted );
1051*cf204528SSimon Glass }
1052*cf204528SSimon Glass
1053*cf204528SSimon Glass #undef BIGFREQ
1054*cf204528SSimon Glass #undef SETMASK
1055*cf204528SSimon Glass #undef CLEARMASK
1056*cf204528SSimon Glass
1057*cf204528SSimon Glass
1058*cf204528SSimon Glass /*---------------------------------------------*/
1059*cf204528SSimon Glass /* Pre:
1060*cf204528SSimon Glass nblock > 0
1061*cf204528SSimon Glass arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1062*cf204528SSimon Glass ((UChar*)arr2) [0 .. nblock-1] holds block
1063*cf204528SSimon Glass arr1 exists for [0 .. nblock-1]
1064*cf204528SSimon Glass
1065*cf204528SSimon Glass Post:
1066*cf204528SSimon Glass ((UChar*)arr2) [0 .. nblock-1] holds block
1067*cf204528SSimon Glass All other areas of block destroyed
1068*cf204528SSimon Glass ftab [ 0 .. 65536 ] destroyed
1069*cf204528SSimon Glass arr1 [0 .. nblock-1] holds sorted order
1070*cf204528SSimon Glass */
BZ2_blockSort(EState * s)1071*cf204528SSimon Glass void BZ2_blockSort ( EState* s )
1072*cf204528SSimon Glass {
1073*cf204528SSimon Glass UInt32* ptr = s->ptr;
1074*cf204528SSimon Glass UChar* block = s->block;
1075*cf204528SSimon Glass UInt32* ftab = s->ftab;
1076*cf204528SSimon Glass Int32 nblock = s->nblock;
1077*cf204528SSimon Glass Int32 verb = s->verbosity;
1078*cf204528SSimon Glass Int32 wfact = s->workFactor;
1079*cf204528SSimon Glass UInt16* quadrant;
1080*cf204528SSimon Glass Int32 budget;
1081*cf204528SSimon Glass Int32 budgetInit;
1082*cf204528SSimon Glass Int32 i;
1083*cf204528SSimon Glass
1084*cf204528SSimon Glass if (nblock < 10000) {
1085*cf204528SSimon Glass fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1086*cf204528SSimon Glass } else {
1087*cf204528SSimon Glass /* Calculate the location for quadrant, remembering to get
1088*cf204528SSimon Glass the alignment right. Assumes that &(block[0]) is at least
1089*cf204528SSimon Glass 2-byte aligned -- this should be ok since block is really
1090*cf204528SSimon Glass the first section of arr2.
1091*cf204528SSimon Glass */
1092*cf204528SSimon Glass i = nblock+BZ_N_OVERSHOOT;
1093*cf204528SSimon Glass if (i & 1) i++;
1094*cf204528SSimon Glass quadrant = (UInt16*)(&(block[i]));
1095*cf204528SSimon Glass
1096*cf204528SSimon Glass /* (wfact-1) / 3 puts the default-factor-30
1097*cf204528SSimon Glass transition point at very roughly the same place as
1098*cf204528SSimon Glass with v0.1 and v0.9.0.
1099*cf204528SSimon Glass Not that it particularly matters any more, since the
1100*cf204528SSimon Glass resulting compressed stream is now the same regardless
1101*cf204528SSimon Glass of whether or not we use the main sort or fallback sort.
1102*cf204528SSimon Glass */
1103*cf204528SSimon Glass if (wfact < 1 ) wfact = 1;
1104*cf204528SSimon Glass if (wfact > 100) wfact = 100;
1105*cf204528SSimon Glass budgetInit = nblock * ((wfact-1) / 3);
1106*cf204528SSimon Glass budget = budgetInit;
1107*cf204528SSimon Glass
1108*cf204528SSimon Glass mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1109*cf204528SSimon Glass if (verb >= 3)
1110*cf204528SSimon Glass VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1111*cf204528SSimon Glass budgetInit - budget,
1112*cf204528SSimon Glass nblock,
1113*cf204528SSimon Glass (float)(budgetInit - budget) /
1114*cf204528SSimon Glass (float)(nblock==0 ? 1 : nblock) );
1115*cf204528SSimon Glass if (budget < 0) {
1116*cf204528SSimon Glass if (verb >= 2)
1117*cf204528SSimon Glass VPrintf0 ( " too repetitive; using fallback"
1118*cf204528SSimon Glass " sorting algorithm\n" );
1119*cf204528SSimon Glass fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1120*cf204528SSimon Glass }
1121*cf204528SSimon Glass }
1122*cf204528SSimon Glass
1123*cf204528SSimon Glass s->origPtr = -1;
1124*cf204528SSimon Glass for (i = 0; i < s->nblock; i++)
1125*cf204528SSimon Glass if (ptr[i] == 0)
1126*cf204528SSimon Glass { s->origPtr = i; break; };
1127*cf204528SSimon Glass
1128*cf204528SSimon Glass AssertH( s->origPtr != -1, 1003 );
1129*cf204528SSimon Glass }
1130*cf204528SSimon Glass
1131*cf204528SSimon Glass
1132*cf204528SSimon Glass /*-------------------------------------------------------------*/
1133*cf204528SSimon Glass /*--- end blocksort.c ---*/
1134*cf204528SSimon Glass /*-------------------------------------------------------------*/
1135