1*cf204528SSimon Glass 2*cf204528SSimon Glass /*-------------------------------------------------------------*/ 3*cf204528SSimon Glass /*--- Compression machinery (not incl block sorting) ---*/ 4*cf204528SSimon Glass /*--- compress.c ---*/ 5*cf204528SSimon Glass /*-------------------------------------------------------------*/ 6*cf204528SSimon Glass 7*cf204528SSimon Glass /*-- 8*cf204528SSimon Glass This file is a part of bzip2 and/or libbzip2, a program and 9*cf204528SSimon Glass library for lossless, block-sorting data compression. 10*cf204528SSimon Glass 11*cf204528SSimon Glass Copyright (C) 1996-2002 Julian R Seward. All rights reserved. 12*cf204528SSimon Glass 13*cf204528SSimon Glass Redistribution and use in source and binary forms, with or without 14*cf204528SSimon Glass modification, are permitted provided that the following conditions 15*cf204528SSimon Glass are met: 16*cf204528SSimon Glass 17*cf204528SSimon Glass 1. Redistributions of source code must retain the above copyright 18*cf204528SSimon Glass notice, this list of conditions and the following disclaimer. 19*cf204528SSimon Glass 20*cf204528SSimon Glass 2. The origin of this software must not be misrepresented; you must 21*cf204528SSimon Glass not claim that you wrote the original software. If you use this 22*cf204528SSimon Glass software in a product, an acknowledgment in the product 23*cf204528SSimon Glass documentation would be appreciated but is not required. 24*cf204528SSimon Glass 25*cf204528SSimon Glass 3. Altered source versions must be plainly marked as such, and must 26*cf204528SSimon Glass not be misrepresented as being the original software. 27*cf204528SSimon Glass 28*cf204528SSimon Glass 4. The name of the author may not be used to endorse or promote 29*cf204528SSimon Glass products derived from this software without specific prior written 30*cf204528SSimon Glass permission. 31*cf204528SSimon Glass 32*cf204528SSimon Glass THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 33*cf204528SSimon Glass OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34*cf204528SSimon Glass WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35*cf204528SSimon Glass ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 36*cf204528SSimon Glass DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37*cf204528SSimon Glass DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 38*cf204528SSimon Glass GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 39*cf204528SSimon Glass INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 40*cf204528SSimon Glass WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 41*cf204528SSimon Glass NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42*cf204528SSimon Glass SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43*cf204528SSimon Glass 44*cf204528SSimon Glass Julian Seward, Cambridge, UK. 45*cf204528SSimon Glass jseward@acm.org 46*cf204528SSimon Glass bzip2/libbzip2 version 1.0.6 of 6 September 2010 47*cf204528SSimon Glass Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 48*cf204528SSimon Glass 49*cf204528SSimon Glass This program is based on (at least) the work of: 50*cf204528SSimon Glass Mike Burrows 51*cf204528SSimon Glass David Wheeler 52*cf204528SSimon Glass Peter Fenwick 53*cf204528SSimon Glass Alistair Moffat 54*cf204528SSimon Glass Radford Neal 55*cf204528SSimon Glass Ian H. Witten 56*cf204528SSimon Glass Robert Sedgewick 57*cf204528SSimon Glass Jon L. Bentley 58*cf204528SSimon Glass 59*cf204528SSimon Glass For more information on these sources, see the manual. 60*cf204528SSimon Glass --*/ 61*cf204528SSimon Glass 62*cf204528SSimon Glass /* CHANGES 63*cf204528SSimon Glass 0.9.0 -- original version. 64*cf204528SSimon Glass 0.9.0a/b -- no changes in this file. 65*cf204528SSimon Glass 0.9.0c -- changed setting of nGroups in sendMTFValues() 66*cf204528SSimon Glass so as to do a bit better on small files 67*cf204528SSimon Glass */ 68*cf204528SSimon Glass 69*cf204528SSimon Glass #include "bzlib_private.h" 70*cf204528SSimon Glass 71*cf204528SSimon Glass 72*cf204528SSimon Glass /*---------------------------------------------------*/ 73*cf204528SSimon Glass /*--- Bit stream I/O ---*/ 74*cf204528SSimon Glass /*---------------------------------------------------*/ 75*cf204528SSimon Glass 76*cf204528SSimon Glass /*---------------------------------------------------*/ 77*cf204528SSimon Glass void BZ2_bsInitWrite ( EState* s ) 78*cf204528SSimon Glass { 79*cf204528SSimon Glass s->bsLive = 0; 80*cf204528SSimon Glass s->bsBuff = 0; 81*cf204528SSimon Glass } 82*cf204528SSimon Glass 83*cf204528SSimon Glass 84*cf204528SSimon Glass /*---------------------------------------------------*/ 85*cf204528SSimon Glass static 86*cf204528SSimon Glass void bsFinishWrite ( EState* s ) 87*cf204528SSimon Glass { 88*cf204528SSimon Glass while (s->bsLive > 0) { 89*cf204528SSimon Glass s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); 90*cf204528SSimon Glass s->numZ++; 91*cf204528SSimon Glass s->bsBuff <<= 8; 92*cf204528SSimon Glass s->bsLive -= 8; 93*cf204528SSimon Glass } 94*cf204528SSimon Glass } 95*cf204528SSimon Glass 96*cf204528SSimon Glass 97*cf204528SSimon Glass /*---------------------------------------------------*/ 98*cf204528SSimon Glass #define bsNEEDW(nz) \ 99*cf204528SSimon Glass { \ 100*cf204528SSimon Glass while (s->bsLive >= 8) { \ 101*cf204528SSimon Glass s->zbits[s->numZ] \ 102*cf204528SSimon Glass = (UChar)(s->bsBuff >> 24); \ 103*cf204528SSimon Glass s->numZ++; \ 104*cf204528SSimon Glass s->bsBuff <<= 8; \ 105*cf204528SSimon Glass s->bsLive -= 8; \ 106*cf204528SSimon Glass } \ 107*cf204528SSimon Glass } 108*cf204528SSimon Glass 109*cf204528SSimon Glass 110*cf204528SSimon Glass /*---------------------------------------------------*/ 111*cf204528SSimon Glass static 112*cf204528SSimon Glass __inline__ 113*cf204528SSimon Glass void bsW ( EState* s, Int32 n, UInt32 v ) 114*cf204528SSimon Glass { 115*cf204528SSimon Glass bsNEEDW ( n ); 116*cf204528SSimon Glass s->bsBuff |= (v << (32 - s->bsLive - n)); 117*cf204528SSimon Glass s->bsLive += n; 118*cf204528SSimon Glass } 119*cf204528SSimon Glass 120*cf204528SSimon Glass 121*cf204528SSimon Glass /*---------------------------------------------------*/ 122*cf204528SSimon Glass static 123*cf204528SSimon Glass void bsPutUInt32 ( EState* s, UInt32 u ) 124*cf204528SSimon Glass { 125*cf204528SSimon Glass bsW ( s, 8, (u >> 24) & 0xffL ); 126*cf204528SSimon Glass bsW ( s, 8, (u >> 16) & 0xffL ); 127*cf204528SSimon Glass bsW ( s, 8, (u >> 8) & 0xffL ); 128*cf204528SSimon Glass bsW ( s, 8, u & 0xffL ); 129*cf204528SSimon Glass } 130*cf204528SSimon Glass 131*cf204528SSimon Glass 132*cf204528SSimon Glass /*---------------------------------------------------*/ 133*cf204528SSimon Glass static 134*cf204528SSimon Glass void bsPutUChar ( EState* s, UChar c ) 135*cf204528SSimon Glass { 136*cf204528SSimon Glass bsW( s, 8, (UInt32)c ); 137*cf204528SSimon Glass } 138*cf204528SSimon Glass 139*cf204528SSimon Glass 140*cf204528SSimon Glass /*---------------------------------------------------*/ 141*cf204528SSimon Glass /*--- The back end proper ---*/ 142*cf204528SSimon Glass /*---------------------------------------------------*/ 143*cf204528SSimon Glass 144*cf204528SSimon Glass /*---------------------------------------------------*/ 145*cf204528SSimon Glass static 146*cf204528SSimon Glass void makeMaps_e ( EState* s ) 147*cf204528SSimon Glass { 148*cf204528SSimon Glass Int32 i; 149*cf204528SSimon Glass s->nInUse = 0; 150*cf204528SSimon Glass for (i = 0; i < 256; i++) 151*cf204528SSimon Glass if (s->inUse[i]) { 152*cf204528SSimon Glass s->unseqToSeq[i] = s->nInUse; 153*cf204528SSimon Glass s->nInUse++; 154*cf204528SSimon Glass } 155*cf204528SSimon Glass } 156*cf204528SSimon Glass 157*cf204528SSimon Glass 158*cf204528SSimon Glass /*---------------------------------------------------*/ 159*cf204528SSimon Glass static 160*cf204528SSimon Glass void generateMTFValues ( EState* s ) 161*cf204528SSimon Glass { 162*cf204528SSimon Glass UChar yy[256]; 163*cf204528SSimon Glass Int32 i, j; 164*cf204528SSimon Glass Int32 zPend; 165*cf204528SSimon Glass Int32 wr; 166*cf204528SSimon Glass Int32 EOB; 167*cf204528SSimon Glass 168*cf204528SSimon Glass /* 169*cf204528SSimon Glass After sorting (eg, here), 170*cf204528SSimon Glass s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, 171*cf204528SSimon Glass and 172*cf204528SSimon Glass ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 173*cf204528SSimon Glass holds the original block data. 174*cf204528SSimon Glass 175*cf204528SSimon Glass The first thing to do is generate the MTF values, 176*cf204528SSimon Glass and put them in 177*cf204528SSimon Glass ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. 178*cf204528SSimon Glass Because there are strictly fewer or equal MTF values 179*cf204528SSimon Glass than block values, ptr values in this area are overwritten 180*cf204528SSimon Glass with MTF values only when they are no longer needed. 181*cf204528SSimon Glass 182*cf204528SSimon Glass The final compressed bitstream is generated into the 183*cf204528SSimon Glass area starting at 184*cf204528SSimon Glass (UChar*) (&((UChar*)s->arr2)[s->nblock]) 185*cf204528SSimon Glass 186*cf204528SSimon Glass These storage aliases are set up in bzCompressInit(), 187*cf204528SSimon Glass except for the last one, which is arranged in 188*cf204528SSimon Glass compressBlock(). 189*cf204528SSimon Glass */ 190*cf204528SSimon Glass UInt32* ptr = s->ptr; 191*cf204528SSimon Glass UChar* block = s->block; 192*cf204528SSimon Glass UInt16* mtfv = s->mtfv; 193*cf204528SSimon Glass 194*cf204528SSimon Glass makeMaps_e ( s ); 195*cf204528SSimon Glass EOB = s->nInUse+1; 196*cf204528SSimon Glass 197*cf204528SSimon Glass for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; 198*cf204528SSimon Glass 199*cf204528SSimon Glass wr = 0; 200*cf204528SSimon Glass zPend = 0; 201*cf204528SSimon Glass for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; 202*cf204528SSimon Glass 203*cf204528SSimon Glass for (i = 0; i < s->nblock; i++) { 204*cf204528SSimon Glass UChar ll_i; 205*cf204528SSimon Glass AssertD ( wr <= i, "generateMTFValues(1)" ); 206*cf204528SSimon Glass j = ptr[i]-1; if (j < 0) j += s->nblock; 207*cf204528SSimon Glass ll_i = s->unseqToSeq[block[j]]; 208*cf204528SSimon Glass AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); 209*cf204528SSimon Glass 210*cf204528SSimon Glass if (yy[0] == ll_i) { 211*cf204528SSimon Glass zPend++; 212*cf204528SSimon Glass } else { 213*cf204528SSimon Glass 214*cf204528SSimon Glass if (zPend > 0) { 215*cf204528SSimon Glass zPend--; 216*cf204528SSimon Glass while (True) { 217*cf204528SSimon Glass if (zPend & 1) { 218*cf204528SSimon Glass mtfv[wr] = BZ_RUNB; wr++; 219*cf204528SSimon Glass s->mtfFreq[BZ_RUNB]++; 220*cf204528SSimon Glass } else { 221*cf204528SSimon Glass mtfv[wr] = BZ_RUNA; wr++; 222*cf204528SSimon Glass s->mtfFreq[BZ_RUNA]++; 223*cf204528SSimon Glass } 224*cf204528SSimon Glass if (zPend < 2) break; 225*cf204528SSimon Glass zPend = (zPend - 2) / 2; 226*cf204528SSimon Glass }; 227*cf204528SSimon Glass zPend = 0; 228*cf204528SSimon Glass } 229*cf204528SSimon Glass { 230*cf204528SSimon Glass register UChar rtmp; 231*cf204528SSimon Glass register UChar* ryy_j; 232*cf204528SSimon Glass register UChar rll_i; 233*cf204528SSimon Glass rtmp = yy[1]; 234*cf204528SSimon Glass yy[1] = yy[0]; 235*cf204528SSimon Glass ryy_j = &(yy[1]); 236*cf204528SSimon Glass rll_i = ll_i; 237*cf204528SSimon Glass while ( rll_i != rtmp ) { 238*cf204528SSimon Glass register UChar rtmp2; 239*cf204528SSimon Glass ryy_j++; 240*cf204528SSimon Glass rtmp2 = rtmp; 241*cf204528SSimon Glass rtmp = *ryy_j; 242*cf204528SSimon Glass *ryy_j = rtmp2; 243*cf204528SSimon Glass }; 244*cf204528SSimon Glass yy[0] = rtmp; 245*cf204528SSimon Glass j = ryy_j - &(yy[0]); 246*cf204528SSimon Glass mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; 247*cf204528SSimon Glass } 248*cf204528SSimon Glass 249*cf204528SSimon Glass } 250*cf204528SSimon Glass } 251*cf204528SSimon Glass 252*cf204528SSimon Glass if (zPend > 0) { 253*cf204528SSimon Glass zPend--; 254*cf204528SSimon Glass while (True) { 255*cf204528SSimon Glass if (zPend & 1) { 256*cf204528SSimon Glass mtfv[wr] = BZ_RUNB; wr++; 257*cf204528SSimon Glass s->mtfFreq[BZ_RUNB]++; 258*cf204528SSimon Glass } else { 259*cf204528SSimon Glass mtfv[wr] = BZ_RUNA; wr++; 260*cf204528SSimon Glass s->mtfFreq[BZ_RUNA]++; 261*cf204528SSimon Glass } 262*cf204528SSimon Glass if (zPend < 2) break; 263*cf204528SSimon Glass zPend = (zPend - 2) / 2; 264*cf204528SSimon Glass }; 265*cf204528SSimon Glass zPend = 0; 266*cf204528SSimon Glass } 267*cf204528SSimon Glass 268*cf204528SSimon Glass mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; 269*cf204528SSimon Glass 270*cf204528SSimon Glass s->nMTF = wr; 271*cf204528SSimon Glass } 272*cf204528SSimon Glass 273*cf204528SSimon Glass 274*cf204528SSimon Glass /*---------------------------------------------------*/ 275*cf204528SSimon Glass #define BZ_LESSER_ICOST 0 276*cf204528SSimon Glass #define BZ_GREATER_ICOST 15 277*cf204528SSimon Glass 278*cf204528SSimon Glass static 279*cf204528SSimon Glass void sendMTFValues ( EState* s ) 280*cf204528SSimon Glass { 281*cf204528SSimon Glass Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; 282*cf204528SSimon Glass Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; 283*cf204528SSimon Glass Int32 nGroups, nBytes; 284*cf204528SSimon Glass 285*cf204528SSimon Glass /*-- 286*cf204528SSimon Glass UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 287*cf204528SSimon Glass is a global since the decoder also needs it. 288*cf204528SSimon Glass 289*cf204528SSimon Glass Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 290*cf204528SSimon Glass Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 291*cf204528SSimon Glass are also globals only used in this proc. 292*cf204528SSimon Glass Made global to keep stack frame size small. 293*cf204528SSimon Glass --*/ 294*cf204528SSimon Glass 295*cf204528SSimon Glass 296*cf204528SSimon Glass UInt16 cost[BZ_N_GROUPS]; 297*cf204528SSimon Glass Int32 fave[BZ_N_GROUPS]; 298*cf204528SSimon Glass 299*cf204528SSimon Glass UInt16* mtfv = s->mtfv; 300*cf204528SSimon Glass 301*cf204528SSimon Glass if (s->verbosity >= 3) 302*cf204528SSimon Glass VPrintf3( " %d in block, %d after MTF & 1-2 coding, " 303*cf204528SSimon Glass "%d+2 syms in use\n", 304*cf204528SSimon Glass s->nblock, s->nMTF, s->nInUse ); 305*cf204528SSimon Glass 306*cf204528SSimon Glass alphaSize = s->nInUse+2; 307*cf204528SSimon Glass for (t = 0; t < BZ_N_GROUPS; t++) 308*cf204528SSimon Glass for (v = 0; v < alphaSize; v++) 309*cf204528SSimon Glass s->len[t][v] = BZ_GREATER_ICOST; 310*cf204528SSimon Glass 311*cf204528SSimon Glass /*--- Decide how many coding tables to use ---*/ 312*cf204528SSimon Glass AssertH ( s->nMTF > 0, 3001 ); 313*cf204528SSimon Glass if (s->nMTF < 200) nGroups = 2; else 314*cf204528SSimon Glass if (s->nMTF < 600) nGroups = 3; else 315*cf204528SSimon Glass if (s->nMTF < 1200) nGroups = 4; else 316*cf204528SSimon Glass if (s->nMTF < 2400) nGroups = 5; else 317*cf204528SSimon Glass nGroups = 6; 318*cf204528SSimon Glass 319*cf204528SSimon Glass /*--- Generate an initial set of coding tables ---*/ 320*cf204528SSimon Glass { 321*cf204528SSimon Glass Int32 nPart, remF, tFreq, aFreq; 322*cf204528SSimon Glass 323*cf204528SSimon Glass nPart = nGroups; 324*cf204528SSimon Glass remF = s->nMTF; 325*cf204528SSimon Glass gs = 0; 326*cf204528SSimon Glass while (nPart > 0) { 327*cf204528SSimon Glass tFreq = remF / nPart; 328*cf204528SSimon Glass ge = gs-1; 329*cf204528SSimon Glass aFreq = 0; 330*cf204528SSimon Glass while (aFreq < tFreq && ge < alphaSize-1) { 331*cf204528SSimon Glass ge++; 332*cf204528SSimon Glass aFreq += s->mtfFreq[ge]; 333*cf204528SSimon Glass } 334*cf204528SSimon Glass 335*cf204528SSimon Glass if (ge > gs 336*cf204528SSimon Glass && nPart != nGroups && nPart != 1 337*cf204528SSimon Glass && ((nGroups-nPart) % 2 == 1)) { 338*cf204528SSimon Glass aFreq -= s->mtfFreq[ge]; 339*cf204528SSimon Glass ge--; 340*cf204528SSimon Glass } 341*cf204528SSimon Glass 342*cf204528SSimon Glass if (s->verbosity >= 3) 343*cf204528SSimon Glass VPrintf5( " initial group %d, [%d .. %d], " 344*cf204528SSimon Glass "has %d syms (%4.1f%%)\n", 345*cf204528SSimon Glass nPart, gs, ge, aFreq, 346*cf204528SSimon Glass (100.0 * (float)aFreq) / (float)(s->nMTF) ); 347*cf204528SSimon Glass 348*cf204528SSimon Glass for (v = 0; v < alphaSize; v++) 349*cf204528SSimon Glass if (v >= gs && v <= ge) 350*cf204528SSimon Glass s->len[nPart-1][v] = BZ_LESSER_ICOST; else 351*cf204528SSimon Glass s->len[nPart-1][v] = BZ_GREATER_ICOST; 352*cf204528SSimon Glass 353*cf204528SSimon Glass nPart--; 354*cf204528SSimon Glass gs = ge+1; 355*cf204528SSimon Glass remF -= aFreq; 356*cf204528SSimon Glass } 357*cf204528SSimon Glass } 358*cf204528SSimon Glass 359*cf204528SSimon Glass /*--- 360*cf204528SSimon Glass Iterate up to BZ_N_ITERS times to improve the tables. 361*cf204528SSimon Glass ---*/ 362*cf204528SSimon Glass for (iter = 0; iter < BZ_N_ITERS; iter++) { 363*cf204528SSimon Glass 364*cf204528SSimon Glass for (t = 0; t < nGroups; t++) fave[t] = 0; 365*cf204528SSimon Glass 366*cf204528SSimon Glass for (t = 0; t < nGroups; t++) 367*cf204528SSimon Glass for (v = 0; v < alphaSize; v++) 368*cf204528SSimon Glass s->rfreq[t][v] = 0; 369*cf204528SSimon Glass 370*cf204528SSimon Glass /*--- 371*cf204528SSimon Glass Set up an auxiliary length table which is used to fast-track 372*cf204528SSimon Glass the common case (nGroups == 6). 373*cf204528SSimon Glass ---*/ 374*cf204528SSimon Glass if (nGroups == 6) { 375*cf204528SSimon Glass for (v = 0; v < alphaSize; v++) { 376*cf204528SSimon Glass s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; 377*cf204528SSimon Glass s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; 378*cf204528SSimon Glass s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; 379*cf204528SSimon Glass } 380*cf204528SSimon Glass } 381*cf204528SSimon Glass 382*cf204528SSimon Glass nSelectors = 0; 383*cf204528SSimon Glass totc = 0; 384*cf204528SSimon Glass gs = 0; 385*cf204528SSimon Glass while (True) { 386*cf204528SSimon Glass 387*cf204528SSimon Glass /*--- Set group start & end marks. --*/ 388*cf204528SSimon Glass if (gs >= s->nMTF) break; 389*cf204528SSimon Glass ge = gs + BZ_G_SIZE - 1; 390*cf204528SSimon Glass if (ge >= s->nMTF) ge = s->nMTF-1; 391*cf204528SSimon Glass 392*cf204528SSimon Glass /*-- 393*cf204528SSimon Glass Calculate the cost of this group as coded 394*cf204528SSimon Glass by each of the coding tables. 395*cf204528SSimon Glass --*/ 396*cf204528SSimon Glass for (t = 0; t < nGroups; t++) cost[t] = 0; 397*cf204528SSimon Glass 398*cf204528SSimon Glass if (nGroups == 6 && 50 == ge-gs+1) { 399*cf204528SSimon Glass /*--- fast track the common case ---*/ 400*cf204528SSimon Glass register UInt32 cost01, cost23, cost45; 401*cf204528SSimon Glass register UInt16 icv; 402*cf204528SSimon Glass cost01 = cost23 = cost45 = 0; 403*cf204528SSimon Glass 404*cf204528SSimon Glass # define BZ_ITER(nn) \ 405*cf204528SSimon Glass icv = mtfv[gs+(nn)]; \ 406*cf204528SSimon Glass cost01 += s->len_pack[icv][0]; \ 407*cf204528SSimon Glass cost23 += s->len_pack[icv][1]; \ 408*cf204528SSimon Glass cost45 += s->len_pack[icv][2]; \ 409*cf204528SSimon Glass 410*cf204528SSimon Glass BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); 411*cf204528SSimon Glass BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); 412*cf204528SSimon Glass BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); 413*cf204528SSimon Glass BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); 414*cf204528SSimon Glass BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); 415*cf204528SSimon Glass BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); 416*cf204528SSimon Glass BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); 417*cf204528SSimon Glass BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); 418*cf204528SSimon Glass BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); 419*cf204528SSimon Glass BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); 420*cf204528SSimon Glass 421*cf204528SSimon Glass # undef BZ_ITER 422*cf204528SSimon Glass 423*cf204528SSimon Glass cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; 424*cf204528SSimon Glass cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; 425*cf204528SSimon Glass cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; 426*cf204528SSimon Glass 427*cf204528SSimon Glass } else { 428*cf204528SSimon Glass /*--- slow version which correctly handles all situations ---*/ 429*cf204528SSimon Glass for (i = gs; i <= ge; i++) { 430*cf204528SSimon Glass UInt16 icv = mtfv[i]; 431*cf204528SSimon Glass for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; 432*cf204528SSimon Glass } 433*cf204528SSimon Glass } 434*cf204528SSimon Glass 435*cf204528SSimon Glass /*-- 436*cf204528SSimon Glass Find the coding table which is best for this group, 437*cf204528SSimon Glass and record its identity in the selector table. 438*cf204528SSimon Glass --*/ 439*cf204528SSimon Glass bc = 999999999; bt = -1; 440*cf204528SSimon Glass for (t = 0; t < nGroups; t++) 441*cf204528SSimon Glass if (cost[t] < bc) { bc = cost[t]; bt = t; }; 442*cf204528SSimon Glass totc += bc; 443*cf204528SSimon Glass fave[bt]++; 444*cf204528SSimon Glass s->selector[nSelectors] = bt; 445*cf204528SSimon Glass nSelectors++; 446*cf204528SSimon Glass 447*cf204528SSimon Glass /*-- 448*cf204528SSimon Glass Increment the symbol frequencies for the selected table. 449*cf204528SSimon Glass --*/ 450*cf204528SSimon Glass if (nGroups == 6 && 50 == ge-gs+1) { 451*cf204528SSimon Glass /*--- fast track the common case ---*/ 452*cf204528SSimon Glass 453*cf204528SSimon Glass # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ 454*cf204528SSimon Glass 455*cf204528SSimon Glass BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); 456*cf204528SSimon Glass BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); 457*cf204528SSimon Glass BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); 458*cf204528SSimon Glass BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); 459*cf204528SSimon Glass BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); 460*cf204528SSimon Glass BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); 461*cf204528SSimon Glass BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); 462*cf204528SSimon Glass BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); 463*cf204528SSimon Glass BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); 464*cf204528SSimon Glass BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); 465*cf204528SSimon Glass 466*cf204528SSimon Glass # undef BZ_ITUR 467*cf204528SSimon Glass 468*cf204528SSimon Glass } else { 469*cf204528SSimon Glass /*--- slow version which correctly handles all situations ---*/ 470*cf204528SSimon Glass for (i = gs; i <= ge; i++) 471*cf204528SSimon Glass s->rfreq[bt][ mtfv[i] ]++; 472*cf204528SSimon Glass } 473*cf204528SSimon Glass 474*cf204528SSimon Glass gs = ge+1; 475*cf204528SSimon Glass } 476*cf204528SSimon Glass if (s->verbosity >= 3) { 477*cf204528SSimon Glass VPrintf2 ( " pass %d: size is %d, grp uses are ", 478*cf204528SSimon Glass iter+1, totc/8 ); 479*cf204528SSimon Glass for (t = 0; t < nGroups; t++) 480*cf204528SSimon Glass VPrintf1 ( "%d ", fave[t] ); 481*cf204528SSimon Glass VPrintf0 ( "\n" ); 482*cf204528SSimon Glass } 483*cf204528SSimon Glass 484*cf204528SSimon Glass /*-- 485*cf204528SSimon Glass Recompute the tables based on the accumulated frequencies. 486*cf204528SSimon Glass --*/ 487*cf204528SSimon Glass /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See 488*cf204528SSimon Glass comment in huffman.c for details. */ 489*cf204528SSimon Glass for (t = 0; t < nGroups; t++) 490*cf204528SSimon Glass BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 491*cf204528SSimon Glass alphaSize, 17 /*20*/ ); 492*cf204528SSimon Glass } 493*cf204528SSimon Glass 494*cf204528SSimon Glass 495*cf204528SSimon Glass AssertH( nGroups < 8, 3002 ); 496*cf204528SSimon Glass AssertH( nSelectors < 32768 && 497*cf204528SSimon Glass nSelectors <= (2 + (900000 / BZ_G_SIZE)), 498*cf204528SSimon Glass 3003 ); 499*cf204528SSimon Glass 500*cf204528SSimon Glass 501*cf204528SSimon Glass /*--- Compute MTF values for the selectors. ---*/ 502*cf204528SSimon Glass { 503*cf204528SSimon Glass UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; 504*cf204528SSimon Glass for (i = 0; i < nGroups; i++) pos[i] = i; 505*cf204528SSimon Glass for (i = 0; i < nSelectors; i++) { 506*cf204528SSimon Glass ll_i = s->selector[i]; 507*cf204528SSimon Glass j = 0; 508*cf204528SSimon Glass tmp = pos[j]; 509*cf204528SSimon Glass while ( ll_i != tmp ) { 510*cf204528SSimon Glass j++; 511*cf204528SSimon Glass tmp2 = tmp; 512*cf204528SSimon Glass tmp = pos[j]; 513*cf204528SSimon Glass pos[j] = tmp2; 514*cf204528SSimon Glass }; 515*cf204528SSimon Glass pos[0] = tmp; 516*cf204528SSimon Glass s->selectorMtf[i] = j; 517*cf204528SSimon Glass } 518*cf204528SSimon Glass }; 519*cf204528SSimon Glass 520*cf204528SSimon Glass /*--- Assign actual codes for the tables. --*/ 521*cf204528SSimon Glass for (t = 0; t < nGroups; t++) { 522*cf204528SSimon Glass minLen = 32; 523*cf204528SSimon Glass maxLen = 0; 524*cf204528SSimon Glass for (i = 0; i < alphaSize; i++) { 525*cf204528SSimon Glass if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 526*cf204528SSimon Glass if (s->len[t][i] < minLen) minLen = s->len[t][i]; 527*cf204528SSimon Glass } 528*cf204528SSimon Glass AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); 529*cf204528SSimon Glass AssertH ( !(minLen < 1), 3005 ); 530*cf204528SSimon Glass BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 531*cf204528SSimon Glass minLen, maxLen, alphaSize ); 532*cf204528SSimon Glass } 533*cf204528SSimon Glass 534*cf204528SSimon Glass /*--- Transmit the mapping table. ---*/ 535*cf204528SSimon Glass { 536*cf204528SSimon Glass Bool inUse16[16]; 537*cf204528SSimon Glass for (i = 0; i < 16; i++) { 538*cf204528SSimon Glass inUse16[i] = False; 539*cf204528SSimon Glass for (j = 0; j < 16; j++) 540*cf204528SSimon Glass if (s->inUse[i * 16 + j]) inUse16[i] = True; 541*cf204528SSimon Glass } 542*cf204528SSimon Glass 543*cf204528SSimon Glass nBytes = s->numZ; 544*cf204528SSimon Glass for (i = 0; i < 16; i++) 545*cf204528SSimon Glass if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); 546*cf204528SSimon Glass 547*cf204528SSimon Glass for (i = 0; i < 16; i++) 548*cf204528SSimon Glass if (inUse16[i]) 549*cf204528SSimon Glass for (j = 0; j < 16; j++) { 550*cf204528SSimon Glass if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); 551*cf204528SSimon Glass } 552*cf204528SSimon Glass 553*cf204528SSimon Glass if (s->verbosity >= 3) 554*cf204528SSimon Glass VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); 555*cf204528SSimon Glass } 556*cf204528SSimon Glass 557*cf204528SSimon Glass /*--- Now the selectors. ---*/ 558*cf204528SSimon Glass nBytes = s->numZ; 559*cf204528SSimon Glass bsW ( s, 3, nGroups ); 560*cf204528SSimon Glass bsW ( s, 15, nSelectors ); 561*cf204528SSimon Glass for (i = 0; i < nSelectors; i++) { 562*cf204528SSimon Glass for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); 563*cf204528SSimon Glass bsW(s,1,0); 564*cf204528SSimon Glass } 565*cf204528SSimon Glass if (s->verbosity >= 3) 566*cf204528SSimon Glass VPrintf1( "selectors %d, ", s->numZ-nBytes ); 567*cf204528SSimon Glass 568*cf204528SSimon Glass /*--- Now the coding tables. ---*/ 569*cf204528SSimon Glass nBytes = s->numZ; 570*cf204528SSimon Glass 571*cf204528SSimon Glass for (t = 0; t < nGroups; t++) { 572*cf204528SSimon Glass Int32 curr = s->len[t][0]; 573*cf204528SSimon Glass bsW ( s, 5, curr ); 574*cf204528SSimon Glass for (i = 0; i < alphaSize; i++) { 575*cf204528SSimon Glass while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; 576*cf204528SSimon Glass while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; 577*cf204528SSimon Glass bsW ( s, 1, 0 ); 578*cf204528SSimon Glass } 579*cf204528SSimon Glass } 580*cf204528SSimon Glass 581*cf204528SSimon Glass if (s->verbosity >= 3) 582*cf204528SSimon Glass VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); 583*cf204528SSimon Glass 584*cf204528SSimon Glass /*--- And finally, the block data proper ---*/ 585*cf204528SSimon Glass nBytes = s->numZ; 586*cf204528SSimon Glass selCtr = 0; 587*cf204528SSimon Glass gs = 0; 588*cf204528SSimon Glass while (True) { 589*cf204528SSimon Glass if (gs >= s->nMTF) break; 590*cf204528SSimon Glass ge = gs + BZ_G_SIZE - 1; 591*cf204528SSimon Glass if (ge >= s->nMTF) ge = s->nMTF-1; 592*cf204528SSimon Glass AssertH ( s->selector[selCtr] < nGroups, 3006 ); 593*cf204528SSimon Glass 594*cf204528SSimon Glass if (nGroups == 6 && 50 == ge-gs+1) { 595*cf204528SSimon Glass /*--- fast track the common case ---*/ 596*cf204528SSimon Glass UInt16 mtfv_i; 597*cf204528SSimon Glass UChar* s_len_sel_selCtr 598*cf204528SSimon Glass = &(s->len[s->selector[selCtr]][0]); 599*cf204528SSimon Glass Int32* s_code_sel_selCtr 600*cf204528SSimon Glass = &(s->code[s->selector[selCtr]][0]); 601*cf204528SSimon Glass 602*cf204528SSimon Glass # define BZ_ITAH(nn) \ 603*cf204528SSimon Glass mtfv_i = mtfv[gs+(nn)]; \ 604*cf204528SSimon Glass bsW ( s, \ 605*cf204528SSimon Glass s_len_sel_selCtr[mtfv_i], \ 606*cf204528SSimon Glass s_code_sel_selCtr[mtfv_i] ) 607*cf204528SSimon Glass 608*cf204528SSimon Glass BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); 609*cf204528SSimon Glass BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); 610*cf204528SSimon Glass BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); 611*cf204528SSimon Glass BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); 612*cf204528SSimon Glass BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); 613*cf204528SSimon Glass BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); 614*cf204528SSimon Glass BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); 615*cf204528SSimon Glass BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); 616*cf204528SSimon Glass BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); 617*cf204528SSimon Glass BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); 618*cf204528SSimon Glass 619*cf204528SSimon Glass # undef BZ_ITAH 620*cf204528SSimon Glass 621*cf204528SSimon Glass } else { 622*cf204528SSimon Glass /*--- slow version which correctly handles all situations ---*/ 623*cf204528SSimon Glass for (i = gs; i <= ge; i++) { 624*cf204528SSimon Glass bsW ( s, 625*cf204528SSimon Glass s->len [s->selector[selCtr]] [mtfv[i]], 626*cf204528SSimon Glass s->code [s->selector[selCtr]] [mtfv[i]] ); 627*cf204528SSimon Glass } 628*cf204528SSimon Glass } 629*cf204528SSimon Glass 630*cf204528SSimon Glass 631*cf204528SSimon Glass gs = ge+1; 632*cf204528SSimon Glass selCtr++; 633*cf204528SSimon Glass } 634*cf204528SSimon Glass AssertH( selCtr == nSelectors, 3007 ); 635*cf204528SSimon Glass 636*cf204528SSimon Glass if (s->verbosity >= 3) 637*cf204528SSimon Glass VPrintf1( "codes %d\n", s->numZ-nBytes ); 638*cf204528SSimon Glass else /* squash compiler 'used but not set' warning */ 639*cf204528SSimon Glass nBytes = nBytes; 640*cf204528SSimon Glass } 641*cf204528SSimon Glass 642*cf204528SSimon Glass 643*cf204528SSimon Glass /*---------------------------------------------------*/ 644*cf204528SSimon Glass void BZ2_compressBlock ( EState* s, Bool is_last_block ) 645*cf204528SSimon Glass { 646*cf204528SSimon Glass if (s->nblock > 0) { 647*cf204528SSimon Glass 648*cf204528SSimon Glass BZ_FINALISE_CRC ( s->blockCRC ); 649*cf204528SSimon Glass s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); 650*cf204528SSimon Glass s->combinedCRC ^= s->blockCRC; 651*cf204528SSimon Glass if (s->blockNo > 1) s->numZ = 0; 652*cf204528SSimon Glass 653*cf204528SSimon Glass if (s->verbosity >= 2) 654*cf204528SSimon Glass VPrintf4( " block %d: crc = 0x%08x, " 655*cf204528SSimon Glass "combined CRC = 0x%08x, size = %d\n", 656*cf204528SSimon Glass s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); 657*cf204528SSimon Glass 658*cf204528SSimon Glass BZ2_blockSort ( s ); 659*cf204528SSimon Glass } 660*cf204528SSimon Glass 661*cf204528SSimon Glass s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); 662*cf204528SSimon Glass 663*cf204528SSimon Glass /*-- If this is the first block, create the stream header. --*/ 664*cf204528SSimon Glass if (s->blockNo == 1) { 665*cf204528SSimon Glass BZ2_bsInitWrite ( s ); 666*cf204528SSimon Glass bsPutUChar ( s, BZ_HDR_B ); 667*cf204528SSimon Glass bsPutUChar ( s, BZ_HDR_Z ); 668*cf204528SSimon Glass bsPutUChar ( s, BZ_HDR_h ); 669*cf204528SSimon Glass bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); 670*cf204528SSimon Glass } 671*cf204528SSimon Glass 672*cf204528SSimon Glass if (s->nblock > 0) { 673*cf204528SSimon Glass 674*cf204528SSimon Glass bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); 675*cf204528SSimon Glass bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); 676*cf204528SSimon Glass bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); 677*cf204528SSimon Glass 678*cf204528SSimon Glass /*-- Now the block's CRC, so it is in a known place. --*/ 679*cf204528SSimon Glass bsPutUInt32 ( s, s->blockCRC ); 680*cf204528SSimon Glass 681*cf204528SSimon Glass /*-- 682*cf204528SSimon Glass Now a single bit indicating (non-)randomisation. 683*cf204528SSimon Glass As of version 0.9.5, we use a better sorting algorithm 684*cf204528SSimon Glass which makes randomisation unnecessary. So always set 685*cf204528SSimon Glass the randomised bit to 'no'. Of course, the decoder 686*cf204528SSimon Glass still needs to be able to handle randomised blocks 687*cf204528SSimon Glass so as to maintain backwards compatibility with 688*cf204528SSimon Glass older versions of bzip2. 689*cf204528SSimon Glass --*/ 690*cf204528SSimon Glass bsW(s,1,0); 691*cf204528SSimon Glass 692*cf204528SSimon Glass bsW ( s, 24, s->origPtr ); 693*cf204528SSimon Glass generateMTFValues ( s ); 694*cf204528SSimon Glass sendMTFValues ( s ); 695*cf204528SSimon Glass } 696*cf204528SSimon Glass 697*cf204528SSimon Glass 698*cf204528SSimon Glass /*-- If this is the last block, add the stream trailer. --*/ 699*cf204528SSimon Glass if (is_last_block) { 700*cf204528SSimon Glass 701*cf204528SSimon Glass bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); 702*cf204528SSimon Glass bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); 703*cf204528SSimon Glass bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); 704*cf204528SSimon Glass bsPutUInt32 ( s, s->combinedCRC ); 705*cf204528SSimon Glass if (s->verbosity >= 2) 706*cf204528SSimon Glass VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); 707*cf204528SSimon Glass bsFinishWrite ( s ); 708*cf204528SSimon Glass } 709*cf204528SSimon Glass } 710*cf204528SSimon Glass 711*cf204528SSimon Glass 712*cf204528SSimon Glass /*-------------------------------------------------------------*/ 713*cf204528SSimon Glass /*--- end compress.c ---*/ 714*cf204528SSimon Glass /*-------------------------------------------------------------*/ 715