1*b3be2f66SJerome Forissier /* inftrees.c -- generate Huffman trees for efficient decoding 2*b3be2f66SJerome Forissier * Copyright (C) 1995-2017 Mark Adler 3*b3be2f66SJerome Forissier * For conditions of distribution and use, see copyright notice in zlib.h 4*b3be2f66SJerome Forissier */ 5*b3be2f66SJerome Forissier 6*b3be2f66SJerome Forissier #include "zutil.h" 7*b3be2f66SJerome Forissier #include "inftrees.h" 8*b3be2f66SJerome Forissier 9*b3be2f66SJerome Forissier #define MAXBITS 15 10*b3be2f66SJerome Forissier 11*b3be2f66SJerome Forissier const char inflate_copyright[] = 12*b3be2f66SJerome Forissier " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; 13*b3be2f66SJerome Forissier /* 14*b3be2f66SJerome Forissier If you use the zlib library in a product, an acknowledgment is welcome 15*b3be2f66SJerome Forissier in the documentation of your product. If for some reason you cannot 16*b3be2f66SJerome Forissier include such an acknowledgment, I would appreciate that you keep this 17*b3be2f66SJerome Forissier copyright string in the executable of your product. 18*b3be2f66SJerome Forissier */ 19*b3be2f66SJerome Forissier 20*b3be2f66SJerome Forissier /* 21*b3be2f66SJerome Forissier Build a set of tables to decode the provided canonical Huffman code. 22*b3be2f66SJerome Forissier The code lengths are lens[0..codes-1]. The result starts at *table, 23*b3be2f66SJerome Forissier whose indices are 0..2^bits-1. work is a writable array of at least 24*b3be2f66SJerome Forissier lens shorts, which is used as a work area. type is the type of code 25*b3be2f66SJerome Forissier to be generated, CODES, LENS, or DISTS. On return, zero is success, 26*b3be2f66SJerome Forissier -1 is an invalid code, and +1 means that ENOUGH isn't enough. table 27*b3be2f66SJerome Forissier on return points to the next available entry's address. bits is the 28*b3be2f66SJerome Forissier requested root table index bits, and on return it is the actual root 29*b3be2f66SJerome Forissier table index bits. It will differ if the request is greater than the 30*b3be2f66SJerome Forissier longest code or if it is less than the shortest code. 31*b3be2f66SJerome Forissier */ 32*b3be2f66SJerome Forissier int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) 33*b3be2f66SJerome Forissier codetype type; 34*b3be2f66SJerome Forissier unsigned short FAR *lens; 35*b3be2f66SJerome Forissier unsigned codes; 36*b3be2f66SJerome Forissier code FAR * FAR *table; 37*b3be2f66SJerome Forissier unsigned FAR *bits; 38*b3be2f66SJerome Forissier unsigned short FAR *work; 39*b3be2f66SJerome Forissier { 40*b3be2f66SJerome Forissier unsigned len; /* a code's length in bits */ 41*b3be2f66SJerome Forissier unsigned sym; /* index of code symbols */ 42*b3be2f66SJerome Forissier unsigned min, max; /* minimum and maximum code lengths */ 43*b3be2f66SJerome Forissier unsigned root; /* number of index bits for root table */ 44*b3be2f66SJerome Forissier unsigned curr; /* number of index bits for current table */ 45*b3be2f66SJerome Forissier unsigned drop; /* code bits to drop for sub-table */ 46*b3be2f66SJerome Forissier int left; /* number of prefix codes available */ 47*b3be2f66SJerome Forissier unsigned used; /* code entries in table used */ 48*b3be2f66SJerome Forissier unsigned huff; /* Huffman code */ 49*b3be2f66SJerome Forissier unsigned incr; /* for incrementing code, index */ 50*b3be2f66SJerome Forissier unsigned fill; /* index for replicating entries */ 51*b3be2f66SJerome Forissier unsigned low; /* low bits for current root entry */ 52*b3be2f66SJerome Forissier unsigned mask; /* mask for low root bits */ 53*b3be2f66SJerome Forissier code here; /* table entry for duplication */ 54*b3be2f66SJerome Forissier code FAR *next; /* next available space in table */ 55*b3be2f66SJerome Forissier const unsigned short FAR *base; /* base value table to use */ 56*b3be2f66SJerome Forissier const unsigned short FAR *extra; /* extra bits table to use */ 57*b3be2f66SJerome Forissier unsigned match; /* use base and extra for symbol >= match */ 58*b3be2f66SJerome Forissier unsigned short count[MAXBITS+1]; /* number of codes of each length */ 59*b3be2f66SJerome Forissier unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ 60*b3be2f66SJerome Forissier static const unsigned short lbase[31] = { /* Length codes 257..285 base */ 61*b3be2f66SJerome Forissier 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 62*b3be2f66SJerome Forissier 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; 63*b3be2f66SJerome Forissier static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 64*b3be2f66SJerome Forissier 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 65*b3be2f66SJerome Forissier 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; 66*b3be2f66SJerome Forissier static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 67*b3be2f66SJerome Forissier 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 68*b3be2f66SJerome Forissier 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 69*b3be2f66SJerome Forissier 8193, 12289, 16385, 24577, 0, 0}; 70*b3be2f66SJerome Forissier static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ 71*b3be2f66SJerome Forissier 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 72*b3be2f66SJerome Forissier 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 73*b3be2f66SJerome Forissier 28, 28, 29, 29, 64, 64}; 74*b3be2f66SJerome Forissier 75*b3be2f66SJerome Forissier /* 76*b3be2f66SJerome Forissier Process a set of code lengths to create a canonical Huffman code. The 77*b3be2f66SJerome Forissier code lengths are lens[0..codes-1]. Each length corresponds to the 78*b3be2f66SJerome Forissier symbols 0..codes-1. The Huffman code is generated by first sorting the 79*b3be2f66SJerome Forissier symbols by length from short to long, and retaining the symbol order 80*b3be2f66SJerome Forissier for codes with equal lengths. Then the code starts with all zero bits 81*b3be2f66SJerome Forissier for the first code of the shortest length, and the codes are integer 82*b3be2f66SJerome Forissier increments for the same length, and zeros are appended as the length 83*b3be2f66SJerome Forissier increases. For the deflate format, these bits are stored backwards 84*b3be2f66SJerome Forissier from their more natural integer increment ordering, and so when the 85*b3be2f66SJerome Forissier decoding tables are built in the large loop below, the integer codes 86*b3be2f66SJerome Forissier are incremented backwards. 87*b3be2f66SJerome Forissier 88*b3be2f66SJerome Forissier This routine assumes, but does not check, that all of the entries in 89*b3be2f66SJerome Forissier lens[] are in the range 0..MAXBITS. The caller must assure this. 90*b3be2f66SJerome Forissier 1..MAXBITS is interpreted as that code length. zero means that that 91*b3be2f66SJerome Forissier symbol does not occur in this code. 92*b3be2f66SJerome Forissier 93*b3be2f66SJerome Forissier The codes are sorted by computing a count of codes for each length, 94*b3be2f66SJerome Forissier creating from that a table of starting indices for each length in the 95*b3be2f66SJerome Forissier sorted table, and then entering the symbols in order in the sorted 96*b3be2f66SJerome Forissier table. The sorted table is work[], with that space being provided by 97*b3be2f66SJerome Forissier the caller. 98*b3be2f66SJerome Forissier 99*b3be2f66SJerome Forissier The length counts are used for other purposes as well, i.e. finding 100*b3be2f66SJerome Forissier the minimum and maximum length codes, determining if there are any 101*b3be2f66SJerome Forissier codes at all, checking for a valid set of lengths, and looking ahead 102*b3be2f66SJerome Forissier at length counts to determine sub-table sizes when building the 103*b3be2f66SJerome Forissier decoding tables. 104*b3be2f66SJerome Forissier */ 105*b3be2f66SJerome Forissier 106*b3be2f66SJerome Forissier /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ 107*b3be2f66SJerome Forissier for (len = 0; len <= MAXBITS; len++) 108*b3be2f66SJerome Forissier count[len] = 0; 109*b3be2f66SJerome Forissier for (sym = 0; sym < codes; sym++) 110*b3be2f66SJerome Forissier count[lens[sym]]++; 111*b3be2f66SJerome Forissier 112*b3be2f66SJerome Forissier /* bound code lengths, force root to be within code lengths */ 113*b3be2f66SJerome Forissier root = *bits; 114*b3be2f66SJerome Forissier for (max = MAXBITS; max >= 1; max--) 115*b3be2f66SJerome Forissier if (count[max] != 0) break; 116*b3be2f66SJerome Forissier if (root > max) root = max; 117*b3be2f66SJerome Forissier if (max == 0) { /* no symbols to code at all */ 118*b3be2f66SJerome Forissier here.op = (unsigned char)64; /* invalid code marker */ 119*b3be2f66SJerome Forissier here.bits = (unsigned char)1; 120*b3be2f66SJerome Forissier here.val = (unsigned short)0; 121*b3be2f66SJerome Forissier *(*table)++ = here; /* make a table to force an error */ 122*b3be2f66SJerome Forissier *(*table)++ = here; 123*b3be2f66SJerome Forissier *bits = 1; 124*b3be2f66SJerome Forissier return 0; /* no symbols, but wait for decoding to report error */ 125*b3be2f66SJerome Forissier } 126*b3be2f66SJerome Forissier for (min = 1; min < max; min++) 127*b3be2f66SJerome Forissier if (count[min] != 0) break; 128*b3be2f66SJerome Forissier if (root < min) root = min; 129*b3be2f66SJerome Forissier 130*b3be2f66SJerome Forissier /* check for an over-subscribed or incomplete set of lengths */ 131*b3be2f66SJerome Forissier left = 1; 132*b3be2f66SJerome Forissier for (len = 1; len <= MAXBITS; len++) { 133*b3be2f66SJerome Forissier left <<= 1; 134*b3be2f66SJerome Forissier left -= count[len]; 135*b3be2f66SJerome Forissier if (left < 0) return -1; /* over-subscribed */ 136*b3be2f66SJerome Forissier } 137*b3be2f66SJerome Forissier if (left > 0 && (type == CODES || max != 1)) 138*b3be2f66SJerome Forissier return -1; /* incomplete set */ 139*b3be2f66SJerome Forissier 140*b3be2f66SJerome Forissier /* generate offsets into symbol table for each length for sorting */ 141*b3be2f66SJerome Forissier offs[1] = 0; 142*b3be2f66SJerome Forissier for (len = 1; len < MAXBITS; len++) 143*b3be2f66SJerome Forissier offs[len + 1] = offs[len] + count[len]; 144*b3be2f66SJerome Forissier 145*b3be2f66SJerome Forissier /* sort symbols by length, by symbol order within each length */ 146*b3be2f66SJerome Forissier for (sym = 0; sym < codes; sym++) 147*b3be2f66SJerome Forissier if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; 148*b3be2f66SJerome Forissier 149*b3be2f66SJerome Forissier /* 150*b3be2f66SJerome Forissier Create and fill in decoding tables. In this loop, the table being 151*b3be2f66SJerome Forissier filled is at next and has curr index bits. The code being used is huff 152*b3be2f66SJerome Forissier with length len. That code is converted to an index by dropping drop 153*b3be2f66SJerome Forissier bits off of the bottom. For codes where len is less than drop + curr, 154*b3be2f66SJerome Forissier those top drop + curr - len bits are incremented through all values to 155*b3be2f66SJerome Forissier fill the table with replicated entries. 156*b3be2f66SJerome Forissier 157*b3be2f66SJerome Forissier root is the number of index bits for the root table. When len exceeds 158*b3be2f66SJerome Forissier root, sub-tables are created pointed to by the root entry with an index 159*b3be2f66SJerome Forissier of the low root bits of huff. This is saved in low to check for when a 160*b3be2f66SJerome Forissier new sub-table should be started. drop is zero when the root table is 161*b3be2f66SJerome Forissier being filled, and drop is root when sub-tables are being filled. 162*b3be2f66SJerome Forissier 163*b3be2f66SJerome Forissier When a new sub-table is needed, it is necessary to look ahead in the 164*b3be2f66SJerome Forissier code lengths to determine what size sub-table is needed. The length 165*b3be2f66SJerome Forissier counts are used for this, and so count[] is decremented as codes are 166*b3be2f66SJerome Forissier entered in the tables. 167*b3be2f66SJerome Forissier 168*b3be2f66SJerome Forissier used keeps track of how many table entries have been allocated from the 169*b3be2f66SJerome Forissier provided *table space. It is checked for LENS and DIST tables against 170*b3be2f66SJerome Forissier the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in 171*b3be2f66SJerome Forissier the initial root table size constants. See the comments in inftrees.h 172*b3be2f66SJerome Forissier for more information. 173*b3be2f66SJerome Forissier 174*b3be2f66SJerome Forissier sym increments through all symbols, and the loop terminates when 175*b3be2f66SJerome Forissier all codes of length max, i.e. all codes, have been processed. This 176*b3be2f66SJerome Forissier routine permits incomplete codes, so another loop after this one fills 177*b3be2f66SJerome Forissier in the rest of the decoding tables with invalid code markers. 178*b3be2f66SJerome Forissier */ 179*b3be2f66SJerome Forissier 180*b3be2f66SJerome Forissier /* set up for code type */ 181*b3be2f66SJerome Forissier switch (type) { 182*b3be2f66SJerome Forissier case CODES: 183*b3be2f66SJerome Forissier base = extra = work; /* dummy value--not used */ 184*b3be2f66SJerome Forissier match = 20; 185*b3be2f66SJerome Forissier break; 186*b3be2f66SJerome Forissier case LENS: 187*b3be2f66SJerome Forissier base = lbase; 188*b3be2f66SJerome Forissier extra = lext; 189*b3be2f66SJerome Forissier match = 257; 190*b3be2f66SJerome Forissier break; 191*b3be2f66SJerome Forissier default: /* DISTS */ 192*b3be2f66SJerome Forissier base = dbase; 193*b3be2f66SJerome Forissier extra = dext; 194*b3be2f66SJerome Forissier match = 0; 195*b3be2f66SJerome Forissier } 196*b3be2f66SJerome Forissier 197*b3be2f66SJerome Forissier /* initialize state for loop */ 198*b3be2f66SJerome Forissier huff = 0; /* starting code */ 199*b3be2f66SJerome Forissier sym = 0; /* starting code symbol */ 200*b3be2f66SJerome Forissier len = min; /* starting code length */ 201*b3be2f66SJerome Forissier next = *table; /* current table to fill in */ 202*b3be2f66SJerome Forissier curr = root; /* current table index bits */ 203*b3be2f66SJerome Forissier drop = 0; /* current bits to drop from code for index */ 204*b3be2f66SJerome Forissier low = (unsigned)(-1); /* trigger new sub-table when len > root */ 205*b3be2f66SJerome Forissier used = 1U << root; /* use root table entries */ 206*b3be2f66SJerome Forissier mask = used - 1; /* mask for comparing low */ 207*b3be2f66SJerome Forissier 208*b3be2f66SJerome Forissier /* check available table space */ 209*b3be2f66SJerome Forissier if ((type == LENS && used > ENOUGH_LENS) || 210*b3be2f66SJerome Forissier (type == DISTS && used > ENOUGH_DISTS)) 211*b3be2f66SJerome Forissier return 1; 212*b3be2f66SJerome Forissier 213*b3be2f66SJerome Forissier /* process all codes and make table entries */ 214*b3be2f66SJerome Forissier for (;;) { 215*b3be2f66SJerome Forissier /* create table entry */ 216*b3be2f66SJerome Forissier here.bits = (unsigned char)(len - drop); 217*b3be2f66SJerome Forissier if (work[sym] + 1U < match) { 218*b3be2f66SJerome Forissier here.op = (unsigned char)0; 219*b3be2f66SJerome Forissier here.val = work[sym]; 220*b3be2f66SJerome Forissier } 221*b3be2f66SJerome Forissier else if (work[sym] >= match) { 222*b3be2f66SJerome Forissier here.op = (unsigned char)(extra[work[sym] - match]); 223*b3be2f66SJerome Forissier here.val = base[work[sym] - match]; 224*b3be2f66SJerome Forissier } 225*b3be2f66SJerome Forissier else { 226*b3be2f66SJerome Forissier here.op = (unsigned char)(32 + 64); /* end of block */ 227*b3be2f66SJerome Forissier here.val = 0; 228*b3be2f66SJerome Forissier } 229*b3be2f66SJerome Forissier 230*b3be2f66SJerome Forissier /* replicate for those indices with low len bits equal to huff */ 231*b3be2f66SJerome Forissier incr = 1U << (len - drop); 232*b3be2f66SJerome Forissier fill = 1U << curr; 233*b3be2f66SJerome Forissier min = fill; /* save offset to next table */ 234*b3be2f66SJerome Forissier do { 235*b3be2f66SJerome Forissier fill -= incr; 236*b3be2f66SJerome Forissier next[(huff >> drop) + fill] = here; 237*b3be2f66SJerome Forissier } while (fill != 0); 238*b3be2f66SJerome Forissier 239*b3be2f66SJerome Forissier /* backwards increment the len-bit code huff */ 240*b3be2f66SJerome Forissier incr = 1U << (len - 1); 241*b3be2f66SJerome Forissier while (huff & incr) 242*b3be2f66SJerome Forissier incr >>= 1; 243*b3be2f66SJerome Forissier if (incr != 0) { 244*b3be2f66SJerome Forissier huff &= incr - 1; 245*b3be2f66SJerome Forissier huff += incr; 246*b3be2f66SJerome Forissier } 247*b3be2f66SJerome Forissier else 248*b3be2f66SJerome Forissier huff = 0; 249*b3be2f66SJerome Forissier 250*b3be2f66SJerome Forissier /* go to next symbol, update count, len */ 251*b3be2f66SJerome Forissier sym++; 252*b3be2f66SJerome Forissier if (--(count[len]) == 0) { 253*b3be2f66SJerome Forissier if (len == max) break; 254*b3be2f66SJerome Forissier len = lens[work[sym]]; 255*b3be2f66SJerome Forissier } 256*b3be2f66SJerome Forissier 257*b3be2f66SJerome Forissier /* create new sub-table if needed */ 258*b3be2f66SJerome Forissier if (len > root && (huff & mask) != low) { 259*b3be2f66SJerome Forissier /* if first time, transition to sub-tables */ 260*b3be2f66SJerome Forissier if (drop == 0) 261*b3be2f66SJerome Forissier drop = root; 262*b3be2f66SJerome Forissier 263*b3be2f66SJerome Forissier /* increment past last table */ 264*b3be2f66SJerome Forissier next += min; /* here min is 1 << curr */ 265*b3be2f66SJerome Forissier 266*b3be2f66SJerome Forissier /* determine length of next table */ 267*b3be2f66SJerome Forissier curr = len - drop; 268*b3be2f66SJerome Forissier left = (int)(1 << curr); 269*b3be2f66SJerome Forissier while (curr + drop < max) { 270*b3be2f66SJerome Forissier left -= count[curr + drop]; 271*b3be2f66SJerome Forissier if (left <= 0) break; 272*b3be2f66SJerome Forissier curr++; 273*b3be2f66SJerome Forissier left <<= 1; 274*b3be2f66SJerome Forissier } 275*b3be2f66SJerome Forissier 276*b3be2f66SJerome Forissier /* check for enough space */ 277*b3be2f66SJerome Forissier used += 1U << curr; 278*b3be2f66SJerome Forissier if ((type == LENS && used > ENOUGH_LENS) || 279*b3be2f66SJerome Forissier (type == DISTS && used > ENOUGH_DISTS)) 280*b3be2f66SJerome Forissier return 1; 281*b3be2f66SJerome Forissier 282*b3be2f66SJerome Forissier /* point entry in root table to sub-table */ 283*b3be2f66SJerome Forissier low = huff & mask; 284*b3be2f66SJerome Forissier (*table)[low].op = (unsigned char)curr; 285*b3be2f66SJerome Forissier (*table)[low].bits = (unsigned char)root; 286*b3be2f66SJerome Forissier (*table)[low].val = (unsigned short)(next - *table); 287*b3be2f66SJerome Forissier } 288*b3be2f66SJerome Forissier } 289*b3be2f66SJerome Forissier 290*b3be2f66SJerome Forissier /* fill in remaining table entry if code is incomplete (guaranteed to have 291*b3be2f66SJerome Forissier at most one remaining entry, since if the code is incomplete, the 292*b3be2f66SJerome Forissier maximum code length that was allowed to get this far is one bit) */ 293*b3be2f66SJerome Forissier if (huff != 0) { 294*b3be2f66SJerome Forissier here.op = (unsigned char)64; /* invalid code marker */ 295*b3be2f66SJerome Forissier here.bits = (unsigned char)(len - drop); 296*b3be2f66SJerome Forissier here.val = (unsigned short)0; 297*b3be2f66SJerome Forissier next[huff] = here; 298*b3be2f66SJerome Forissier } 299*b3be2f66SJerome Forissier 300*b3be2f66SJerome Forissier /* set return parameters */ 301*b3be2f66SJerome Forissier *table += used; 302*b3be2f66SJerome Forissier *bits = root; 303*b3be2f66SJerome Forissier return 0; 304*b3be2f66SJerome Forissier } 305