11bb92983SJerome Forissier // SPDX-License-Identifier: Zlib
2b3be2f66SJerome Forissier /* inftrees.c -- generate Huffman trees for efficient decoding
3*dd65d970SJerome Forissier * Copyright (C) 1995-2022 Mark Adler
4b3be2f66SJerome Forissier * For conditions of distribution and use, see copyright notice in zlib.h
5b3be2f66SJerome Forissier */
6b3be2f66SJerome Forissier
7b3be2f66SJerome Forissier #include "zutil.h"
8b3be2f66SJerome Forissier #include "inftrees.h"
9b3be2f66SJerome Forissier
10b3be2f66SJerome Forissier #define MAXBITS 15
11b3be2f66SJerome Forissier
12b3be2f66SJerome Forissier const char inflate_copyright[] =
13*dd65d970SJerome Forissier " inflate 1.2.12 Copyright 1995-2022 Mark Adler ";
14b3be2f66SJerome Forissier /*
15b3be2f66SJerome Forissier If you use the zlib library in a product, an acknowledgment is welcome
16b3be2f66SJerome Forissier in the documentation of your product. If for some reason you cannot
17b3be2f66SJerome Forissier include such an acknowledgment, I would appreciate that you keep this
18b3be2f66SJerome Forissier copyright string in the executable of your product.
19b3be2f66SJerome Forissier */
20b3be2f66SJerome Forissier
21b3be2f66SJerome Forissier /*
22b3be2f66SJerome Forissier Build a set of tables to decode the provided canonical Huffman code.
23b3be2f66SJerome Forissier The code lengths are lens[0..codes-1]. The result starts at *table,
24b3be2f66SJerome Forissier whose indices are 0..2^bits-1. work is a writable array of at least
25b3be2f66SJerome Forissier lens shorts, which is used as a work area. type is the type of code
26b3be2f66SJerome Forissier to be generated, CODES, LENS, or DISTS. On return, zero is success,
27b3be2f66SJerome Forissier -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
28b3be2f66SJerome Forissier on return points to the next available entry's address. bits is the
29b3be2f66SJerome Forissier requested root table index bits, and on return it is the actual root
30b3be2f66SJerome Forissier table index bits. It will differ if the request is greater than the
31b3be2f66SJerome Forissier longest code or if it is less than the shortest code.
32b3be2f66SJerome Forissier */
inflate_table(type,lens,codes,table,bits,work)33b3be2f66SJerome Forissier int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
34b3be2f66SJerome Forissier codetype type;
35b3be2f66SJerome Forissier unsigned short FAR *lens;
36b3be2f66SJerome Forissier unsigned codes;
37b3be2f66SJerome Forissier code FAR * FAR *table;
38b3be2f66SJerome Forissier unsigned FAR *bits;
39b3be2f66SJerome Forissier unsigned short FAR *work;
40b3be2f66SJerome Forissier {
41b3be2f66SJerome Forissier unsigned len; /* a code's length in bits */
42b3be2f66SJerome Forissier unsigned sym; /* index of code symbols */
43b3be2f66SJerome Forissier unsigned min, max; /* minimum and maximum code lengths */
44b3be2f66SJerome Forissier unsigned root; /* number of index bits for root table */
45b3be2f66SJerome Forissier unsigned curr; /* number of index bits for current table */
46b3be2f66SJerome Forissier unsigned drop; /* code bits to drop for sub-table */
47b3be2f66SJerome Forissier int left; /* number of prefix codes available */
48b3be2f66SJerome Forissier unsigned used; /* code entries in table used */
49b3be2f66SJerome Forissier unsigned huff; /* Huffman code */
50b3be2f66SJerome Forissier unsigned incr; /* for incrementing code, index */
51b3be2f66SJerome Forissier unsigned fill; /* index for replicating entries */
52b3be2f66SJerome Forissier unsigned low; /* low bits for current root entry */
53b3be2f66SJerome Forissier unsigned mask; /* mask for low root bits */
54b3be2f66SJerome Forissier code here; /* table entry for duplication */
55b3be2f66SJerome Forissier code FAR *next; /* next available space in table */
56b3be2f66SJerome Forissier const unsigned short FAR *base; /* base value table to use */
57b3be2f66SJerome Forissier const unsigned short FAR *extra; /* extra bits table to use */
58b3be2f66SJerome Forissier unsigned match; /* use base and extra for symbol >= match */
59b3be2f66SJerome Forissier unsigned short count[MAXBITS+1]; /* number of codes of each length */
60b3be2f66SJerome Forissier unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
61b3be2f66SJerome Forissier static const unsigned short lbase[31] = { /* Length codes 257..285 base */
62b3be2f66SJerome Forissier 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
63b3be2f66SJerome Forissier 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
64b3be2f66SJerome Forissier static const unsigned short lext[31] = { /* Length codes 257..285 extra */
65b3be2f66SJerome Forissier 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
66*dd65d970SJerome Forissier 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 202};
67b3be2f66SJerome Forissier static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
68b3be2f66SJerome Forissier 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
69b3be2f66SJerome Forissier 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
70b3be2f66SJerome Forissier 8193, 12289, 16385, 24577, 0, 0};
71b3be2f66SJerome Forissier static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
72b3be2f66SJerome Forissier 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
73b3be2f66SJerome Forissier 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
74b3be2f66SJerome Forissier 28, 28, 29, 29, 64, 64};
75b3be2f66SJerome Forissier
76b3be2f66SJerome Forissier /*
77b3be2f66SJerome Forissier Process a set of code lengths to create a canonical Huffman code. The
78b3be2f66SJerome Forissier code lengths are lens[0..codes-1]. Each length corresponds to the
79b3be2f66SJerome Forissier symbols 0..codes-1. The Huffman code is generated by first sorting the
80b3be2f66SJerome Forissier symbols by length from short to long, and retaining the symbol order
81b3be2f66SJerome Forissier for codes with equal lengths. Then the code starts with all zero bits
82b3be2f66SJerome Forissier for the first code of the shortest length, and the codes are integer
83b3be2f66SJerome Forissier increments for the same length, and zeros are appended as the length
84b3be2f66SJerome Forissier increases. For the deflate format, these bits are stored backwards
85b3be2f66SJerome Forissier from their more natural integer increment ordering, and so when the
86b3be2f66SJerome Forissier decoding tables are built in the large loop below, the integer codes
87b3be2f66SJerome Forissier are incremented backwards.
88b3be2f66SJerome Forissier
89b3be2f66SJerome Forissier This routine assumes, but does not check, that all of the entries in
90b3be2f66SJerome Forissier lens[] are in the range 0..MAXBITS. The caller must assure this.
91b3be2f66SJerome Forissier 1..MAXBITS is interpreted as that code length. zero means that that
92b3be2f66SJerome Forissier symbol does not occur in this code.
93b3be2f66SJerome Forissier
94b3be2f66SJerome Forissier The codes are sorted by computing a count of codes for each length,
95b3be2f66SJerome Forissier creating from that a table of starting indices for each length in the
96b3be2f66SJerome Forissier sorted table, and then entering the symbols in order in the sorted
97b3be2f66SJerome Forissier table. The sorted table is work[], with that space being provided by
98b3be2f66SJerome Forissier the caller.
99b3be2f66SJerome Forissier
100b3be2f66SJerome Forissier The length counts are used for other purposes as well, i.e. finding
101b3be2f66SJerome Forissier the minimum and maximum length codes, determining if there are any
102b3be2f66SJerome Forissier codes at all, checking for a valid set of lengths, and looking ahead
103b3be2f66SJerome Forissier at length counts to determine sub-table sizes when building the
104b3be2f66SJerome Forissier decoding tables.
105b3be2f66SJerome Forissier */
106b3be2f66SJerome Forissier
107b3be2f66SJerome Forissier /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
108b3be2f66SJerome Forissier for (len = 0; len <= MAXBITS; len++)
109b3be2f66SJerome Forissier count[len] = 0;
110b3be2f66SJerome Forissier for (sym = 0; sym < codes; sym++)
111b3be2f66SJerome Forissier count[lens[sym]]++;
112b3be2f66SJerome Forissier
113b3be2f66SJerome Forissier /* bound code lengths, force root to be within code lengths */
114b3be2f66SJerome Forissier root = *bits;
115b3be2f66SJerome Forissier for (max = MAXBITS; max >= 1; max--)
116b3be2f66SJerome Forissier if (count[max] != 0) break;
117b3be2f66SJerome Forissier if (root > max) root = max;
118b3be2f66SJerome Forissier if (max == 0) { /* no symbols to code at all */
119b3be2f66SJerome Forissier here.op = (unsigned char)64; /* invalid code marker */
120b3be2f66SJerome Forissier here.bits = (unsigned char)1;
121b3be2f66SJerome Forissier here.val = (unsigned short)0;
122b3be2f66SJerome Forissier *(*table)++ = here; /* make a table to force an error */
123b3be2f66SJerome Forissier *(*table)++ = here;
124b3be2f66SJerome Forissier *bits = 1;
125b3be2f66SJerome Forissier return 0; /* no symbols, but wait for decoding to report error */
126b3be2f66SJerome Forissier }
127b3be2f66SJerome Forissier for (min = 1; min < max; min++)
128b3be2f66SJerome Forissier if (count[min] != 0) break;
129b3be2f66SJerome Forissier if (root < min) root = min;
130b3be2f66SJerome Forissier
131b3be2f66SJerome Forissier /* check for an over-subscribed or incomplete set of lengths */
132b3be2f66SJerome Forissier left = 1;
133b3be2f66SJerome Forissier for (len = 1; len <= MAXBITS; len++) {
134b3be2f66SJerome Forissier left <<= 1;
135b3be2f66SJerome Forissier left -= count[len];
136b3be2f66SJerome Forissier if (left < 0) return -1; /* over-subscribed */
137b3be2f66SJerome Forissier }
138b3be2f66SJerome Forissier if (left > 0 && (type == CODES || max != 1))
139b3be2f66SJerome Forissier return -1; /* incomplete set */
140b3be2f66SJerome Forissier
141b3be2f66SJerome Forissier /* generate offsets into symbol table for each length for sorting */
142b3be2f66SJerome Forissier offs[1] = 0;
143b3be2f66SJerome Forissier for (len = 1; len < MAXBITS; len++)
144b3be2f66SJerome Forissier offs[len + 1] = offs[len] + count[len];
145b3be2f66SJerome Forissier
146b3be2f66SJerome Forissier /* sort symbols by length, by symbol order within each length */
147b3be2f66SJerome Forissier for (sym = 0; sym < codes; sym++)
148b3be2f66SJerome Forissier if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
149b3be2f66SJerome Forissier
150b3be2f66SJerome Forissier /*
151b3be2f66SJerome Forissier Create and fill in decoding tables. In this loop, the table being
152b3be2f66SJerome Forissier filled is at next and has curr index bits. The code being used is huff
153b3be2f66SJerome Forissier with length len. That code is converted to an index by dropping drop
154b3be2f66SJerome Forissier bits off of the bottom. For codes where len is less than drop + curr,
155b3be2f66SJerome Forissier those top drop + curr - len bits are incremented through all values to
156b3be2f66SJerome Forissier fill the table with replicated entries.
157b3be2f66SJerome Forissier
158b3be2f66SJerome Forissier root is the number of index bits for the root table. When len exceeds
159b3be2f66SJerome Forissier root, sub-tables are created pointed to by the root entry with an index
160b3be2f66SJerome Forissier of the low root bits of huff. This is saved in low to check for when a
161b3be2f66SJerome Forissier new sub-table should be started. drop is zero when the root table is
162b3be2f66SJerome Forissier being filled, and drop is root when sub-tables are being filled.
163b3be2f66SJerome Forissier
164b3be2f66SJerome Forissier When a new sub-table is needed, it is necessary to look ahead in the
165b3be2f66SJerome Forissier code lengths to determine what size sub-table is needed. The length
166b3be2f66SJerome Forissier counts are used for this, and so count[] is decremented as codes are
167b3be2f66SJerome Forissier entered in the tables.
168b3be2f66SJerome Forissier
169b3be2f66SJerome Forissier used keeps track of how many table entries have been allocated from the
170b3be2f66SJerome Forissier provided *table space. It is checked for LENS and DIST tables against
171b3be2f66SJerome Forissier the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
172b3be2f66SJerome Forissier the initial root table size constants. See the comments in inftrees.h
173b3be2f66SJerome Forissier for more information.
174b3be2f66SJerome Forissier
175b3be2f66SJerome Forissier sym increments through all symbols, and the loop terminates when
176b3be2f66SJerome Forissier all codes of length max, i.e. all codes, have been processed. This
177b3be2f66SJerome Forissier routine permits incomplete codes, so another loop after this one fills
178b3be2f66SJerome Forissier in the rest of the decoding tables with invalid code markers.
179b3be2f66SJerome Forissier */
180b3be2f66SJerome Forissier
181b3be2f66SJerome Forissier /* set up for code type */
182b3be2f66SJerome Forissier switch (type) {
183b3be2f66SJerome Forissier case CODES:
184b3be2f66SJerome Forissier base = extra = work; /* dummy value--not used */
185b3be2f66SJerome Forissier match = 20;
186b3be2f66SJerome Forissier break;
187b3be2f66SJerome Forissier case LENS:
188b3be2f66SJerome Forissier base = lbase;
189b3be2f66SJerome Forissier extra = lext;
190b3be2f66SJerome Forissier match = 257;
191b3be2f66SJerome Forissier break;
192b3be2f66SJerome Forissier default: /* DISTS */
193b3be2f66SJerome Forissier base = dbase;
194b3be2f66SJerome Forissier extra = dext;
195b3be2f66SJerome Forissier match = 0;
196b3be2f66SJerome Forissier }
197b3be2f66SJerome Forissier
198b3be2f66SJerome Forissier /* initialize state for loop */
199b3be2f66SJerome Forissier huff = 0; /* starting code */
200b3be2f66SJerome Forissier sym = 0; /* starting code symbol */
201b3be2f66SJerome Forissier len = min; /* starting code length */
202b3be2f66SJerome Forissier next = *table; /* current table to fill in */
203b3be2f66SJerome Forissier curr = root; /* current table index bits */
204b3be2f66SJerome Forissier drop = 0; /* current bits to drop from code for index */
205b3be2f66SJerome Forissier low = (unsigned)(-1); /* trigger new sub-table when len > root */
206b3be2f66SJerome Forissier used = 1U << root; /* use root table entries */
207b3be2f66SJerome Forissier mask = used - 1; /* mask for comparing low */
208b3be2f66SJerome Forissier
209b3be2f66SJerome Forissier /* check available table space */
210b3be2f66SJerome Forissier if ((type == LENS && used > ENOUGH_LENS) ||
211b3be2f66SJerome Forissier (type == DISTS && used > ENOUGH_DISTS))
212b3be2f66SJerome Forissier return 1;
213b3be2f66SJerome Forissier
214b3be2f66SJerome Forissier /* process all codes and make table entries */
215b3be2f66SJerome Forissier for (;;) {
216b3be2f66SJerome Forissier /* create table entry */
217b3be2f66SJerome Forissier here.bits = (unsigned char)(len - drop);
218b3be2f66SJerome Forissier if (work[sym] + 1U < match) {
219b3be2f66SJerome Forissier here.op = (unsigned char)0;
220b3be2f66SJerome Forissier here.val = work[sym];
221b3be2f66SJerome Forissier }
222b3be2f66SJerome Forissier else if (work[sym] >= match) {
223b3be2f66SJerome Forissier here.op = (unsigned char)(extra[work[sym] - match]);
224b3be2f66SJerome Forissier here.val = base[work[sym] - match];
225b3be2f66SJerome Forissier }
226b3be2f66SJerome Forissier else {
227b3be2f66SJerome Forissier here.op = (unsigned char)(32 + 64); /* end of block */
228b3be2f66SJerome Forissier here.val = 0;
229b3be2f66SJerome Forissier }
230b3be2f66SJerome Forissier
231b3be2f66SJerome Forissier /* replicate for those indices with low len bits equal to huff */
232b3be2f66SJerome Forissier incr = 1U << (len - drop);
233b3be2f66SJerome Forissier fill = 1U << curr;
234b3be2f66SJerome Forissier min = fill; /* save offset to next table */
235b3be2f66SJerome Forissier do {
236b3be2f66SJerome Forissier fill -= incr;
237b3be2f66SJerome Forissier next[(huff >> drop) + fill] = here;
238b3be2f66SJerome Forissier } while (fill != 0);
239b3be2f66SJerome Forissier
240b3be2f66SJerome Forissier /* backwards increment the len-bit code huff */
241b3be2f66SJerome Forissier incr = 1U << (len - 1);
242b3be2f66SJerome Forissier while (huff & incr)
243b3be2f66SJerome Forissier incr >>= 1;
244b3be2f66SJerome Forissier if (incr != 0) {
245b3be2f66SJerome Forissier huff &= incr - 1;
246b3be2f66SJerome Forissier huff += incr;
247b3be2f66SJerome Forissier }
248b3be2f66SJerome Forissier else
249b3be2f66SJerome Forissier huff = 0;
250b3be2f66SJerome Forissier
251b3be2f66SJerome Forissier /* go to next symbol, update count, len */
252b3be2f66SJerome Forissier sym++;
253b3be2f66SJerome Forissier if (--(count[len]) == 0) {
254b3be2f66SJerome Forissier if (len == max) break;
255b3be2f66SJerome Forissier len = lens[work[sym]];
256b3be2f66SJerome Forissier }
257b3be2f66SJerome Forissier
258b3be2f66SJerome Forissier /* create new sub-table if needed */
259b3be2f66SJerome Forissier if (len > root && (huff & mask) != low) {
260b3be2f66SJerome Forissier /* if first time, transition to sub-tables */
261b3be2f66SJerome Forissier if (drop == 0)
262b3be2f66SJerome Forissier drop = root;
263b3be2f66SJerome Forissier
264b3be2f66SJerome Forissier /* increment past last table */
265b3be2f66SJerome Forissier next += min; /* here min is 1 << curr */
266b3be2f66SJerome Forissier
267b3be2f66SJerome Forissier /* determine length of next table */
268b3be2f66SJerome Forissier curr = len - drop;
269b3be2f66SJerome Forissier left = (int)(1 << curr);
270b3be2f66SJerome Forissier while (curr + drop < max) {
271b3be2f66SJerome Forissier left -= count[curr + drop];
272b3be2f66SJerome Forissier if (left <= 0) break;
273b3be2f66SJerome Forissier curr++;
274b3be2f66SJerome Forissier left <<= 1;
275b3be2f66SJerome Forissier }
276b3be2f66SJerome Forissier
277b3be2f66SJerome Forissier /* check for enough space */
278b3be2f66SJerome Forissier used += 1U << curr;
279b3be2f66SJerome Forissier if ((type == LENS && used > ENOUGH_LENS) ||
280b3be2f66SJerome Forissier (type == DISTS && used > ENOUGH_DISTS))
281b3be2f66SJerome Forissier return 1;
282b3be2f66SJerome Forissier
283b3be2f66SJerome Forissier /* point entry in root table to sub-table */
284b3be2f66SJerome Forissier low = huff & mask;
285b3be2f66SJerome Forissier (*table)[low].op = (unsigned char)curr;
286b3be2f66SJerome Forissier (*table)[low].bits = (unsigned char)root;
287b3be2f66SJerome Forissier (*table)[low].val = (unsigned short)(next - *table);
288b3be2f66SJerome Forissier }
289b3be2f66SJerome Forissier }
290b3be2f66SJerome Forissier
291b3be2f66SJerome Forissier /* fill in remaining table entry if code is incomplete (guaranteed to have
292b3be2f66SJerome Forissier at most one remaining entry, since if the code is incomplete, the
293b3be2f66SJerome Forissier maximum code length that was allowed to get this far is one bit) */
294b3be2f66SJerome Forissier if (huff != 0) {
295b3be2f66SJerome Forissier here.op = (unsigned char)64; /* invalid code marker */
296b3be2f66SJerome Forissier here.bits = (unsigned char)(len - drop);
297b3be2f66SJerome Forissier here.val = (unsigned short)0;
298b3be2f66SJerome Forissier next[huff] = here;
299b3be2f66SJerome Forissier }
300b3be2f66SJerome Forissier
301b3be2f66SJerome Forissier /* set return parameters */
302b3be2f66SJerome Forissier *table += used;
303b3be2f66SJerome Forissier *bits = root;
304b3be2f66SJerome Forissier return 0;
305b3be2f66SJerome Forissier }
306