11bb92983SJerome Forissier // SPDX-License-Identifier: Zlib
2b3be2f66SJerome Forissier /* inffast.c -- fast decoding
3b3be2f66SJerome Forissier * Copyright (C) 1995-2017 Mark Adler
4b3be2f66SJerome Forissier * For conditions of distribution and use, see copyright notice in zlib.h
5b3be2f66SJerome Forissier */
6b3be2f66SJerome Forissier
7b3be2f66SJerome Forissier #include "zutil.h"
8b3be2f66SJerome Forissier #include "inftrees.h"
9b3be2f66SJerome Forissier #include "inflate.h"
10b3be2f66SJerome Forissier #include "inffast.h"
11b3be2f66SJerome Forissier
12b3be2f66SJerome Forissier #ifdef ASMINF
13b3be2f66SJerome Forissier # pragma message("Assembler code may have bugs -- use at your own risk")
14b3be2f66SJerome Forissier #else
15b3be2f66SJerome Forissier
16b3be2f66SJerome Forissier /*
17b3be2f66SJerome Forissier Decode literal, length, and distance codes and write out the resulting
18b3be2f66SJerome Forissier literal and match bytes until either not enough input or output is
19b3be2f66SJerome Forissier available, an end-of-block is encountered, or a data error is encountered.
20b3be2f66SJerome Forissier When large enough input and output buffers are supplied to inflate(), for
21b3be2f66SJerome Forissier example, a 16K input buffer and a 64K output buffer, more than 95% of the
22b3be2f66SJerome Forissier inflate execution time is spent in this routine.
23b3be2f66SJerome Forissier
24b3be2f66SJerome Forissier Entry assumptions:
25b3be2f66SJerome Forissier
26b3be2f66SJerome Forissier state->mode == LEN
27b3be2f66SJerome Forissier strm->avail_in >= 6
28b3be2f66SJerome Forissier strm->avail_out >= 258
29b3be2f66SJerome Forissier start >= strm->avail_out
30b3be2f66SJerome Forissier state->bits < 8
31b3be2f66SJerome Forissier
32b3be2f66SJerome Forissier On return, state->mode is one of:
33b3be2f66SJerome Forissier
34b3be2f66SJerome Forissier LEN -- ran out of enough output space or enough available input
35b3be2f66SJerome Forissier TYPE -- reached end of block code, inflate() to interpret next block
36b3be2f66SJerome Forissier BAD -- error in block data
37b3be2f66SJerome Forissier
38b3be2f66SJerome Forissier Notes:
39b3be2f66SJerome Forissier
40b3be2f66SJerome Forissier - The maximum input bits used by a length/distance pair is 15 bits for the
41b3be2f66SJerome Forissier length code, 5 bits for the length extra, 15 bits for the distance code,
42b3be2f66SJerome Forissier and 13 bits for the distance extra. This totals 48 bits, or six bytes.
43b3be2f66SJerome Forissier Therefore if strm->avail_in >= 6, then there is enough input to avoid
44b3be2f66SJerome Forissier checking for available input while decoding.
45b3be2f66SJerome Forissier
46b3be2f66SJerome Forissier - The maximum bytes that a single length/distance pair can output is 258
47b3be2f66SJerome Forissier bytes, which is the maximum length that can be coded. inflate_fast()
48b3be2f66SJerome Forissier requires strm->avail_out >= 258 for each loop to avoid checking for
49b3be2f66SJerome Forissier output space.
50b3be2f66SJerome Forissier */
inflate_fast(strm,start)51b3be2f66SJerome Forissier void ZLIB_INTERNAL inflate_fast(strm, start)
52b3be2f66SJerome Forissier z_streamp strm;
53b3be2f66SJerome Forissier unsigned start; /* inflate()'s starting value for strm->avail_out */
54b3be2f66SJerome Forissier {
55b3be2f66SJerome Forissier struct inflate_state FAR *state;
56b3be2f66SJerome Forissier z_const unsigned char FAR *in; /* local strm->next_in */
57b3be2f66SJerome Forissier z_const unsigned char FAR *last; /* have enough input while in < last */
58b3be2f66SJerome Forissier unsigned char FAR *out; /* local strm->next_out */
59b3be2f66SJerome Forissier unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
60b3be2f66SJerome Forissier unsigned char FAR *end; /* while out < end, enough space available */
61b3be2f66SJerome Forissier #ifdef INFLATE_STRICT
62b3be2f66SJerome Forissier unsigned dmax; /* maximum distance from zlib header */
63b3be2f66SJerome Forissier #endif
64b3be2f66SJerome Forissier unsigned wsize; /* window size or zero if not using window */
65b3be2f66SJerome Forissier unsigned whave; /* valid bytes in the window */
66b3be2f66SJerome Forissier unsigned wnext; /* window write index */
67b3be2f66SJerome Forissier unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
68b3be2f66SJerome Forissier unsigned long hold; /* local strm->hold */
69b3be2f66SJerome Forissier unsigned bits; /* local strm->bits */
70b3be2f66SJerome Forissier code const FAR *lcode; /* local strm->lencode */
71b3be2f66SJerome Forissier code const FAR *dcode; /* local strm->distcode */
72b3be2f66SJerome Forissier unsigned lmask; /* mask for first level of length codes */
73b3be2f66SJerome Forissier unsigned dmask; /* mask for first level of distance codes */
74*dd65d970SJerome Forissier code const *here; /* retrieved table entry */
75b3be2f66SJerome Forissier unsigned op; /* code bits, operation, extra bits, or */
76b3be2f66SJerome Forissier /* window position, window bytes to copy */
77b3be2f66SJerome Forissier unsigned len; /* match length, unused bytes */
78b3be2f66SJerome Forissier unsigned dist; /* match distance */
79b3be2f66SJerome Forissier unsigned char FAR *from; /* where to copy match from */
80b3be2f66SJerome Forissier
81b3be2f66SJerome Forissier /* copy state to local variables */
82b3be2f66SJerome Forissier state = (struct inflate_state FAR *)strm->state;
83b3be2f66SJerome Forissier in = strm->next_in;
84b3be2f66SJerome Forissier last = in + (strm->avail_in - 5);
85b3be2f66SJerome Forissier out = strm->next_out;
86b3be2f66SJerome Forissier beg = out - (start - strm->avail_out);
87b3be2f66SJerome Forissier end = out + (strm->avail_out - 257);
88b3be2f66SJerome Forissier #ifdef INFLATE_STRICT
89b3be2f66SJerome Forissier dmax = state->dmax;
90b3be2f66SJerome Forissier #endif
91b3be2f66SJerome Forissier wsize = state->wsize;
92b3be2f66SJerome Forissier whave = state->whave;
93b3be2f66SJerome Forissier wnext = state->wnext;
94b3be2f66SJerome Forissier window = state->window;
95b3be2f66SJerome Forissier hold = state->hold;
96b3be2f66SJerome Forissier bits = state->bits;
97b3be2f66SJerome Forissier lcode = state->lencode;
98b3be2f66SJerome Forissier dcode = state->distcode;
99b3be2f66SJerome Forissier lmask = (1U << state->lenbits) - 1;
100b3be2f66SJerome Forissier dmask = (1U << state->distbits) - 1;
101b3be2f66SJerome Forissier
102b3be2f66SJerome Forissier /* decode literals and length/distances until end-of-block or not enough
103b3be2f66SJerome Forissier input data or output space */
104b3be2f66SJerome Forissier do {
105b3be2f66SJerome Forissier if (bits < 15) {
106b3be2f66SJerome Forissier hold += (unsigned long)(*in++) << bits;
107b3be2f66SJerome Forissier bits += 8;
108b3be2f66SJerome Forissier hold += (unsigned long)(*in++) << bits;
109b3be2f66SJerome Forissier bits += 8;
110b3be2f66SJerome Forissier }
111*dd65d970SJerome Forissier here = lcode + (hold & lmask);
112b3be2f66SJerome Forissier dolen:
113*dd65d970SJerome Forissier op = (unsigned)(here->bits);
114b3be2f66SJerome Forissier hold >>= op;
115b3be2f66SJerome Forissier bits -= op;
116*dd65d970SJerome Forissier op = (unsigned)(here->op);
117b3be2f66SJerome Forissier if (op == 0) { /* literal */
118*dd65d970SJerome Forissier Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
119b3be2f66SJerome Forissier "inflate: literal '%c'\n" :
120*dd65d970SJerome Forissier "inflate: literal 0x%02x\n", here->val));
121*dd65d970SJerome Forissier *out++ = (unsigned char)(here->val);
122b3be2f66SJerome Forissier }
123b3be2f66SJerome Forissier else if (op & 16) { /* length base */
124*dd65d970SJerome Forissier len = (unsigned)(here->val);
125b3be2f66SJerome Forissier op &= 15; /* number of extra bits */
126b3be2f66SJerome Forissier if (op) {
127b3be2f66SJerome Forissier if (bits < op) {
128b3be2f66SJerome Forissier hold += (unsigned long)(*in++) << bits;
129b3be2f66SJerome Forissier bits += 8;
130b3be2f66SJerome Forissier }
131b3be2f66SJerome Forissier len += (unsigned)hold & ((1U << op) - 1);
132b3be2f66SJerome Forissier hold >>= op;
133b3be2f66SJerome Forissier bits -= op;
134b3be2f66SJerome Forissier }
135b3be2f66SJerome Forissier Tracevv((stderr, "inflate: length %u\n", len));
136b3be2f66SJerome Forissier if (bits < 15) {
137b3be2f66SJerome Forissier hold += (unsigned long)(*in++) << bits;
138b3be2f66SJerome Forissier bits += 8;
139b3be2f66SJerome Forissier hold += (unsigned long)(*in++) << bits;
140b3be2f66SJerome Forissier bits += 8;
141b3be2f66SJerome Forissier }
142*dd65d970SJerome Forissier here = dcode + (hold & dmask);
143b3be2f66SJerome Forissier dodist:
144*dd65d970SJerome Forissier op = (unsigned)(here->bits);
145b3be2f66SJerome Forissier hold >>= op;
146b3be2f66SJerome Forissier bits -= op;
147*dd65d970SJerome Forissier op = (unsigned)(here->op);
148b3be2f66SJerome Forissier if (op & 16) { /* distance base */
149*dd65d970SJerome Forissier dist = (unsigned)(here->val);
150b3be2f66SJerome Forissier op &= 15; /* number of extra bits */
151b3be2f66SJerome Forissier if (bits < op) {
152b3be2f66SJerome Forissier hold += (unsigned long)(*in++) << bits;
153b3be2f66SJerome Forissier bits += 8;
154b3be2f66SJerome Forissier if (bits < op) {
155b3be2f66SJerome Forissier hold += (unsigned long)(*in++) << bits;
156b3be2f66SJerome Forissier bits += 8;
157b3be2f66SJerome Forissier }
158b3be2f66SJerome Forissier }
159b3be2f66SJerome Forissier dist += (unsigned)hold & ((1U << op) - 1);
160b3be2f66SJerome Forissier #ifdef INFLATE_STRICT
161b3be2f66SJerome Forissier if (dist > dmax) {
162b3be2f66SJerome Forissier strm->msg = (char *)"invalid distance too far back";
163b3be2f66SJerome Forissier state->mode = BAD;
164b3be2f66SJerome Forissier break;
165b3be2f66SJerome Forissier }
166b3be2f66SJerome Forissier #endif
167b3be2f66SJerome Forissier hold >>= op;
168b3be2f66SJerome Forissier bits -= op;
169b3be2f66SJerome Forissier Tracevv((stderr, "inflate: distance %u\n", dist));
170b3be2f66SJerome Forissier op = (unsigned)(out - beg); /* max distance in output */
171b3be2f66SJerome Forissier if (dist > op) { /* see if copy from window */
172b3be2f66SJerome Forissier op = dist - op; /* distance back in window */
173b3be2f66SJerome Forissier if (op > whave) {
174b3be2f66SJerome Forissier if (state->sane) {
175b3be2f66SJerome Forissier strm->msg =
176b3be2f66SJerome Forissier (char *)"invalid distance too far back";
177b3be2f66SJerome Forissier state->mode = BAD;
178b3be2f66SJerome Forissier break;
179b3be2f66SJerome Forissier }
180b3be2f66SJerome Forissier #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
181b3be2f66SJerome Forissier if (len <= op - whave) {
182b3be2f66SJerome Forissier do {
183b3be2f66SJerome Forissier *out++ = 0;
184b3be2f66SJerome Forissier } while (--len);
185b3be2f66SJerome Forissier continue;
186b3be2f66SJerome Forissier }
187b3be2f66SJerome Forissier len -= op - whave;
188b3be2f66SJerome Forissier do {
189b3be2f66SJerome Forissier *out++ = 0;
190b3be2f66SJerome Forissier } while (--op > whave);
191b3be2f66SJerome Forissier if (op == 0) {
192b3be2f66SJerome Forissier from = out - dist;
193b3be2f66SJerome Forissier do {
194b3be2f66SJerome Forissier *out++ = *from++;
195b3be2f66SJerome Forissier } while (--len);
196b3be2f66SJerome Forissier continue;
197b3be2f66SJerome Forissier }
198b3be2f66SJerome Forissier #endif
199b3be2f66SJerome Forissier }
200b3be2f66SJerome Forissier from = window;
201b3be2f66SJerome Forissier if (wnext == 0) { /* very common case */
202b3be2f66SJerome Forissier from += wsize - op;
203b3be2f66SJerome Forissier if (op < len) { /* some from window */
204b3be2f66SJerome Forissier len -= op;
205b3be2f66SJerome Forissier do {
206b3be2f66SJerome Forissier *out++ = *from++;
207b3be2f66SJerome Forissier } while (--op);
208b3be2f66SJerome Forissier from = out - dist; /* rest from output */
209b3be2f66SJerome Forissier }
210b3be2f66SJerome Forissier }
211b3be2f66SJerome Forissier else if (wnext < op) { /* wrap around window */
212b3be2f66SJerome Forissier from += wsize + wnext - op;
213b3be2f66SJerome Forissier op -= wnext;
214b3be2f66SJerome Forissier if (op < len) { /* some from end of window */
215b3be2f66SJerome Forissier len -= op;
216b3be2f66SJerome Forissier do {
217b3be2f66SJerome Forissier *out++ = *from++;
218b3be2f66SJerome Forissier } while (--op);
219b3be2f66SJerome Forissier from = window;
220b3be2f66SJerome Forissier if (wnext < len) { /* some from start of window */
221b3be2f66SJerome Forissier op = wnext;
222b3be2f66SJerome Forissier len -= op;
223b3be2f66SJerome Forissier do {
224b3be2f66SJerome Forissier *out++ = *from++;
225b3be2f66SJerome Forissier } while (--op);
226b3be2f66SJerome Forissier from = out - dist; /* rest from output */
227b3be2f66SJerome Forissier }
228b3be2f66SJerome Forissier }
229b3be2f66SJerome Forissier }
230b3be2f66SJerome Forissier else { /* contiguous in window */
231b3be2f66SJerome Forissier from += wnext - op;
232b3be2f66SJerome Forissier if (op < len) { /* some from window */
233b3be2f66SJerome Forissier len -= op;
234b3be2f66SJerome Forissier do {
235b3be2f66SJerome Forissier *out++ = *from++;
236b3be2f66SJerome Forissier } while (--op);
237b3be2f66SJerome Forissier from = out - dist; /* rest from output */
238b3be2f66SJerome Forissier }
239b3be2f66SJerome Forissier }
240b3be2f66SJerome Forissier while (len > 2) {
241b3be2f66SJerome Forissier *out++ = *from++;
242b3be2f66SJerome Forissier *out++ = *from++;
243b3be2f66SJerome Forissier *out++ = *from++;
244b3be2f66SJerome Forissier len -= 3;
245b3be2f66SJerome Forissier }
246b3be2f66SJerome Forissier if (len) {
247b3be2f66SJerome Forissier *out++ = *from++;
248b3be2f66SJerome Forissier if (len > 1)
249b3be2f66SJerome Forissier *out++ = *from++;
250b3be2f66SJerome Forissier }
251b3be2f66SJerome Forissier }
252b3be2f66SJerome Forissier else {
253b3be2f66SJerome Forissier from = out - dist; /* copy direct from output */
254b3be2f66SJerome Forissier do { /* minimum length is three */
255b3be2f66SJerome Forissier *out++ = *from++;
256b3be2f66SJerome Forissier *out++ = *from++;
257b3be2f66SJerome Forissier *out++ = *from++;
258b3be2f66SJerome Forissier len -= 3;
259b3be2f66SJerome Forissier } while (len > 2);
260b3be2f66SJerome Forissier if (len) {
261b3be2f66SJerome Forissier *out++ = *from++;
262b3be2f66SJerome Forissier if (len > 1)
263b3be2f66SJerome Forissier *out++ = *from++;
264b3be2f66SJerome Forissier }
265b3be2f66SJerome Forissier }
266b3be2f66SJerome Forissier }
267b3be2f66SJerome Forissier else if ((op & 64) == 0) { /* 2nd level distance code */
268*dd65d970SJerome Forissier here = dcode + here->val + (hold & ((1U << op) - 1));
269b3be2f66SJerome Forissier goto dodist;
270b3be2f66SJerome Forissier }
271b3be2f66SJerome Forissier else {
272b3be2f66SJerome Forissier strm->msg = (char *)"invalid distance code";
273b3be2f66SJerome Forissier state->mode = BAD;
274b3be2f66SJerome Forissier break;
275b3be2f66SJerome Forissier }
276b3be2f66SJerome Forissier }
277b3be2f66SJerome Forissier else if ((op & 64) == 0) { /* 2nd level length code */
278*dd65d970SJerome Forissier here = lcode + here->val + (hold & ((1U << op) - 1));
279b3be2f66SJerome Forissier goto dolen;
280b3be2f66SJerome Forissier }
281b3be2f66SJerome Forissier else if (op & 32) { /* end-of-block */
282b3be2f66SJerome Forissier Tracevv((stderr, "inflate: end of block\n"));
283b3be2f66SJerome Forissier state->mode = TYPE;
284b3be2f66SJerome Forissier break;
285b3be2f66SJerome Forissier }
286b3be2f66SJerome Forissier else {
287b3be2f66SJerome Forissier strm->msg = (char *)"invalid literal/length code";
288b3be2f66SJerome Forissier state->mode = BAD;
289b3be2f66SJerome Forissier break;
290b3be2f66SJerome Forissier }
291b3be2f66SJerome Forissier } while (in < last && out < end);
292b3be2f66SJerome Forissier
293b3be2f66SJerome Forissier /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
294b3be2f66SJerome Forissier len = bits >> 3;
295b3be2f66SJerome Forissier in -= len;
296b3be2f66SJerome Forissier bits -= len << 3;
297b3be2f66SJerome Forissier hold &= (1U << bits) - 1;
298b3be2f66SJerome Forissier
299b3be2f66SJerome Forissier /* update state and return */
300b3be2f66SJerome Forissier strm->next_in = in;
301b3be2f66SJerome Forissier strm->next_out = out;
302b3be2f66SJerome Forissier strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
303b3be2f66SJerome Forissier strm->avail_out = (unsigned)(out < end ?
304b3be2f66SJerome Forissier 257 + (end - out) : 257 - (out - end));
305b3be2f66SJerome Forissier state->hold = hold;
306b3be2f66SJerome Forissier state->bits = bits;
307b3be2f66SJerome Forissier return;
308b3be2f66SJerome Forissier }
309b3be2f66SJerome Forissier
310b3be2f66SJerome Forissier /*
311b3be2f66SJerome Forissier inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
312b3be2f66SJerome Forissier - Using bit fields for code structure
313b3be2f66SJerome Forissier - Different op definition to avoid & for extra bits (do & for table bits)
314b3be2f66SJerome Forissier - Three separate decoding do-loops for direct, window, and wnext == 0
315b3be2f66SJerome Forissier - Special case for distance > 1 copies to do overlapped load and store copy
316b3be2f66SJerome Forissier - Explicit branch predictions (based on measured branch probabilities)
317b3be2f66SJerome Forissier - Deferring match copy and interspersed it with decoding subsequent codes
318b3be2f66SJerome Forissier - Swapping literal/length else
319b3be2f66SJerome Forissier - Swapping window/direct else
320b3be2f66SJerome Forissier - Larger unrolled copy loops (three is about right)
321b3be2f66SJerome Forissier - Moving len -= 3 statement into middle of loop
322b3be2f66SJerome Forissier */
323b3be2f66SJerome Forissier
324b3be2f66SJerome Forissier #endif /* !ASMINF */
325