xref: /rk3399_rockchip-uboot/lib/zlib/inffast.c (revision e89516f031dbf711b71e6ee4d131cdc8b9946fb0)
1*e89516f0SMike Frysinger /* inffast.c -- fast decoding
2*e89516f0SMike Frysinger  * Copyright (C) 1995-2004 Mark Adler
3*e89516f0SMike Frysinger  * For conditions of distribution and use, see copyright notice in zlib.h
4*e89516f0SMike Frysinger  */
5*e89516f0SMike Frysinger 
6*e89516f0SMike Frysinger /* U-boot: we already included these
7*e89516f0SMike Frysinger #include "zutil.h"
8*e89516f0SMike Frysinger #include "inftrees.h"
9*e89516f0SMike Frysinger #include "inflate.h"
10*e89516f0SMike Frysinger #include "inffast.h"
11*e89516f0SMike Frysinger */
12*e89516f0SMike Frysinger 
13*e89516f0SMike Frysinger #ifndef ASMINF
14*e89516f0SMike Frysinger 
15*e89516f0SMike Frysinger /* Allow machine dependent optimization for post-increment or pre-increment.
16*e89516f0SMike Frysinger    Based on testing to date,
17*e89516f0SMike Frysinger    Pre-increment preferred for:
18*e89516f0SMike Frysinger    - PowerPC G3 (Adler)
19*e89516f0SMike Frysinger    - MIPS R5000 (Randers-Pehrson)
20*e89516f0SMike Frysinger    Post-increment preferred for:
21*e89516f0SMike Frysinger    - none
22*e89516f0SMike Frysinger    No measurable difference:
23*e89516f0SMike Frysinger    - Pentium III (Anderson)
24*e89516f0SMike Frysinger    - M68060 (Nikl)
25*e89516f0SMike Frysinger  */
26*e89516f0SMike Frysinger #ifdef POSTINC
27*e89516f0SMike Frysinger #  define OFF 0
28*e89516f0SMike Frysinger #  define PUP(a) *(a)++
29*e89516f0SMike Frysinger #else
30*e89516f0SMike Frysinger #  define OFF 1
31*e89516f0SMike Frysinger #  define PUP(a) *++(a)
32*e89516f0SMike Frysinger #endif
33*e89516f0SMike Frysinger 
34*e89516f0SMike Frysinger /*
35*e89516f0SMike Frysinger    Decode literal, length, and distance codes and write out the resulting
36*e89516f0SMike Frysinger    literal and match bytes until either not enough input or output is
37*e89516f0SMike Frysinger    available, an end-of-block is encountered, or a data error is encountered.
38*e89516f0SMike Frysinger    When large enough input and output buffers are supplied to inflate(), for
39*e89516f0SMike Frysinger    example, a 16K input buffer and a 64K output buffer, more than 95% of the
40*e89516f0SMike Frysinger    inflate execution time is spent in this routine.
41*e89516f0SMike Frysinger 
42*e89516f0SMike Frysinger    Entry assumptions:
43*e89516f0SMike Frysinger 
44*e89516f0SMike Frysinger         state->mode == LEN
45*e89516f0SMike Frysinger         strm->avail_in >= 6
46*e89516f0SMike Frysinger         strm->avail_out >= 258
47*e89516f0SMike Frysinger         start >= strm->avail_out
48*e89516f0SMike Frysinger         state->bits < 8
49*e89516f0SMike Frysinger 
50*e89516f0SMike Frysinger    On return, state->mode is one of:
51*e89516f0SMike Frysinger 
52*e89516f0SMike Frysinger         LEN -- ran out of enough output space or enough available input
53*e89516f0SMike Frysinger         TYPE -- reached end of block code, inflate() to interpret next block
54*e89516f0SMike Frysinger         BAD -- error in block data
55*e89516f0SMike Frysinger 
56*e89516f0SMike Frysinger    Notes:
57*e89516f0SMike Frysinger 
58*e89516f0SMike Frysinger     - The maximum input bits used by a length/distance pair is 15 bits for the
59*e89516f0SMike Frysinger       length code, 5 bits for the length extra, 15 bits for the distance code,
60*e89516f0SMike Frysinger       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
61*e89516f0SMike Frysinger       Therefore if strm->avail_in >= 6, then there is enough input to avoid
62*e89516f0SMike Frysinger       checking for available input while decoding.
63*e89516f0SMike Frysinger 
64*e89516f0SMike Frysinger     - The maximum bytes that a single length/distance pair can output is 258
65*e89516f0SMike Frysinger       bytes, which is the maximum length that can be coded.  inflate_fast()
66*e89516f0SMike Frysinger       requires strm->avail_out >= 258 for each loop to avoid checking for
67*e89516f0SMike Frysinger       output space.
68*e89516f0SMike Frysinger  */
69*e89516f0SMike Frysinger void inflate_fast(strm, start)
70*e89516f0SMike Frysinger z_streamp strm;
71*e89516f0SMike Frysinger unsigned start;         /* inflate()'s starting value for strm->avail_out */
72*e89516f0SMike Frysinger {
73*e89516f0SMike Frysinger     struct inflate_state FAR *state;
74*e89516f0SMike Frysinger     unsigned char FAR *in;      /* local strm->next_in */
75*e89516f0SMike Frysinger     unsigned char FAR *last;    /* while in < last, enough input available */
76*e89516f0SMike Frysinger     unsigned char FAR *out;     /* local strm->next_out */
77*e89516f0SMike Frysinger     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
78*e89516f0SMike Frysinger     unsigned char FAR *end;     /* while out < end, enough space available */
79*e89516f0SMike Frysinger #ifdef INFLATE_STRICT
80*e89516f0SMike Frysinger     unsigned dmax;              /* maximum distance from zlib header */
81*e89516f0SMike Frysinger #endif
82*e89516f0SMike Frysinger     unsigned wsize;             /* window size or zero if not using window */
83*e89516f0SMike Frysinger     unsigned whave;             /* valid bytes in the window */
84*e89516f0SMike Frysinger     unsigned write;             /* window write index */
85*e89516f0SMike Frysinger     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
86*e89516f0SMike Frysinger     unsigned long hold;         /* local strm->hold */
87*e89516f0SMike Frysinger     unsigned bits;              /* local strm->bits */
88*e89516f0SMike Frysinger     code const FAR *lcode;      /* local strm->lencode */
89*e89516f0SMike Frysinger     code const FAR *dcode;      /* local strm->distcode */
90*e89516f0SMike Frysinger     unsigned lmask;             /* mask for first level of length codes */
91*e89516f0SMike Frysinger     unsigned dmask;             /* mask for first level of distance codes */
92*e89516f0SMike Frysinger     code this;                  /* retrieved table entry */
93*e89516f0SMike Frysinger     unsigned op;                /* code bits, operation, extra bits, or */
94*e89516f0SMike Frysinger                                 /*  window position, window bytes to copy */
95*e89516f0SMike Frysinger     unsigned len;               /* match length, unused bytes */
96*e89516f0SMike Frysinger     unsigned dist;              /* match distance */
97*e89516f0SMike Frysinger     unsigned char FAR *from;    /* where to copy match from */
98*e89516f0SMike Frysinger 
99*e89516f0SMike Frysinger     /* copy state to local variables */
100*e89516f0SMike Frysinger     state = (struct inflate_state FAR *)strm->state;
101*e89516f0SMike Frysinger     in = strm->next_in - OFF;
102*e89516f0SMike Frysinger     last = in + (strm->avail_in - 5);
103*e89516f0SMike Frysinger     out = strm->next_out - OFF;
104*e89516f0SMike Frysinger     beg = out - (start - strm->avail_out);
105*e89516f0SMike Frysinger     end = out + (strm->avail_out - 257);
106*e89516f0SMike Frysinger #ifdef INFLATE_STRICT
107*e89516f0SMike Frysinger     dmax = state->dmax;
108*e89516f0SMike Frysinger #endif
109*e89516f0SMike Frysinger     wsize = state->wsize;
110*e89516f0SMike Frysinger     whave = state->whave;
111*e89516f0SMike Frysinger     write = state->write;
112*e89516f0SMike Frysinger     window = state->window;
113*e89516f0SMike Frysinger     hold = state->hold;
114*e89516f0SMike Frysinger     bits = state->bits;
115*e89516f0SMike Frysinger     lcode = state->lencode;
116*e89516f0SMike Frysinger     dcode = state->distcode;
117*e89516f0SMike Frysinger     lmask = (1U << state->lenbits) - 1;
118*e89516f0SMike Frysinger     dmask = (1U << state->distbits) - 1;
119*e89516f0SMike Frysinger 
120*e89516f0SMike Frysinger     /* decode literals and length/distances until end-of-block or not enough
121*e89516f0SMike Frysinger        input data or output space */
122*e89516f0SMike Frysinger     do {
123*e89516f0SMike Frysinger         if (bits < 15) {
124*e89516f0SMike Frysinger             hold += (unsigned long)(PUP(in)) << bits;
125*e89516f0SMike Frysinger             bits += 8;
126*e89516f0SMike Frysinger             hold += (unsigned long)(PUP(in)) << bits;
127*e89516f0SMike Frysinger             bits += 8;
128*e89516f0SMike Frysinger         }
129*e89516f0SMike Frysinger         this = lcode[hold & lmask];
130*e89516f0SMike Frysinger       dolen:
131*e89516f0SMike Frysinger         op = (unsigned)(this.bits);
132*e89516f0SMike Frysinger         hold >>= op;
133*e89516f0SMike Frysinger         bits -= op;
134*e89516f0SMike Frysinger         op = (unsigned)(this.op);
135*e89516f0SMike Frysinger         if (op == 0) {                          /* literal */
136*e89516f0SMike Frysinger             Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
137*e89516f0SMike Frysinger                     "inflate:         literal '%c'\n" :
138*e89516f0SMike Frysinger                     "inflate:         literal 0x%02x\n", this.val));
139*e89516f0SMike Frysinger             PUP(out) = (unsigned char)(this.val);
140*e89516f0SMike Frysinger         }
141*e89516f0SMike Frysinger         else if (op & 16) {                     /* length base */
142*e89516f0SMike Frysinger             len = (unsigned)(this.val);
143*e89516f0SMike Frysinger             op &= 15;                           /* number of extra bits */
144*e89516f0SMike Frysinger             if (op) {
145*e89516f0SMike Frysinger                 if (bits < op) {
146*e89516f0SMike Frysinger                     hold += (unsigned long)(PUP(in)) << bits;
147*e89516f0SMike Frysinger                     bits += 8;
148*e89516f0SMike Frysinger                 }
149*e89516f0SMike Frysinger                 len += (unsigned)hold & ((1U << op) - 1);
150*e89516f0SMike Frysinger                 hold >>= op;
151*e89516f0SMike Frysinger                 bits -= op;
152*e89516f0SMike Frysinger             }
153*e89516f0SMike Frysinger             Tracevv((stderr, "inflate:         length %u\n", len));
154*e89516f0SMike Frysinger             if (bits < 15) {
155*e89516f0SMike Frysinger                 hold += (unsigned long)(PUP(in)) << bits;
156*e89516f0SMike Frysinger                 bits += 8;
157*e89516f0SMike Frysinger                 hold += (unsigned long)(PUP(in)) << bits;
158*e89516f0SMike Frysinger                 bits += 8;
159*e89516f0SMike Frysinger             }
160*e89516f0SMike Frysinger             this = dcode[hold & dmask];
161*e89516f0SMike Frysinger           dodist:
162*e89516f0SMike Frysinger             op = (unsigned)(this.bits);
163*e89516f0SMike Frysinger             hold >>= op;
164*e89516f0SMike Frysinger             bits -= op;
165*e89516f0SMike Frysinger             op = (unsigned)(this.op);
166*e89516f0SMike Frysinger             if (op & 16) {                      /* distance base */
167*e89516f0SMike Frysinger                 dist = (unsigned)(this.val);
168*e89516f0SMike Frysinger                 op &= 15;                       /* number of extra bits */
169*e89516f0SMike Frysinger                 if (bits < op) {
170*e89516f0SMike Frysinger                     hold += (unsigned long)(PUP(in)) << bits;
171*e89516f0SMike Frysinger                     bits += 8;
172*e89516f0SMike Frysinger                     if (bits < op) {
173*e89516f0SMike Frysinger                         hold += (unsigned long)(PUP(in)) << bits;
174*e89516f0SMike Frysinger                         bits += 8;
175*e89516f0SMike Frysinger                     }
176*e89516f0SMike Frysinger                 }
177*e89516f0SMike Frysinger                 dist += (unsigned)hold & ((1U << op) - 1);
178*e89516f0SMike Frysinger #ifdef INFLATE_STRICT
179*e89516f0SMike Frysinger                 if (dist > dmax) {
180*e89516f0SMike Frysinger                     strm->msg = (char *)"invalid distance too far back";
181*e89516f0SMike Frysinger                     state->mode = BAD;
182*e89516f0SMike Frysinger                     break;
183*e89516f0SMike Frysinger                 }
184*e89516f0SMike Frysinger #endif
185*e89516f0SMike Frysinger                 hold >>= op;
186*e89516f0SMike Frysinger                 bits -= op;
187*e89516f0SMike Frysinger                 Tracevv((stderr, "inflate:         distance %u\n", dist));
188*e89516f0SMike Frysinger                 op = (unsigned)(out - beg);     /* max distance in output */
189*e89516f0SMike Frysinger                 if (dist > op) {                /* see if copy from window */
190*e89516f0SMike Frysinger                     op = dist - op;             /* distance back in window */
191*e89516f0SMike Frysinger                     if (op > whave) {
192*e89516f0SMike Frysinger                         strm->msg = (char *)"invalid distance too far back";
193*e89516f0SMike Frysinger                         state->mode = BAD;
194*e89516f0SMike Frysinger                         break;
195*e89516f0SMike Frysinger                     }
196*e89516f0SMike Frysinger                     from = window - OFF;
197*e89516f0SMike Frysinger                     if (write == 0) {           /* very common case */
198*e89516f0SMike Frysinger                         from += wsize - op;
199*e89516f0SMike Frysinger                         if (op < len) {         /* some from window */
200*e89516f0SMike Frysinger                             len -= op;
201*e89516f0SMike Frysinger                             do {
202*e89516f0SMike Frysinger                                 PUP(out) = PUP(from);
203*e89516f0SMike Frysinger                             } while (--op);
204*e89516f0SMike Frysinger                             from = out - dist;  /* rest from output */
205*e89516f0SMike Frysinger                         }
206*e89516f0SMike Frysinger                     }
207*e89516f0SMike Frysinger                     else if (write < op) {      /* wrap around window */
208*e89516f0SMike Frysinger                         from += wsize + write - op;
209*e89516f0SMike Frysinger                         op -= write;
210*e89516f0SMike Frysinger                         if (op < len) {         /* some from end of window */
211*e89516f0SMike Frysinger                             len -= op;
212*e89516f0SMike Frysinger                             do {
213*e89516f0SMike Frysinger                                 PUP(out) = PUP(from);
214*e89516f0SMike Frysinger                             } while (--op);
215*e89516f0SMike Frysinger                             from = window - OFF;
216*e89516f0SMike Frysinger                             if (write < len) {  /* some from start of window */
217*e89516f0SMike Frysinger                                 op = write;
218*e89516f0SMike Frysinger                                 len -= op;
219*e89516f0SMike Frysinger                                 do {
220*e89516f0SMike Frysinger                                     PUP(out) = PUP(from);
221*e89516f0SMike Frysinger                                 } while (--op);
222*e89516f0SMike Frysinger                                 from = out - dist;      /* rest from output */
223*e89516f0SMike Frysinger                             }
224*e89516f0SMike Frysinger                         }
225*e89516f0SMike Frysinger                     }
226*e89516f0SMike Frysinger                     else {                      /* contiguous in window */
227*e89516f0SMike Frysinger                         from += write - op;
228*e89516f0SMike Frysinger                         if (op < len) {         /* some from window */
229*e89516f0SMike Frysinger                             len -= op;
230*e89516f0SMike Frysinger                             do {
231*e89516f0SMike Frysinger                                 PUP(out) = PUP(from);
232*e89516f0SMike Frysinger                             } while (--op);
233*e89516f0SMike Frysinger                             from = out - dist;  /* rest from output */
234*e89516f0SMike Frysinger                         }
235*e89516f0SMike Frysinger                     }
236*e89516f0SMike Frysinger                     while (len > 2) {
237*e89516f0SMike Frysinger                         PUP(out) = PUP(from);
238*e89516f0SMike Frysinger                         PUP(out) = PUP(from);
239*e89516f0SMike Frysinger                         PUP(out) = PUP(from);
240*e89516f0SMike Frysinger                         len -= 3;
241*e89516f0SMike Frysinger                     }
242*e89516f0SMike Frysinger                     if (len) {
243*e89516f0SMike Frysinger                         PUP(out) = PUP(from);
244*e89516f0SMike Frysinger                         if (len > 1)
245*e89516f0SMike Frysinger                             PUP(out) = PUP(from);
246*e89516f0SMike Frysinger                     }
247*e89516f0SMike Frysinger                 }
248*e89516f0SMike Frysinger                 else {
249*e89516f0SMike Frysinger 		    unsigned short *sout;
250*e89516f0SMike Frysinger 		    unsigned long loops;
251*e89516f0SMike Frysinger 
252*e89516f0SMike Frysinger                     from = out - dist;          /* copy direct from output */
253*e89516f0SMike Frysinger                     /* minimum length is three */
254*e89516f0SMike Frysinger 		    /* Align out addr */
255*e89516f0SMike Frysinger 		    if (!((long)(out - 1 + OFF) & 1)) {
256*e89516f0SMike Frysinger 			PUP(out) = PUP(from);
257*e89516f0SMike Frysinger 			len--;
258*e89516f0SMike Frysinger 		    }
259*e89516f0SMike Frysinger 		    sout = (unsigned short *)(out - OFF);
260*e89516f0SMike Frysinger 		    if (dist > 2 ) {
261*e89516f0SMike Frysinger 			unsigned short *sfrom;
262*e89516f0SMike Frysinger 
263*e89516f0SMike Frysinger 			sfrom = (unsigned short *)(from - OFF);
264*e89516f0SMike Frysinger 			loops = len >> 1;
265*e89516f0SMike Frysinger 			do
266*e89516f0SMike Frysinger 			    PUP(sout) = get_unaligned(++sfrom);
267*e89516f0SMike Frysinger 			while (--loops);
268*e89516f0SMike Frysinger 			out = (unsigned char *)sout + OFF;
269*e89516f0SMike Frysinger 			from = (unsigned char *)sfrom + OFF;
270*e89516f0SMike Frysinger 		    } else { /* dist == 1 or dist == 2 */
271*e89516f0SMike Frysinger 			unsigned short pat16;
272*e89516f0SMike Frysinger 
273*e89516f0SMike Frysinger 			pat16 = *(sout-2+2*OFF);
274*e89516f0SMike Frysinger 			if (dist == 1)
275*e89516f0SMike Frysinger #if defined(__BIG_ENDIAN)
276*e89516f0SMike Frysinger 			    pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
277*e89516f0SMike Frysinger #elif defined(__LITTLE_ENDIAN)
278*e89516f0SMike Frysinger 			    pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
279*e89516f0SMike Frysinger #else
280*e89516f0SMike Frysinger #error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
281*e89516f0SMike Frysinger #endif
282*e89516f0SMike Frysinger 			loops = len >> 1;
283*e89516f0SMike Frysinger 			do
284*e89516f0SMike Frysinger 			    PUP(sout) = pat16;
285*e89516f0SMike Frysinger 			while (--loops);
286*e89516f0SMike Frysinger 			out = (unsigned char *)sout + OFF;
287*e89516f0SMike Frysinger 		    }
288*e89516f0SMike Frysinger 		    if (len & 1)
289*e89516f0SMike Frysinger 			PUP(out) = PUP(from);
290*e89516f0SMike Frysinger                 }
291*e89516f0SMike Frysinger             }
292*e89516f0SMike Frysinger             else if ((op & 64) == 0) {          /* 2nd level distance code */
293*e89516f0SMike Frysinger                 this = dcode[this.val + (hold & ((1U << op) - 1))];
294*e89516f0SMike Frysinger                 goto dodist;
295*e89516f0SMike Frysinger             }
296*e89516f0SMike Frysinger             else {
297*e89516f0SMike Frysinger                 strm->msg = (char *)"invalid distance code";
298*e89516f0SMike Frysinger                 state->mode = BAD;
299*e89516f0SMike Frysinger                 break;
300*e89516f0SMike Frysinger             }
301*e89516f0SMike Frysinger         }
302*e89516f0SMike Frysinger         else if ((op & 64) == 0) {              /* 2nd level length code */
303*e89516f0SMike Frysinger             this = lcode[this.val + (hold & ((1U << op) - 1))];
304*e89516f0SMike Frysinger             goto dolen;
305*e89516f0SMike Frysinger         }
306*e89516f0SMike Frysinger         else if (op & 32) {                     /* end-of-block */
307*e89516f0SMike Frysinger             Tracevv((stderr, "inflate:         end of block\n"));
308*e89516f0SMike Frysinger             state->mode = TYPE;
309*e89516f0SMike Frysinger             break;
310*e89516f0SMike Frysinger         }
311*e89516f0SMike Frysinger         else {
312*e89516f0SMike Frysinger             strm->msg = (char *)"invalid literal/length code";
313*e89516f0SMike Frysinger             state->mode = BAD;
314*e89516f0SMike Frysinger             break;
315*e89516f0SMike Frysinger         }
316*e89516f0SMike Frysinger     } while (in < last && out < end);
317*e89516f0SMike Frysinger 
318*e89516f0SMike Frysinger     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
319*e89516f0SMike Frysinger     len = bits >> 3;
320*e89516f0SMike Frysinger     in -= len;
321*e89516f0SMike Frysinger     bits -= len << 3;
322*e89516f0SMike Frysinger     hold &= (1U << bits) - 1;
323*e89516f0SMike Frysinger 
324*e89516f0SMike Frysinger     /* update state and return */
325*e89516f0SMike Frysinger     strm->next_in = in + OFF;
326*e89516f0SMike Frysinger     strm->next_out = out + OFF;
327*e89516f0SMike Frysinger     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
328*e89516f0SMike Frysinger     strm->avail_out = (unsigned)(out < end ?
329*e89516f0SMike Frysinger                                  257 + (end - out) : 257 - (out - end));
330*e89516f0SMike Frysinger     state->hold = hold;
331*e89516f0SMike Frysinger     state->bits = bits;
332*e89516f0SMike Frysinger     return;
333*e89516f0SMike Frysinger }
334*e89516f0SMike Frysinger 
335*e89516f0SMike Frysinger /*
336*e89516f0SMike Frysinger    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
337*e89516f0SMike Frysinger    - Using bit fields for code structure
338*e89516f0SMike Frysinger    - Different op definition to avoid & for extra bits (do & for table bits)
339*e89516f0SMike Frysinger    - Three separate decoding do-loops for direct, window, and write == 0
340*e89516f0SMike Frysinger    - Special case for distance > 1 copies to do overlapped load and store copy
341*e89516f0SMike Frysinger    - Explicit branch predictions (based on measured branch probabilities)
342*e89516f0SMike Frysinger    - Deferring match copy and interspersed it with decoding subsequent codes
343*e89516f0SMike Frysinger    - Swapping literal/length else
344*e89516f0SMike Frysinger    - Swapping window/direct else
345*e89516f0SMike Frysinger    - Larger unrolled copy loops (three is about right)
346*e89516f0SMike Frysinger    - Moving len -= 3 statement into middle of loop
347*e89516f0SMike Frysinger  */
348*e89516f0SMike Frysinger 
349*e89516f0SMike Frysinger #endif /* !ASMINF */
350