1*221b1638SMasahiro Yamada /* inffast.c -- fast decoding 2*221b1638SMasahiro Yamada * Copyright (C) 1995-2017 Mark Adler 3*221b1638SMasahiro Yamada * For conditions of distribution and use, see copyright notice in zlib.h 4*221b1638SMasahiro Yamada */ 5*221b1638SMasahiro Yamada 6*221b1638SMasahiro Yamada #include "zutil.h" 7*221b1638SMasahiro Yamada #include "inftrees.h" 8*221b1638SMasahiro Yamada #include "inflate.h" 9*221b1638SMasahiro Yamada #include "inffast.h" 10*221b1638SMasahiro Yamada 11*221b1638SMasahiro Yamada #ifdef ASMINF 12*221b1638SMasahiro Yamada # pragma message("Assembler code may have bugs -- use at your own risk") 13*221b1638SMasahiro Yamada #else 14*221b1638SMasahiro Yamada 15*221b1638SMasahiro Yamada /* 16*221b1638SMasahiro Yamada Decode literal, length, and distance codes and write out the resulting 17*221b1638SMasahiro Yamada literal and match bytes until either not enough input or output is 18*221b1638SMasahiro Yamada available, an end-of-block is encountered, or a data error is encountered. 19*221b1638SMasahiro Yamada When large enough input and output buffers are supplied to inflate(), for 20*221b1638SMasahiro Yamada example, a 16K input buffer and a 64K output buffer, more than 95% of the 21*221b1638SMasahiro Yamada inflate execution time is spent in this routine. 22*221b1638SMasahiro Yamada 23*221b1638SMasahiro Yamada Entry assumptions: 24*221b1638SMasahiro Yamada 25*221b1638SMasahiro Yamada state->mode == LEN 26*221b1638SMasahiro Yamada strm->avail_in >= 6 27*221b1638SMasahiro Yamada strm->avail_out >= 258 28*221b1638SMasahiro Yamada start >= strm->avail_out 29*221b1638SMasahiro Yamada state->bits < 8 30*221b1638SMasahiro Yamada 31*221b1638SMasahiro Yamada On return, state->mode is one of: 32*221b1638SMasahiro Yamada 33*221b1638SMasahiro Yamada LEN -- ran out of enough output space or enough available input 34*221b1638SMasahiro Yamada TYPE -- reached end of block code, inflate() to interpret next block 35*221b1638SMasahiro Yamada BAD -- error in block data 36*221b1638SMasahiro Yamada 37*221b1638SMasahiro Yamada Notes: 38*221b1638SMasahiro Yamada 39*221b1638SMasahiro Yamada - The maximum input bits used by a length/distance pair is 15 bits for the 40*221b1638SMasahiro Yamada length code, 5 bits for the length extra, 15 bits for the distance code, 41*221b1638SMasahiro Yamada and 13 bits for the distance extra. This totals 48 bits, or six bytes. 42*221b1638SMasahiro Yamada Therefore if strm->avail_in >= 6, then there is enough input to avoid 43*221b1638SMasahiro Yamada checking for available input while decoding. 44*221b1638SMasahiro Yamada 45*221b1638SMasahiro Yamada - The maximum bytes that a single length/distance pair can output is 258 46*221b1638SMasahiro Yamada bytes, which is the maximum length that can be coded. inflate_fast() 47*221b1638SMasahiro Yamada requires strm->avail_out >= 258 for each loop to avoid checking for 48*221b1638SMasahiro Yamada output space. 49*221b1638SMasahiro Yamada */ 50*221b1638SMasahiro Yamada void ZLIB_INTERNAL inflate_fast(strm, start) 51*221b1638SMasahiro Yamada z_streamp strm; 52*221b1638SMasahiro Yamada unsigned start; /* inflate()'s starting value for strm->avail_out */ 53*221b1638SMasahiro Yamada { 54*221b1638SMasahiro Yamada struct inflate_state FAR *state; 55*221b1638SMasahiro Yamada z_const unsigned char FAR *in; /* local strm->next_in */ 56*221b1638SMasahiro Yamada z_const unsigned char FAR *last; /* have enough input while in < last */ 57*221b1638SMasahiro Yamada unsigned char FAR *out; /* local strm->next_out */ 58*221b1638SMasahiro Yamada unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 59*221b1638SMasahiro Yamada unsigned char FAR *end; /* while out < end, enough space available */ 60*221b1638SMasahiro Yamada #ifdef INFLATE_STRICT 61*221b1638SMasahiro Yamada unsigned dmax; /* maximum distance from zlib header */ 62*221b1638SMasahiro Yamada #endif 63*221b1638SMasahiro Yamada unsigned wsize; /* window size or zero if not using window */ 64*221b1638SMasahiro Yamada unsigned whave; /* valid bytes in the window */ 65*221b1638SMasahiro Yamada unsigned wnext; /* window write index */ 66*221b1638SMasahiro Yamada unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 67*221b1638SMasahiro Yamada unsigned long hold; /* local strm->hold */ 68*221b1638SMasahiro Yamada unsigned bits; /* local strm->bits */ 69*221b1638SMasahiro Yamada code const FAR *lcode; /* local strm->lencode */ 70*221b1638SMasahiro Yamada code const FAR *dcode; /* local strm->distcode */ 71*221b1638SMasahiro Yamada unsigned lmask; /* mask for first level of length codes */ 72*221b1638SMasahiro Yamada unsigned dmask; /* mask for first level of distance codes */ 73*221b1638SMasahiro Yamada code here; /* retrieved table entry */ 74*221b1638SMasahiro Yamada unsigned op; /* code bits, operation, extra bits, or */ 75*221b1638SMasahiro Yamada /* window position, window bytes to copy */ 76*221b1638SMasahiro Yamada unsigned len; /* match length, unused bytes */ 77*221b1638SMasahiro Yamada unsigned dist; /* match distance */ 78*221b1638SMasahiro Yamada unsigned char FAR *from; /* where to copy match from */ 79*221b1638SMasahiro Yamada 80*221b1638SMasahiro Yamada /* copy state to local variables */ 81*221b1638SMasahiro Yamada state = (struct inflate_state FAR *)strm->state; 82*221b1638SMasahiro Yamada in = strm->next_in; 83*221b1638SMasahiro Yamada last = in + (strm->avail_in - 5); 84*221b1638SMasahiro Yamada out = strm->next_out; 85*221b1638SMasahiro Yamada beg = out - (start - strm->avail_out); 86*221b1638SMasahiro Yamada end = out + (strm->avail_out - 257); 87*221b1638SMasahiro Yamada #ifdef INFLATE_STRICT 88*221b1638SMasahiro Yamada dmax = state->dmax; 89*221b1638SMasahiro Yamada #endif 90*221b1638SMasahiro Yamada wsize = state->wsize; 91*221b1638SMasahiro Yamada whave = state->whave; 92*221b1638SMasahiro Yamada wnext = state->wnext; 93*221b1638SMasahiro Yamada window = state->window; 94*221b1638SMasahiro Yamada hold = state->hold; 95*221b1638SMasahiro Yamada bits = state->bits; 96*221b1638SMasahiro Yamada lcode = state->lencode; 97*221b1638SMasahiro Yamada dcode = state->distcode; 98*221b1638SMasahiro Yamada lmask = (1U << state->lenbits) - 1; 99*221b1638SMasahiro Yamada dmask = (1U << state->distbits) - 1; 100*221b1638SMasahiro Yamada 101*221b1638SMasahiro Yamada /* decode literals and length/distances until end-of-block or not enough 102*221b1638SMasahiro Yamada input data or output space */ 103*221b1638SMasahiro Yamada do { 104*221b1638SMasahiro Yamada if (bits < 15) { 105*221b1638SMasahiro Yamada hold += (unsigned long)(*in++) << bits; 106*221b1638SMasahiro Yamada bits += 8; 107*221b1638SMasahiro Yamada hold += (unsigned long)(*in++) << bits; 108*221b1638SMasahiro Yamada bits += 8; 109*221b1638SMasahiro Yamada } 110*221b1638SMasahiro Yamada here = lcode[hold & lmask]; 111*221b1638SMasahiro Yamada dolen: 112*221b1638SMasahiro Yamada op = (unsigned)(here.bits); 113*221b1638SMasahiro Yamada hold >>= op; 114*221b1638SMasahiro Yamada bits -= op; 115*221b1638SMasahiro Yamada op = (unsigned)(here.op); 116*221b1638SMasahiro Yamada if (op == 0) { /* literal */ 117*221b1638SMasahiro Yamada Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? 118*221b1638SMasahiro Yamada "inflate: literal '%c'\n" : 119*221b1638SMasahiro Yamada "inflate: literal 0x%02x\n", here.val)); 120*221b1638SMasahiro Yamada *out++ = (unsigned char)(here.val); 121*221b1638SMasahiro Yamada } 122*221b1638SMasahiro Yamada else if (op & 16) { /* length base */ 123*221b1638SMasahiro Yamada len = (unsigned)(here.val); 124*221b1638SMasahiro Yamada op &= 15; /* number of extra bits */ 125*221b1638SMasahiro Yamada if (op) { 126*221b1638SMasahiro Yamada if (bits < op) { 127*221b1638SMasahiro Yamada hold += (unsigned long)(*in++) << bits; 128*221b1638SMasahiro Yamada bits += 8; 129*221b1638SMasahiro Yamada } 130*221b1638SMasahiro Yamada len += (unsigned)hold & ((1U << op) - 1); 131*221b1638SMasahiro Yamada hold >>= op; 132*221b1638SMasahiro Yamada bits -= op; 133*221b1638SMasahiro Yamada } 134*221b1638SMasahiro Yamada Tracevv((stderr, "inflate: length %u\n", len)); 135*221b1638SMasahiro Yamada if (bits < 15) { 136*221b1638SMasahiro Yamada hold += (unsigned long)(*in++) << bits; 137*221b1638SMasahiro Yamada bits += 8; 138*221b1638SMasahiro Yamada hold += (unsigned long)(*in++) << bits; 139*221b1638SMasahiro Yamada bits += 8; 140*221b1638SMasahiro Yamada } 141*221b1638SMasahiro Yamada here = dcode[hold & dmask]; 142*221b1638SMasahiro Yamada dodist: 143*221b1638SMasahiro Yamada op = (unsigned)(here.bits); 144*221b1638SMasahiro Yamada hold >>= op; 145*221b1638SMasahiro Yamada bits -= op; 146*221b1638SMasahiro Yamada op = (unsigned)(here.op); 147*221b1638SMasahiro Yamada if (op & 16) { /* distance base */ 148*221b1638SMasahiro Yamada dist = (unsigned)(here.val); 149*221b1638SMasahiro Yamada op &= 15; /* number of extra bits */ 150*221b1638SMasahiro Yamada if (bits < op) { 151*221b1638SMasahiro Yamada hold += (unsigned long)(*in++) << bits; 152*221b1638SMasahiro Yamada bits += 8; 153*221b1638SMasahiro Yamada if (bits < op) { 154*221b1638SMasahiro Yamada hold += (unsigned long)(*in++) << bits; 155*221b1638SMasahiro Yamada bits += 8; 156*221b1638SMasahiro Yamada } 157*221b1638SMasahiro Yamada } 158*221b1638SMasahiro Yamada dist += (unsigned)hold & ((1U << op) - 1); 159*221b1638SMasahiro Yamada #ifdef INFLATE_STRICT 160*221b1638SMasahiro Yamada if (dist > dmax) { 161*221b1638SMasahiro Yamada strm->msg = (char *)"invalid distance too far back"; 162*221b1638SMasahiro Yamada state->mode = BAD; 163*221b1638SMasahiro Yamada break; 164*221b1638SMasahiro Yamada } 165*221b1638SMasahiro Yamada #endif 166*221b1638SMasahiro Yamada hold >>= op; 167*221b1638SMasahiro Yamada bits -= op; 168*221b1638SMasahiro Yamada Tracevv((stderr, "inflate: distance %u\n", dist)); 169*221b1638SMasahiro Yamada op = (unsigned)(out - beg); /* max distance in output */ 170*221b1638SMasahiro Yamada if (dist > op) { /* see if copy from window */ 171*221b1638SMasahiro Yamada op = dist - op; /* distance back in window */ 172*221b1638SMasahiro Yamada if (op > whave) { 173*221b1638SMasahiro Yamada if (state->sane) { 174*221b1638SMasahiro Yamada strm->msg = 175*221b1638SMasahiro Yamada (char *)"invalid distance too far back"; 176*221b1638SMasahiro Yamada state->mode = BAD; 177*221b1638SMasahiro Yamada break; 178*221b1638SMasahiro Yamada } 179*221b1638SMasahiro Yamada #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 180*221b1638SMasahiro Yamada if (len <= op - whave) { 181*221b1638SMasahiro Yamada do { 182*221b1638SMasahiro Yamada *out++ = 0; 183*221b1638SMasahiro Yamada } while (--len); 184*221b1638SMasahiro Yamada continue; 185*221b1638SMasahiro Yamada } 186*221b1638SMasahiro Yamada len -= op - whave; 187*221b1638SMasahiro Yamada do { 188*221b1638SMasahiro Yamada *out++ = 0; 189*221b1638SMasahiro Yamada } while (--op > whave); 190*221b1638SMasahiro Yamada if (op == 0) { 191*221b1638SMasahiro Yamada from = out - dist; 192*221b1638SMasahiro Yamada do { 193*221b1638SMasahiro Yamada *out++ = *from++; 194*221b1638SMasahiro Yamada } while (--len); 195*221b1638SMasahiro Yamada continue; 196*221b1638SMasahiro Yamada } 197*221b1638SMasahiro Yamada #endif 198*221b1638SMasahiro Yamada } 199*221b1638SMasahiro Yamada from = window; 200*221b1638SMasahiro Yamada if (wnext == 0) { /* very common case */ 201*221b1638SMasahiro Yamada from += wsize - op; 202*221b1638SMasahiro Yamada if (op < len) { /* some from window */ 203*221b1638SMasahiro Yamada len -= op; 204*221b1638SMasahiro Yamada do { 205*221b1638SMasahiro Yamada *out++ = *from++; 206*221b1638SMasahiro Yamada } while (--op); 207*221b1638SMasahiro Yamada from = out - dist; /* rest from output */ 208*221b1638SMasahiro Yamada } 209*221b1638SMasahiro Yamada } 210*221b1638SMasahiro Yamada else if (wnext < op) { /* wrap around window */ 211*221b1638SMasahiro Yamada from += wsize + wnext - op; 212*221b1638SMasahiro Yamada op -= wnext; 213*221b1638SMasahiro Yamada if (op < len) { /* some from end of window */ 214*221b1638SMasahiro Yamada len -= op; 215*221b1638SMasahiro Yamada do { 216*221b1638SMasahiro Yamada *out++ = *from++; 217*221b1638SMasahiro Yamada } while (--op); 218*221b1638SMasahiro Yamada from = window; 219*221b1638SMasahiro Yamada if (wnext < len) { /* some from start of window */ 220*221b1638SMasahiro Yamada op = wnext; 221*221b1638SMasahiro Yamada len -= op; 222*221b1638SMasahiro Yamada do { 223*221b1638SMasahiro Yamada *out++ = *from++; 224*221b1638SMasahiro Yamada } while (--op); 225*221b1638SMasahiro Yamada from = out - dist; /* rest from output */ 226*221b1638SMasahiro Yamada } 227*221b1638SMasahiro Yamada } 228*221b1638SMasahiro Yamada } 229*221b1638SMasahiro Yamada else { /* contiguous in window */ 230*221b1638SMasahiro Yamada from += wnext - op; 231*221b1638SMasahiro Yamada if (op < len) { /* some from window */ 232*221b1638SMasahiro Yamada len -= op; 233*221b1638SMasahiro Yamada do { 234*221b1638SMasahiro Yamada *out++ = *from++; 235*221b1638SMasahiro Yamada } while (--op); 236*221b1638SMasahiro Yamada from = out - dist; /* rest from output */ 237*221b1638SMasahiro Yamada } 238*221b1638SMasahiro Yamada } 239*221b1638SMasahiro Yamada while (len > 2) { 240*221b1638SMasahiro Yamada *out++ = *from++; 241*221b1638SMasahiro Yamada *out++ = *from++; 242*221b1638SMasahiro Yamada *out++ = *from++; 243*221b1638SMasahiro Yamada len -= 3; 244*221b1638SMasahiro Yamada } 245*221b1638SMasahiro Yamada if (len) { 246*221b1638SMasahiro Yamada *out++ = *from++; 247*221b1638SMasahiro Yamada if (len > 1) 248*221b1638SMasahiro Yamada *out++ = *from++; 249*221b1638SMasahiro Yamada } 250*221b1638SMasahiro Yamada } 251*221b1638SMasahiro Yamada else { 252*221b1638SMasahiro Yamada from = out - dist; /* copy direct from output */ 253*221b1638SMasahiro Yamada do { /* minimum length is three */ 254*221b1638SMasahiro Yamada *out++ = *from++; 255*221b1638SMasahiro Yamada *out++ = *from++; 256*221b1638SMasahiro Yamada *out++ = *from++; 257*221b1638SMasahiro Yamada len -= 3; 258*221b1638SMasahiro Yamada } while (len > 2); 259*221b1638SMasahiro Yamada if (len) { 260*221b1638SMasahiro Yamada *out++ = *from++; 261*221b1638SMasahiro Yamada if (len > 1) 262*221b1638SMasahiro Yamada *out++ = *from++; 263*221b1638SMasahiro Yamada } 264*221b1638SMasahiro Yamada } 265*221b1638SMasahiro Yamada } 266*221b1638SMasahiro Yamada else if ((op & 64) == 0) { /* 2nd level distance code */ 267*221b1638SMasahiro Yamada here = dcode[here.val + (hold & ((1U << op) - 1))]; 268*221b1638SMasahiro Yamada goto dodist; 269*221b1638SMasahiro Yamada } 270*221b1638SMasahiro Yamada else { 271*221b1638SMasahiro Yamada strm->msg = (char *)"invalid distance code"; 272*221b1638SMasahiro Yamada state->mode = BAD; 273*221b1638SMasahiro Yamada break; 274*221b1638SMasahiro Yamada } 275*221b1638SMasahiro Yamada } 276*221b1638SMasahiro Yamada else if ((op & 64) == 0) { /* 2nd level length code */ 277*221b1638SMasahiro Yamada here = lcode[here.val + (hold & ((1U << op) - 1))]; 278*221b1638SMasahiro Yamada goto dolen; 279*221b1638SMasahiro Yamada } 280*221b1638SMasahiro Yamada else if (op & 32) { /* end-of-block */ 281*221b1638SMasahiro Yamada Tracevv((stderr, "inflate: end of block\n")); 282*221b1638SMasahiro Yamada state->mode = TYPE; 283*221b1638SMasahiro Yamada break; 284*221b1638SMasahiro Yamada } 285*221b1638SMasahiro Yamada else { 286*221b1638SMasahiro Yamada strm->msg = (char *)"invalid literal/length code"; 287*221b1638SMasahiro Yamada state->mode = BAD; 288*221b1638SMasahiro Yamada break; 289*221b1638SMasahiro Yamada } 290*221b1638SMasahiro Yamada } while (in < last && out < end); 291*221b1638SMasahiro Yamada 292*221b1638SMasahiro Yamada /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 293*221b1638SMasahiro Yamada len = bits >> 3; 294*221b1638SMasahiro Yamada in -= len; 295*221b1638SMasahiro Yamada bits -= len << 3; 296*221b1638SMasahiro Yamada hold &= (1U << bits) - 1; 297*221b1638SMasahiro Yamada 298*221b1638SMasahiro Yamada /* update state and return */ 299*221b1638SMasahiro Yamada strm->next_in = in; 300*221b1638SMasahiro Yamada strm->next_out = out; 301*221b1638SMasahiro Yamada strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 302*221b1638SMasahiro Yamada strm->avail_out = (unsigned)(out < end ? 303*221b1638SMasahiro Yamada 257 + (end - out) : 257 - (out - end)); 304*221b1638SMasahiro Yamada state->hold = hold; 305*221b1638SMasahiro Yamada state->bits = bits; 306*221b1638SMasahiro Yamada return; 307*221b1638SMasahiro Yamada } 308*221b1638SMasahiro Yamada 309*221b1638SMasahiro Yamada /* 310*221b1638SMasahiro Yamada inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 311*221b1638SMasahiro Yamada - Using bit fields for code structure 312*221b1638SMasahiro Yamada - Different op definition to avoid & for extra bits (do & for table bits) 313*221b1638SMasahiro Yamada - Three separate decoding do-loops for direct, window, and wnext == 0 314*221b1638SMasahiro Yamada - Special case for distance > 1 copies to do overlapped load and store copy 315*221b1638SMasahiro Yamada - Explicit branch predictions (based on measured branch probabilities) 316*221b1638SMasahiro Yamada - Deferring match copy and interspersed it with decoding subsequent codes 317*221b1638SMasahiro Yamada - Swapping literal/length else 318*221b1638SMasahiro Yamada - Swapping window/direct else 319*221b1638SMasahiro Yamada - Larger unrolled copy loops (three is about right) 320*221b1638SMasahiro Yamada - Moving len -= 3 statement into middle of loop 321*221b1638SMasahiro Yamada */ 322*221b1638SMasahiro Yamada 323*221b1638SMasahiro Yamada #endif /* !ASMINF */ 324