xref: /OK3568_Linux_fs/kernel/lib/zstd/huf_decompress.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Huffman decoder, part of New Generation Entropy library
3*4882a593Smuzhiyun  * Copyright (C) 2013-2016, Yann Collet.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Redistribution and use in source and binary forms, with or without
8*4882a593Smuzhiyun  * modification, are permitted provided that the following conditions are
9*4882a593Smuzhiyun  * met:
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  *   * Redistributions of source code must retain the above copyright
12*4882a593Smuzhiyun  * notice, this list of conditions and the following disclaimer.
13*4882a593Smuzhiyun  *   * Redistributions in binary form must reproduce the above
14*4882a593Smuzhiyun  * copyright notice, this list of conditions and the following disclaimer
15*4882a593Smuzhiyun  * in the documentation and/or other materials provided with the
16*4882a593Smuzhiyun  * distribution.
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19*4882a593Smuzhiyun  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20*4882a593Smuzhiyun  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21*4882a593Smuzhiyun  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22*4882a593Smuzhiyun  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23*4882a593Smuzhiyun  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24*4882a593Smuzhiyun  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25*4882a593Smuzhiyun  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26*4882a593Smuzhiyun  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*4882a593Smuzhiyun  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28*4882a593Smuzhiyun  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*4882a593Smuzhiyun  *
30*4882a593Smuzhiyun  * This program is free software; you can redistribute it and/or modify it under
31*4882a593Smuzhiyun  * the terms of the GNU General Public License version 2 as published by the
32*4882a593Smuzhiyun  * Free Software Foundation. This program is dual-licensed; you may select
33*4882a593Smuzhiyun  * either version 2 of the GNU General Public License ("GPL") or BSD license
34*4882a593Smuzhiyun  * ("BSD").
35*4882a593Smuzhiyun  *
36*4882a593Smuzhiyun  * You can contact the author at :
37*4882a593Smuzhiyun  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
38*4882a593Smuzhiyun  */
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun /* **************************************************************
41*4882a593Smuzhiyun *  Compiler specifics
42*4882a593Smuzhiyun ****************************************************************/
43*4882a593Smuzhiyun #define FORCE_INLINE static __always_inline
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun /* **************************************************************
46*4882a593Smuzhiyun *  Dependencies
47*4882a593Smuzhiyun ****************************************************************/
48*4882a593Smuzhiyun #include "bitstream.h" /* BIT_* */
49*4882a593Smuzhiyun #include "fse.h"       /* header compression */
50*4882a593Smuzhiyun #include "huf.h"
51*4882a593Smuzhiyun #include <linux/compiler.h>
52*4882a593Smuzhiyun #include <linux/kernel.h>
53*4882a593Smuzhiyun #include <linux/string.h> /* memcpy, memset */
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun /* **************************************************************
56*4882a593Smuzhiyun *  Error Management
57*4882a593Smuzhiyun ****************************************************************/
58*4882a593Smuzhiyun #define HUF_STATIC_ASSERT(c)                                   \
59*4882a593Smuzhiyun 	{                                                      \
60*4882a593Smuzhiyun 		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
61*4882a593Smuzhiyun 	} /* use only *after* variable declarations */
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun /*-***************************/
64*4882a593Smuzhiyun /*  generic DTableDesc       */
65*4882a593Smuzhiyun /*-***************************/
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun typedef struct {
68*4882a593Smuzhiyun 	BYTE maxTableLog;
69*4882a593Smuzhiyun 	BYTE tableType;
70*4882a593Smuzhiyun 	BYTE tableLog;
71*4882a593Smuzhiyun 	BYTE reserved;
72*4882a593Smuzhiyun } DTableDesc;
73*4882a593Smuzhiyun 
HUF_getDTableDesc(const HUF_DTable * table)74*4882a593Smuzhiyun static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun 	DTableDesc dtd;
77*4882a593Smuzhiyun 	memcpy(&dtd, table, sizeof(dtd));
78*4882a593Smuzhiyun 	return dtd;
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun /*-***************************/
82*4882a593Smuzhiyun /*  single-symbol decoding   */
83*4882a593Smuzhiyun /*-***************************/
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun typedef struct {
86*4882a593Smuzhiyun 	BYTE byte;
87*4882a593Smuzhiyun 	BYTE nbBits;
88*4882a593Smuzhiyun } HUF_DEltX2; /* single-symbol decoding */
89*4882a593Smuzhiyun 
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workspace,size_t workspaceSize)90*4882a593Smuzhiyun size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun 	U32 tableLog = 0;
93*4882a593Smuzhiyun 	U32 nbSymbols = 0;
94*4882a593Smuzhiyun 	size_t iSize;
95*4882a593Smuzhiyun 	void *const dtPtr = DTable + 1;
96*4882a593Smuzhiyun 	HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 	U32 *rankVal;
99*4882a593Smuzhiyun 	BYTE *huffWeight;
100*4882a593Smuzhiyun 	size_t spaceUsed32 = 0;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	rankVal = (U32 *)workspace + spaceUsed32;
103*4882a593Smuzhiyun 	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
104*4882a593Smuzhiyun 	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
105*4882a593Smuzhiyun 	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 	if ((spaceUsed32 << 2) > workspaceSize)
108*4882a593Smuzhiyun 		return ERROR(tableLog_tooLarge);
109*4882a593Smuzhiyun 	workspace = (U32 *)workspace + spaceUsed32;
110*4882a593Smuzhiyun 	workspaceSize -= (spaceUsed32 << 2);
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 	HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
113*4882a593Smuzhiyun 	/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
116*4882a593Smuzhiyun 	if (HUF_isError(iSize))
117*4882a593Smuzhiyun 		return iSize;
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	/* Table header */
120*4882a593Smuzhiyun 	{
121*4882a593Smuzhiyun 		DTableDesc dtd = HUF_getDTableDesc(DTable);
122*4882a593Smuzhiyun 		if (tableLog > (U32)(dtd.maxTableLog + 1))
123*4882a593Smuzhiyun 			return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
124*4882a593Smuzhiyun 		dtd.tableType = 0;
125*4882a593Smuzhiyun 		dtd.tableLog = (BYTE)tableLog;
126*4882a593Smuzhiyun 		memcpy(DTable, &dtd, sizeof(dtd));
127*4882a593Smuzhiyun 	}
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	/* Calculate starting value for each rank */
130*4882a593Smuzhiyun 	{
131*4882a593Smuzhiyun 		U32 n, nextRankStart = 0;
132*4882a593Smuzhiyun 		for (n = 1; n < tableLog + 1; n++) {
133*4882a593Smuzhiyun 			U32 const curr = nextRankStart;
134*4882a593Smuzhiyun 			nextRankStart += (rankVal[n] << (n - 1));
135*4882a593Smuzhiyun 			rankVal[n] = curr;
136*4882a593Smuzhiyun 		}
137*4882a593Smuzhiyun 	}
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	/* fill DTable */
140*4882a593Smuzhiyun 	{
141*4882a593Smuzhiyun 		U32 n;
142*4882a593Smuzhiyun 		for (n = 0; n < nbSymbols; n++) {
143*4882a593Smuzhiyun 			U32 const w = huffWeight[n];
144*4882a593Smuzhiyun 			U32 const length = (1 << w) >> 1;
145*4882a593Smuzhiyun 			U32 u;
146*4882a593Smuzhiyun 			HUF_DEltX2 D;
147*4882a593Smuzhiyun 			D.byte = (BYTE)n;
148*4882a593Smuzhiyun 			D.nbBits = (BYTE)(tableLog + 1 - w);
149*4882a593Smuzhiyun 			for (u = rankVal[w]; u < rankVal[w] + length; u++)
150*4882a593Smuzhiyun 				dt[u] = D;
151*4882a593Smuzhiyun 			rankVal[w] += length;
152*4882a593Smuzhiyun 		}
153*4882a593Smuzhiyun 	}
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	return iSize;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun 
HUF_decodeSymbolX2(BIT_DStream_t * Dstream,const HUF_DEltX2 * dt,const U32 dtLog)158*4882a593Smuzhiyun static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun 	size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
161*4882a593Smuzhiyun 	BYTE const c = dt[val].byte;
162*4882a593Smuzhiyun 	BIT_skipBits(Dstream, dt[val].nbBits);
163*4882a593Smuzhiyun 	return c;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun 
166*4882a593Smuzhiyun #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)         \
169*4882a593Smuzhiyun 	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
170*4882a593Smuzhiyun 	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
173*4882a593Smuzhiyun 	if (ZSTD_64bits())                     \
174*4882a593Smuzhiyun 	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
175*4882a593Smuzhiyun 
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)176*4882a593Smuzhiyun FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun 	BYTE *const pStart = p;
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	/* up to 4 symbols at a time */
181*4882a593Smuzhiyun 	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
182*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
183*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
184*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
185*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
186*4882a593Smuzhiyun 	}
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	/* closer to the end */
189*4882a593Smuzhiyun 	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
190*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	/* no more data to retrieve from bitstream, hence no need to reload */
193*4882a593Smuzhiyun 	while (p < pEnd)
194*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	return pEnd - pStart;
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun 
HUF_decompress1X2_usingDTable_internal(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)199*4882a593Smuzhiyun static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun 	BYTE *op = (BYTE *)dst;
202*4882a593Smuzhiyun 	BYTE *const oend = op + dstSize;
203*4882a593Smuzhiyun 	const void *dtPtr = DTable + 1;
204*4882a593Smuzhiyun 	const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
205*4882a593Smuzhiyun 	BIT_DStream_t bitD;
206*4882a593Smuzhiyun 	DTableDesc const dtd = HUF_getDTableDesc(DTable);
207*4882a593Smuzhiyun 	U32 const dtLog = dtd.tableLog;
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	{
210*4882a593Smuzhiyun 		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
211*4882a593Smuzhiyun 		if (HUF_isError(errorCode))
212*4882a593Smuzhiyun 			return errorCode;
213*4882a593Smuzhiyun 	}
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	/* check */
218*4882a593Smuzhiyun 	if (!BIT_endOfDStream(&bitD))
219*4882a593Smuzhiyun 		return ERROR(corruption_detected);
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	return dstSize;
222*4882a593Smuzhiyun }
223*4882a593Smuzhiyun 
HUF_decompress1X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)224*4882a593Smuzhiyun size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
225*4882a593Smuzhiyun {
226*4882a593Smuzhiyun 	DTableDesc dtd = HUF_getDTableDesc(DTable);
227*4882a593Smuzhiyun 	if (dtd.tableType != 0)
228*4882a593Smuzhiyun 		return ERROR(GENERIC);
229*4882a593Smuzhiyun 	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun 
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workspace,size_t workspaceSize)232*4882a593Smuzhiyun size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun 	const BYTE *ip = (const BYTE *)cSrc;
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
237*4882a593Smuzhiyun 	if (HUF_isError(hSize))
238*4882a593Smuzhiyun 		return hSize;
239*4882a593Smuzhiyun 	if (hSize >= cSrcSize)
240*4882a593Smuzhiyun 		return ERROR(srcSize_wrong);
241*4882a593Smuzhiyun 	ip += hSize;
242*4882a593Smuzhiyun 	cSrcSize -= hSize;
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun 
HUF_decompress4X2_usingDTable_internal(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)247*4882a593Smuzhiyun static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun 	/* Check */
250*4882a593Smuzhiyun 	if (cSrcSize < 10)
251*4882a593Smuzhiyun 		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	{
254*4882a593Smuzhiyun 		const BYTE *const istart = (const BYTE *)cSrc;
255*4882a593Smuzhiyun 		BYTE *const ostart = (BYTE *)dst;
256*4882a593Smuzhiyun 		BYTE *const oend = ostart + dstSize;
257*4882a593Smuzhiyun 		const void *const dtPtr = DTable + 1;
258*4882a593Smuzhiyun 		const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 		/* Init */
261*4882a593Smuzhiyun 		BIT_DStream_t bitD1;
262*4882a593Smuzhiyun 		BIT_DStream_t bitD2;
263*4882a593Smuzhiyun 		BIT_DStream_t bitD3;
264*4882a593Smuzhiyun 		BIT_DStream_t bitD4;
265*4882a593Smuzhiyun 		size_t const length1 = ZSTD_readLE16(istart);
266*4882a593Smuzhiyun 		size_t const length2 = ZSTD_readLE16(istart + 2);
267*4882a593Smuzhiyun 		size_t const length3 = ZSTD_readLE16(istart + 4);
268*4882a593Smuzhiyun 		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
269*4882a593Smuzhiyun 		const BYTE *const istart1 = istart + 6; /* jumpTable */
270*4882a593Smuzhiyun 		const BYTE *const istart2 = istart1 + length1;
271*4882a593Smuzhiyun 		const BYTE *const istart3 = istart2 + length2;
272*4882a593Smuzhiyun 		const BYTE *const istart4 = istart3 + length3;
273*4882a593Smuzhiyun 		const size_t segmentSize = (dstSize + 3) / 4;
274*4882a593Smuzhiyun 		BYTE *const opStart2 = ostart + segmentSize;
275*4882a593Smuzhiyun 		BYTE *const opStart3 = opStart2 + segmentSize;
276*4882a593Smuzhiyun 		BYTE *const opStart4 = opStart3 + segmentSize;
277*4882a593Smuzhiyun 		BYTE *op1 = ostart;
278*4882a593Smuzhiyun 		BYTE *op2 = opStart2;
279*4882a593Smuzhiyun 		BYTE *op3 = opStart3;
280*4882a593Smuzhiyun 		BYTE *op4 = opStart4;
281*4882a593Smuzhiyun 		U32 endSignal;
282*4882a593Smuzhiyun 		DTableDesc const dtd = HUF_getDTableDesc(DTable);
283*4882a593Smuzhiyun 		U32 const dtLog = dtd.tableLog;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 		if (length4 > cSrcSize)
286*4882a593Smuzhiyun 			return ERROR(corruption_detected); /* overflow */
287*4882a593Smuzhiyun 		{
288*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
289*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
290*4882a593Smuzhiyun 				return errorCode;
291*4882a593Smuzhiyun 		}
292*4882a593Smuzhiyun 		{
293*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
294*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
295*4882a593Smuzhiyun 				return errorCode;
296*4882a593Smuzhiyun 		}
297*4882a593Smuzhiyun 		{
298*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
299*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
300*4882a593Smuzhiyun 				return errorCode;
301*4882a593Smuzhiyun 		}
302*4882a593Smuzhiyun 		{
303*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
304*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
305*4882a593Smuzhiyun 				return errorCode;
306*4882a593Smuzhiyun 		}
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 		/* 16-32 symbols per loop (4-8 symbols per stream) */
309*4882a593Smuzhiyun 		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
310*4882a593Smuzhiyun 		for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
311*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
312*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
313*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
314*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
315*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
316*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
317*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
318*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
319*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
320*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
321*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
322*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
323*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
324*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
325*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
326*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
327*4882a593Smuzhiyun 			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
328*4882a593Smuzhiyun 		}
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 		/* check corruption */
331*4882a593Smuzhiyun 		if (op1 > opStart2)
332*4882a593Smuzhiyun 			return ERROR(corruption_detected);
333*4882a593Smuzhiyun 		if (op2 > opStart3)
334*4882a593Smuzhiyun 			return ERROR(corruption_detected);
335*4882a593Smuzhiyun 		if (op3 > opStart4)
336*4882a593Smuzhiyun 			return ERROR(corruption_detected);
337*4882a593Smuzhiyun 		/* note : op4 supposed already verified within main loop */
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 		/* finish bitStreams one by one */
340*4882a593Smuzhiyun 		HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
341*4882a593Smuzhiyun 		HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
342*4882a593Smuzhiyun 		HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
343*4882a593Smuzhiyun 		HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 		/* check */
346*4882a593Smuzhiyun 		endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
347*4882a593Smuzhiyun 		if (!endSignal)
348*4882a593Smuzhiyun 			return ERROR(corruption_detected);
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 		/* decoded size */
351*4882a593Smuzhiyun 		return dstSize;
352*4882a593Smuzhiyun 	}
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun 
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)355*4882a593Smuzhiyun size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
356*4882a593Smuzhiyun {
357*4882a593Smuzhiyun 	DTableDesc dtd = HUF_getDTableDesc(DTable);
358*4882a593Smuzhiyun 	if (dtd.tableType != 0)
359*4882a593Smuzhiyun 		return ERROR(GENERIC);
360*4882a593Smuzhiyun 	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun 
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workspace,size_t workspaceSize)363*4882a593Smuzhiyun size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
364*4882a593Smuzhiyun {
365*4882a593Smuzhiyun 	const BYTE *ip = (const BYTE *)cSrc;
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
368*4882a593Smuzhiyun 	if (HUF_isError(hSize))
369*4882a593Smuzhiyun 		return hSize;
370*4882a593Smuzhiyun 	if (hSize >= cSrcSize)
371*4882a593Smuzhiyun 		return ERROR(srcSize_wrong);
372*4882a593Smuzhiyun 	ip += hSize;
373*4882a593Smuzhiyun 	cSrcSize -= hSize;
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun /* *************************/
379*4882a593Smuzhiyun /* double-symbols decoding */
380*4882a593Smuzhiyun /* *************************/
381*4882a593Smuzhiyun typedef struct {
382*4882a593Smuzhiyun 	U16 sequence;
383*4882a593Smuzhiyun 	BYTE nbBits;
384*4882a593Smuzhiyun 	BYTE length;
385*4882a593Smuzhiyun } HUF_DEltX4; /* double-symbols decoding */
386*4882a593Smuzhiyun 
387*4882a593Smuzhiyun typedef struct {
388*4882a593Smuzhiyun 	BYTE symbol;
389*4882a593Smuzhiyun 	BYTE weight;
390*4882a593Smuzhiyun } sortedSymbol_t;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun /* HUF_fillDTableX4Level2() :
393*4882a593Smuzhiyun  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX4Level2(HUF_DEltX4 * DTable,U32 sizeLog,const U32 consumed,const U32 * rankValOrigin,const int minWeight,const sortedSymbol_t * sortedSymbols,const U32 sortedListSize,U32 nbBitsBaseline,U16 baseSeq)394*4882a593Smuzhiyun static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
395*4882a593Smuzhiyun 				   const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
396*4882a593Smuzhiyun {
397*4882a593Smuzhiyun 	HUF_DEltX4 DElt;
398*4882a593Smuzhiyun 	U32 rankVal[HUF_TABLELOG_MAX + 1];
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	/* get pre-calculated rankVal */
401*4882a593Smuzhiyun 	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	/* fill skipped values */
404*4882a593Smuzhiyun 	if (minWeight > 1) {
405*4882a593Smuzhiyun 		U32 i, skipSize = rankVal[minWeight];
406*4882a593Smuzhiyun 		ZSTD_writeLE16(&(DElt.sequence), baseSeq);
407*4882a593Smuzhiyun 		DElt.nbBits = (BYTE)(consumed);
408*4882a593Smuzhiyun 		DElt.length = 1;
409*4882a593Smuzhiyun 		for (i = 0; i < skipSize; i++)
410*4882a593Smuzhiyun 			DTable[i] = DElt;
411*4882a593Smuzhiyun 	}
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 	/* fill DTable */
414*4882a593Smuzhiyun 	{
415*4882a593Smuzhiyun 		U32 s;
416*4882a593Smuzhiyun 		for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
417*4882a593Smuzhiyun 			const U32 symbol = sortedSymbols[s].symbol;
418*4882a593Smuzhiyun 			const U32 weight = sortedSymbols[s].weight;
419*4882a593Smuzhiyun 			const U32 nbBits = nbBitsBaseline - weight;
420*4882a593Smuzhiyun 			const U32 length = 1 << (sizeLog - nbBits);
421*4882a593Smuzhiyun 			const U32 start = rankVal[weight];
422*4882a593Smuzhiyun 			U32 i = start;
423*4882a593Smuzhiyun 			const U32 end = start + length;
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 			ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
426*4882a593Smuzhiyun 			DElt.nbBits = (BYTE)(nbBits + consumed);
427*4882a593Smuzhiyun 			DElt.length = 2;
428*4882a593Smuzhiyun 			do {
429*4882a593Smuzhiyun 				DTable[i++] = DElt;
430*4882a593Smuzhiyun 			} while (i < end); /* since length >= 1 */
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 			rankVal[weight] += length;
433*4882a593Smuzhiyun 		}
434*4882a593Smuzhiyun 	}
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
438*4882a593Smuzhiyun typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
439*4882a593Smuzhiyun 
HUF_fillDTableX4(HUF_DEltX4 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 sortedListSize,const U32 * rankStart,rankVal_t rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)440*4882a593Smuzhiyun static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
441*4882a593Smuzhiyun 			     rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
442*4882a593Smuzhiyun {
443*4882a593Smuzhiyun 	U32 rankVal[HUF_TABLELOG_MAX + 1];
444*4882a593Smuzhiyun 	const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
445*4882a593Smuzhiyun 	const U32 minBits = nbBitsBaseline - maxWeight;
446*4882a593Smuzhiyun 	U32 s;
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	/* fill DTable */
451*4882a593Smuzhiyun 	for (s = 0; s < sortedListSize; s++) {
452*4882a593Smuzhiyun 		const U16 symbol = sortedList[s].symbol;
453*4882a593Smuzhiyun 		const U32 weight = sortedList[s].weight;
454*4882a593Smuzhiyun 		const U32 nbBits = nbBitsBaseline - weight;
455*4882a593Smuzhiyun 		const U32 start = rankVal[weight];
456*4882a593Smuzhiyun 		const U32 length = 1 << (targetLog - nbBits);
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 		if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
459*4882a593Smuzhiyun 			U32 sortedRank;
460*4882a593Smuzhiyun 			int minWeight = nbBits + scaleLog;
461*4882a593Smuzhiyun 			if (minWeight < 1)
462*4882a593Smuzhiyun 				minWeight = 1;
463*4882a593Smuzhiyun 			sortedRank = rankStart[minWeight];
464*4882a593Smuzhiyun 			HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
465*4882a593Smuzhiyun 					       sortedListSize - sortedRank, nbBitsBaseline, symbol);
466*4882a593Smuzhiyun 		} else {
467*4882a593Smuzhiyun 			HUF_DEltX4 DElt;
468*4882a593Smuzhiyun 			ZSTD_writeLE16(&(DElt.sequence), symbol);
469*4882a593Smuzhiyun 			DElt.nbBits = (BYTE)(nbBits);
470*4882a593Smuzhiyun 			DElt.length = 1;
471*4882a593Smuzhiyun 			{
472*4882a593Smuzhiyun 				U32 const end = start + length;
473*4882a593Smuzhiyun 				U32 u;
474*4882a593Smuzhiyun 				for (u = start; u < end; u++)
475*4882a593Smuzhiyun 					DTable[u] = DElt;
476*4882a593Smuzhiyun 			}
477*4882a593Smuzhiyun 		}
478*4882a593Smuzhiyun 		rankVal[weight] += length;
479*4882a593Smuzhiyun 	}
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun 
HUF_readDTableX4_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workspace,size_t workspaceSize)482*4882a593Smuzhiyun size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun 	U32 tableLog, maxW, sizeOfSort, nbSymbols;
485*4882a593Smuzhiyun 	DTableDesc dtd = HUF_getDTableDesc(DTable);
486*4882a593Smuzhiyun 	U32 const maxTableLog = dtd.maxTableLog;
487*4882a593Smuzhiyun 	size_t iSize;
488*4882a593Smuzhiyun 	void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
489*4882a593Smuzhiyun 	HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
490*4882a593Smuzhiyun 	U32 *rankStart;
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	rankValCol_t *rankVal;
493*4882a593Smuzhiyun 	U32 *rankStats;
494*4882a593Smuzhiyun 	U32 *rankStart0;
495*4882a593Smuzhiyun 	sortedSymbol_t *sortedSymbol;
496*4882a593Smuzhiyun 	BYTE *weightList;
497*4882a593Smuzhiyun 	size_t spaceUsed32 = 0;
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
502*4882a593Smuzhiyun 	spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
503*4882a593Smuzhiyun 	rankStats = (U32 *)workspace + spaceUsed32;
504*4882a593Smuzhiyun 	spaceUsed32 += HUF_TABLELOG_MAX + 1;
505*4882a593Smuzhiyun 	rankStart0 = (U32 *)workspace + spaceUsed32;
506*4882a593Smuzhiyun 	spaceUsed32 += HUF_TABLELOG_MAX + 2;
507*4882a593Smuzhiyun 	sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
508*4882a593Smuzhiyun 	spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
509*4882a593Smuzhiyun 	weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
510*4882a593Smuzhiyun 	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun 	if ((spaceUsed32 << 2) > workspaceSize)
513*4882a593Smuzhiyun 		return ERROR(tableLog_tooLarge);
514*4882a593Smuzhiyun 	workspace = (U32 *)workspace + spaceUsed32;
515*4882a593Smuzhiyun 	workspaceSize -= (spaceUsed32 << 2);
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	rankStart = rankStart0 + 1;
518*4882a593Smuzhiyun 	memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
519*4882a593Smuzhiyun 
520*4882a593Smuzhiyun 	HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
521*4882a593Smuzhiyun 	if (maxTableLog > HUF_TABLELOG_MAX)
522*4882a593Smuzhiyun 		return ERROR(tableLog_tooLarge);
523*4882a593Smuzhiyun 	/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
526*4882a593Smuzhiyun 	if (HUF_isError(iSize))
527*4882a593Smuzhiyun 		return iSize;
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	/* check result */
530*4882a593Smuzhiyun 	if (tableLog > maxTableLog)
531*4882a593Smuzhiyun 		return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	/* find maxWeight */
534*4882a593Smuzhiyun 	for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
535*4882a593Smuzhiyun 	} /* necessarily finds a solution before 0 */
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	/* Get start index of each weight */
538*4882a593Smuzhiyun 	{
539*4882a593Smuzhiyun 		U32 w, nextRankStart = 0;
540*4882a593Smuzhiyun 		for (w = 1; w < maxW + 1; w++) {
541*4882a593Smuzhiyun 			U32 curr = nextRankStart;
542*4882a593Smuzhiyun 			nextRankStart += rankStats[w];
543*4882a593Smuzhiyun 			rankStart[w] = curr;
544*4882a593Smuzhiyun 		}
545*4882a593Smuzhiyun 		rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
546*4882a593Smuzhiyun 		sizeOfSort = nextRankStart;
547*4882a593Smuzhiyun 	}
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	/* sort symbols by weight */
550*4882a593Smuzhiyun 	{
551*4882a593Smuzhiyun 		U32 s;
552*4882a593Smuzhiyun 		for (s = 0; s < nbSymbols; s++) {
553*4882a593Smuzhiyun 			U32 const w = weightList[s];
554*4882a593Smuzhiyun 			U32 const r = rankStart[w]++;
555*4882a593Smuzhiyun 			sortedSymbol[r].symbol = (BYTE)s;
556*4882a593Smuzhiyun 			sortedSymbol[r].weight = (BYTE)w;
557*4882a593Smuzhiyun 		}
558*4882a593Smuzhiyun 		rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
559*4882a593Smuzhiyun 	}
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	/* Build rankVal */
562*4882a593Smuzhiyun 	{
563*4882a593Smuzhiyun 		U32 *const rankVal0 = rankVal[0];
564*4882a593Smuzhiyun 		{
565*4882a593Smuzhiyun 			int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
566*4882a593Smuzhiyun 			U32 nextRankVal = 0;
567*4882a593Smuzhiyun 			U32 w;
568*4882a593Smuzhiyun 			for (w = 1; w < maxW + 1; w++) {
569*4882a593Smuzhiyun 				U32 curr = nextRankVal;
570*4882a593Smuzhiyun 				nextRankVal += rankStats[w] << (w + rescale);
571*4882a593Smuzhiyun 				rankVal0[w] = curr;
572*4882a593Smuzhiyun 			}
573*4882a593Smuzhiyun 		}
574*4882a593Smuzhiyun 		{
575*4882a593Smuzhiyun 			U32 const minBits = tableLog + 1 - maxW;
576*4882a593Smuzhiyun 			U32 consumed;
577*4882a593Smuzhiyun 			for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
578*4882a593Smuzhiyun 				U32 *const rankValPtr = rankVal[consumed];
579*4882a593Smuzhiyun 				U32 w;
580*4882a593Smuzhiyun 				for (w = 1; w < maxW + 1; w++) {
581*4882a593Smuzhiyun 					rankValPtr[w] = rankVal0[w] >> consumed;
582*4882a593Smuzhiyun 				}
583*4882a593Smuzhiyun 			}
584*4882a593Smuzhiyun 		}
585*4882a593Smuzhiyun 	}
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
588*4882a593Smuzhiyun 
589*4882a593Smuzhiyun 	dtd.tableLog = (BYTE)maxTableLog;
590*4882a593Smuzhiyun 	dtd.tableType = 1;
591*4882a593Smuzhiyun 	memcpy(DTable, &dtd, sizeof(dtd));
592*4882a593Smuzhiyun 	return iSize;
593*4882a593Smuzhiyun }
594*4882a593Smuzhiyun 
HUF_decodeSymbolX4(void * op,BIT_DStream_t * DStream,const HUF_DEltX4 * dt,const U32 dtLog)595*4882a593Smuzhiyun static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
596*4882a593Smuzhiyun {
597*4882a593Smuzhiyun 	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
598*4882a593Smuzhiyun 	memcpy(op, dt + val, 2);
599*4882a593Smuzhiyun 	BIT_skipBits(DStream, dt[val].nbBits);
600*4882a593Smuzhiyun 	return dt[val].length;
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun 
HUF_decodeLastSymbolX4(void * op,BIT_DStream_t * DStream,const HUF_DEltX4 * dt,const U32 dtLog)603*4882a593Smuzhiyun static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
604*4882a593Smuzhiyun {
605*4882a593Smuzhiyun 	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
606*4882a593Smuzhiyun 	memcpy(op, dt + val, 1);
607*4882a593Smuzhiyun 	if (dt[val].length == 1)
608*4882a593Smuzhiyun 		BIT_skipBits(DStream, dt[val].nbBits);
609*4882a593Smuzhiyun 	else {
610*4882a593Smuzhiyun 		if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
611*4882a593Smuzhiyun 			BIT_skipBits(DStream, dt[val].nbBits);
612*4882a593Smuzhiyun 			if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
613*4882a593Smuzhiyun 				/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
614*4882a593Smuzhiyun 				DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
615*4882a593Smuzhiyun 		}
616*4882a593Smuzhiyun 	}
617*4882a593Smuzhiyun 	return 1;
618*4882a593Smuzhiyun }
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
621*4882a593Smuzhiyun 
622*4882a593Smuzhiyun #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr)         \
623*4882a593Smuzhiyun 	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
624*4882a593Smuzhiyun 	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
627*4882a593Smuzhiyun 	if (ZSTD_64bits())                     \
628*4882a593Smuzhiyun 	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
629*4882a593Smuzhiyun 
HUF_decodeStreamX4(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX4 * const dt,const U32 dtLog)630*4882a593Smuzhiyun FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
631*4882a593Smuzhiyun {
632*4882a593Smuzhiyun 	BYTE *const pStart = p;
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun 	/* up to 8 symbols at a time */
635*4882a593Smuzhiyun 	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
636*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
637*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
638*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
639*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
640*4882a593Smuzhiyun 	}
641*4882a593Smuzhiyun 
642*4882a593Smuzhiyun 	/* closer to end : up to 2 symbols at a time */
643*4882a593Smuzhiyun 	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
644*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
645*4882a593Smuzhiyun 
646*4882a593Smuzhiyun 	while (p <= pEnd - 2)
647*4882a593Smuzhiyun 		HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun 	if (p < pEnd)
650*4882a593Smuzhiyun 		p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
651*4882a593Smuzhiyun 
652*4882a593Smuzhiyun 	return p - pStart;
653*4882a593Smuzhiyun }
654*4882a593Smuzhiyun 
HUF_decompress1X4_usingDTable_internal(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)655*4882a593Smuzhiyun static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
656*4882a593Smuzhiyun {
657*4882a593Smuzhiyun 	BIT_DStream_t bitD;
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun 	/* Init */
660*4882a593Smuzhiyun 	{
661*4882a593Smuzhiyun 		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
662*4882a593Smuzhiyun 		if (HUF_isError(errorCode))
663*4882a593Smuzhiyun 			return errorCode;
664*4882a593Smuzhiyun 	}
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun 	/* decode */
667*4882a593Smuzhiyun 	{
668*4882a593Smuzhiyun 		BYTE *const ostart = (BYTE *)dst;
669*4882a593Smuzhiyun 		BYTE *const oend = ostart + dstSize;
670*4882a593Smuzhiyun 		const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
671*4882a593Smuzhiyun 		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
672*4882a593Smuzhiyun 		DTableDesc const dtd = HUF_getDTableDesc(DTable);
673*4882a593Smuzhiyun 		HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
674*4882a593Smuzhiyun 	}
675*4882a593Smuzhiyun 
676*4882a593Smuzhiyun 	/* check */
677*4882a593Smuzhiyun 	if (!BIT_endOfDStream(&bitD))
678*4882a593Smuzhiyun 		return ERROR(corruption_detected);
679*4882a593Smuzhiyun 
680*4882a593Smuzhiyun 	/* decoded size */
681*4882a593Smuzhiyun 	return dstSize;
682*4882a593Smuzhiyun }
683*4882a593Smuzhiyun 
HUF_decompress1X4_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)684*4882a593Smuzhiyun size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
685*4882a593Smuzhiyun {
686*4882a593Smuzhiyun 	DTableDesc dtd = HUF_getDTableDesc(DTable);
687*4882a593Smuzhiyun 	if (dtd.tableType != 1)
688*4882a593Smuzhiyun 		return ERROR(GENERIC);
689*4882a593Smuzhiyun 	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun 
HUF_decompress1X4_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workspace,size_t workspaceSize)692*4882a593Smuzhiyun size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
693*4882a593Smuzhiyun {
694*4882a593Smuzhiyun 	const BYTE *ip = (const BYTE *)cSrc;
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
697*4882a593Smuzhiyun 	if (HUF_isError(hSize))
698*4882a593Smuzhiyun 		return hSize;
699*4882a593Smuzhiyun 	if (hSize >= cSrcSize)
700*4882a593Smuzhiyun 		return ERROR(srcSize_wrong);
701*4882a593Smuzhiyun 	ip += hSize;
702*4882a593Smuzhiyun 	cSrcSize -= hSize;
703*4882a593Smuzhiyun 
704*4882a593Smuzhiyun 	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
705*4882a593Smuzhiyun }
706*4882a593Smuzhiyun 
HUF_decompress4X4_usingDTable_internal(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)707*4882a593Smuzhiyun static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
708*4882a593Smuzhiyun {
709*4882a593Smuzhiyun 	if (cSrcSize < 10)
710*4882a593Smuzhiyun 		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
711*4882a593Smuzhiyun 
712*4882a593Smuzhiyun 	{
713*4882a593Smuzhiyun 		const BYTE *const istart = (const BYTE *)cSrc;
714*4882a593Smuzhiyun 		BYTE *const ostart = (BYTE *)dst;
715*4882a593Smuzhiyun 		BYTE *const oend = ostart + dstSize;
716*4882a593Smuzhiyun 		const void *const dtPtr = DTable + 1;
717*4882a593Smuzhiyun 		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 		/* Init */
720*4882a593Smuzhiyun 		BIT_DStream_t bitD1;
721*4882a593Smuzhiyun 		BIT_DStream_t bitD2;
722*4882a593Smuzhiyun 		BIT_DStream_t bitD3;
723*4882a593Smuzhiyun 		BIT_DStream_t bitD4;
724*4882a593Smuzhiyun 		size_t const length1 = ZSTD_readLE16(istart);
725*4882a593Smuzhiyun 		size_t const length2 = ZSTD_readLE16(istart + 2);
726*4882a593Smuzhiyun 		size_t const length3 = ZSTD_readLE16(istart + 4);
727*4882a593Smuzhiyun 		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
728*4882a593Smuzhiyun 		const BYTE *const istart1 = istart + 6; /* jumpTable */
729*4882a593Smuzhiyun 		const BYTE *const istart2 = istart1 + length1;
730*4882a593Smuzhiyun 		const BYTE *const istart3 = istart2 + length2;
731*4882a593Smuzhiyun 		const BYTE *const istart4 = istart3 + length3;
732*4882a593Smuzhiyun 		size_t const segmentSize = (dstSize + 3) / 4;
733*4882a593Smuzhiyun 		BYTE *const opStart2 = ostart + segmentSize;
734*4882a593Smuzhiyun 		BYTE *const opStart3 = opStart2 + segmentSize;
735*4882a593Smuzhiyun 		BYTE *const opStart4 = opStart3 + segmentSize;
736*4882a593Smuzhiyun 		BYTE *op1 = ostart;
737*4882a593Smuzhiyun 		BYTE *op2 = opStart2;
738*4882a593Smuzhiyun 		BYTE *op3 = opStart3;
739*4882a593Smuzhiyun 		BYTE *op4 = opStart4;
740*4882a593Smuzhiyun 		U32 endSignal;
741*4882a593Smuzhiyun 		DTableDesc const dtd = HUF_getDTableDesc(DTable);
742*4882a593Smuzhiyun 		U32 const dtLog = dtd.tableLog;
743*4882a593Smuzhiyun 
744*4882a593Smuzhiyun 		if (length4 > cSrcSize)
745*4882a593Smuzhiyun 			return ERROR(corruption_detected); /* overflow */
746*4882a593Smuzhiyun 		{
747*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
748*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
749*4882a593Smuzhiyun 				return errorCode;
750*4882a593Smuzhiyun 		}
751*4882a593Smuzhiyun 		{
752*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
753*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
754*4882a593Smuzhiyun 				return errorCode;
755*4882a593Smuzhiyun 		}
756*4882a593Smuzhiyun 		{
757*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
758*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
759*4882a593Smuzhiyun 				return errorCode;
760*4882a593Smuzhiyun 		}
761*4882a593Smuzhiyun 		{
762*4882a593Smuzhiyun 			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
763*4882a593Smuzhiyun 			if (HUF_isError(errorCode))
764*4882a593Smuzhiyun 				return errorCode;
765*4882a593Smuzhiyun 		}
766*4882a593Smuzhiyun 
767*4882a593Smuzhiyun 		/* 16-32 symbols per loop (4-8 symbols per stream) */
768*4882a593Smuzhiyun 		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
769*4882a593Smuzhiyun 		for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
770*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
771*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
772*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
773*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
774*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
775*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
776*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
777*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
778*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
779*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
780*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
781*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
782*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
783*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
784*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
785*4882a593Smuzhiyun 			HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
786*4882a593Smuzhiyun 
787*4882a593Smuzhiyun 			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
788*4882a593Smuzhiyun 		}
789*4882a593Smuzhiyun 
790*4882a593Smuzhiyun 		/* check corruption */
791*4882a593Smuzhiyun 		if (op1 > opStart2)
792*4882a593Smuzhiyun 			return ERROR(corruption_detected);
793*4882a593Smuzhiyun 		if (op2 > opStart3)
794*4882a593Smuzhiyun 			return ERROR(corruption_detected);
795*4882a593Smuzhiyun 		if (op3 > opStart4)
796*4882a593Smuzhiyun 			return ERROR(corruption_detected);
797*4882a593Smuzhiyun 		/* note : op4 already verified within main loop */
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun 		/* finish bitStreams one by one */
800*4882a593Smuzhiyun 		HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
801*4882a593Smuzhiyun 		HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
802*4882a593Smuzhiyun 		HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
803*4882a593Smuzhiyun 		HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
804*4882a593Smuzhiyun 
805*4882a593Smuzhiyun 		/* check */
806*4882a593Smuzhiyun 		{
807*4882a593Smuzhiyun 			U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
808*4882a593Smuzhiyun 			if (!endCheck)
809*4882a593Smuzhiyun 				return ERROR(corruption_detected);
810*4882a593Smuzhiyun 		}
811*4882a593Smuzhiyun 
812*4882a593Smuzhiyun 		/* decoded size */
813*4882a593Smuzhiyun 		return dstSize;
814*4882a593Smuzhiyun 	}
815*4882a593Smuzhiyun }
816*4882a593Smuzhiyun 
HUF_decompress4X4_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)817*4882a593Smuzhiyun size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
818*4882a593Smuzhiyun {
819*4882a593Smuzhiyun 	DTableDesc dtd = HUF_getDTableDesc(DTable);
820*4882a593Smuzhiyun 	if (dtd.tableType != 1)
821*4882a593Smuzhiyun 		return ERROR(GENERIC);
822*4882a593Smuzhiyun 	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
823*4882a593Smuzhiyun }
824*4882a593Smuzhiyun 
HUF_decompress4X4_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workspace,size_t workspaceSize)825*4882a593Smuzhiyun size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
826*4882a593Smuzhiyun {
827*4882a593Smuzhiyun 	const BYTE *ip = (const BYTE *)cSrc;
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
830*4882a593Smuzhiyun 	if (HUF_isError(hSize))
831*4882a593Smuzhiyun 		return hSize;
832*4882a593Smuzhiyun 	if (hSize >= cSrcSize)
833*4882a593Smuzhiyun 		return ERROR(srcSize_wrong);
834*4882a593Smuzhiyun 	ip += hSize;
835*4882a593Smuzhiyun 	cSrcSize -= hSize;
836*4882a593Smuzhiyun 
837*4882a593Smuzhiyun 	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
838*4882a593Smuzhiyun }
839*4882a593Smuzhiyun 
840*4882a593Smuzhiyun /* ********************************/
841*4882a593Smuzhiyun /* Generic decompression selector */
842*4882a593Smuzhiyun /* ********************************/
843*4882a593Smuzhiyun 
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)844*4882a593Smuzhiyun size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
845*4882a593Smuzhiyun {
846*4882a593Smuzhiyun 	DTableDesc const dtd = HUF_getDTableDesc(DTable);
847*4882a593Smuzhiyun 	return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
848*4882a593Smuzhiyun 			     : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
849*4882a593Smuzhiyun }
850*4882a593Smuzhiyun 
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)851*4882a593Smuzhiyun size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
852*4882a593Smuzhiyun {
853*4882a593Smuzhiyun 	DTableDesc const dtd = HUF_getDTableDesc(DTable);
854*4882a593Smuzhiyun 	return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
855*4882a593Smuzhiyun 			     : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
856*4882a593Smuzhiyun }
857*4882a593Smuzhiyun 
858*4882a593Smuzhiyun typedef struct {
859*4882a593Smuzhiyun 	U32 tableTime;
860*4882a593Smuzhiyun 	U32 decode256Time;
861*4882a593Smuzhiyun } algo_time_t;
862*4882a593Smuzhiyun static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
863*4882a593Smuzhiyun     /* single, double, quad */
864*4882a593Smuzhiyun     {{0, 0}, {1, 1}, {2, 2}},		     /* Q==0 : impossible */
865*4882a593Smuzhiyun     {{0, 0}, {1, 1}, {2, 2}},		     /* Q==1 : impossible */
866*4882a593Smuzhiyun     {{38, 130}, {1313, 74}, {2151, 38}},     /* Q == 2 : 12-18% */
867*4882a593Smuzhiyun     {{448, 128}, {1353, 74}, {2238, 41}},    /* Q == 3 : 18-25% */
868*4882a593Smuzhiyun     {{556, 128}, {1353, 74}, {2238, 47}},    /* Q == 4 : 25-32% */
869*4882a593Smuzhiyun     {{714, 128}, {1418, 74}, {2436, 53}},    /* Q == 5 : 32-38% */
870*4882a593Smuzhiyun     {{883, 128}, {1437, 74}, {2464, 61}},    /* Q == 6 : 38-44% */
871*4882a593Smuzhiyun     {{897, 128}, {1515, 75}, {2622, 68}},    /* Q == 7 : 44-50% */
872*4882a593Smuzhiyun     {{926, 128}, {1613, 75}, {2730, 75}},    /* Q == 8 : 50-56% */
873*4882a593Smuzhiyun     {{947, 128}, {1729, 77}, {3359, 77}},    /* Q == 9 : 56-62% */
874*4882a593Smuzhiyun     {{1107, 128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
875*4882a593Smuzhiyun     {{1177, 128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
876*4882a593Smuzhiyun     {{1242, 128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
877*4882a593Smuzhiyun     {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
878*4882a593Smuzhiyun     {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
879*4882a593Smuzhiyun     {{722, 128}, {1891, 145}, {1936, 146}},  /* Q ==15 : 93-99% */
880*4882a593Smuzhiyun };
881*4882a593Smuzhiyun 
882*4882a593Smuzhiyun /** HUF_selectDecoder() :
883*4882a593Smuzhiyun *   Tells which decoder is likely to decode faster,
884*4882a593Smuzhiyun *   based on a set of pre-determined metrics.
885*4882a593Smuzhiyun *   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
886*4882a593Smuzhiyun *   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)887*4882a593Smuzhiyun U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
888*4882a593Smuzhiyun {
889*4882a593Smuzhiyun 	/* decoder timing evaluation */
890*4882a593Smuzhiyun 	U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
891*4882a593Smuzhiyun 	U32 const D256 = (U32)(dstSize >> 8);
892*4882a593Smuzhiyun 	U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
893*4882a593Smuzhiyun 	U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
894*4882a593Smuzhiyun 	DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 	return DTime1 < DTime0;
897*4882a593Smuzhiyun }
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
900*4882a593Smuzhiyun 
HUF_decompress4X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workspace,size_t workspaceSize)901*4882a593Smuzhiyun size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
902*4882a593Smuzhiyun {
903*4882a593Smuzhiyun 	/* validation checks */
904*4882a593Smuzhiyun 	if (dstSize == 0)
905*4882a593Smuzhiyun 		return ERROR(dstSize_tooSmall);
906*4882a593Smuzhiyun 	if (cSrcSize > dstSize)
907*4882a593Smuzhiyun 		return ERROR(corruption_detected); /* invalid */
908*4882a593Smuzhiyun 	if (cSrcSize == dstSize) {
909*4882a593Smuzhiyun 		memcpy(dst, cSrc, dstSize);
910*4882a593Smuzhiyun 		return dstSize;
911*4882a593Smuzhiyun 	} /* not compressed */
912*4882a593Smuzhiyun 	if (cSrcSize == 1) {
913*4882a593Smuzhiyun 		memset(dst, *(const BYTE *)cSrc, dstSize);
914*4882a593Smuzhiyun 		return dstSize;
915*4882a593Smuzhiyun 	} /* RLE */
916*4882a593Smuzhiyun 
917*4882a593Smuzhiyun 	{
918*4882a593Smuzhiyun 		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
919*4882a593Smuzhiyun 		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
920*4882a593Smuzhiyun 			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
921*4882a593Smuzhiyun 	}
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun 
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workspace,size_t workspaceSize)924*4882a593Smuzhiyun size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
925*4882a593Smuzhiyun {
926*4882a593Smuzhiyun 	/* validation checks */
927*4882a593Smuzhiyun 	if (dstSize == 0)
928*4882a593Smuzhiyun 		return ERROR(dstSize_tooSmall);
929*4882a593Smuzhiyun 	if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
930*4882a593Smuzhiyun 		return ERROR(corruption_detected); /* invalid */
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 	{
933*4882a593Smuzhiyun 		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
934*4882a593Smuzhiyun 		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
935*4882a593Smuzhiyun 			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
936*4882a593Smuzhiyun 	}
937*4882a593Smuzhiyun }
938*4882a593Smuzhiyun 
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workspace,size_t workspaceSize)939*4882a593Smuzhiyun size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
940*4882a593Smuzhiyun {
941*4882a593Smuzhiyun 	/* validation checks */
942*4882a593Smuzhiyun 	if (dstSize == 0)
943*4882a593Smuzhiyun 		return ERROR(dstSize_tooSmall);
944*4882a593Smuzhiyun 	if (cSrcSize > dstSize)
945*4882a593Smuzhiyun 		return ERROR(corruption_detected); /* invalid */
946*4882a593Smuzhiyun 	if (cSrcSize == dstSize) {
947*4882a593Smuzhiyun 		memcpy(dst, cSrc, dstSize);
948*4882a593Smuzhiyun 		return dstSize;
949*4882a593Smuzhiyun 	} /* not compressed */
950*4882a593Smuzhiyun 	if (cSrcSize == 1) {
951*4882a593Smuzhiyun 		memset(dst, *(const BYTE *)cSrc, dstSize);
952*4882a593Smuzhiyun 		return dstSize;
953*4882a593Smuzhiyun 	} /* RLE */
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun 	{
956*4882a593Smuzhiyun 		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
957*4882a593Smuzhiyun 		return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
958*4882a593Smuzhiyun 			      : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
959*4882a593Smuzhiyun 	}
960*4882a593Smuzhiyun }
961