xref: /OK3568_Linux_fs/kernel/lib/xz/xz_dec_bcj.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Branch/Call/Jump (BCJ) filter decoders
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5*4882a593Smuzhiyun  *          Igor Pavlov <https://7-zip.org/>
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * This file has been put into the public domain.
8*4882a593Smuzhiyun  * You can do whatever you want with this file.
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include "xz_private.h"
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun /*
14*4882a593Smuzhiyun  * The rest of the file is inside this ifdef. It makes things a little more
15*4882a593Smuzhiyun  * convenient when building without support for any BCJ filters.
16*4882a593Smuzhiyun  */
17*4882a593Smuzhiyun #ifdef XZ_DEC_BCJ
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun struct xz_dec_bcj {
20*4882a593Smuzhiyun 	/* Type of the BCJ filter being used */
21*4882a593Smuzhiyun 	enum {
22*4882a593Smuzhiyun 		BCJ_X86 = 4,        /* x86 or x86-64 */
23*4882a593Smuzhiyun 		BCJ_POWERPC = 5,    /* Big endian only */
24*4882a593Smuzhiyun 		BCJ_IA64 = 6,       /* Big or little endian */
25*4882a593Smuzhiyun 		BCJ_ARM = 7,        /* Little endian only */
26*4882a593Smuzhiyun 		BCJ_ARMTHUMB = 8,   /* Little endian only */
27*4882a593Smuzhiyun 		BCJ_SPARC = 9       /* Big or little endian */
28*4882a593Smuzhiyun 	} type;
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun 	/*
31*4882a593Smuzhiyun 	 * Return value of the next filter in the chain. We need to preserve
32*4882a593Smuzhiyun 	 * this information across calls, because we must not call the next
33*4882a593Smuzhiyun 	 * filter anymore once it has returned XZ_STREAM_END.
34*4882a593Smuzhiyun 	 */
35*4882a593Smuzhiyun 	enum xz_ret ret;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	/* True if we are operating in single-call mode. */
38*4882a593Smuzhiyun 	bool single_call;
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun 	/*
41*4882a593Smuzhiyun 	 * Absolute position relative to the beginning of the uncompressed
42*4882a593Smuzhiyun 	 * data (in a single .xz Block). We care only about the lowest 32
43*4882a593Smuzhiyun 	 * bits so this doesn't need to be uint64_t even with big files.
44*4882a593Smuzhiyun 	 */
45*4882a593Smuzhiyun 	uint32_t pos;
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	/* x86 filter state */
48*4882a593Smuzhiyun 	uint32_t x86_prev_mask;
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun 	/* Temporary space to hold the variables from struct xz_buf */
51*4882a593Smuzhiyun 	uint8_t *out;
52*4882a593Smuzhiyun 	size_t out_pos;
53*4882a593Smuzhiyun 	size_t out_size;
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun 	struct {
56*4882a593Smuzhiyun 		/* Amount of already filtered data in the beginning of buf */
57*4882a593Smuzhiyun 		size_t filtered;
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 		/* Total amount of data currently stored in buf  */
60*4882a593Smuzhiyun 		size_t size;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 		/*
63*4882a593Smuzhiyun 		 * Buffer to hold a mix of filtered and unfiltered data. This
64*4882a593Smuzhiyun 		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
65*4882a593Smuzhiyun 		 *
66*4882a593Smuzhiyun 		 * Type         Alignment   Look-ahead
67*4882a593Smuzhiyun 		 * x86              1           4
68*4882a593Smuzhiyun 		 * PowerPC          4           0
69*4882a593Smuzhiyun 		 * IA-64           16           0
70*4882a593Smuzhiyun 		 * ARM              4           0
71*4882a593Smuzhiyun 		 * ARM-Thumb        2           2
72*4882a593Smuzhiyun 		 * SPARC            4           0
73*4882a593Smuzhiyun 		 */
74*4882a593Smuzhiyun 		uint8_t buf[16];
75*4882a593Smuzhiyun 	} temp;
76*4882a593Smuzhiyun };
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun #ifdef XZ_DEC_X86
79*4882a593Smuzhiyun /*
80*4882a593Smuzhiyun  * This is used to test the most significant byte of a memory address
81*4882a593Smuzhiyun  * in an x86 instruction.
82*4882a593Smuzhiyun  */
bcj_x86_test_msbyte(uint8_t b)83*4882a593Smuzhiyun static inline int bcj_x86_test_msbyte(uint8_t b)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun 	return b == 0x00 || b == 0xFF;
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun 
bcj_x86(struct xz_dec_bcj * s,uint8_t * buf,size_t size)88*4882a593Smuzhiyun static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
89*4882a593Smuzhiyun {
90*4882a593Smuzhiyun 	static const bool mask_to_allowed_status[8]
91*4882a593Smuzhiyun 		= { true, true, true, false, true, false, false, false };
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	size_t i;
96*4882a593Smuzhiyun 	size_t prev_pos = (size_t)-1;
97*4882a593Smuzhiyun 	uint32_t prev_mask = s->x86_prev_mask;
98*4882a593Smuzhiyun 	uint32_t src;
99*4882a593Smuzhiyun 	uint32_t dest;
100*4882a593Smuzhiyun 	uint32_t j;
101*4882a593Smuzhiyun 	uint8_t b;
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	if (size <= 4)
104*4882a593Smuzhiyun 		return 0;
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	size -= 4;
107*4882a593Smuzhiyun 	for (i = 0; i < size; ++i) {
108*4882a593Smuzhiyun 		if ((buf[i] & 0xFE) != 0xE8)
109*4882a593Smuzhiyun 			continue;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 		prev_pos = i - prev_pos;
112*4882a593Smuzhiyun 		if (prev_pos > 3) {
113*4882a593Smuzhiyun 			prev_mask = 0;
114*4882a593Smuzhiyun 		} else {
115*4882a593Smuzhiyun 			prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116*4882a593Smuzhiyun 			if (prev_mask != 0) {
117*4882a593Smuzhiyun 				b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118*4882a593Smuzhiyun 				if (!mask_to_allowed_status[prev_mask]
119*4882a593Smuzhiyun 						|| bcj_x86_test_msbyte(b)) {
120*4882a593Smuzhiyun 					prev_pos = i;
121*4882a593Smuzhiyun 					prev_mask = (prev_mask << 1) | 1;
122*4882a593Smuzhiyun 					continue;
123*4882a593Smuzhiyun 				}
124*4882a593Smuzhiyun 			}
125*4882a593Smuzhiyun 		}
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 		prev_pos = i;
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 		if (bcj_x86_test_msbyte(buf[i + 4])) {
130*4882a593Smuzhiyun 			src = get_unaligned_le32(buf + i + 1);
131*4882a593Smuzhiyun 			while (true) {
132*4882a593Smuzhiyun 				dest = src - (s->pos + (uint32_t)i + 5);
133*4882a593Smuzhiyun 				if (prev_mask == 0)
134*4882a593Smuzhiyun 					break;
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 				j = mask_to_bit_num[prev_mask] * 8;
137*4882a593Smuzhiyun 				b = (uint8_t)(dest >> (24 - j));
138*4882a593Smuzhiyun 				if (!bcj_x86_test_msbyte(b))
139*4882a593Smuzhiyun 					break;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 				src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
142*4882a593Smuzhiyun 			}
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 			dest &= 0x01FFFFFF;
145*4882a593Smuzhiyun 			dest |= (uint32_t)0 - (dest & 0x01000000);
146*4882a593Smuzhiyun 			put_unaligned_le32(dest, buf + i + 1);
147*4882a593Smuzhiyun 			i += 4;
148*4882a593Smuzhiyun 		} else {
149*4882a593Smuzhiyun 			prev_mask = (prev_mask << 1) | 1;
150*4882a593Smuzhiyun 		}
151*4882a593Smuzhiyun 	}
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	prev_pos = i - prev_pos;
154*4882a593Smuzhiyun 	s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
155*4882a593Smuzhiyun 	return i;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun #endif
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun #ifdef XZ_DEC_POWERPC
bcj_powerpc(struct xz_dec_bcj * s,uint8_t * buf,size_t size)160*4882a593Smuzhiyun static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun 	size_t i;
163*4882a593Smuzhiyun 	uint32_t instr;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	for (i = 0; i + 4 <= size; i += 4) {
166*4882a593Smuzhiyun 		instr = get_unaligned_be32(buf + i);
167*4882a593Smuzhiyun 		if ((instr & 0xFC000003) == 0x48000001) {
168*4882a593Smuzhiyun 			instr &= 0x03FFFFFC;
169*4882a593Smuzhiyun 			instr -= s->pos + (uint32_t)i;
170*4882a593Smuzhiyun 			instr &= 0x03FFFFFC;
171*4882a593Smuzhiyun 			instr |= 0x48000001;
172*4882a593Smuzhiyun 			put_unaligned_be32(instr, buf + i);
173*4882a593Smuzhiyun 		}
174*4882a593Smuzhiyun 	}
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	return i;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun #endif
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun #ifdef XZ_DEC_IA64
bcj_ia64(struct xz_dec_bcj * s,uint8_t * buf,size_t size)181*4882a593Smuzhiyun static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun 	static const uint8_t branch_table[32] = {
184*4882a593Smuzhiyun 		0, 0, 0, 0, 0, 0, 0, 0,
185*4882a593Smuzhiyun 		0, 0, 0, 0, 0, 0, 0, 0,
186*4882a593Smuzhiyun 		4, 4, 6, 6, 0, 0, 7, 7,
187*4882a593Smuzhiyun 		4, 4, 0, 0, 4, 4, 0, 0
188*4882a593Smuzhiyun 	};
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	/*
191*4882a593Smuzhiyun 	 * The local variables take a little bit stack space, but it's less
192*4882a593Smuzhiyun 	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193*4882a593Smuzhiyun 	 * stack usage here without doing that for the LZMA2 decoder too.
194*4882a593Smuzhiyun 	 */
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	/* Loop counters */
197*4882a593Smuzhiyun 	size_t i;
198*4882a593Smuzhiyun 	size_t j;
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201*4882a593Smuzhiyun 	uint32_t slot;
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	/* Bitwise offset of the instruction indicated by slot */
204*4882a593Smuzhiyun 	uint32_t bit_pos;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	/* bit_pos split into byte and bit parts */
207*4882a593Smuzhiyun 	uint32_t byte_pos;
208*4882a593Smuzhiyun 	uint32_t bit_res;
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	/* Address part of an instruction */
211*4882a593Smuzhiyun 	uint32_t addr;
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	/* Mask used to detect which instructions to convert */
214*4882a593Smuzhiyun 	uint32_t mask;
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 	/* 41-bit instruction stored somewhere in the lowest 48 bits */
217*4882a593Smuzhiyun 	uint64_t instr;
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	/* Instruction normalized with bit_res for easier manipulation */
220*4882a593Smuzhiyun 	uint64_t norm;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	for (i = 0; i + 16 <= size; i += 16) {
223*4882a593Smuzhiyun 		mask = branch_table[buf[i] & 0x1F];
224*4882a593Smuzhiyun 		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225*4882a593Smuzhiyun 			if (((mask >> slot) & 1) == 0)
226*4882a593Smuzhiyun 				continue;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 			byte_pos = bit_pos >> 3;
229*4882a593Smuzhiyun 			bit_res = bit_pos & 7;
230*4882a593Smuzhiyun 			instr = 0;
231*4882a593Smuzhiyun 			for (j = 0; j < 6; ++j)
232*4882a593Smuzhiyun 				instr |= (uint64_t)(buf[i + j + byte_pos])
233*4882a593Smuzhiyun 						<< (8 * j);
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun 			norm = instr >> bit_res;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 			if (((norm >> 37) & 0x0F) == 0x05
238*4882a593Smuzhiyun 					&& ((norm >> 9) & 0x07) == 0) {
239*4882a593Smuzhiyun 				addr = (norm >> 13) & 0x0FFFFF;
240*4882a593Smuzhiyun 				addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241*4882a593Smuzhiyun 				addr <<= 4;
242*4882a593Smuzhiyun 				addr -= s->pos + (uint32_t)i;
243*4882a593Smuzhiyun 				addr >>= 4;
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun 				norm &= ~((uint64_t)0x8FFFFF << 13);
246*4882a593Smuzhiyun 				norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247*4882a593Smuzhiyun 				norm |= (uint64_t)(addr & 0x100000)
248*4882a593Smuzhiyun 						<< (36 - 20);
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 				instr &= (1 << bit_res) - 1;
251*4882a593Smuzhiyun 				instr |= norm << bit_res;
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 				for (j = 0; j < 6; j++)
254*4882a593Smuzhiyun 					buf[i + j + byte_pos]
255*4882a593Smuzhiyun 						= (uint8_t)(instr >> (8 * j));
256*4882a593Smuzhiyun 			}
257*4882a593Smuzhiyun 		}
258*4882a593Smuzhiyun 	}
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	return i;
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun #endif
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun #ifdef XZ_DEC_ARM
bcj_arm(struct xz_dec_bcj * s,uint8_t * buf,size_t size)265*4882a593Smuzhiyun static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun 	size_t i;
268*4882a593Smuzhiyun 	uint32_t addr;
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	for (i = 0; i + 4 <= size; i += 4) {
271*4882a593Smuzhiyun 		if (buf[i + 3] == 0xEB) {
272*4882a593Smuzhiyun 			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
273*4882a593Smuzhiyun 					| ((uint32_t)buf[i + 2] << 16);
274*4882a593Smuzhiyun 			addr <<= 2;
275*4882a593Smuzhiyun 			addr -= s->pos + (uint32_t)i + 8;
276*4882a593Smuzhiyun 			addr >>= 2;
277*4882a593Smuzhiyun 			buf[i] = (uint8_t)addr;
278*4882a593Smuzhiyun 			buf[i + 1] = (uint8_t)(addr >> 8);
279*4882a593Smuzhiyun 			buf[i + 2] = (uint8_t)(addr >> 16);
280*4882a593Smuzhiyun 		}
281*4882a593Smuzhiyun 	}
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 	return i;
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun #endif
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun #ifdef XZ_DEC_ARMTHUMB
bcj_armthumb(struct xz_dec_bcj * s,uint8_t * buf,size_t size)288*4882a593Smuzhiyun static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun 	size_t i;
291*4882a593Smuzhiyun 	uint32_t addr;
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	for (i = 0; i + 4 <= size; i += 2) {
294*4882a593Smuzhiyun 		if ((buf[i + 1] & 0xF8) == 0xF0
295*4882a593Smuzhiyun 				&& (buf[i + 3] & 0xF8) == 0xF8) {
296*4882a593Smuzhiyun 			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
297*4882a593Smuzhiyun 					| ((uint32_t)buf[i] << 11)
298*4882a593Smuzhiyun 					| (((uint32_t)buf[i + 3] & 0x07) << 8)
299*4882a593Smuzhiyun 					| (uint32_t)buf[i + 2];
300*4882a593Smuzhiyun 			addr <<= 1;
301*4882a593Smuzhiyun 			addr -= s->pos + (uint32_t)i + 4;
302*4882a593Smuzhiyun 			addr >>= 1;
303*4882a593Smuzhiyun 			buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
304*4882a593Smuzhiyun 			buf[i] = (uint8_t)(addr >> 11);
305*4882a593Smuzhiyun 			buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
306*4882a593Smuzhiyun 			buf[i + 2] = (uint8_t)addr;
307*4882a593Smuzhiyun 			i += 2;
308*4882a593Smuzhiyun 		}
309*4882a593Smuzhiyun 	}
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	return i;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun #endif
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun #ifdef XZ_DEC_SPARC
bcj_sparc(struct xz_dec_bcj * s,uint8_t * buf,size_t size)316*4882a593Smuzhiyun static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun 	size_t i;
319*4882a593Smuzhiyun 	uint32_t instr;
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	for (i = 0; i + 4 <= size; i += 4) {
322*4882a593Smuzhiyun 		instr = get_unaligned_be32(buf + i);
323*4882a593Smuzhiyun 		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
324*4882a593Smuzhiyun 			instr <<= 2;
325*4882a593Smuzhiyun 			instr -= s->pos + (uint32_t)i;
326*4882a593Smuzhiyun 			instr >>= 2;
327*4882a593Smuzhiyun 			instr = ((uint32_t)0x40000000 - (instr & 0x400000))
328*4882a593Smuzhiyun 					| 0x40000000 | (instr & 0x3FFFFF);
329*4882a593Smuzhiyun 			put_unaligned_be32(instr, buf + i);
330*4882a593Smuzhiyun 		}
331*4882a593Smuzhiyun 	}
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 	return i;
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun #endif
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun /*
338*4882a593Smuzhiyun  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
339*4882a593Smuzhiyun  * of data that got filtered.
340*4882a593Smuzhiyun  *
341*4882a593Smuzhiyun  * NOTE: This is implemented as a switch statement to avoid using function
342*4882a593Smuzhiyun  * pointers, which could be problematic in the kernel boot code, which must
343*4882a593Smuzhiyun  * avoid pointers to static data (at least on x86).
344*4882a593Smuzhiyun  */
bcj_apply(struct xz_dec_bcj * s,uint8_t * buf,size_t * pos,size_t size)345*4882a593Smuzhiyun static void bcj_apply(struct xz_dec_bcj *s,
346*4882a593Smuzhiyun 		      uint8_t *buf, size_t *pos, size_t size)
347*4882a593Smuzhiyun {
348*4882a593Smuzhiyun 	size_t filtered;
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	buf += *pos;
351*4882a593Smuzhiyun 	size -= *pos;
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 	switch (s->type) {
354*4882a593Smuzhiyun #ifdef XZ_DEC_X86
355*4882a593Smuzhiyun 	case BCJ_X86:
356*4882a593Smuzhiyun 		filtered = bcj_x86(s, buf, size);
357*4882a593Smuzhiyun 		break;
358*4882a593Smuzhiyun #endif
359*4882a593Smuzhiyun #ifdef XZ_DEC_POWERPC
360*4882a593Smuzhiyun 	case BCJ_POWERPC:
361*4882a593Smuzhiyun 		filtered = bcj_powerpc(s, buf, size);
362*4882a593Smuzhiyun 		break;
363*4882a593Smuzhiyun #endif
364*4882a593Smuzhiyun #ifdef XZ_DEC_IA64
365*4882a593Smuzhiyun 	case BCJ_IA64:
366*4882a593Smuzhiyun 		filtered = bcj_ia64(s, buf, size);
367*4882a593Smuzhiyun 		break;
368*4882a593Smuzhiyun #endif
369*4882a593Smuzhiyun #ifdef XZ_DEC_ARM
370*4882a593Smuzhiyun 	case BCJ_ARM:
371*4882a593Smuzhiyun 		filtered = bcj_arm(s, buf, size);
372*4882a593Smuzhiyun 		break;
373*4882a593Smuzhiyun #endif
374*4882a593Smuzhiyun #ifdef XZ_DEC_ARMTHUMB
375*4882a593Smuzhiyun 	case BCJ_ARMTHUMB:
376*4882a593Smuzhiyun 		filtered = bcj_armthumb(s, buf, size);
377*4882a593Smuzhiyun 		break;
378*4882a593Smuzhiyun #endif
379*4882a593Smuzhiyun #ifdef XZ_DEC_SPARC
380*4882a593Smuzhiyun 	case BCJ_SPARC:
381*4882a593Smuzhiyun 		filtered = bcj_sparc(s, buf, size);
382*4882a593Smuzhiyun 		break;
383*4882a593Smuzhiyun #endif
384*4882a593Smuzhiyun 	default:
385*4882a593Smuzhiyun 		/* Never reached but silence compiler warnings. */
386*4882a593Smuzhiyun 		filtered = 0;
387*4882a593Smuzhiyun 		break;
388*4882a593Smuzhiyun 	}
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	*pos += filtered;
391*4882a593Smuzhiyun 	s->pos += filtered;
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun /*
395*4882a593Smuzhiyun  * Flush pending filtered data from temp to the output buffer.
396*4882a593Smuzhiyun  * Move the remaining mixture of possibly filtered and unfiltered
397*4882a593Smuzhiyun  * data to the beginning of temp.
398*4882a593Smuzhiyun  */
bcj_flush(struct xz_dec_bcj * s,struct xz_buf * b)399*4882a593Smuzhiyun static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400*4882a593Smuzhiyun {
401*4882a593Smuzhiyun 	size_t copy_size;
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404*4882a593Smuzhiyun 	memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405*4882a593Smuzhiyun 	b->out_pos += copy_size;
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	s->temp.filtered -= copy_size;
408*4882a593Smuzhiyun 	s->temp.size -= copy_size;
409*4882a593Smuzhiyun 	memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun /*
413*4882a593Smuzhiyun  * The BCJ filter functions are primitive in sense that they process the
414*4882a593Smuzhiyun  * data in chunks of 1-16 bytes. To hide this issue, this function does
415*4882a593Smuzhiyun  * some buffering.
416*4882a593Smuzhiyun  */
xz_dec_bcj_run(struct xz_dec_bcj * s,struct xz_dec_lzma2 * lzma2,struct xz_buf * b)417*4882a593Smuzhiyun XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
418*4882a593Smuzhiyun 				     struct xz_dec_lzma2 *lzma2,
419*4882a593Smuzhiyun 				     struct xz_buf *b)
420*4882a593Smuzhiyun {
421*4882a593Smuzhiyun 	size_t out_start;
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 	/*
424*4882a593Smuzhiyun 	 * Flush pending already filtered data to the output buffer. Return
425*4882a593Smuzhiyun 	 * immediatelly if we couldn't flush everything, or if the next
426*4882a593Smuzhiyun 	 * filter in the chain had already returned XZ_STREAM_END.
427*4882a593Smuzhiyun 	 */
428*4882a593Smuzhiyun 	if (s->temp.filtered > 0) {
429*4882a593Smuzhiyun 		bcj_flush(s, b);
430*4882a593Smuzhiyun 		if (s->temp.filtered > 0)
431*4882a593Smuzhiyun 			return XZ_OK;
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 		if (s->ret == XZ_STREAM_END)
434*4882a593Smuzhiyun 			return XZ_STREAM_END;
435*4882a593Smuzhiyun 	}
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun 	/*
438*4882a593Smuzhiyun 	 * If we have more output space than what is currently pending in
439*4882a593Smuzhiyun 	 * temp, copy the unfiltered data from temp to the output buffer
440*4882a593Smuzhiyun 	 * and try to fill the output buffer by decoding more data from the
441*4882a593Smuzhiyun 	 * next filter in the chain. Apply the BCJ filter on the new data
442*4882a593Smuzhiyun 	 * in the output buffer. If everything cannot be filtered, copy it
443*4882a593Smuzhiyun 	 * to temp and rewind the output buffer position accordingly.
444*4882a593Smuzhiyun 	 *
445*4882a593Smuzhiyun 	 * This needs to be always run when temp.size == 0 to handle a special
446*4882a593Smuzhiyun 	 * case where the output buffer is full and the next filter has no
447*4882a593Smuzhiyun 	 * more output coming but hasn't returned XZ_STREAM_END yet.
448*4882a593Smuzhiyun 	 */
449*4882a593Smuzhiyun 	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
450*4882a593Smuzhiyun 		out_start = b->out_pos;
451*4882a593Smuzhiyun 		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
452*4882a593Smuzhiyun 		b->out_pos += s->temp.size;
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 		s->ret = xz_dec_lzma2_run(lzma2, b);
455*4882a593Smuzhiyun 		if (s->ret != XZ_STREAM_END
456*4882a593Smuzhiyun 				&& (s->ret != XZ_OK || s->single_call))
457*4882a593Smuzhiyun 			return s->ret;
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun 		bcj_apply(s, b->out, &out_start, b->out_pos);
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 		/*
462*4882a593Smuzhiyun 		 * As an exception, if the next filter returned XZ_STREAM_END,
463*4882a593Smuzhiyun 		 * we can do that too, since the last few bytes that remain
464*4882a593Smuzhiyun 		 * unfiltered are meant to remain unfiltered.
465*4882a593Smuzhiyun 		 */
466*4882a593Smuzhiyun 		if (s->ret == XZ_STREAM_END)
467*4882a593Smuzhiyun 			return XZ_STREAM_END;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 		s->temp.size = b->out_pos - out_start;
470*4882a593Smuzhiyun 		b->out_pos -= s->temp.size;
471*4882a593Smuzhiyun 		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun 		/*
474*4882a593Smuzhiyun 		 * If there wasn't enough input to the next filter to fill
475*4882a593Smuzhiyun 		 * the output buffer with unfiltered data, there's no point
476*4882a593Smuzhiyun 		 * to try decoding more data to temp.
477*4882a593Smuzhiyun 		 */
478*4882a593Smuzhiyun 		if (b->out_pos + s->temp.size < b->out_size)
479*4882a593Smuzhiyun 			return XZ_OK;
480*4882a593Smuzhiyun 	}
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 	/*
483*4882a593Smuzhiyun 	 * We have unfiltered data in temp. If the output buffer isn't full
484*4882a593Smuzhiyun 	 * yet, try to fill the temp buffer by decoding more data from the
485*4882a593Smuzhiyun 	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
486*4882a593Smuzhiyun 	 * fill the actual output buffer by copying filtered data from temp.
487*4882a593Smuzhiyun 	 * A mix of filtered and unfiltered data may be left in temp; it will
488*4882a593Smuzhiyun 	 * be taken care on the next call to this function.
489*4882a593Smuzhiyun 	 */
490*4882a593Smuzhiyun 	if (b->out_pos < b->out_size) {
491*4882a593Smuzhiyun 		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
492*4882a593Smuzhiyun 		s->out = b->out;
493*4882a593Smuzhiyun 		s->out_pos = b->out_pos;
494*4882a593Smuzhiyun 		s->out_size = b->out_size;
495*4882a593Smuzhiyun 		b->out = s->temp.buf;
496*4882a593Smuzhiyun 		b->out_pos = s->temp.size;
497*4882a593Smuzhiyun 		b->out_size = sizeof(s->temp.buf);
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 		s->ret = xz_dec_lzma2_run(lzma2, b);
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 		s->temp.size = b->out_pos;
502*4882a593Smuzhiyun 		b->out = s->out;
503*4882a593Smuzhiyun 		b->out_pos = s->out_pos;
504*4882a593Smuzhiyun 		b->out_size = s->out_size;
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun 		if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
507*4882a593Smuzhiyun 			return s->ret;
508*4882a593Smuzhiyun 
509*4882a593Smuzhiyun 		bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 		/*
512*4882a593Smuzhiyun 		 * If the next filter returned XZ_STREAM_END, we mark that
513*4882a593Smuzhiyun 		 * everything is filtered, since the last unfiltered bytes
514*4882a593Smuzhiyun 		 * of the stream are meant to be left as is.
515*4882a593Smuzhiyun 		 */
516*4882a593Smuzhiyun 		if (s->ret == XZ_STREAM_END)
517*4882a593Smuzhiyun 			s->temp.filtered = s->temp.size;
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 		bcj_flush(s, b);
520*4882a593Smuzhiyun 		if (s->temp.filtered > 0)
521*4882a593Smuzhiyun 			return XZ_OK;
522*4882a593Smuzhiyun 	}
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	return s->ret;
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun 
xz_dec_bcj_create(bool single_call)527*4882a593Smuzhiyun XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
528*4882a593Smuzhiyun {
529*4882a593Smuzhiyun 	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
530*4882a593Smuzhiyun 	if (s != NULL)
531*4882a593Smuzhiyun 		s->single_call = single_call;
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	return s;
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun 
xz_dec_bcj_reset(struct xz_dec_bcj * s,uint8_t id)536*4882a593Smuzhiyun XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
537*4882a593Smuzhiyun {
538*4882a593Smuzhiyun 	switch (id) {
539*4882a593Smuzhiyun #ifdef XZ_DEC_X86
540*4882a593Smuzhiyun 	case BCJ_X86:
541*4882a593Smuzhiyun #endif
542*4882a593Smuzhiyun #ifdef XZ_DEC_POWERPC
543*4882a593Smuzhiyun 	case BCJ_POWERPC:
544*4882a593Smuzhiyun #endif
545*4882a593Smuzhiyun #ifdef XZ_DEC_IA64
546*4882a593Smuzhiyun 	case BCJ_IA64:
547*4882a593Smuzhiyun #endif
548*4882a593Smuzhiyun #ifdef XZ_DEC_ARM
549*4882a593Smuzhiyun 	case BCJ_ARM:
550*4882a593Smuzhiyun #endif
551*4882a593Smuzhiyun #ifdef XZ_DEC_ARMTHUMB
552*4882a593Smuzhiyun 	case BCJ_ARMTHUMB:
553*4882a593Smuzhiyun #endif
554*4882a593Smuzhiyun #ifdef XZ_DEC_SPARC
555*4882a593Smuzhiyun 	case BCJ_SPARC:
556*4882a593Smuzhiyun #endif
557*4882a593Smuzhiyun 		break;
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	default:
560*4882a593Smuzhiyun 		/* Unsupported Filter ID */
561*4882a593Smuzhiyun 		return XZ_OPTIONS_ERROR;
562*4882a593Smuzhiyun 	}
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 	s->type = id;
565*4882a593Smuzhiyun 	s->ret = XZ_OK;
566*4882a593Smuzhiyun 	s->pos = 0;
567*4882a593Smuzhiyun 	s->x86_prev_mask = 0;
568*4882a593Smuzhiyun 	s->temp.filtered = 0;
569*4882a593Smuzhiyun 	s->temp.size = 0;
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 	return XZ_OK;
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun #endif
575