1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /* Linux driver for Philips webcam
3*4882a593Smuzhiyun Decompression for chipset version 2 et 3
4*4882a593Smuzhiyun (C) 2004-2006 Luc Saillard (luc@saillard.org)
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx
7*4882a593Smuzhiyun driver and thus may have bugs that are not present in the original version.
8*4882a593Smuzhiyun Please send bug reports and support requests to <luc@saillard.org>.
9*4882a593Smuzhiyun The decompression routines have been implemented by reverse-engineering the
10*4882a593Smuzhiyun Nemosoft binary pwcx module. Caveat emptor.
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun */
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include "pwc-timon.h"
16*4882a593Smuzhiyun #include "pwc-kiara.h"
17*4882a593Smuzhiyun #include "pwc-dec23.h"
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #include <linux/string.h>
20*4882a593Smuzhiyun #include <linux/slab.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun * USE_LOOKUP_TABLE_TO_CLAMP
24*4882a593Smuzhiyun * 0: use a C version of this tests: { a<0?0:(a>255?255:a) }
25*4882a593Smuzhiyun * 1: use a faster lookup table for cpu with a big cache (intel)
26*4882a593Smuzhiyun */
27*4882a593Smuzhiyun #define USE_LOOKUP_TABLE_TO_CLAMP 1
28*4882a593Smuzhiyun /*
29*4882a593Smuzhiyun * UNROLL_LOOP_FOR_COPYING_BLOCK
30*4882a593Smuzhiyun * 0: use a loop for a smaller code (but little slower)
31*4882a593Smuzhiyun * 1: when unrolling the loop, gcc produces some faster code (perhaps only
32*4882a593Smuzhiyun * valid for intel processor class). Activating this option, automatically
33*4882a593Smuzhiyun * activate USE_LOOKUP_TABLE_TO_CLAMP
34*4882a593Smuzhiyun */
35*4882a593Smuzhiyun #define UNROLL_LOOP_FOR_COPY 1
36*4882a593Smuzhiyun #if UNROLL_LOOP_FOR_COPY
37*4882a593Smuzhiyun # undef USE_LOOKUP_TABLE_TO_CLAMP
38*4882a593Smuzhiyun # define USE_LOOKUP_TABLE_TO_CLAMP 1
39*4882a593Smuzhiyun #endif
40*4882a593Smuzhiyun
build_subblock_pattern(struct pwc_dec23_private * pdec)41*4882a593Smuzhiyun static void build_subblock_pattern(struct pwc_dec23_private *pdec)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun static const unsigned int initial_values[12] = {
44*4882a593Smuzhiyun -0x526500, -0x221200, 0x221200, 0x526500,
45*4882a593Smuzhiyun -0x3de200, 0x3de200,
46*4882a593Smuzhiyun -0x6db480, -0x2d5d00, 0x2d5d00, 0x6db480,
47*4882a593Smuzhiyun -0x12c200, 0x12c200
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun static const unsigned int values_derivated[12] = {
51*4882a593Smuzhiyun 0xa4ca, 0x4424, -0x4424, -0xa4ca,
52*4882a593Smuzhiyun 0x7bc4, -0x7bc4,
53*4882a593Smuzhiyun 0xdb69, 0x5aba, -0x5aba, -0xdb69,
54*4882a593Smuzhiyun 0x2584, -0x2584
55*4882a593Smuzhiyun };
56*4882a593Smuzhiyun unsigned int temp_values[12];
57*4882a593Smuzhiyun int i, j;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun memcpy(temp_values, initial_values, sizeof(initial_values));
60*4882a593Smuzhiyun for (i = 0; i < 256; i++) {
61*4882a593Smuzhiyun for (j = 0; j < 12; j++) {
62*4882a593Smuzhiyun pdec->table_subblock[i][j] = temp_values[j];
63*4882a593Smuzhiyun temp_values[j] += values_derivated[j];
64*4882a593Smuzhiyun }
65*4882a593Smuzhiyun }
66*4882a593Smuzhiyun }
67*4882a593Smuzhiyun
build_bit_powermask_table(struct pwc_dec23_private * pdec)68*4882a593Smuzhiyun static void build_bit_powermask_table(struct pwc_dec23_private *pdec)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun unsigned char *p;
71*4882a593Smuzhiyun unsigned int bit, byte, mask, val;
72*4882a593Smuzhiyun unsigned int bitpower = 1;
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun for (bit = 0; bit < 8; bit++) {
75*4882a593Smuzhiyun mask = bitpower - 1;
76*4882a593Smuzhiyun p = pdec->table_bitpowermask[bit];
77*4882a593Smuzhiyun for (byte = 0; byte < 256; byte++) {
78*4882a593Smuzhiyun val = (byte & mask);
79*4882a593Smuzhiyun if (byte & bitpower)
80*4882a593Smuzhiyun val = -val;
81*4882a593Smuzhiyun *p++ = val;
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun bitpower<<=1;
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun
build_table_color(const unsigned int romtable[16][8],unsigned char p0004[16][1024],unsigned char p8004[16][256])88*4882a593Smuzhiyun static void build_table_color(const unsigned int romtable[16][8],
89*4882a593Smuzhiyun unsigned char p0004[16][1024],
90*4882a593Smuzhiyun unsigned char p8004[16][256])
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun int compression_mode, j, k, bit, pw;
93*4882a593Smuzhiyun unsigned char *p0, *p8;
94*4882a593Smuzhiyun const unsigned int *r;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun /* We have 16 compressions tables */
97*4882a593Smuzhiyun for (compression_mode = 0; compression_mode < 16; compression_mode++) {
98*4882a593Smuzhiyun p0 = p0004[compression_mode];
99*4882a593Smuzhiyun p8 = p8004[compression_mode];
100*4882a593Smuzhiyun r = romtable[compression_mode];
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun for (j = 0; j < 8; j++, r++, p0 += 128) {
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun for (k = 0; k < 16; k++) {
105*4882a593Smuzhiyun if (k == 0)
106*4882a593Smuzhiyun bit = 1;
107*4882a593Smuzhiyun else if (k >= 1 && k < 3)
108*4882a593Smuzhiyun bit = (r[0] >> 15) & 7;
109*4882a593Smuzhiyun else if (k >= 3 && k < 6)
110*4882a593Smuzhiyun bit = (r[0] >> 12) & 7;
111*4882a593Smuzhiyun else if (k >= 6 && k < 10)
112*4882a593Smuzhiyun bit = (r[0] >> 9) & 7;
113*4882a593Smuzhiyun else if (k >= 10 && k < 13)
114*4882a593Smuzhiyun bit = (r[0] >> 6) & 7;
115*4882a593Smuzhiyun else if (k >= 13 && k < 15)
116*4882a593Smuzhiyun bit = (r[0] >> 3) & 7;
117*4882a593Smuzhiyun else
118*4882a593Smuzhiyun bit = (r[0]) & 7;
119*4882a593Smuzhiyun if (k == 0)
120*4882a593Smuzhiyun *p8++ = 8;
121*4882a593Smuzhiyun else
122*4882a593Smuzhiyun *p8++ = j - bit;
123*4882a593Smuzhiyun *p8++ = bit;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun pw = 1 << bit;
126*4882a593Smuzhiyun p0[k + 0x00] = (1 * pw) + 0x80;
127*4882a593Smuzhiyun p0[k + 0x10] = (2 * pw) + 0x80;
128*4882a593Smuzhiyun p0[k + 0x20] = (3 * pw) + 0x80;
129*4882a593Smuzhiyun p0[k + 0x30] = (4 * pw) + 0x80;
130*4882a593Smuzhiyun p0[k + 0x40] = (-1 * pw) + 0x80;
131*4882a593Smuzhiyun p0[k + 0x50] = (-2 * pw) + 0x80;
132*4882a593Smuzhiyun p0[k + 0x60] = (-3 * pw) + 0x80;
133*4882a593Smuzhiyun p0[k + 0x70] = (-4 * pw) + 0x80;
134*4882a593Smuzhiyun } /* end of for (k=0; k<16; k++, p8++) */
135*4882a593Smuzhiyun } /* end of for (j=0; j<8; j++ , table++) */
136*4882a593Smuzhiyun } /* end of foreach compression_mode */
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /*
140*4882a593Smuzhiyun *
141*4882a593Smuzhiyun */
fill_table_dc00_d800(struct pwc_dec23_private * pdec)142*4882a593Smuzhiyun static void fill_table_dc00_d800(struct pwc_dec23_private *pdec)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun #define SCALEBITS 15
145*4882a593Smuzhiyun #define ONE_HALF (1UL << (SCALEBITS - 1))
146*4882a593Smuzhiyun int i;
147*4882a593Smuzhiyun unsigned int offset1 = ONE_HALF;
148*4882a593Smuzhiyun unsigned int offset2 = 0x0000;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun for (i=0; i<256; i++) {
151*4882a593Smuzhiyun pdec->table_dc00[i] = offset1 & ~(ONE_HALF);
152*4882a593Smuzhiyun pdec->table_d800[i] = offset2;
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun offset1 += 0x7bc4;
155*4882a593Smuzhiyun offset2 += 0x7bc4;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun /*
160*4882a593Smuzhiyun * To decode the stream:
161*4882a593Smuzhiyun * if look_bits(2) == 0: # op == 2 in the lookup table
162*4882a593Smuzhiyun * skip_bits(2)
163*4882a593Smuzhiyun * end of the stream
164*4882a593Smuzhiyun * elif look_bits(3) == 7: # op == 1 in the lookup table
165*4882a593Smuzhiyun * skip_bits(3)
166*4882a593Smuzhiyun * yyyy = get_bits(4)
167*4882a593Smuzhiyun * xxxx = get_bits(8)
168*4882a593Smuzhiyun * else: # op == 0 in the lookup table
169*4882a593Smuzhiyun * skip_bits(x)
170*4882a593Smuzhiyun *
171*4882a593Smuzhiyun * For speedup processing, we build a lookup table and we takes the first 6 bits.
172*4882a593Smuzhiyun *
173*4882a593Smuzhiyun * struct {
174*4882a593Smuzhiyun * unsigned char op; // operation to execute
175*4882a593Smuzhiyun * unsigned char bits; // bits use to perform operation
176*4882a593Smuzhiyun * unsigned char offset1; // offset to add to access in the table_0004 % 16
177*4882a593Smuzhiyun * unsigned char offset2; // offset to add to access in the table_0004
178*4882a593Smuzhiyun * }
179*4882a593Smuzhiyun *
180*4882a593Smuzhiyun * How to build this table ?
181*4882a593Smuzhiyun * op == 2 when (i%4)==0
182*4882a593Smuzhiyun * op == 1 when (i%8)==7
183*4882a593Smuzhiyun * op == 0 otherwise
184*4882a593Smuzhiyun *
185*4882a593Smuzhiyun */
186*4882a593Smuzhiyun static const unsigned char hash_table_ops[64*4] = {
187*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
188*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
189*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x10,
190*4882a593Smuzhiyun 0x00, 0x06, 0x01, 0x30,
191*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
192*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
193*4882a593Smuzhiyun 0x00, 0x05, 0x01, 0x20,
194*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00,
195*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
196*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
197*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x50,
198*4882a593Smuzhiyun 0x00, 0x05, 0x02, 0x00,
199*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
200*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
201*4882a593Smuzhiyun 0x00, 0x05, 0x03, 0x00,
202*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00,
203*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
204*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
205*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x10,
206*4882a593Smuzhiyun 0x00, 0x06, 0x02, 0x10,
207*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
208*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
209*4882a593Smuzhiyun 0x00, 0x05, 0x01, 0x60,
210*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00,
211*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
212*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
213*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x50,
214*4882a593Smuzhiyun 0x00, 0x05, 0x02, 0x40,
215*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
216*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
217*4882a593Smuzhiyun 0x00, 0x05, 0x03, 0x40,
218*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00,
219*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
220*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
221*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x10,
222*4882a593Smuzhiyun 0x00, 0x06, 0x01, 0x70,
223*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
224*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
225*4882a593Smuzhiyun 0x00, 0x05, 0x01, 0x20,
226*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00,
227*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
228*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
229*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x50,
230*4882a593Smuzhiyun 0x00, 0x05, 0x02, 0x00,
231*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
232*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
233*4882a593Smuzhiyun 0x00, 0x05, 0x03, 0x00,
234*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00,
235*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
236*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
237*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x10,
238*4882a593Smuzhiyun 0x00, 0x06, 0x02, 0x50,
239*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
240*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
241*4882a593Smuzhiyun 0x00, 0x05, 0x01, 0x60,
242*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00,
243*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
244*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x00,
245*4882a593Smuzhiyun 0x00, 0x04, 0x01, 0x50,
246*4882a593Smuzhiyun 0x00, 0x05, 0x02, 0x40,
247*4882a593Smuzhiyun 0x02, 0x00, 0x00, 0x00,
248*4882a593Smuzhiyun 0x00, 0x03, 0x01, 0x40,
249*4882a593Smuzhiyun 0x00, 0x05, 0x03, 0x40,
250*4882a593Smuzhiyun 0x01, 0x00, 0x00, 0x00
251*4882a593Smuzhiyun };
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun /*
254*4882a593Smuzhiyun *
255*4882a593Smuzhiyun */
256*4882a593Smuzhiyun static const unsigned int MulIdx[16][16] = {
257*4882a593Smuzhiyun {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
258*4882a593Smuzhiyun {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,},
259*4882a593Smuzhiyun {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,},
260*4882a593Smuzhiyun {4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,},
261*4882a593Smuzhiyun {6, 7, 8, 9, 7, 10, 11, 8, 8, 11, 10, 7, 9, 8, 7, 6,},
262*4882a593Smuzhiyun {4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4,},
263*4882a593Smuzhiyun {1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,},
264*4882a593Smuzhiyun {0, 3, 3, 0, 1, 2, 2, 1, 2, 1, 1, 2, 3, 0, 0, 3,},
265*4882a593Smuzhiyun {0, 1, 2, 3, 3, 2, 1, 0, 3, 2, 1, 0, 0, 1, 2, 3,},
266*4882a593Smuzhiyun {1, 1, 1, 1, 3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2,},
267*4882a593Smuzhiyun {7, 10, 11, 8, 9, 8, 7, 6, 6, 7, 8, 9, 8, 11, 10, 7,},
268*4882a593Smuzhiyun {4, 5, 5, 4, 5, 4, 4, 5, 5, 4, 4, 5, 4, 5, 5, 4,},
269*4882a593Smuzhiyun {7, 9, 6, 8, 10, 8, 7, 11, 11, 7, 8, 10, 8, 6, 9, 7,},
270*4882a593Smuzhiyun {1, 3, 0, 2, 2, 0, 3, 1, 2, 0, 3, 1, 1, 3, 0, 2,},
271*4882a593Smuzhiyun {1, 2, 2, 1, 3, 0, 0, 3, 0, 3, 3, 0, 2, 1, 1, 2,},
272*4882a593Smuzhiyun {10, 8, 7, 11, 8, 6, 9, 7, 7, 9, 6, 8, 11, 7, 8, 10}
273*4882a593Smuzhiyun };
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun #if USE_LOOKUP_TABLE_TO_CLAMP
276*4882a593Smuzhiyun #define MAX_OUTER_CROP_VALUE (512)
277*4882a593Smuzhiyun static unsigned char pwc_crop_table[256 + 2*MAX_OUTER_CROP_VALUE];
278*4882a593Smuzhiyun #define CLAMP(x) (pwc_crop_table[MAX_OUTER_CROP_VALUE+(x)])
279*4882a593Smuzhiyun #else
280*4882a593Smuzhiyun #define CLAMP(x) ((x)>255?255:((x)<0?0:x))
281*4882a593Smuzhiyun #endif
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun /* If the type or the command change, we rebuild the lookup table */
pwc_dec23_init(struct pwc_device * pdev,const unsigned char * cmd)285*4882a593Smuzhiyun void pwc_dec23_init(struct pwc_device *pdev, const unsigned char *cmd)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun int flags, version, shift, i;
288*4882a593Smuzhiyun struct pwc_dec23_private *pdec = &pdev->dec23;
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun mutex_init(&pdec->lock);
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun if (pdec->last_cmd_valid && pdec->last_cmd == cmd[2])
293*4882a593Smuzhiyun return;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun if (DEVICE_USE_CODEC3(pdev->type)) {
296*4882a593Smuzhiyun flags = cmd[2] & 0x18;
297*4882a593Smuzhiyun if (flags == 8)
298*4882a593Smuzhiyun pdec->nbits = 7; /* More bits, mean more bits to encode the stream, but better quality */
299*4882a593Smuzhiyun else if (flags == 0x10)
300*4882a593Smuzhiyun pdec->nbits = 8;
301*4882a593Smuzhiyun else
302*4882a593Smuzhiyun pdec->nbits = 6;
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun version = cmd[2] >> 5;
305*4882a593Smuzhiyun build_table_color(KiaraRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1);
306*4882a593Smuzhiyun build_table_color(KiaraRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2);
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun } else {
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun flags = cmd[2] & 6;
311*4882a593Smuzhiyun if (flags == 2)
312*4882a593Smuzhiyun pdec->nbits = 7;
313*4882a593Smuzhiyun else if (flags == 4)
314*4882a593Smuzhiyun pdec->nbits = 8;
315*4882a593Smuzhiyun else
316*4882a593Smuzhiyun pdec->nbits = 6;
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun version = cmd[2] >> 3;
319*4882a593Smuzhiyun build_table_color(TimonRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1);
320*4882a593Smuzhiyun build_table_color(TimonRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2);
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun /* Information can be coded on a variable number of bits but never less than 8 */
324*4882a593Smuzhiyun shift = 8 - pdec->nbits;
325*4882a593Smuzhiyun pdec->scalebits = SCALEBITS - shift;
326*4882a593Smuzhiyun pdec->nbitsmask = 0xFF >> shift;
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun fill_table_dc00_d800(pdec);
329*4882a593Smuzhiyun build_subblock_pattern(pdec);
330*4882a593Smuzhiyun build_bit_powermask_table(pdec);
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun #if USE_LOOKUP_TABLE_TO_CLAMP
333*4882a593Smuzhiyun /* Build the static table to clamp value [0-255] */
334*4882a593Smuzhiyun for (i=0;i<MAX_OUTER_CROP_VALUE;i++)
335*4882a593Smuzhiyun pwc_crop_table[i] = 0;
336*4882a593Smuzhiyun for (i=0; i<256; i++)
337*4882a593Smuzhiyun pwc_crop_table[MAX_OUTER_CROP_VALUE+i] = i;
338*4882a593Smuzhiyun for (i=0; i<MAX_OUTER_CROP_VALUE; i++)
339*4882a593Smuzhiyun pwc_crop_table[MAX_OUTER_CROP_VALUE+256+i] = 255;
340*4882a593Smuzhiyun #endif
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun pdec->last_cmd = cmd[2];
343*4882a593Smuzhiyun pdec->last_cmd_valid = 1;
344*4882a593Smuzhiyun }
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun /*
347*4882a593Smuzhiyun * Copy the 4x4 image block to Y plane buffer
348*4882a593Smuzhiyun */
copy_image_block_Y(const int * src,unsigned char * dst,unsigned int bytes_per_line,unsigned int scalebits)349*4882a593Smuzhiyun static void copy_image_block_Y(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun #if UNROLL_LOOP_FOR_COPY
352*4882a593Smuzhiyun const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
353*4882a593Smuzhiyun const int *c = src;
354*4882a593Smuzhiyun unsigned char *d = dst;
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun *d++ = cm[c[0] >> scalebits];
357*4882a593Smuzhiyun *d++ = cm[c[1] >> scalebits];
358*4882a593Smuzhiyun *d++ = cm[c[2] >> scalebits];
359*4882a593Smuzhiyun *d++ = cm[c[3] >> scalebits];
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun d = dst + bytes_per_line;
362*4882a593Smuzhiyun *d++ = cm[c[4] >> scalebits];
363*4882a593Smuzhiyun *d++ = cm[c[5] >> scalebits];
364*4882a593Smuzhiyun *d++ = cm[c[6] >> scalebits];
365*4882a593Smuzhiyun *d++ = cm[c[7] >> scalebits];
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun d = dst + bytes_per_line*2;
368*4882a593Smuzhiyun *d++ = cm[c[8] >> scalebits];
369*4882a593Smuzhiyun *d++ = cm[c[9] >> scalebits];
370*4882a593Smuzhiyun *d++ = cm[c[10] >> scalebits];
371*4882a593Smuzhiyun *d++ = cm[c[11] >> scalebits];
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun d = dst + bytes_per_line*3;
374*4882a593Smuzhiyun *d++ = cm[c[12] >> scalebits];
375*4882a593Smuzhiyun *d++ = cm[c[13] >> scalebits];
376*4882a593Smuzhiyun *d++ = cm[c[14] >> scalebits];
377*4882a593Smuzhiyun *d++ = cm[c[15] >> scalebits];
378*4882a593Smuzhiyun #else
379*4882a593Smuzhiyun int i;
380*4882a593Smuzhiyun const int *c = src;
381*4882a593Smuzhiyun unsigned char *d = dst;
382*4882a593Smuzhiyun for (i = 0; i < 4; i++, c++)
383*4882a593Smuzhiyun *d++ = CLAMP((*c) >> scalebits);
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun d = dst + bytes_per_line;
386*4882a593Smuzhiyun for (i = 0; i < 4; i++, c++)
387*4882a593Smuzhiyun *d++ = CLAMP((*c) >> scalebits);
388*4882a593Smuzhiyun
389*4882a593Smuzhiyun d = dst + bytes_per_line*2;
390*4882a593Smuzhiyun for (i = 0; i < 4; i++, c++)
391*4882a593Smuzhiyun *d++ = CLAMP((*c) >> scalebits);
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun d = dst + bytes_per_line*3;
394*4882a593Smuzhiyun for (i = 0; i < 4; i++, c++)
395*4882a593Smuzhiyun *d++ = CLAMP((*c) >> scalebits);
396*4882a593Smuzhiyun #endif
397*4882a593Smuzhiyun }
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun /*
400*4882a593Smuzhiyun * Copy the 4x4 image block to a CrCb plane buffer
401*4882a593Smuzhiyun *
402*4882a593Smuzhiyun */
copy_image_block_CrCb(const int * src,unsigned char * dst,unsigned int bytes_per_line,unsigned int scalebits)403*4882a593Smuzhiyun static void copy_image_block_CrCb(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
404*4882a593Smuzhiyun {
405*4882a593Smuzhiyun #if UNROLL_LOOP_FOR_COPY
406*4882a593Smuzhiyun /* Unroll all loops */
407*4882a593Smuzhiyun const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
408*4882a593Smuzhiyun const int *c = src;
409*4882a593Smuzhiyun unsigned char *d = dst;
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun *d++ = cm[c[0] >> scalebits];
412*4882a593Smuzhiyun *d++ = cm[c[4] >> scalebits];
413*4882a593Smuzhiyun *d++ = cm[c[1] >> scalebits];
414*4882a593Smuzhiyun *d++ = cm[c[5] >> scalebits];
415*4882a593Smuzhiyun *d++ = cm[c[2] >> scalebits];
416*4882a593Smuzhiyun *d++ = cm[c[6] >> scalebits];
417*4882a593Smuzhiyun *d++ = cm[c[3] >> scalebits];
418*4882a593Smuzhiyun *d++ = cm[c[7] >> scalebits];
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun d = dst + bytes_per_line;
421*4882a593Smuzhiyun *d++ = cm[c[12] >> scalebits];
422*4882a593Smuzhiyun *d++ = cm[c[8] >> scalebits];
423*4882a593Smuzhiyun *d++ = cm[c[13] >> scalebits];
424*4882a593Smuzhiyun *d++ = cm[c[9] >> scalebits];
425*4882a593Smuzhiyun *d++ = cm[c[14] >> scalebits];
426*4882a593Smuzhiyun *d++ = cm[c[10] >> scalebits];
427*4882a593Smuzhiyun *d++ = cm[c[15] >> scalebits];
428*4882a593Smuzhiyun *d++ = cm[c[11] >> scalebits];
429*4882a593Smuzhiyun #else
430*4882a593Smuzhiyun int i;
431*4882a593Smuzhiyun const int *c1 = src;
432*4882a593Smuzhiyun const int *c2 = src + 4;
433*4882a593Smuzhiyun unsigned char *d = dst;
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun for (i = 0; i < 4; i++, c1++, c2++) {
436*4882a593Smuzhiyun *d++ = CLAMP((*c1) >> scalebits);
437*4882a593Smuzhiyun *d++ = CLAMP((*c2) >> scalebits);
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun c1 = src + 12;
440*4882a593Smuzhiyun d = dst + bytes_per_line;
441*4882a593Smuzhiyun for (i = 0; i < 4; i++, c1++, c2++) {
442*4882a593Smuzhiyun *d++ = CLAMP((*c1) >> scalebits);
443*4882a593Smuzhiyun *d++ = CLAMP((*c2) >> scalebits);
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun #endif
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun /*
449*4882a593Smuzhiyun * To manage the stream, we keep bits in a 32 bits register.
450*4882a593Smuzhiyun * fill_nbits(n): fill the reservoir with at least n bits
451*4882a593Smuzhiyun * skip_bits(n): discard n bits from the reservoir
452*4882a593Smuzhiyun * get_bits(n): fill the reservoir, returns the first n bits and discard the
453*4882a593Smuzhiyun * bits from the reservoir.
454*4882a593Smuzhiyun * __get_nbits(n): faster version of get_bits(n), but asumes that the reservoir
455*4882a593Smuzhiyun * contains at least n bits. bits returned is discarded.
456*4882a593Smuzhiyun */
457*4882a593Smuzhiyun #define fill_nbits(pdec, nbits_wanted) do { \
458*4882a593Smuzhiyun while (pdec->nbits_in_reservoir<(nbits_wanted)) \
459*4882a593Smuzhiyun { \
460*4882a593Smuzhiyun pdec->reservoir |= (*(pdec->stream)++) << (pdec->nbits_in_reservoir); \
461*4882a593Smuzhiyun pdec->nbits_in_reservoir += 8; \
462*4882a593Smuzhiyun } \
463*4882a593Smuzhiyun } while(0);
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun #define skip_nbits(pdec, nbits_to_skip) do { \
466*4882a593Smuzhiyun pdec->reservoir >>= (nbits_to_skip); \
467*4882a593Smuzhiyun pdec->nbits_in_reservoir -= (nbits_to_skip); \
468*4882a593Smuzhiyun } while(0);
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun #define get_nbits(pdec, nbits_wanted, result) do { \
471*4882a593Smuzhiyun fill_nbits(pdec, nbits_wanted); \
472*4882a593Smuzhiyun result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \
473*4882a593Smuzhiyun skip_nbits(pdec, nbits_wanted); \
474*4882a593Smuzhiyun } while(0);
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun #define __get_nbits(pdec, nbits_wanted, result) do { \
477*4882a593Smuzhiyun result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \
478*4882a593Smuzhiyun skip_nbits(pdec, nbits_wanted); \
479*4882a593Smuzhiyun } while(0);
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun #define look_nbits(pdec, nbits_wanted) \
482*4882a593Smuzhiyun ((pdec->reservoir) & ((1U<<(nbits_wanted))-1))
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun /*
485*4882a593Smuzhiyun * Decode a 4x4 pixel block
486*4882a593Smuzhiyun */
decode_block(struct pwc_dec23_private * pdec,const unsigned char * ptable0004,const unsigned char * ptable8004)487*4882a593Smuzhiyun static void decode_block(struct pwc_dec23_private *pdec,
488*4882a593Smuzhiyun const unsigned char *ptable0004,
489*4882a593Smuzhiyun const unsigned char *ptable8004)
490*4882a593Smuzhiyun {
491*4882a593Smuzhiyun unsigned int primary_color;
492*4882a593Smuzhiyun unsigned int channel_v, offset1, op;
493*4882a593Smuzhiyun int i;
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun fill_nbits(pdec, 16);
496*4882a593Smuzhiyun __get_nbits(pdec, pdec->nbits, primary_color);
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun if (look_nbits(pdec,2) == 0) {
499*4882a593Smuzhiyun skip_nbits(pdec, 2);
500*4882a593Smuzhiyun /* Very simple, the color is the same for all pixels of the square */
501*4882a593Smuzhiyun for (i = 0; i < 16; i++)
502*4882a593Smuzhiyun pdec->temp_colors[i] = pdec->table_dc00[primary_color];
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun return;
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun /* This block is encoded with small pattern */
508*4882a593Smuzhiyun for (i = 0; i < 16; i++)
509*4882a593Smuzhiyun pdec->temp_colors[i] = pdec->table_d800[primary_color];
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun __get_nbits(pdec, 3, channel_v);
512*4882a593Smuzhiyun channel_v = ((channel_v & 1) << 2) | (channel_v & 2) | ((channel_v & 4) >> 2);
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun ptable0004 += (channel_v * 128);
515*4882a593Smuzhiyun ptable8004 += (channel_v * 32);
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun offset1 = 0;
518*4882a593Smuzhiyun do
519*4882a593Smuzhiyun {
520*4882a593Smuzhiyun unsigned int htable_idx, rows = 0;
521*4882a593Smuzhiyun const unsigned int *block;
522*4882a593Smuzhiyun
523*4882a593Smuzhiyun /* [ zzzz y x x ]
524*4882a593Smuzhiyun * xx == 00 :=> end of the block def, remove the two bits from the stream
525*4882a593Smuzhiyun * yxx == 111
526*4882a593Smuzhiyun * yxx == any other value
527*4882a593Smuzhiyun *
528*4882a593Smuzhiyun */
529*4882a593Smuzhiyun fill_nbits(pdec, 16);
530*4882a593Smuzhiyun htable_idx = look_nbits(pdec, 6);
531*4882a593Smuzhiyun op = hash_table_ops[htable_idx * 4];
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun if (op == 2) {
534*4882a593Smuzhiyun skip_nbits(pdec, 2);
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun } else if (op == 1) {
537*4882a593Smuzhiyun /* 15bits [ xxxx xxxx yyyy 111 ]
538*4882a593Smuzhiyun * yyy => offset in the table8004
539*4882a593Smuzhiyun * xxx => offset in the tabled004 (tree)
540*4882a593Smuzhiyun */
541*4882a593Smuzhiyun unsigned int mask, shift;
542*4882a593Smuzhiyun unsigned int nbits, col1;
543*4882a593Smuzhiyun unsigned int yyyy;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun skip_nbits(pdec, 3);
546*4882a593Smuzhiyun /* offset1 += yyyy */
547*4882a593Smuzhiyun __get_nbits(pdec, 4, yyyy);
548*4882a593Smuzhiyun offset1 += 1 + yyyy;
549*4882a593Smuzhiyun offset1 &= 0x0F;
550*4882a593Smuzhiyun nbits = ptable8004[offset1 * 2];
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun /* col1 = xxxx xxxx */
553*4882a593Smuzhiyun __get_nbits(pdec, nbits+1, col1);
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun /* Bit mask table */
556*4882a593Smuzhiyun mask = pdec->table_bitpowermask[nbits][col1];
557*4882a593Smuzhiyun shift = ptable8004[offset1 * 2 + 1];
558*4882a593Smuzhiyun rows = ((mask << shift) + 0x80) & 0xFF;
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun block = pdec->table_subblock[rows];
561*4882a593Smuzhiyun for (i = 0; i < 16; i++)
562*4882a593Smuzhiyun pdec->temp_colors[i] += block[MulIdx[offset1][i]];
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun } else {
565*4882a593Smuzhiyun /* op == 0
566*4882a593Smuzhiyun * offset1 is coded on 3 bits
567*4882a593Smuzhiyun */
568*4882a593Smuzhiyun unsigned int shift;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun offset1 += hash_table_ops [htable_idx * 4 + 2];
571*4882a593Smuzhiyun offset1 &= 0x0F;
572*4882a593Smuzhiyun
573*4882a593Smuzhiyun rows = ptable0004[offset1 + hash_table_ops [htable_idx * 4 + 3]];
574*4882a593Smuzhiyun block = pdec->table_subblock[rows];
575*4882a593Smuzhiyun for (i = 0; i < 16; i++)
576*4882a593Smuzhiyun pdec->temp_colors[i] += block[MulIdx[offset1][i]];
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun shift = hash_table_ops[htable_idx * 4 + 1];
579*4882a593Smuzhiyun skip_nbits(pdec, shift);
580*4882a593Smuzhiyun }
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun } while (op != 2);
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun
DecompressBand23(struct pwc_dec23_private * pdec,const unsigned char * rawyuv,unsigned char * planar_y,unsigned char * planar_u,unsigned char * planar_v,unsigned int compressed_image_width,unsigned int real_image_width)586*4882a593Smuzhiyun static void DecompressBand23(struct pwc_dec23_private *pdec,
587*4882a593Smuzhiyun const unsigned char *rawyuv,
588*4882a593Smuzhiyun unsigned char *planar_y,
589*4882a593Smuzhiyun unsigned char *planar_u,
590*4882a593Smuzhiyun unsigned char *planar_v,
591*4882a593Smuzhiyun unsigned int compressed_image_width,
592*4882a593Smuzhiyun unsigned int real_image_width)
593*4882a593Smuzhiyun {
594*4882a593Smuzhiyun int compression_index, nblocks;
595*4882a593Smuzhiyun const unsigned char *ptable0004;
596*4882a593Smuzhiyun const unsigned char *ptable8004;
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun pdec->reservoir = 0;
599*4882a593Smuzhiyun pdec->nbits_in_reservoir = 0;
600*4882a593Smuzhiyun pdec->stream = rawyuv + 1; /* The first byte of the stream is skipped */
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun get_nbits(pdec, 4, compression_index);
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun /* pass 1: uncompress Y component */
605*4882a593Smuzhiyun nblocks = compressed_image_width / 4;
606*4882a593Smuzhiyun
607*4882a593Smuzhiyun ptable0004 = pdec->table_0004_pass1[compression_index];
608*4882a593Smuzhiyun ptable8004 = pdec->table_8004_pass1[compression_index];
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun /* Each block decode a square of 4x4 */
611*4882a593Smuzhiyun while (nblocks) {
612*4882a593Smuzhiyun decode_block(pdec, ptable0004, ptable8004);
613*4882a593Smuzhiyun copy_image_block_Y(pdec->temp_colors, planar_y, real_image_width, pdec->scalebits);
614*4882a593Smuzhiyun planar_y += 4;
615*4882a593Smuzhiyun nblocks--;
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun /* pass 2: uncompress UV component */
619*4882a593Smuzhiyun nblocks = compressed_image_width / 8;
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun ptable0004 = pdec->table_0004_pass2[compression_index];
622*4882a593Smuzhiyun ptable8004 = pdec->table_8004_pass2[compression_index];
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun /* Each block decode a square of 4x4 */
625*4882a593Smuzhiyun while (nblocks) {
626*4882a593Smuzhiyun decode_block(pdec, ptable0004, ptable8004);
627*4882a593Smuzhiyun copy_image_block_CrCb(pdec->temp_colors, planar_u, real_image_width/2, pdec->scalebits);
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun decode_block(pdec, ptable0004, ptable8004);
630*4882a593Smuzhiyun copy_image_block_CrCb(pdec->temp_colors, planar_v, real_image_width/2, pdec->scalebits);
631*4882a593Smuzhiyun
632*4882a593Smuzhiyun planar_v += 8;
633*4882a593Smuzhiyun planar_u += 8;
634*4882a593Smuzhiyun nblocks -= 2;
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun }
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun /**
640*4882a593Smuzhiyun * Uncompress a pwc23 buffer.
641*4882a593Smuzhiyun * @pdev: pointer to pwc device's internal struct
642*4882a593Smuzhiyun * @src: raw data
643*4882a593Smuzhiyun * @dst: image output
644*4882a593Smuzhiyun */
pwc_dec23_decompress(struct pwc_device * pdev,const void * src,void * dst)645*4882a593Smuzhiyun void pwc_dec23_decompress(struct pwc_device *pdev,
646*4882a593Smuzhiyun const void *src,
647*4882a593Smuzhiyun void *dst)
648*4882a593Smuzhiyun {
649*4882a593Smuzhiyun int bandlines_left, bytes_per_block;
650*4882a593Smuzhiyun struct pwc_dec23_private *pdec = &pdev->dec23;
651*4882a593Smuzhiyun
652*4882a593Smuzhiyun /* YUV420P image format */
653*4882a593Smuzhiyun unsigned char *pout_planar_y;
654*4882a593Smuzhiyun unsigned char *pout_planar_u;
655*4882a593Smuzhiyun unsigned char *pout_planar_v;
656*4882a593Smuzhiyun unsigned int plane_size;
657*4882a593Smuzhiyun
658*4882a593Smuzhiyun mutex_lock(&pdec->lock);
659*4882a593Smuzhiyun
660*4882a593Smuzhiyun bandlines_left = pdev->height / 4;
661*4882a593Smuzhiyun bytes_per_block = pdev->width * 4;
662*4882a593Smuzhiyun plane_size = pdev->height * pdev->width;
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun pout_planar_y = dst;
665*4882a593Smuzhiyun pout_planar_u = dst + plane_size;
666*4882a593Smuzhiyun pout_planar_v = dst + plane_size + plane_size / 4;
667*4882a593Smuzhiyun
668*4882a593Smuzhiyun while (bandlines_left--) {
669*4882a593Smuzhiyun DecompressBand23(pdec, src,
670*4882a593Smuzhiyun pout_planar_y, pout_planar_u, pout_planar_v,
671*4882a593Smuzhiyun pdev->width, pdev->width);
672*4882a593Smuzhiyun src += pdev->vbandlength;
673*4882a593Smuzhiyun pout_planar_y += bytes_per_block;
674*4882a593Smuzhiyun pout_planar_u += pdev->width;
675*4882a593Smuzhiyun pout_planar_v += pdev->width;
676*4882a593Smuzhiyun }
677*4882a593Smuzhiyun mutex_unlock(&pdec->lock);
678*4882a593Smuzhiyun }
679