xref: /OK3568_Linux_fs/kernel/lib/842/842_compress.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * 842 Software Compression
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2015 Dan Streetman, IBM Corp
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * See 842.h for details of the 842 compressed format.
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11*4882a593Smuzhiyun #define MODULE_NAME "842_compress"
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #include <linux/hashtable.h>
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun #include "842.h"
16*4882a593Smuzhiyun #include "842_debugfs.h"
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #define SW842_HASHTABLE8_BITS	(10)
19*4882a593Smuzhiyun #define SW842_HASHTABLE4_BITS	(11)
20*4882a593Smuzhiyun #define SW842_HASHTABLE2_BITS	(10)
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun /* By default, we allow compressing input buffers of any length, but we must
23*4882a593Smuzhiyun  * use the non-standard "short data" template so the decompressor can correctly
24*4882a593Smuzhiyun  * reproduce the uncompressed data buffer at the right length.  However the
25*4882a593Smuzhiyun  * hardware 842 compressor will not recognize the "short data" template, and
26*4882a593Smuzhiyun  * will fail to decompress any compressed buffer containing it (I have no idea
27*4882a593Smuzhiyun  * why anyone would want to use software to compress and hardware to decompress
28*4882a593Smuzhiyun  * but that's beside the point).  This parameter forces the compression
29*4882a593Smuzhiyun  * function to simply reject any input buffer that isn't a multiple of 8 bytes
30*4882a593Smuzhiyun  * long, instead of using the "short data" template, so that all compressed
31*4882a593Smuzhiyun  * buffers produced by this function will be decompressable by the 842 hardware
32*4882a593Smuzhiyun  * decompressor.  Unless you have a specific need for that, leave this disabled
33*4882a593Smuzhiyun  * so that any length buffer can be compressed.
34*4882a593Smuzhiyun  */
35*4882a593Smuzhiyun static bool sw842_strict;
36*4882a593Smuzhiyun module_param_named(strict, sw842_strict, bool, 0644);
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
39*4882a593Smuzhiyun 	{ I8, N0, N0, N0, 0x19 }, /* 8 */
40*4882a593Smuzhiyun 	{ I4, I4, N0, N0, 0x18 }, /* 18 */
41*4882a593Smuzhiyun 	{ I4, I2, I2, N0, 0x17 }, /* 25 */
42*4882a593Smuzhiyun 	{ I2, I2, I4, N0, 0x13 }, /* 25 */
43*4882a593Smuzhiyun 	{ I2, I2, I2, I2, 0x12 }, /* 32 */
44*4882a593Smuzhiyun 	{ I4, I2, D2, N0, 0x16 }, /* 33 */
45*4882a593Smuzhiyun 	{ I4, D2, I2, N0, 0x15 }, /* 33 */
46*4882a593Smuzhiyun 	{ I2, D2, I4, N0, 0x0e }, /* 33 */
47*4882a593Smuzhiyun 	{ D2, I2, I4, N0, 0x09 }, /* 33 */
48*4882a593Smuzhiyun 	{ I2, I2, I2, D2, 0x11 }, /* 40 */
49*4882a593Smuzhiyun 	{ I2, I2, D2, I2, 0x10 }, /* 40 */
50*4882a593Smuzhiyun 	{ I2, D2, I2, I2, 0x0d }, /* 40 */
51*4882a593Smuzhiyun 	{ D2, I2, I2, I2, 0x08 }, /* 40 */
52*4882a593Smuzhiyun 	{ I4, D4, N0, N0, 0x14 }, /* 41 */
53*4882a593Smuzhiyun 	{ D4, I4, N0, N0, 0x04 }, /* 41 */
54*4882a593Smuzhiyun 	{ I2, I2, D4, N0, 0x0f }, /* 48 */
55*4882a593Smuzhiyun 	{ I2, D2, I2, D2, 0x0c }, /* 48 */
56*4882a593Smuzhiyun 	{ I2, D4, I2, N0, 0x0b }, /* 48 */
57*4882a593Smuzhiyun 	{ D2, I2, I2, D2, 0x07 }, /* 48 */
58*4882a593Smuzhiyun 	{ D2, I2, D2, I2, 0x06 }, /* 48 */
59*4882a593Smuzhiyun 	{ D4, I2, I2, N0, 0x03 }, /* 48 */
60*4882a593Smuzhiyun 	{ I2, D2, D4, N0, 0x0a }, /* 56 */
61*4882a593Smuzhiyun 	{ D2, I2, D4, N0, 0x05 }, /* 56 */
62*4882a593Smuzhiyun 	{ D4, I2, D2, N0, 0x02 }, /* 56 */
63*4882a593Smuzhiyun 	{ D4, D2, I2, N0, 0x01 }, /* 56 */
64*4882a593Smuzhiyun 	{ D8, N0, N0, N0, 0x00 }, /* 64 */
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun struct sw842_hlist_node8 {
68*4882a593Smuzhiyun 	struct hlist_node node;
69*4882a593Smuzhiyun 	u64 data;
70*4882a593Smuzhiyun 	u8 index;
71*4882a593Smuzhiyun };
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun struct sw842_hlist_node4 {
74*4882a593Smuzhiyun 	struct hlist_node node;
75*4882a593Smuzhiyun 	u32 data;
76*4882a593Smuzhiyun 	u16 index;
77*4882a593Smuzhiyun };
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun struct sw842_hlist_node2 {
80*4882a593Smuzhiyun 	struct hlist_node node;
81*4882a593Smuzhiyun 	u16 data;
82*4882a593Smuzhiyun 	u8 index;
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun #define INDEX_NOT_FOUND		(-1)
86*4882a593Smuzhiyun #define INDEX_NOT_CHECKED	(-2)
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun struct sw842_param {
89*4882a593Smuzhiyun 	u8 *in;
90*4882a593Smuzhiyun 	u8 *instart;
91*4882a593Smuzhiyun 	u64 ilen;
92*4882a593Smuzhiyun 	u8 *out;
93*4882a593Smuzhiyun 	u64 olen;
94*4882a593Smuzhiyun 	u8 bit;
95*4882a593Smuzhiyun 	u64 data8[1];
96*4882a593Smuzhiyun 	u32 data4[2];
97*4882a593Smuzhiyun 	u16 data2[4];
98*4882a593Smuzhiyun 	int index8[1];
99*4882a593Smuzhiyun 	int index4[2];
100*4882a593Smuzhiyun 	int index2[4];
101*4882a593Smuzhiyun 	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
102*4882a593Smuzhiyun 	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
103*4882a593Smuzhiyun 	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
104*4882a593Smuzhiyun 	struct sw842_hlist_node8 node8[1 << I8_BITS];
105*4882a593Smuzhiyun 	struct sw842_hlist_node4 node4[1 << I4_BITS];
106*4882a593Smuzhiyun 	struct sw842_hlist_node2 node2[1 << I2_BITS];
107*4882a593Smuzhiyun };
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun #define get_input_data(p, o, b)						\
110*4882a593Smuzhiyun 	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun #define init_hashtable_nodes(p, b)	do {			\
113*4882a593Smuzhiyun 	int _i;							\
114*4882a593Smuzhiyun 	hash_init((p)->htable##b);				\
115*4882a593Smuzhiyun 	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
116*4882a593Smuzhiyun 		(p)->node##b[_i].index = _i;			\
117*4882a593Smuzhiyun 		(p)->node##b[_i].data = 0;			\
118*4882a593Smuzhiyun 		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
119*4882a593Smuzhiyun 	}							\
120*4882a593Smuzhiyun } while (0)
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun #define find_index(p, b, n)	({					\
123*4882a593Smuzhiyun 	struct sw842_hlist_node##b *_n;					\
124*4882a593Smuzhiyun 	p->index##b[n] = INDEX_NOT_FOUND;				\
125*4882a593Smuzhiyun 	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
126*4882a593Smuzhiyun 		if (p->data##b[n] == _n->data) {			\
127*4882a593Smuzhiyun 			p->index##b[n] = _n->index;			\
128*4882a593Smuzhiyun 			break;						\
129*4882a593Smuzhiyun 		}							\
130*4882a593Smuzhiyun 	}								\
131*4882a593Smuzhiyun 	p->index##b[n] >= 0;						\
132*4882a593Smuzhiyun })
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun #define check_index(p, b, n)			\
135*4882a593Smuzhiyun 	((p)->index##b[n] == INDEX_NOT_CHECKED	\
136*4882a593Smuzhiyun 	 ? find_index(p, b, n)			\
137*4882a593Smuzhiyun 	 : (p)->index##b[n] >= 0)
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun #define replace_hash(p, b, i, d)	do {				\
140*4882a593Smuzhiyun 	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
141*4882a593Smuzhiyun 	hash_del(&_n->node);						\
142*4882a593Smuzhiyun 	_n->data = (p)->data##b[d];					\
143*4882a593Smuzhiyun 	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
144*4882a593Smuzhiyun 		 (unsigned int)_n->index,				\
145*4882a593Smuzhiyun 		 (unsigned int)((p)->in - (p)->instart),		\
146*4882a593Smuzhiyun 		 (unsigned long)_n->data);				\
147*4882a593Smuzhiyun 	hash_add((p)->htable##b, &_n->node, _n->data);			\
148*4882a593Smuzhiyun } while (0)
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun static int add_bits(struct sw842_param *p, u64 d, u8 n);
153*4882a593Smuzhiyun 
__split_add_bits(struct sw842_param * p,u64 d,u8 n,u8 s)154*4882a593Smuzhiyun static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
155*4882a593Smuzhiyun {
156*4882a593Smuzhiyun 	int ret;
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	if (n <= s)
159*4882a593Smuzhiyun 		return -EINVAL;
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	ret = add_bits(p, d >> s, n - s);
162*4882a593Smuzhiyun 	if (ret)
163*4882a593Smuzhiyun 		return ret;
164*4882a593Smuzhiyun 	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun 
add_bits(struct sw842_param * p,u64 d,u8 n)167*4882a593Smuzhiyun static int add_bits(struct sw842_param *p, u64 d, u8 n)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun 	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
170*4882a593Smuzhiyun 	u64 o;
171*4882a593Smuzhiyun 	u8 *out = p->out;
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	if (n > 64)
176*4882a593Smuzhiyun 		return -EINVAL;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
179*4882a593Smuzhiyun 	 * or if we're at the end of the output buffer and would write past end
180*4882a593Smuzhiyun 	 */
181*4882a593Smuzhiyun 	if (bits > 64)
182*4882a593Smuzhiyun 		return __split_add_bits(p, d, n, 32);
183*4882a593Smuzhiyun 	else if (p->olen < 8 && bits > 32 && bits <= 56)
184*4882a593Smuzhiyun 		return __split_add_bits(p, d, n, 16);
185*4882a593Smuzhiyun 	else if (p->olen < 4 && bits > 16 && bits <= 24)
186*4882a593Smuzhiyun 		return __split_add_bits(p, d, n, 8);
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	if (DIV_ROUND_UP(bits, 8) > p->olen)
189*4882a593Smuzhiyun 		return -ENOSPC;
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 	o = *out & bmask[b];
192*4882a593Smuzhiyun 	d <<= s;
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 	if (bits <= 8)
195*4882a593Smuzhiyun 		*out = o | d;
196*4882a593Smuzhiyun 	else if (bits <= 16)
197*4882a593Smuzhiyun 		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
198*4882a593Smuzhiyun 	else if (bits <= 24)
199*4882a593Smuzhiyun 		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
200*4882a593Smuzhiyun 	else if (bits <= 32)
201*4882a593Smuzhiyun 		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
202*4882a593Smuzhiyun 	else if (bits <= 40)
203*4882a593Smuzhiyun 		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
204*4882a593Smuzhiyun 	else if (bits <= 48)
205*4882a593Smuzhiyun 		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
206*4882a593Smuzhiyun 	else if (bits <= 56)
207*4882a593Smuzhiyun 		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
208*4882a593Smuzhiyun 	else
209*4882a593Smuzhiyun 		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	p->bit += n;
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	if (p->bit > 7) {
214*4882a593Smuzhiyun 		p->out += p->bit / 8;
215*4882a593Smuzhiyun 		p->olen -= p->bit / 8;
216*4882a593Smuzhiyun 		p->bit %= 8;
217*4882a593Smuzhiyun 	}
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	return 0;
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun 
add_template(struct sw842_param * p,u8 c)222*4882a593Smuzhiyun static int add_template(struct sw842_param *p, u8 c)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun 	int ret, i, b = 0;
225*4882a593Smuzhiyun 	u8 *t = comp_ops[c];
226*4882a593Smuzhiyun 	bool inv = false;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	if (c >= OPS_MAX)
229*4882a593Smuzhiyun 		return -EINVAL;
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	pr_debug("template %x\n", t[4]);
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	ret = add_bits(p, t[4], OP_BITS);
234*4882a593Smuzhiyun 	if (ret)
235*4882a593Smuzhiyun 		return ret;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	for (i = 0; i < 4; i++) {
238*4882a593Smuzhiyun 		pr_debug("op %x\n", t[i]);
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 		switch (t[i] & OP_AMOUNT) {
241*4882a593Smuzhiyun 		case OP_AMOUNT_8:
242*4882a593Smuzhiyun 			if (b)
243*4882a593Smuzhiyun 				inv = true;
244*4882a593Smuzhiyun 			else if (t[i] & OP_ACTION_INDEX)
245*4882a593Smuzhiyun 				ret = add_bits(p, p->index8[0], I8_BITS);
246*4882a593Smuzhiyun 			else if (t[i] & OP_ACTION_DATA)
247*4882a593Smuzhiyun 				ret = add_bits(p, p->data8[0], 64);
248*4882a593Smuzhiyun 			else
249*4882a593Smuzhiyun 				inv = true;
250*4882a593Smuzhiyun 			break;
251*4882a593Smuzhiyun 		case OP_AMOUNT_4:
252*4882a593Smuzhiyun 			if (b == 2 && t[i] & OP_ACTION_DATA)
253*4882a593Smuzhiyun 				ret = add_bits(p, get_input_data(p, 2, 32), 32);
254*4882a593Smuzhiyun 			else if (b != 0 && b != 4)
255*4882a593Smuzhiyun 				inv = true;
256*4882a593Smuzhiyun 			else if (t[i] & OP_ACTION_INDEX)
257*4882a593Smuzhiyun 				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
258*4882a593Smuzhiyun 			else if (t[i] & OP_ACTION_DATA)
259*4882a593Smuzhiyun 				ret = add_bits(p, p->data4[b >> 2], 32);
260*4882a593Smuzhiyun 			else
261*4882a593Smuzhiyun 				inv = true;
262*4882a593Smuzhiyun 			break;
263*4882a593Smuzhiyun 		case OP_AMOUNT_2:
264*4882a593Smuzhiyun 			if (b != 0 && b != 2 && b != 4 && b != 6)
265*4882a593Smuzhiyun 				inv = true;
266*4882a593Smuzhiyun 			if (t[i] & OP_ACTION_INDEX)
267*4882a593Smuzhiyun 				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
268*4882a593Smuzhiyun 			else if (t[i] & OP_ACTION_DATA)
269*4882a593Smuzhiyun 				ret = add_bits(p, p->data2[b >> 1], 16);
270*4882a593Smuzhiyun 			else
271*4882a593Smuzhiyun 				inv = true;
272*4882a593Smuzhiyun 			break;
273*4882a593Smuzhiyun 		case OP_AMOUNT_0:
274*4882a593Smuzhiyun 			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
275*4882a593Smuzhiyun 			break;
276*4882a593Smuzhiyun 		default:
277*4882a593Smuzhiyun 			inv = true;
278*4882a593Smuzhiyun 			break;
279*4882a593Smuzhiyun 		}
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 		if (ret)
282*4882a593Smuzhiyun 			return ret;
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 		if (inv) {
285*4882a593Smuzhiyun 			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
286*4882a593Smuzhiyun 			       c, i, t[0], t[1], t[2], t[3]);
287*4882a593Smuzhiyun 			return -EINVAL;
288*4882a593Smuzhiyun 		}
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 		b += t[i] & OP_AMOUNT;
291*4882a593Smuzhiyun 	}
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	if (b != 8) {
294*4882a593Smuzhiyun 		pr_err("Invalid template %x len %x : %x %x %x %x\n",
295*4882a593Smuzhiyun 		       c, b, t[0], t[1], t[2], t[3]);
296*4882a593Smuzhiyun 		return -EINVAL;
297*4882a593Smuzhiyun 	}
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	if (sw842_template_counts)
300*4882a593Smuzhiyun 		atomic_inc(&template_count[t[4]]);
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	return 0;
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun 
add_repeat_template(struct sw842_param * p,u8 r)305*4882a593Smuzhiyun static int add_repeat_template(struct sw842_param *p, u8 r)
306*4882a593Smuzhiyun {
307*4882a593Smuzhiyun 	int ret;
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	/* repeat param is 0-based */
310*4882a593Smuzhiyun 	if (!r || --r > REPEAT_BITS_MAX)
311*4882a593Smuzhiyun 		return -EINVAL;
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	ret = add_bits(p, OP_REPEAT, OP_BITS);
314*4882a593Smuzhiyun 	if (ret)
315*4882a593Smuzhiyun 		return ret;
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	ret = add_bits(p, r, REPEAT_BITS);
318*4882a593Smuzhiyun 	if (ret)
319*4882a593Smuzhiyun 		return ret;
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	if (sw842_template_counts)
322*4882a593Smuzhiyun 		atomic_inc(&template_repeat_count);
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun 	return 0;
325*4882a593Smuzhiyun }
326*4882a593Smuzhiyun 
add_short_data_template(struct sw842_param * p,u8 b)327*4882a593Smuzhiyun static int add_short_data_template(struct sw842_param *p, u8 b)
328*4882a593Smuzhiyun {
329*4882a593Smuzhiyun 	int ret, i;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	if (!b || b > SHORT_DATA_BITS_MAX)
332*4882a593Smuzhiyun 		return -EINVAL;
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun 	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
335*4882a593Smuzhiyun 	if (ret)
336*4882a593Smuzhiyun 		return ret;
337*4882a593Smuzhiyun 
338*4882a593Smuzhiyun 	ret = add_bits(p, b, SHORT_DATA_BITS);
339*4882a593Smuzhiyun 	if (ret)
340*4882a593Smuzhiyun 		return ret;
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	for (i = 0; i < b; i++) {
343*4882a593Smuzhiyun 		ret = add_bits(p, p->in[i], 8);
344*4882a593Smuzhiyun 		if (ret)
345*4882a593Smuzhiyun 			return ret;
346*4882a593Smuzhiyun 	}
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	if (sw842_template_counts)
349*4882a593Smuzhiyun 		atomic_inc(&template_short_data_count);
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	return 0;
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun 
add_zeros_template(struct sw842_param * p)354*4882a593Smuzhiyun static int add_zeros_template(struct sw842_param *p)
355*4882a593Smuzhiyun {
356*4882a593Smuzhiyun 	int ret = add_bits(p, OP_ZEROS, OP_BITS);
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	if (ret)
359*4882a593Smuzhiyun 		return ret;
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	if (sw842_template_counts)
362*4882a593Smuzhiyun 		atomic_inc(&template_zeros_count);
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	return 0;
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun 
add_end_template(struct sw842_param * p)367*4882a593Smuzhiyun static int add_end_template(struct sw842_param *p)
368*4882a593Smuzhiyun {
369*4882a593Smuzhiyun 	int ret = add_bits(p, OP_END, OP_BITS);
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	if (ret)
372*4882a593Smuzhiyun 		return ret;
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	if (sw842_template_counts)
375*4882a593Smuzhiyun 		atomic_inc(&template_end_count);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	return 0;
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun 
check_template(struct sw842_param * p,u8 c)380*4882a593Smuzhiyun static bool check_template(struct sw842_param *p, u8 c)
381*4882a593Smuzhiyun {
382*4882a593Smuzhiyun 	u8 *t = comp_ops[c];
383*4882a593Smuzhiyun 	int i, match, b = 0;
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	if (c >= OPS_MAX)
386*4882a593Smuzhiyun 		return false;
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	for (i = 0; i < 4; i++) {
389*4882a593Smuzhiyun 		if (t[i] & OP_ACTION_INDEX) {
390*4882a593Smuzhiyun 			if (t[i] & OP_AMOUNT_2)
391*4882a593Smuzhiyun 				match = check_index(p, 2, b >> 1);
392*4882a593Smuzhiyun 			else if (t[i] & OP_AMOUNT_4)
393*4882a593Smuzhiyun 				match = check_index(p, 4, b >> 2);
394*4882a593Smuzhiyun 			else if (t[i] & OP_AMOUNT_8)
395*4882a593Smuzhiyun 				match = check_index(p, 8, 0);
396*4882a593Smuzhiyun 			else
397*4882a593Smuzhiyun 				return false;
398*4882a593Smuzhiyun 			if (!match)
399*4882a593Smuzhiyun 				return false;
400*4882a593Smuzhiyun 		}
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 		b += t[i] & OP_AMOUNT;
403*4882a593Smuzhiyun 	}
404*4882a593Smuzhiyun 
405*4882a593Smuzhiyun 	return true;
406*4882a593Smuzhiyun }
407*4882a593Smuzhiyun 
get_next_data(struct sw842_param * p)408*4882a593Smuzhiyun static void get_next_data(struct sw842_param *p)
409*4882a593Smuzhiyun {
410*4882a593Smuzhiyun 	p->data8[0] = get_input_data(p, 0, 64);
411*4882a593Smuzhiyun 	p->data4[0] = get_input_data(p, 0, 32);
412*4882a593Smuzhiyun 	p->data4[1] = get_input_data(p, 4, 32);
413*4882a593Smuzhiyun 	p->data2[0] = get_input_data(p, 0, 16);
414*4882a593Smuzhiyun 	p->data2[1] = get_input_data(p, 2, 16);
415*4882a593Smuzhiyun 	p->data2[2] = get_input_data(p, 4, 16);
416*4882a593Smuzhiyun 	p->data2[3] = get_input_data(p, 6, 16);
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun /* update the hashtable entries.
420*4882a593Smuzhiyun  * only call this after finding/adding the current template
421*4882a593Smuzhiyun  * the dataN fields for the current 8 byte block must be already updated
422*4882a593Smuzhiyun  */
update_hashtables(struct sw842_param * p)423*4882a593Smuzhiyun static void update_hashtables(struct sw842_param *p)
424*4882a593Smuzhiyun {
425*4882a593Smuzhiyun 	u64 pos = p->in - p->instart;
426*4882a593Smuzhiyun 	u64 n8 = (pos >> 3) % (1 << I8_BITS);
427*4882a593Smuzhiyun 	u64 n4 = (pos >> 2) % (1 << I4_BITS);
428*4882a593Smuzhiyun 	u64 n2 = (pos >> 1) % (1 << I2_BITS);
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	replace_hash(p, 8, n8, 0);
431*4882a593Smuzhiyun 	replace_hash(p, 4, n4, 0);
432*4882a593Smuzhiyun 	replace_hash(p, 4, n4, 1);
433*4882a593Smuzhiyun 	replace_hash(p, 2, n2, 0);
434*4882a593Smuzhiyun 	replace_hash(p, 2, n2, 1);
435*4882a593Smuzhiyun 	replace_hash(p, 2, n2, 2);
436*4882a593Smuzhiyun 	replace_hash(p, 2, n2, 3);
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun /* find the next template to use, and add it
440*4882a593Smuzhiyun  * the p->dataN fields must already be set for the current 8 byte block
441*4882a593Smuzhiyun  */
process_next(struct sw842_param * p)442*4882a593Smuzhiyun static int process_next(struct sw842_param *p)
443*4882a593Smuzhiyun {
444*4882a593Smuzhiyun 	int ret, i;
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	p->index8[0] = INDEX_NOT_CHECKED;
447*4882a593Smuzhiyun 	p->index4[0] = INDEX_NOT_CHECKED;
448*4882a593Smuzhiyun 	p->index4[1] = INDEX_NOT_CHECKED;
449*4882a593Smuzhiyun 	p->index2[0] = INDEX_NOT_CHECKED;
450*4882a593Smuzhiyun 	p->index2[1] = INDEX_NOT_CHECKED;
451*4882a593Smuzhiyun 	p->index2[2] = INDEX_NOT_CHECKED;
452*4882a593Smuzhiyun 	p->index2[3] = INDEX_NOT_CHECKED;
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	/* check up to OPS_MAX - 1; last op is our fallback */
455*4882a593Smuzhiyun 	for (i = 0; i < OPS_MAX - 1; i++) {
456*4882a593Smuzhiyun 		if (check_template(p, i))
457*4882a593Smuzhiyun 			break;
458*4882a593Smuzhiyun 	}
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	ret = add_template(p, i);
461*4882a593Smuzhiyun 	if (ret)
462*4882a593Smuzhiyun 		return ret;
463*4882a593Smuzhiyun 
464*4882a593Smuzhiyun 	return 0;
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun /**
468*4882a593Smuzhiyun  * sw842_compress
469*4882a593Smuzhiyun  *
470*4882a593Smuzhiyun  * Compress the uncompressed buffer of length @ilen at @in to the output buffer
471*4882a593Smuzhiyun  * @out, using no more than @olen bytes, using the 842 compression format.
472*4882a593Smuzhiyun  *
473*4882a593Smuzhiyun  * Returns: 0 on success, error on failure.  The @olen parameter
474*4882a593Smuzhiyun  * will contain the number of output bytes written on success, or
475*4882a593Smuzhiyun  * 0 on error.
476*4882a593Smuzhiyun  */
sw842_compress(const u8 * in,unsigned int ilen,u8 * out,unsigned int * olen,void * wmem)477*4882a593Smuzhiyun int sw842_compress(const u8 *in, unsigned int ilen,
478*4882a593Smuzhiyun 		   u8 *out, unsigned int *olen, void *wmem)
479*4882a593Smuzhiyun {
480*4882a593Smuzhiyun 	struct sw842_param *p = (struct sw842_param *)wmem;
481*4882a593Smuzhiyun 	int ret;
482*4882a593Smuzhiyun 	u64 last, next, pad, total;
483*4882a593Smuzhiyun 	u8 repeat_count = 0;
484*4882a593Smuzhiyun 	u32 crc;
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	init_hashtable_nodes(p, 8);
489*4882a593Smuzhiyun 	init_hashtable_nodes(p, 4);
490*4882a593Smuzhiyun 	init_hashtable_nodes(p, 2);
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	p->in = (u8 *)in;
493*4882a593Smuzhiyun 	p->instart = p->in;
494*4882a593Smuzhiyun 	p->ilen = ilen;
495*4882a593Smuzhiyun 	p->out = out;
496*4882a593Smuzhiyun 	p->olen = *olen;
497*4882a593Smuzhiyun 	p->bit = 0;
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	total = p->olen;
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	*olen = 0;
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	/* if using strict mode, we can only compress a multiple of 8 */
504*4882a593Smuzhiyun 	if (sw842_strict && (ilen % 8)) {
505*4882a593Smuzhiyun 		pr_err("Using strict mode, can't compress len %d\n", ilen);
506*4882a593Smuzhiyun 		return -EINVAL;
507*4882a593Smuzhiyun 	}
508*4882a593Smuzhiyun 
509*4882a593Smuzhiyun 	/* let's compress at least 8 bytes, mkay? */
510*4882a593Smuzhiyun 	if (unlikely(ilen < 8))
511*4882a593Smuzhiyun 		goto skip_comp;
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	/* make initial 'last' different so we don't match the first time */
514*4882a593Smuzhiyun 	last = ~get_unaligned((u64 *)p->in);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	while (p->ilen > 7) {
517*4882a593Smuzhiyun 		next = get_unaligned((u64 *)p->in);
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 		/* must get the next data, as we need to update the hashtable
520*4882a593Smuzhiyun 		 * entries with the new data every time
521*4882a593Smuzhiyun 		 */
522*4882a593Smuzhiyun 		get_next_data(p);
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 		/* we don't care about endianness in last or next;
525*4882a593Smuzhiyun 		 * we're just comparing 8 bytes to another 8 bytes,
526*4882a593Smuzhiyun 		 * they're both the same endianness
527*4882a593Smuzhiyun 		 */
528*4882a593Smuzhiyun 		if (next == last) {
529*4882a593Smuzhiyun 			/* repeat count bits are 0-based, so we stop at +1 */
530*4882a593Smuzhiyun 			if (++repeat_count <= REPEAT_BITS_MAX)
531*4882a593Smuzhiyun 				goto repeat;
532*4882a593Smuzhiyun 		}
533*4882a593Smuzhiyun 		if (repeat_count) {
534*4882a593Smuzhiyun 			ret = add_repeat_template(p, repeat_count);
535*4882a593Smuzhiyun 			repeat_count = 0;
536*4882a593Smuzhiyun 			if (next == last) /* reached max repeat bits */
537*4882a593Smuzhiyun 				goto repeat;
538*4882a593Smuzhiyun 		}
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 		if (next == 0)
541*4882a593Smuzhiyun 			ret = add_zeros_template(p);
542*4882a593Smuzhiyun 		else
543*4882a593Smuzhiyun 			ret = process_next(p);
544*4882a593Smuzhiyun 
545*4882a593Smuzhiyun 		if (ret)
546*4882a593Smuzhiyun 			return ret;
547*4882a593Smuzhiyun 
548*4882a593Smuzhiyun repeat:
549*4882a593Smuzhiyun 		last = next;
550*4882a593Smuzhiyun 		update_hashtables(p);
551*4882a593Smuzhiyun 		p->in += 8;
552*4882a593Smuzhiyun 		p->ilen -= 8;
553*4882a593Smuzhiyun 	}
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 	if (repeat_count) {
556*4882a593Smuzhiyun 		ret = add_repeat_template(p, repeat_count);
557*4882a593Smuzhiyun 		if (ret)
558*4882a593Smuzhiyun 			return ret;
559*4882a593Smuzhiyun 	}
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun skip_comp:
562*4882a593Smuzhiyun 	if (p->ilen > 0) {
563*4882a593Smuzhiyun 		ret = add_short_data_template(p, p->ilen);
564*4882a593Smuzhiyun 		if (ret)
565*4882a593Smuzhiyun 			return ret;
566*4882a593Smuzhiyun 
567*4882a593Smuzhiyun 		p->in += p->ilen;
568*4882a593Smuzhiyun 		p->ilen = 0;
569*4882a593Smuzhiyun 	}
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 	ret = add_end_template(p);
572*4882a593Smuzhiyun 	if (ret)
573*4882a593Smuzhiyun 		return ret;
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	/*
576*4882a593Smuzhiyun 	 * crc(0:31) is appended to target data starting with the next
577*4882a593Smuzhiyun 	 * bit after End of stream template.
578*4882a593Smuzhiyun 	 * nx842 calculates CRC for data in big-endian format. So doing
579*4882a593Smuzhiyun 	 * same here so that sw842 decompression can be used for both
580*4882a593Smuzhiyun 	 * compressed data.
581*4882a593Smuzhiyun 	 */
582*4882a593Smuzhiyun 	crc = crc32_be(0, in, ilen);
583*4882a593Smuzhiyun 	ret = add_bits(p, crc, CRC_BITS);
584*4882a593Smuzhiyun 	if (ret)
585*4882a593Smuzhiyun 		return ret;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	if (p->bit) {
588*4882a593Smuzhiyun 		p->out++;
589*4882a593Smuzhiyun 		p->olen--;
590*4882a593Smuzhiyun 		p->bit = 0;
591*4882a593Smuzhiyun 	}
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun 	/* pad compressed length to multiple of 8 */
594*4882a593Smuzhiyun 	pad = (8 - ((total - p->olen) % 8)) % 8;
595*4882a593Smuzhiyun 	if (pad) {
596*4882a593Smuzhiyun 		if (pad > p->olen) /* we were so close! */
597*4882a593Smuzhiyun 			return -ENOSPC;
598*4882a593Smuzhiyun 		memset(p->out, 0, pad);
599*4882a593Smuzhiyun 		p->out += pad;
600*4882a593Smuzhiyun 		p->olen -= pad;
601*4882a593Smuzhiyun 	}
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	if (unlikely((total - p->olen) > UINT_MAX))
604*4882a593Smuzhiyun 		return -ENOSPC;
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	*olen = total - p->olen;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	return 0;
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(sw842_compress);
611*4882a593Smuzhiyun 
sw842_init(void)612*4882a593Smuzhiyun static int __init sw842_init(void)
613*4882a593Smuzhiyun {
614*4882a593Smuzhiyun 	if (sw842_template_counts)
615*4882a593Smuzhiyun 		sw842_debugfs_create();
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 	return 0;
618*4882a593Smuzhiyun }
619*4882a593Smuzhiyun module_init(sw842_init);
620*4882a593Smuzhiyun 
sw842_exit(void)621*4882a593Smuzhiyun static void __exit sw842_exit(void)
622*4882a593Smuzhiyun {
623*4882a593Smuzhiyun 	if (sw842_template_counts)
624*4882a593Smuzhiyun 		sw842_debugfs_remove();
625*4882a593Smuzhiyun }
626*4882a593Smuzhiyun module_exit(sw842_exit);
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun MODULE_LICENSE("GPL");
629*4882a593Smuzhiyun MODULE_DESCRIPTION("Software 842 Compressor");
630*4882a593Smuzhiyun MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
631