xref: /OK3568_Linux_fs/kernel/drivers/crypto/nx/nx-842.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Cryptographic API for the NX-842 hardware compression.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) IBM Corporation, 2011-2015
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Designer of the Power data compression engine:
8*4882a593Smuzhiyun  *   Bulent Abali <abali@us.ibm.com>
9*4882a593Smuzhiyun  *
10*4882a593Smuzhiyun  * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
11*4882a593Smuzhiyun  *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  * Rewrite: Dan Streetman <ddstreet@ieee.org>
14*4882a593Smuzhiyun  *
15*4882a593Smuzhiyun  * This is an interface to the NX-842 compression hardware in PowerPC
16*4882a593Smuzhiyun  * processors.  Most of the complexity of this drvier is due to the fact that
17*4882a593Smuzhiyun  * the NX-842 compression hardware requires the input and output data buffers
18*4882a593Smuzhiyun  * to be specifically aligned, to be a specific multiple in length, and within
19*4882a593Smuzhiyun  * specific minimum and maximum lengths.  Those restrictions, provided by the
20*4882a593Smuzhiyun  * nx-842 driver via nx842_constraints, mean this driver must use bounce
21*4882a593Smuzhiyun  * buffers and headers to correct misaligned in or out buffers, and to split
22*4882a593Smuzhiyun  * input buffers that are too large.
23*4882a593Smuzhiyun  *
24*4882a593Smuzhiyun  * This driver will fall back to software decompression if the hardware
25*4882a593Smuzhiyun  * decompression fails, so this driver's decompression should never fail as
26*4882a593Smuzhiyun  * long as the provided compressed buffer is valid.  Any compressed buffer
27*4882a593Smuzhiyun  * created by this driver will have a header (except ones where the input
28*4882a593Smuzhiyun  * perfectly matches the constraints); so users of this driver cannot simply
29*4882a593Smuzhiyun  * pass a compressed buffer created by this driver over to the 842 software
30*4882a593Smuzhiyun  * decompression library.  Instead, users must use this driver to decompress;
31*4882a593Smuzhiyun  * if the hardware fails or is unavailable, the compressed buffer will be
32*4882a593Smuzhiyun  * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
33*4882a593Smuzhiyun  * software decompression library.
34*4882a593Smuzhiyun  *
35*4882a593Smuzhiyun  * This does not fall back to software compression, however, since the caller
36*4882a593Smuzhiyun  * of this function is specifically requesting hardware compression; if the
37*4882a593Smuzhiyun  * hardware compression fails, the caller can fall back to software
38*4882a593Smuzhiyun  * compression, and the raw 842 compressed buffer that the software compressor
39*4882a593Smuzhiyun  * creates can be passed to this driver for hardware decompression; any
40*4882a593Smuzhiyun  * buffer without our specific header magic is assumed to be a raw 842 buffer
41*4882a593Smuzhiyun  * and passed directly to the hardware.  Note that the software compression
42*4882a593Smuzhiyun  * library will produce a compressed buffer that is incompatible with the
43*4882a593Smuzhiyun  * hardware decompressor if the original input buffer length is not a multiple
44*4882a593Smuzhiyun  * of 8; if such a compressed buffer is passed to this driver for
45*4882a593Smuzhiyun  * decompression, the hardware will reject it and this driver will then pass
46*4882a593Smuzhiyun  * it over to the software library for decompression.
47*4882a593Smuzhiyun  */
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun #include <linux/vmalloc.h>
52*4882a593Smuzhiyun #include <linux/sw842.h>
53*4882a593Smuzhiyun #include <linux/spinlock.h>
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun #include "nx-842.h"
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
58*4882a593Smuzhiyun  * template (see lib/842/842.h), so this magic number will never appear at
59*4882a593Smuzhiyun  * the start of a raw 842 compressed buffer.  That is important, as any buffer
60*4882a593Smuzhiyun  * passed to us without this magic is assumed to be a raw 842 compressed
61*4882a593Smuzhiyun  * buffer, and passed directly to the hardware to decompress.
62*4882a593Smuzhiyun  */
63*4882a593Smuzhiyun #define NX842_CRYPTO_MAGIC	(0xf842)
64*4882a593Smuzhiyun #define NX842_CRYPTO_HEADER_SIZE(g)				\
65*4882a593Smuzhiyun 	(sizeof(struct nx842_crypto_header) +			\
66*4882a593Smuzhiyun 	 sizeof(struct nx842_crypto_header_group) * (g))
67*4882a593Smuzhiyun #define NX842_CRYPTO_HEADER_MAX_SIZE				\
68*4882a593Smuzhiyun 	NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun /* bounce buffer size */
71*4882a593Smuzhiyun #define BOUNCE_BUFFER_ORDER	(2)
72*4882a593Smuzhiyun #define BOUNCE_BUFFER_SIZE					\
73*4882a593Smuzhiyun 	((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun /* try longer on comp because we can fallback to sw decomp if hw is busy */
76*4882a593Smuzhiyun #define COMP_BUSY_TIMEOUT	(250) /* ms */
77*4882a593Smuzhiyun #define DECOMP_BUSY_TIMEOUT	(50) /* ms */
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun struct nx842_crypto_param {
80*4882a593Smuzhiyun 	u8 *in;
81*4882a593Smuzhiyun 	unsigned int iremain;
82*4882a593Smuzhiyun 	u8 *out;
83*4882a593Smuzhiyun 	unsigned int oremain;
84*4882a593Smuzhiyun 	unsigned int ototal;
85*4882a593Smuzhiyun };
86*4882a593Smuzhiyun 
update_param(struct nx842_crypto_param * p,unsigned int slen,unsigned int dlen)87*4882a593Smuzhiyun static int update_param(struct nx842_crypto_param *p,
88*4882a593Smuzhiyun 			unsigned int slen, unsigned int dlen)
89*4882a593Smuzhiyun {
90*4882a593Smuzhiyun 	if (p->iremain < slen)
91*4882a593Smuzhiyun 		return -EOVERFLOW;
92*4882a593Smuzhiyun 	if (p->oremain < dlen)
93*4882a593Smuzhiyun 		return -ENOSPC;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	p->in += slen;
96*4882a593Smuzhiyun 	p->iremain -= slen;
97*4882a593Smuzhiyun 	p->out += dlen;
98*4882a593Smuzhiyun 	p->oremain -= dlen;
99*4882a593Smuzhiyun 	p->ototal += dlen;
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	return 0;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun 
nx842_crypto_init(struct crypto_tfm * tfm,struct nx842_driver * driver)104*4882a593Smuzhiyun int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 	spin_lock_init(&ctx->lock);
109*4882a593Smuzhiyun 	ctx->driver = driver;
110*4882a593Smuzhiyun 	ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
111*4882a593Smuzhiyun 	ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
112*4882a593Smuzhiyun 	ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
113*4882a593Smuzhiyun 	if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
114*4882a593Smuzhiyun 		kfree(ctx->wmem);
115*4882a593Smuzhiyun 		free_page((unsigned long)ctx->sbounce);
116*4882a593Smuzhiyun 		free_page((unsigned long)ctx->dbounce);
117*4882a593Smuzhiyun 		return -ENOMEM;
118*4882a593Smuzhiyun 	}
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	return 0;
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nx842_crypto_init);
123*4882a593Smuzhiyun 
nx842_crypto_exit(struct crypto_tfm * tfm)124*4882a593Smuzhiyun void nx842_crypto_exit(struct crypto_tfm *tfm)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	kfree(ctx->wmem);
129*4882a593Smuzhiyun 	free_page((unsigned long)ctx->sbounce);
130*4882a593Smuzhiyun 	free_page((unsigned long)ctx->dbounce);
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nx842_crypto_exit);
133*4882a593Smuzhiyun 
check_constraints(struct nx842_constraints * c)134*4882a593Smuzhiyun static void check_constraints(struct nx842_constraints *c)
135*4882a593Smuzhiyun {
136*4882a593Smuzhiyun 	/* limit maximum, to always have enough bounce buffer to decompress */
137*4882a593Smuzhiyun 	if (c->maximum > BOUNCE_BUFFER_SIZE)
138*4882a593Smuzhiyun 		c->maximum = BOUNCE_BUFFER_SIZE;
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun 
nx842_crypto_add_header(struct nx842_crypto_header * hdr,u8 * buf)141*4882a593Smuzhiyun static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun 	int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	/* compress should have added space for header */
146*4882a593Smuzhiyun 	if (s > be16_to_cpu(hdr->group[0].padding)) {
147*4882a593Smuzhiyun 		pr_err("Internal error: no space for header\n");
148*4882a593Smuzhiyun 		return -EINVAL;
149*4882a593Smuzhiyun 	}
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	memcpy(buf, hdr, s);
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	return 0;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun 
compress(struct nx842_crypto_ctx * ctx,struct nx842_crypto_param * p,struct nx842_crypto_header_group * g,struct nx842_constraints * c,u16 * ignore,unsigned int hdrsize)158*4882a593Smuzhiyun static int compress(struct nx842_crypto_ctx *ctx,
159*4882a593Smuzhiyun 		    struct nx842_crypto_param *p,
160*4882a593Smuzhiyun 		    struct nx842_crypto_header_group *g,
161*4882a593Smuzhiyun 		    struct nx842_constraints *c,
162*4882a593Smuzhiyun 		    u16 *ignore,
163*4882a593Smuzhiyun 		    unsigned int hdrsize)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun 	unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
166*4882a593Smuzhiyun 	unsigned int adj_slen = slen;
167*4882a593Smuzhiyun 	u8 *src = p->in, *dst = p->out;
168*4882a593Smuzhiyun 	int ret, dskip = 0;
169*4882a593Smuzhiyun 	ktime_t timeout;
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	if (p->iremain == 0)
172*4882a593Smuzhiyun 		return -EOVERFLOW;
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	if (p->oremain == 0 || hdrsize + c->minimum > dlen)
175*4882a593Smuzhiyun 		return -ENOSPC;
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun 	if (slen % c->multiple)
178*4882a593Smuzhiyun 		adj_slen = round_up(slen, c->multiple);
179*4882a593Smuzhiyun 	if (slen < c->minimum)
180*4882a593Smuzhiyun 		adj_slen = c->minimum;
181*4882a593Smuzhiyun 	if (slen > c->maximum)
182*4882a593Smuzhiyun 		adj_slen = slen = c->maximum;
183*4882a593Smuzhiyun 	if (adj_slen > slen || (u64)src % c->alignment) {
184*4882a593Smuzhiyun 		adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
185*4882a593Smuzhiyun 		slen = min(slen, BOUNCE_BUFFER_SIZE);
186*4882a593Smuzhiyun 		if (adj_slen > slen)
187*4882a593Smuzhiyun 			memset(ctx->sbounce + slen, 0, adj_slen - slen);
188*4882a593Smuzhiyun 		memcpy(ctx->sbounce, src, slen);
189*4882a593Smuzhiyun 		src = ctx->sbounce;
190*4882a593Smuzhiyun 		slen = adj_slen;
191*4882a593Smuzhiyun 		pr_debug("using comp sbounce buffer, len %x\n", slen);
192*4882a593Smuzhiyun 	}
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 	dst += hdrsize;
195*4882a593Smuzhiyun 	dlen -= hdrsize;
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	if ((u64)dst % c->alignment) {
198*4882a593Smuzhiyun 		dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
199*4882a593Smuzhiyun 		dst += dskip;
200*4882a593Smuzhiyun 		dlen -= dskip;
201*4882a593Smuzhiyun 	}
202*4882a593Smuzhiyun 	if (dlen % c->multiple)
203*4882a593Smuzhiyun 		dlen = round_down(dlen, c->multiple);
204*4882a593Smuzhiyun 	if (dlen < c->minimum) {
205*4882a593Smuzhiyun nospc:
206*4882a593Smuzhiyun 		dst = ctx->dbounce;
207*4882a593Smuzhiyun 		dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
208*4882a593Smuzhiyun 		dlen = round_down(dlen, c->multiple);
209*4882a593Smuzhiyun 		dskip = 0;
210*4882a593Smuzhiyun 		pr_debug("using comp dbounce buffer, len %x\n", dlen);
211*4882a593Smuzhiyun 	}
212*4882a593Smuzhiyun 	if (dlen > c->maximum)
213*4882a593Smuzhiyun 		dlen = c->maximum;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	tmplen = dlen;
216*4882a593Smuzhiyun 	timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
217*4882a593Smuzhiyun 	do {
218*4882a593Smuzhiyun 		dlen = tmplen; /* reset dlen, if we're retrying */
219*4882a593Smuzhiyun 		ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
220*4882a593Smuzhiyun 		/* possibly we should reduce the slen here, instead of
221*4882a593Smuzhiyun 		 * retrying with the dbounce buffer?
222*4882a593Smuzhiyun 		 */
223*4882a593Smuzhiyun 		if (ret == -ENOSPC && dst != ctx->dbounce)
224*4882a593Smuzhiyun 			goto nospc;
225*4882a593Smuzhiyun 	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
226*4882a593Smuzhiyun 	if (ret)
227*4882a593Smuzhiyun 		return ret;
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	dskip += hdrsize;
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	if (dst == ctx->dbounce)
232*4882a593Smuzhiyun 		memcpy(p->out + dskip, dst, dlen);
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	g->padding = cpu_to_be16(dskip);
235*4882a593Smuzhiyun 	g->compressed_length = cpu_to_be32(dlen);
236*4882a593Smuzhiyun 	g->uncompressed_length = cpu_to_be32(slen);
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	if (p->iremain < slen) {
239*4882a593Smuzhiyun 		*ignore = slen - p->iremain;
240*4882a593Smuzhiyun 		slen = p->iremain;
241*4882a593Smuzhiyun 	}
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun 	pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
244*4882a593Smuzhiyun 		 slen, *ignore, dlen, dskip);
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	return update_param(p, slen, dskip + dlen);
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun 
nx842_crypto_compress(struct crypto_tfm * tfm,const u8 * src,unsigned int slen,u8 * dst,unsigned int * dlen)249*4882a593Smuzhiyun int nx842_crypto_compress(struct crypto_tfm *tfm,
250*4882a593Smuzhiyun 			  const u8 *src, unsigned int slen,
251*4882a593Smuzhiyun 			  u8 *dst, unsigned int *dlen)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
254*4882a593Smuzhiyun 	struct nx842_crypto_header *hdr = &ctx->header;
255*4882a593Smuzhiyun 	struct nx842_crypto_param p;
256*4882a593Smuzhiyun 	struct nx842_constraints c = *ctx->driver->constraints;
257*4882a593Smuzhiyun 	unsigned int groups, hdrsize, h;
258*4882a593Smuzhiyun 	int ret, n;
259*4882a593Smuzhiyun 	bool add_header;
260*4882a593Smuzhiyun 	u16 ignore = 0;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	check_constraints(&c);
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	p.in = (u8 *)src;
265*4882a593Smuzhiyun 	p.iremain = slen;
266*4882a593Smuzhiyun 	p.out = dst;
267*4882a593Smuzhiyun 	p.oremain = *dlen;
268*4882a593Smuzhiyun 	p.ototal = 0;
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	*dlen = 0;
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
273*4882a593Smuzhiyun 		       DIV_ROUND_UP(p.iremain, c.maximum));
274*4882a593Smuzhiyun 	hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	spin_lock_bh(&ctx->lock);
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	/* skip adding header if the buffers meet all constraints */
279*4882a593Smuzhiyun 	add_header = (p.iremain % c.multiple	||
280*4882a593Smuzhiyun 		      p.iremain < c.minimum	||
281*4882a593Smuzhiyun 		      p.iremain > c.maximum	||
282*4882a593Smuzhiyun 		      (u64)p.in % c.alignment	||
283*4882a593Smuzhiyun 		      p.oremain % c.multiple	||
284*4882a593Smuzhiyun 		      p.oremain < c.minimum	||
285*4882a593Smuzhiyun 		      p.oremain > c.maximum	||
286*4882a593Smuzhiyun 		      (u64)p.out % c.alignment);
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
289*4882a593Smuzhiyun 	hdr->groups = 0;
290*4882a593Smuzhiyun 	hdr->ignore = 0;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	while (p.iremain > 0) {
293*4882a593Smuzhiyun 		n = hdr->groups++;
294*4882a593Smuzhiyun 		ret = -ENOSPC;
295*4882a593Smuzhiyun 		if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
296*4882a593Smuzhiyun 			goto unlock;
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 		/* header goes before first group */
299*4882a593Smuzhiyun 		h = !n && add_header ? hdrsize : 0;
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 		if (ignore)
302*4882a593Smuzhiyun 			pr_warn("internal error, ignore is set %x\n", ignore);
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 		ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
305*4882a593Smuzhiyun 		if (ret)
306*4882a593Smuzhiyun 			goto unlock;
307*4882a593Smuzhiyun 	}
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	if (!add_header && hdr->groups > 1) {
310*4882a593Smuzhiyun 		pr_err("Internal error: No header but multiple groups\n");
311*4882a593Smuzhiyun 		ret = -EINVAL;
312*4882a593Smuzhiyun 		goto unlock;
313*4882a593Smuzhiyun 	}
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	/* ignore indicates the input stream needed to be padded */
316*4882a593Smuzhiyun 	hdr->ignore = cpu_to_be16(ignore);
317*4882a593Smuzhiyun 	if (ignore)
318*4882a593Smuzhiyun 		pr_debug("marked %d bytes as ignore\n", ignore);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	if (add_header)
321*4882a593Smuzhiyun 		ret = nx842_crypto_add_header(hdr, dst);
322*4882a593Smuzhiyun 	if (ret)
323*4882a593Smuzhiyun 		goto unlock;
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	*dlen = p.ototal;
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun unlock:
330*4882a593Smuzhiyun 	spin_unlock_bh(&ctx->lock);
331*4882a593Smuzhiyun 	return ret;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nx842_crypto_compress);
334*4882a593Smuzhiyun 
decompress(struct nx842_crypto_ctx * ctx,struct nx842_crypto_param * p,struct nx842_crypto_header_group * g,struct nx842_constraints * c,u16 ignore)335*4882a593Smuzhiyun static int decompress(struct nx842_crypto_ctx *ctx,
336*4882a593Smuzhiyun 		      struct nx842_crypto_param *p,
337*4882a593Smuzhiyun 		      struct nx842_crypto_header_group *g,
338*4882a593Smuzhiyun 		      struct nx842_constraints *c,
339*4882a593Smuzhiyun 		      u16 ignore)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	unsigned int slen = be32_to_cpu(g->compressed_length);
342*4882a593Smuzhiyun 	unsigned int required_len = be32_to_cpu(g->uncompressed_length);
343*4882a593Smuzhiyun 	unsigned int dlen = p->oremain, tmplen;
344*4882a593Smuzhiyun 	unsigned int adj_slen = slen;
345*4882a593Smuzhiyun 	u8 *src = p->in, *dst = p->out;
346*4882a593Smuzhiyun 	u16 padding = be16_to_cpu(g->padding);
347*4882a593Smuzhiyun 	int ret, spadding = 0;
348*4882a593Smuzhiyun 	ktime_t timeout;
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	if (!slen || !required_len)
351*4882a593Smuzhiyun 		return -EINVAL;
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 	if (p->iremain <= 0 || padding + slen > p->iremain)
354*4882a593Smuzhiyun 		return -EOVERFLOW;
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	if (p->oremain <= 0 || required_len - ignore > p->oremain)
357*4882a593Smuzhiyun 		return -ENOSPC;
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 	src += padding;
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	if (slen % c->multiple)
362*4882a593Smuzhiyun 		adj_slen = round_up(slen, c->multiple);
363*4882a593Smuzhiyun 	if (slen < c->minimum)
364*4882a593Smuzhiyun 		adj_slen = c->minimum;
365*4882a593Smuzhiyun 	if (slen > c->maximum)
366*4882a593Smuzhiyun 		goto usesw;
367*4882a593Smuzhiyun 	if (slen < adj_slen || (u64)src % c->alignment) {
368*4882a593Smuzhiyun 		/* we can append padding bytes because the 842 format defines
369*4882a593Smuzhiyun 		 * an "end" template (see lib/842/842_decompress.c) and will
370*4882a593Smuzhiyun 		 * ignore any bytes following it.
371*4882a593Smuzhiyun 		 */
372*4882a593Smuzhiyun 		if (slen < adj_slen)
373*4882a593Smuzhiyun 			memset(ctx->sbounce + slen, 0, adj_slen - slen);
374*4882a593Smuzhiyun 		memcpy(ctx->sbounce, src, slen);
375*4882a593Smuzhiyun 		src = ctx->sbounce;
376*4882a593Smuzhiyun 		spadding = adj_slen - slen;
377*4882a593Smuzhiyun 		slen = adj_slen;
378*4882a593Smuzhiyun 		pr_debug("using decomp sbounce buffer, len %x\n", slen);
379*4882a593Smuzhiyun 	}
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	if (dlen % c->multiple)
382*4882a593Smuzhiyun 		dlen = round_down(dlen, c->multiple);
383*4882a593Smuzhiyun 	if (dlen < required_len || (u64)dst % c->alignment) {
384*4882a593Smuzhiyun 		dst = ctx->dbounce;
385*4882a593Smuzhiyun 		dlen = min(required_len, BOUNCE_BUFFER_SIZE);
386*4882a593Smuzhiyun 		pr_debug("using decomp dbounce buffer, len %x\n", dlen);
387*4882a593Smuzhiyun 	}
388*4882a593Smuzhiyun 	if (dlen < c->minimum)
389*4882a593Smuzhiyun 		goto usesw;
390*4882a593Smuzhiyun 	if (dlen > c->maximum)
391*4882a593Smuzhiyun 		dlen = c->maximum;
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	tmplen = dlen;
394*4882a593Smuzhiyun 	timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
395*4882a593Smuzhiyun 	do {
396*4882a593Smuzhiyun 		dlen = tmplen; /* reset dlen, if we're retrying */
397*4882a593Smuzhiyun 		ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
398*4882a593Smuzhiyun 	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
399*4882a593Smuzhiyun 	if (ret) {
400*4882a593Smuzhiyun usesw:
401*4882a593Smuzhiyun 		/* reset everything, sw doesn't have constraints */
402*4882a593Smuzhiyun 		src = p->in + padding;
403*4882a593Smuzhiyun 		slen = be32_to_cpu(g->compressed_length);
404*4882a593Smuzhiyun 		spadding = 0;
405*4882a593Smuzhiyun 		dst = p->out;
406*4882a593Smuzhiyun 		dlen = p->oremain;
407*4882a593Smuzhiyun 		if (dlen < required_len) { /* have ignore bytes */
408*4882a593Smuzhiyun 			dst = ctx->dbounce;
409*4882a593Smuzhiyun 			dlen = BOUNCE_BUFFER_SIZE;
410*4882a593Smuzhiyun 		}
411*4882a593Smuzhiyun 		pr_info_ratelimited("using software 842 decompression\n");
412*4882a593Smuzhiyun 		ret = sw842_decompress(src, slen, dst, &dlen);
413*4882a593Smuzhiyun 	}
414*4882a593Smuzhiyun 	if (ret)
415*4882a593Smuzhiyun 		return ret;
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	slen -= spadding;
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	dlen -= ignore;
420*4882a593Smuzhiyun 	if (ignore)
421*4882a593Smuzhiyun 		pr_debug("ignoring last %x bytes\n", ignore);
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 	if (dst == ctx->dbounce)
424*4882a593Smuzhiyun 		memcpy(p->out, dst, dlen);
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 	pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
427*4882a593Smuzhiyun 		 slen, padding, dlen, ignore);
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 	return update_param(p, slen + padding, dlen);
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun 
nx842_crypto_decompress(struct crypto_tfm * tfm,const u8 * src,unsigned int slen,u8 * dst,unsigned int * dlen)432*4882a593Smuzhiyun int nx842_crypto_decompress(struct crypto_tfm *tfm,
433*4882a593Smuzhiyun 			    const u8 *src, unsigned int slen,
434*4882a593Smuzhiyun 			    u8 *dst, unsigned int *dlen)
435*4882a593Smuzhiyun {
436*4882a593Smuzhiyun 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
437*4882a593Smuzhiyun 	struct nx842_crypto_header *hdr;
438*4882a593Smuzhiyun 	struct nx842_crypto_param p;
439*4882a593Smuzhiyun 	struct nx842_constraints c = *ctx->driver->constraints;
440*4882a593Smuzhiyun 	int n, ret, hdr_len;
441*4882a593Smuzhiyun 	u16 ignore = 0;
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	check_constraints(&c);
444*4882a593Smuzhiyun 
445*4882a593Smuzhiyun 	p.in = (u8 *)src;
446*4882a593Smuzhiyun 	p.iremain = slen;
447*4882a593Smuzhiyun 	p.out = dst;
448*4882a593Smuzhiyun 	p.oremain = *dlen;
449*4882a593Smuzhiyun 	p.ototal = 0;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	*dlen = 0;
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun 	hdr = (struct nx842_crypto_header *)src;
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 	spin_lock_bh(&ctx->lock);
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	/* If it doesn't start with our header magic number, assume it's a raw
458*4882a593Smuzhiyun 	 * 842 compressed buffer and pass it directly to the hardware driver
459*4882a593Smuzhiyun 	 */
460*4882a593Smuzhiyun 	if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
461*4882a593Smuzhiyun 		struct nx842_crypto_header_group g = {
462*4882a593Smuzhiyun 			.padding =		0,
463*4882a593Smuzhiyun 			.compressed_length =	cpu_to_be32(p.iremain),
464*4882a593Smuzhiyun 			.uncompressed_length =	cpu_to_be32(p.oremain),
465*4882a593Smuzhiyun 		};
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 		ret = decompress(ctx, &p, &g, &c, 0);
468*4882a593Smuzhiyun 		if (ret)
469*4882a593Smuzhiyun 			goto unlock;
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 		goto success;
472*4882a593Smuzhiyun 	}
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	if (!hdr->groups) {
475*4882a593Smuzhiyun 		pr_err("header has no groups\n");
476*4882a593Smuzhiyun 		ret = -EINVAL;
477*4882a593Smuzhiyun 		goto unlock;
478*4882a593Smuzhiyun 	}
479*4882a593Smuzhiyun 	if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
480*4882a593Smuzhiyun 		pr_err("header has too many groups %x, max %x\n",
481*4882a593Smuzhiyun 		       hdr->groups, NX842_CRYPTO_GROUP_MAX);
482*4882a593Smuzhiyun 		ret = -EINVAL;
483*4882a593Smuzhiyun 		goto unlock;
484*4882a593Smuzhiyun 	}
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
487*4882a593Smuzhiyun 	if (hdr_len > slen) {
488*4882a593Smuzhiyun 		ret = -EOVERFLOW;
489*4882a593Smuzhiyun 		goto unlock;
490*4882a593Smuzhiyun 	}
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	memcpy(&ctx->header, src, hdr_len);
493*4882a593Smuzhiyun 	hdr = &ctx->header;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 	for (n = 0; n < hdr->groups; n++) {
496*4882a593Smuzhiyun 		/* ignore applies to last group */
497*4882a593Smuzhiyun 		if (n + 1 == hdr->groups)
498*4882a593Smuzhiyun 			ignore = be16_to_cpu(hdr->ignore);
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun 		ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
501*4882a593Smuzhiyun 		if (ret)
502*4882a593Smuzhiyun 			goto unlock;
503*4882a593Smuzhiyun 	}
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun success:
506*4882a593Smuzhiyun 	*dlen = p.ototal;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	ret = 0;
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun unlock:
513*4882a593Smuzhiyun 	spin_unlock_bh(&ctx->lock);
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 	return ret;
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun MODULE_LICENSE("GPL");
520*4882a593Smuzhiyun MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
521*4882a593Smuzhiyun MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
522