xref: /OK3568_Linux_fs/kernel/drivers/crypto/padlock-sha.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Cryptographic API.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Support for VIA PadLock hardware crypto engine.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <crypto/internal/hash.h>
11*4882a593Smuzhiyun #include <crypto/padlock.h>
12*4882a593Smuzhiyun #include <crypto/sha.h>
13*4882a593Smuzhiyun #include <linux/err.h>
14*4882a593Smuzhiyun #include <linux/module.h>
15*4882a593Smuzhiyun #include <linux/init.h>
16*4882a593Smuzhiyun #include <linux/errno.h>
17*4882a593Smuzhiyun #include <linux/interrupt.h>
18*4882a593Smuzhiyun #include <linux/kernel.h>
19*4882a593Smuzhiyun #include <linux/scatterlist.h>
20*4882a593Smuzhiyun #include <asm/cpu_device_id.h>
21*4882a593Smuzhiyun #include <asm/fpu/api.h>
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun struct padlock_sha_desc {
24*4882a593Smuzhiyun 	struct shash_desc fallback;
25*4882a593Smuzhiyun };
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun struct padlock_sha_ctx {
28*4882a593Smuzhiyun 	struct crypto_shash *fallback;
29*4882a593Smuzhiyun };
30*4882a593Smuzhiyun 
padlock_sha_init(struct shash_desc * desc)31*4882a593Smuzhiyun static int padlock_sha_init(struct shash_desc *desc)
32*4882a593Smuzhiyun {
33*4882a593Smuzhiyun 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
34*4882a593Smuzhiyun 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun 	dctx->fallback.tfm = ctx->fallback;
37*4882a593Smuzhiyun 	return crypto_shash_init(&dctx->fallback);
38*4882a593Smuzhiyun }
39*4882a593Smuzhiyun 
padlock_sha_update(struct shash_desc * desc,const u8 * data,unsigned int length)40*4882a593Smuzhiyun static int padlock_sha_update(struct shash_desc *desc,
41*4882a593Smuzhiyun 			      const u8 *data, unsigned int length)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	return crypto_shash_update(&dctx->fallback, data, length);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun 
padlock_sha_export(struct shash_desc * desc,void * out)48*4882a593Smuzhiyun static int padlock_sha_export(struct shash_desc *desc, void *out)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	return crypto_shash_export(&dctx->fallback, out);
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun 
padlock_sha_import(struct shash_desc * desc,const void * in)55*4882a593Smuzhiyun static int padlock_sha_import(struct shash_desc *desc, const void *in)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
58*4882a593Smuzhiyun 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	dctx->fallback.tfm = ctx->fallback;
61*4882a593Smuzhiyun 	return crypto_shash_import(&dctx->fallback, in);
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun 
padlock_output_block(uint32_t * src,uint32_t * dst,size_t count)64*4882a593Smuzhiyun static inline void padlock_output_block(uint32_t *src,
65*4882a593Smuzhiyun 		 	uint32_t *dst, size_t count)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun 	while (count--)
68*4882a593Smuzhiyun 		*dst++ = swab32(*src++);
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun 
padlock_sha1_finup(struct shash_desc * desc,const u8 * in,unsigned int count,u8 * out)71*4882a593Smuzhiyun static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
72*4882a593Smuzhiyun 			      unsigned int count, u8 *out)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	/* We can't store directly to *out as it may be unaligned. */
75*4882a593Smuzhiyun 	/* BTW Don't reduce the buffer size below 128 Bytes!
76*4882a593Smuzhiyun 	 *     PadLock microcode needs it that big. */
77*4882a593Smuzhiyun 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
78*4882a593Smuzhiyun 		((aligned(STACK_ALIGN)));
79*4882a593Smuzhiyun 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
80*4882a593Smuzhiyun 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
81*4882a593Smuzhiyun 	struct sha1_state state;
82*4882a593Smuzhiyun 	unsigned int space;
83*4882a593Smuzhiyun 	unsigned int leftover;
84*4882a593Smuzhiyun 	int err;
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	err = crypto_shash_export(&dctx->fallback, &state);
87*4882a593Smuzhiyun 	if (err)
88*4882a593Smuzhiyun 		goto out;
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 	if (state.count + count > ULONG_MAX)
91*4882a593Smuzhiyun 		return crypto_shash_finup(&dctx->fallback, in, count, out);
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
94*4882a593Smuzhiyun 	space =  SHA1_BLOCK_SIZE - leftover;
95*4882a593Smuzhiyun 	if (space) {
96*4882a593Smuzhiyun 		if (count > space) {
97*4882a593Smuzhiyun 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
98*4882a593Smuzhiyun 			      crypto_shash_export(&dctx->fallback, &state);
99*4882a593Smuzhiyun 			if (err)
100*4882a593Smuzhiyun 				goto out;
101*4882a593Smuzhiyun 			count -= space;
102*4882a593Smuzhiyun 			in += space;
103*4882a593Smuzhiyun 		} else {
104*4882a593Smuzhiyun 			memcpy(state.buffer + leftover, in, count);
105*4882a593Smuzhiyun 			in = state.buffer;
106*4882a593Smuzhiyun 			count += leftover;
107*4882a593Smuzhiyun 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
108*4882a593Smuzhiyun 		}
109*4882a593Smuzhiyun 	}
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	memcpy(result, &state.state, SHA1_DIGEST_SIZE);
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
114*4882a593Smuzhiyun 		      : \
115*4882a593Smuzhiyun 		      : "c"((unsigned long)state.count + count), \
116*4882a593Smuzhiyun 			"a"((unsigned long)state.count), \
117*4882a593Smuzhiyun 			"S"(in), "D"(result));
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun out:
122*4882a593Smuzhiyun 	return err;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun 
padlock_sha1_final(struct shash_desc * desc,u8 * out)125*4882a593Smuzhiyun static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun 	u8 buf[4];
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	return padlock_sha1_finup(desc, buf, 0, out);
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun 
padlock_sha256_finup(struct shash_desc * desc,const u8 * in,unsigned int count,u8 * out)132*4882a593Smuzhiyun static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
133*4882a593Smuzhiyun 				unsigned int count, u8 *out)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	/* We can't store directly to *out as it may be unaligned. */
136*4882a593Smuzhiyun 	/* BTW Don't reduce the buffer size below 128 Bytes!
137*4882a593Smuzhiyun 	 *     PadLock microcode needs it that big. */
138*4882a593Smuzhiyun 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
139*4882a593Smuzhiyun 		((aligned(STACK_ALIGN)));
140*4882a593Smuzhiyun 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
141*4882a593Smuzhiyun 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
142*4882a593Smuzhiyun 	struct sha256_state state;
143*4882a593Smuzhiyun 	unsigned int space;
144*4882a593Smuzhiyun 	unsigned int leftover;
145*4882a593Smuzhiyun 	int err;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	err = crypto_shash_export(&dctx->fallback, &state);
148*4882a593Smuzhiyun 	if (err)
149*4882a593Smuzhiyun 		goto out;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	if (state.count + count > ULONG_MAX)
152*4882a593Smuzhiyun 		return crypto_shash_finup(&dctx->fallback, in, count, out);
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
155*4882a593Smuzhiyun 	space =  SHA256_BLOCK_SIZE - leftover;
156*4882a593Smuzhiyun 	if (space) {
157*4882a593Smuzhiyun 		if (count > space) {
158*4882a593Smuzhiyun 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
159*4882a593Smuzhiyun 			      crypto_shash_export(&dctx->fallback, &state);
160*4882a593Smuzhiyun 			if (err)
161*4882a593Smuzhiyun 				goto out;
162*4882a593Smuzhiyun 			count -= space;
163*4882a593Smuzhiyun 			in += space;
164*4882a593Smuzhiyun 		} else {
165*4882a593Smuzhiyun 			memcpy(state.buf + leftover, in, count);
166*4882a593Smuzhiyun 			in = state.buf;
167*4882a593Smuzhiyun 			count += leftover;
168*4882a593Smuzhiyun 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
169*4882a593Smuzhiyun 		}
170*4882a593Smuzhiyun 	}
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 	memcpy(result, &state.state, SHA256_DIGEST_SIZE);
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
175*4882a593Smuzhiyun 		      : \
176*4882a593Smuzhiyun 		      : "c"((unsigned long)state.count + count), \
177*4882a593Smuzhiyun 			"a"((unsigned long)state.count), \
178*4882a593Smuzhiyun 			"S"(in), "D"(result));
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun out:
183*4882a593Smuzhiyun 	return err;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun 
padlock_sha256_final(struct shash_desc * desc,u8 * out)186*4882a593Smuzhiyun static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun 	u8 buf[4];
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	return padlock_sha256_finup(desc, buf, 0, out);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun 
padlock_init_tfm(struct crypto_shash * hash)193*4882a593Smuzhiyun static int padlock_init_tfm(struct crypto_shash *hash)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	const char *fallback_driver_name = crypto_shash_alg_name(hash);
196*4882a593Smuzhiyun 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
197*4882a593Smuzhiyun 	struct crypto_shash *fallback_tfm;
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	/* Allocate a fallback and abort if it failed. */
200*4882a593Smuzhiyun 	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
201*4882a593Smuzhiyun 					  CRYPTO_ALG_NEED_FALLBACK);
202*4882a593Smuzhiyun 	if (IS_ERR(fallback_tfm)) {
203*4882a593Smuzhiyun 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
204*4882a593Smuzhiyun 		       fallback_driver_name);
205*4882a593Smuzhiyun 		return PTR_ERR(fallback_tfm);
206*4882a593Smuzhiyun 	}
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	ctx->fallback = fallback_tfm;
209*4882a593Smuzhiyun 	hash->descsize += crypto_shash_descsize(fallback_tfm);
210*4882a593Smuzhiyun 	return 0;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
padlock_exit_tfm(struct crypto_shash * hash)213*4882a593Smuzhiyun static void padlock_exit_tfm(struct crypto_shash *hash)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	crypto_free_shash(ctx->fallback);
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun static struct shash_alg sha1_alg = {
221*4882a593Smuzhiyun 	.digestsize	=	SHA1_DIGEST_SIZE,
222*4882a593Smuzhiyun 	.init   	= 	padlock_sha_init,
223*4882a593Smuzhiyun 	.update 	=	padlock_sha_update,
224*4882a593Smuzhiyun 	.finup  	=	padlock_sha1_finup,
225*4882a593Smuzhiyun 	.final  	=	padlock_sha1_final,
226*4882a593Smuzhiyun 	.export		=	padlock_sha_export,
227*4882a593Smuzhiyun 	.import		=	padlock_sha_import,
228*4882a593Smuzhiyun 	.init_tfm	=	padlock_init_tfm,
229*4882a593Smuzhiyun 	.exit_tfm	=	padlock_exit_tfm,
230*4882a593Smuzhiyun 	.descsize	=	sizeof(struct padlock_sha_desc),
231*4882a593Smuzhiyun 	.statesize	=	sizeof(struct sha1_state),
232*4882a593Smuzhiyun 	.base		=	{
233*4882a593Smuzhiyun 		.cra_name		=	"sha1",
234*4882a593Smuzhiyun 		.cra_driver_name	=	"sha1-padlock",
235*4882a593Smuzhiyun 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
236*4882a593Smuzhiyun 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
237*4882a593Smuzhiyun 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
238*4882a593Smuzhiyun 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
239*4882a593Smuzhiyun 		.cra_module		=	THIS_MODULE,
240*4882a593Smuzhiyun 	}
241*4882a593Smuzhiyun };
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun static struct shash_alg sha256_alg = {
244*4882a593Smuzhiyun 	.digestsize	=	SHA256_DIGEST_SIZE,
245*4882a593Smuzhiyun 	.init   	= 	padlock_sha_init,
246*4882a593Smuzhiyun 	.update 	=	padlock_sha_update,
247*4882a593Smuzhiyun 	.finup  	=	padlock_sha256_finup,
248*4882a593Smuzhiyun 	.final  	=	padlock_sha256_final,
249*4882a593Smuzhiyun 	.export		=	padlock_sha_export,
250*4882a593Smuzhiyun 	.import		=	padlock_sha_import,
251*4882a593Smuzhiyun 	.init_tfm	=	padlock_init_tfm,
252*4882a593Smuzhiyun 	.exit_tfm	=	padlock_exit_tfm,
253*4882a593Smuzhiyun 	.descsize	=	sizeof(struct padlock_sha_desc),
254*4882a593Smuzhiyun 	.statesize	=	sizeof(struct sha256_state),
255*4882a593Smuzhiyun 	.base		=	{
256*4882a593Smuzhiyun 		.cra_name		=	"sha256",
257*4882a593Smuzhiyun 		.cra_driver_name	=	"sha256-padlock",
258*4882a593Smuzhiyun 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
259*4882a593Smuzhiyun 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
260*4882a593Smuzhiyun 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
261*4882a593Smuzhiyun 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
262*4882a593Smuzhiyun 		.cra_module		=	THIS_MODULE,
263*4882a593Smuzhiyun 	}
264*4882a593Smuzhiyun };
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun /* Add two shash_alg instance for hardware-implemented *
267*4882a593Smuzhiyun * multiple-parts hash supported by VIA Nano Processor.*/
padlock_sha1_init_nano(struct shash_desc * desc)268*4882a593Smuzhiyun static int padlock_sha1_init_nano(struct shash_desc *desc)
269*4882a593Smuzhiyun {
270*4882a593Smuzhiyun 	struct sha1_state *sctx = shash_desc_ctx(desc);
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	*sctx = (struct sha1_state){
273*4882a593Smuzhiyun 		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
274*4882a593Smuzhiyun 	};
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	return 0;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun 
padlock_sha1_update_nano(struct shash_desc * desc,const u8 * data,unsigned int len)279*4882a593Smuzhiyun static int padlock_sha1_update_nano(struct shash_desc *desc,
280*4882a593Smuzhiyun 			const u8 *data,	unsigned int len)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun 	struct sha1_state *sctx = shash_desc_ctx(desc);
283*4882a593Smuzhiyun 	unsigned int partial, done;
284*4882a593Smuzhiyun 	const u8 *src;
285*4882a593Smuzhiyun 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
286*4882a593Smuzhiyun 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
287*4882a593Smuzhiyun 		((aligned(STACK_ALIGN)));
288*4882a593Smuzhiyun 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	partial = sctx->count & 0x3f;
291*4882a593Smuzhiyun 	sctx->count += len;
292*4882a593Smuzhiyun 	done = 0;
293*4882a593Smuzhiyun 	src = data;
294*4882a593Smuzhiyun 	memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	if ((partial + len) >= SHA1_BLOCK_SIZE) {
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 		/* Append the bytes in state's buffer to a block to handle */
299*4882a593Smuzhiyun 		if (partial) {
300*4882a593Smuzhiyun 			done = -partial;
301*4882a593Smuzhiyun 			memcpy(sctx->buffer + partial, data,
302*4882a593Smuzhiyun 				done + SHA1_BLOCK_SIZE);
303*4882a593Smuzhiyun 			src = sctx->buffer;
304*4882a593Smuzhiyun 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
305*4882a593Smuzhiyun 			: "+S"(src), "+D"(dst) \
306*4882a593Smuzhiyun 			: "a"((long)-1), "c"((unsigned long)1));
307*4882a593Smuzhiyun 			done += SHA1_BLOCK_SIZE;
308*4882a593Smuzhiyun 			src = data + done;
309*4882a593Smuzhiyun 		}
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 		/* Process the left bytes from the input data */
312*4882a593Smuzhiyun 		if (len - done >= SHA1_BLOCK_SIZE) {
313*4882a593Smuzhiyun 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
314*4882a593Smuzhiyun 			: "+S"(src), "+D"(dst)
315*4882a593Smuzhiyun 			: "a"((long)-1),
316*4882a593Smuzhiyun 			"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
317*4882a593Smuzhiyun 			done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
318*4882a593Smuzhiyun 			src = data + done;
319*4882a593Smuzhiyun 		}
320*4882a593Smuzhiyun 		partial = 0;
321*4882a593Smuzhiyun 	}
322*4882a593Smuzhiyun 	memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
323*4882a593Smuzhiyun 	memcpy(sctx->buffer + partial, src, len - done);
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	return 0;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun 
padlock_sha1_final_nano(struct shash_desc * desc,u8 * out)328*4882a593Smuzhiyun static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun 	struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
331*4882a593Smuzhiyun 	unsigned int partial, padlen;
332*4882a593Smuzhiyun 	__be64 bits;
333*4882a593Smuzhiyun 	static const u8 padding[64] = { 0x80, };
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	bits = cpu_to_be64(state->count << 3);
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	/* Pad out to 56 mod 64 */
338*4882a593Smuzhiyun 	partial = state->count & 0x3f;
339*4882a593Smuzhiyun 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
340*4882a593Smuzhiyun 	padlock_sha1_update_nano(desc, padding, padlen);
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	/* Append length field bytes */
343*4882a593Smuzhiyun 	padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	/* Swap to output */
346*4882a593Smuzhiyun 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	return 0;
349*4882a593Smuzhiyun }
350*4882a593Smuzhiyun 
padlock_sha256_init_nano(struct shash_desc * desc)351*4882a593Smuzhiyun static int padlock_sha256_init_nano(struct shash_desc *desc)
352*4882a593Smuzhiyun {
353*4882a593Smuzhiyun 	struct sha256_state *sctx = shash_desc_ctx(desc);
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	*sctx = (struct sha256_state){
356*4882a593Smuzhiyun 		.state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
357*4882a593Smuzhiyun 				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
358*4882a593Smuzhiyun 	};
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	return 0;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun 
padlock_sha256_update_nano(struct shash_desc * desc,const u8 * data,unsigned int len)363*4882a593Smuzhiyun static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
364*4882a593Smuzhiyun 			  unsigned int len)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun 	struct sha256_state *sctx = shash_desc_ctx(desc);
367*4882a593Smuzhiyun 	unsigned int partial, done;
368*4882a593Smuzhiyun 	const u8 *src;
369*4882a593Smuzhiyun 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
370*4882a593Smuzhiyun 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
371*4882a593Smuzhiyun 		((aligned(STACK_ALIGN)));
372*4882a593Smuzhiyun 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	partial = sctx->count & 0x3f;
375*4882a593Smuzhiyun 	sctx->count += len;
376*4882a593Smuzhiyun 	done = 0;
377*4882a593Smuzhiyun 	src = data;
378*4882a593Smuzhiyun 	memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	if ((partial + len) >= SHA256_BLOCK_SIZE) {
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 		/* Append the bytes in state's buffer to a block to handle */
383*4882a593Smuzhiyun 		if (partial) {
384*4882a593Smuzhiyun 			done = -partial;
385*4882a593Smuzhiyun 			memcpy(sctx->buf + partial, data,
386*4882a593Smuzhiyun 				done + SHA256_BLOCK_SIZE);
387*4882a593Smuzhiyun 			src = sctx->buf;
388*4882a593Smuzhiyun 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
389*4882a593Smuzhiyun 			: "+S"(src), "+D"(dst)
390*4882a593Smuzhiyun 			: "a"((long)-1), "c"((unsigned long)1));
391*4882a593Smuzhiyun 			done += SHA256_BLOCK_SIZE;
392*4882a593Smuzhiyun 			src = data + done;
393*4882a593Smuzhiyun 		}
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 		/* Process the left bytes from input data*/
396*4882a593Smuzhiyun 		if (len - done >= SHA256_BLOCK_SIZE) {
397*4882a593Smuzhiyun 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
398*4882a593Smuzhiyun 			: "+S"(src), "+D"(dst)
399*4882a593Smuzhiyun 			: "a"((long)-1),
400*4882a593Smuzhiyun 			"c"((unsigned long)((len - done) / 64)));
401*4882a593Smuzhiyun 			done += ((len - done) - (len - done) % 64);
402*4882a593Smuzhiyun 			src = data + done;
403*4882a593Smuzhiyun 		}
404*4882a593Smuzhiyun 		partial = 0;
405*4882a593Smuzhiyun 	}
406*4882a593Smuzhiyun 	memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
407*4882a593Smuzhiyun 	memcpy(sctx->buf + partial, src, len - done);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	return 0;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun 
padlock_sha256_final_nano(struct shash_desc * desc,u8 * out)412*4882a593Smuzhiyun static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
413*4882a593Smuzhiyun {
414*4882a593Smuzhiyun 	struct sha256_state *state =
415*4882a593Smuzhiyun 		(struct sha256_state *)shash_desc_ctx(desc);
416*4882a593Smuzhiyun 	unsigned int partial, padlen;
417*4882a593Smuzhiyun 	__be64 bits;
418*4882a593Smuzhiyun 	static const u8 padding[64] = { 0x80, };
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	bits = cpu_to_be64(state->count << 3);
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	/* Pad out to 56 mod 64 */
423*4882a593Smuzhiyun 	partial = state->count & 0x3f;
424*4882a593Smuzhiyun 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
425*4882a593Smuzhiyun 	padlock_sha256_update_nano(desc, padding, padlen);
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	/* Append length field bytes */
428*4882a593Smuzhiyun 	padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	/* Swap to output */
431*4882a593Smuzhiyun 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	return 0;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun 
padlock_sha_export_nano(struct shash_desc * desc,void * out)436*4882a593Smuzhiyun static int padlock_sha_export_nano(struct shash_desc *desc,
437*4882a593Smuzhiyun 				void *out)
438*4882a593Smuzhiyun {
439*4882a593Smuzhiyun 	int statesize = crypto_shash_statesize(desc->tfm);
440*4882a593Smuzhiyun 	void *sctx = shash_desc_ctx(desc);
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 	memcpy(out, sctx, statesize);
443*4882a593Smuzhiyun 	return 0;
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun 
padlock_sha_import_nano(struct shash_desc * desc,const void * in)446*4882a593Smuzhiyun static int padlock_sha_import_nano(struct shash_desc *desc,
447*4882a593Smuzhiyun 				const void *in)
448*4882a593Smuzhiyun {
449*4882a593Smuzhiyun 	int statesize = crypto_shash_statesize(desc->tfm);
450*4882a593Smuzhiyun 	void *sctx = shash_desc_ctx(desc);
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 	memcpy(sctx, in, statesize);
453*4882a593Smuzhiyun 	return 0;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun static struct shash_alg sha1_alg_nano = {
457*4882a593Smuzhiyun 	.digestsize	=	SHA1_DIGEST_SIZE,
458*4882a593Smuzhiyun 	.init		=	padlock_sha1_init_nano,
459*4882a593Smuzhiyun 	.update		=	padlock_sha1_update_nano,
460*4882a593Smuzhiyun 	.final		=	padlock_sha1_final_nano,
461*4882a593Smuzhiyun 	.export		=	padlock_sha_export_nano,
462*4882a593Smuzhiyun 	.import		=	padlock_sha_import_nano,
463*4882a593Smuzhiyun 	.descsize	=	sizeof(struct sha1_state),
464*4882a593Smuzhiyun 	.statesize	=	sizeof(struct sha1_state),
465*4882a593Smuzhiyun 	.base		=	{
466*4882a593Smuzhiyun 		.cra_name		=	"sha1",
467*4882a593Smuzhiyun 		.cra_driver_name	=	"sha1-padlock-nano",
468*4882a593Smuzhiyun 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
469*4882a593Smuzhiyun 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
470*4882a593Smuzhiyun 		.cra_module		=	THIS_MODULE,
471*4882a593Smuzhiyun 	}
472*4882a593Smuzhiyun };
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun static struct shash_alg sha256_alg_nano = {
475*4882a593Smuzhiyun 	.digestsize	=	SHA256_DIGEST_SIZE,
476*4882a593Smuzhiyun 	.init		=	padlock_sha256_init_nano,
477*4882a593Smuzhiyun 	.update		=	padlock_sha256_update_nano,
478*4882a593Smuzhiyun 	.final		=	padlock_sha256_final_nano,
479*4882a593Smuzhiyun 	.export		=	padlock_sha_export_nano,
480*4882a593Smuzhiyun 	.import		=	padlock_sha_import_nano,
481*4882a593Smuzhiyun 	.descsize	=	sizeof(struct sha256_state),
482*4882a593Smuzhiyun 	.statesize	=	sizeof(struct sha256_state),
483*4882a593Smuzhiyun 	.base		=	{
484*4882a593Smuzhiyun 		.cra_name		=	"sha256",
485*4882a593Smuzhiyun 		.cra_driver_name	=	"sha256-padlock-nano",
486*4882a593Smuzhiyun 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
487*4882a593Smuzhiyun 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
488*4882a593Smuzhiyun 		.cra_module		=	THIS_MODULE,
489*4882a593Smuzhiyun 	}
490*4882a593Smuzhiyun };
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun static const struct x86_cpu_id padlock_sha_ids[] = {
493*4882a593Smuzhiyun 	X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL),
494*4882a593Smuzhiyun 	{}
495*4882a593Smuzhiyun };
496*4882a593Smuzhiyun MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
497*4882a593Smuzhiyun 
padlock_init(void)498*4882a593Smuzhiyun static int __init padlock_init(void)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun 	int rc = -ENODEV;
501*4882a593Smuzhiyun 	struct cpuinfo_x86 *c = &cpu_data(0);
502*4882a593Smuzhiyun 	struct shash_alg *sha1;
503*4882a593Smuzhiyun 	struct shash_alg *sha256;
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 	if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
506*4882a593Smuzhiyun 		return -ENODEV;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	/* Register the newly added algorithm module if on *
509*4882a593Smuzhiyun 	* VIA Nano processor, or else just do as before */
510*4882a593Smuzhiyun 	if (c->x86_model < 0x0f) {
511*4882a593Smuzhiyun 		sha1 = &sha1_alg;
512*4882a593Smuzhiyun 		sha256 = &sha256_alg;
513*4882a593Smuzhiyun 	} else {
514*4882a593Smuzhiyun 		sha1 = &sha1_alg_nano;
515*4882a593Smuzhiyun 		sha256 = &sha256_alg_nano;
516*4882a593Smuzhiyun 	}
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	rc = crypto_register_shash(sha1);
519*4882a593Smuzhiyun 	if (rc)
520*4882a593Smuzhiyun 		goto out;
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	rc = crypto_register_shash(sha256);
523*4882a593Smuzhiyun 	if (rc)
524*4882a593Smuzhiyun 		goto out_unreg1;
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	return 0;
529*4882a593Smuzhiyun 
530*4882a593Smuzhiyun out_unreg1:
531*4882a593Smuzhiyun 	crypto_unregister_shash(sha1);
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun out:
534*4882a593Smuzhiyun 	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
535*4882a593Smuzhiyun 	return rc;
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun 
padlock_fini(void)538*4882a593Smuzhiyun static void __exit padlock_fini(void)
539*4882a593Smuzhiyun {
540*4882a593Smuzhiyun 	struct cpuinfo_x86 *c = &cpu_data(0);
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 	if (c->x86_model >= 0x0f) {
543*4882a593Smuzhiyun 		crypto_unregister_shash(&sha1_alg_nano);
544*4882a593Smuzhiyun 		crypto_unregister_shash(&sha256_alg_nano);
545*4882a593Smuzhiyun 	} else {
546*4882a593Smuzhiyun 		crypto_unregister_shash(&sha1_alg);
547*4882a593Smuzhiyun 		crypto_unregister_shash(&sha256_alg);
548*4882a593Smuzhiyun 	}
549*4882a593Smuzhiyun }
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun module_init(padlock_init);
552*4882a593Smuzhiyun module_exit(padlock_fini);
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
555*4882a593Smuzhiyun MODULE_LICENSE("GPL");
556*4882a593Smuzhiyun MODULE_AUTHOR("Michal Ludvig");
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha1-all");
559*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha256-all");
560*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha1-padlock");
561*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha256-padlock");
562