1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Cryptographic API.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Support for VIA PadLock hardware crypto engine.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Copyright (c) 2006 Michal Ludvig <michal@logix.cz>
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <crypto/internal/hash.h>
11*4882a593Smuzhiyun #include <crypto/padlock.h>
12*4882a593Smuzhiyun #include <crypto/sha.h>
13*4882a593Smuzhiyun #include <linux/err.h>
14*4882a593Smuzhiyun #include <linux/module.h>
15*4882a593Smuzhiyun #include <linux/init.h>
16*4882a593Smuzhiyun #include <linux/errno.h>
17*4882a593Smuzhiyun #include <linux/interrupt.h>
18*4882a593Smuzhiyun #include <linux/kernel.h>
19*4882a593Smuzhiyun #include <linux/scatterlist.h>
20*4882a593Smuzhiyun #include <asm/cpu_device_id.h>
21*4882a593Smuzhiyun #include <asm/fpu/api.h>
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun struct padlock_sha_desc {
24*4882a593Smuzhiyun struct shash_desc fallback;
25*4882a593Smuzhiyun };
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun struct padlock_sha_ctx {
28*4882a593Smuzhiyun struct crypto_shash *fallback;
29*4882a593Smuzhiyun };
30*4882a593Smuzhiyun
padlock_sha_init(struct shash_desc * desc)31*4882a593Smuzhiyun static int padlock_sha_init(struct shash_desc *desc)
32*4882a593Smuzhiyun {
33*4882a593Smuzhiyun struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
34*4882a593Smuzhiyun struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun dctx->fallback.tfm = ctx->fallback;
37*4882a593Smuzhiyun return crypto_shash_init(&dctx->fallback);
38*4882a593Smuzhiyun }
39*4882a593Smuzhiyun
padlock_sha_update(struct shash_desc * desc,const u8 * data,unsigned int length)40*4882a593Smuzhiyun static int padlock_sha_update(struct shash_desc *desc,
41*4882a593Smuzhiyun const u8 *data, unsigned int length)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun return crypto_shash_update(&dctx->fallback, data, length);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
padlock_sha_export(struct shash_desc * desc,void * out)48*4882a593Smuzhiyun static int padlock_sha_export(struct shash_desc *desc, void *out)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun return crypto_shash_export(&dctx->fallback, out);
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun
padlock_sha_import(struct shash_desc * desc,const void * in)55*4882a593Smuzhiyun static int padlock_sha_import(struct shash_desc *desc, const void *in)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
58*4882a593Smuzhiyun struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun dctx->fallback.tfm = ctx->fallback;
61*4882a593Smuzhiyun return crypto_shash_import(&dctx->fallback, in);
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun
padlock_output_block(uint32_t * src,uint32_t * dst,size_t count)64*4882a593Smuzhiyun static inline void padlock_output_block(uint32_t *src,
65*4882a593Smuzhiyun uint32_t *dst, size_t count)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun while (count--)
68*4882a593Smuzhiyun *dst++ = swab32(*src++);
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun
padlock_sha1_finup(struct shash_desc * desc,const u8 * in,unsigned int count,u8 * out)71*4882a593Smuzhiyun static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
72*4882a593Smuzhiyun unsigned int count, u8 *out)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun /* We can't store directly to *out as it may be unaligned. */
75*4882a593Smuzhiyun /* BTW Don't reduce the buffer size below 128 Bytes!
76*4882a593Smuzhiyun * PadLock microcode needs it that big. */
77*4882a593Smuzhiyun char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
78*4882a593Smuzhiyun ((aligned(STACK_ALIGN)));
79*4882a593Smuzhiyun char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
80*4882a593Smuzhiyun struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
81*4882a593Smuzhiyun struct sha1_state state;
82*4882a593Smuzhiyun unsigned int space;
83*4882a593Smuzhiyun unsigned int leftover;
84*4882a593Smuzhiyun int err;
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun err = crypto_shash_export(&dctx->fallback, &state);
87*4882a593Smuzhiyun if (err)
88*4882a593Smuzhiyun goto out;
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun if (state.count + count > ULONG_MAX)
91*4882a593Smuzhiyun return crypto_shash_finup(&dctx->fallback, in, count, out);
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
94*4882a593Smuzhiyun space = SHA1_BLOCK_SIZE - leftover;
95*4882a593Smuzhiyun if (space) {
96*4882a593Smuzhiyun if (count > space) {
97*4882a593Smuzhiyun err = crypto_shash_update(&dctx->fallback, in, space) ?:
98*4882a593Smuzhiyun crypto_shash_export(&dctx->fallback, &state);
99*4882a593Smuzhiyun if (err)
100*4882a593Smuzhiyun goto out;
101*4882a593Smuzhiyun count -= space;
102*4882a593Smuzhiyun in += space;
103*4882a593Smuzhiyun } else {
104*4882a593Smuzhiyun memcpy(state.buffer + leftover, in, count);
105*4882a593Smuzhiyun in = state.buffer;
106*4882a593Smuzhiyun count += leftover;
107*4882a593Smuzhiyun state.count &= ~(SHA1_BLOCK_SIZE - 1);
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun memcpy(result, &state.state, SHA1_DIGEST_SIZE);
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
114*4882a593Smuzhiyun : \
115*4882a593Smuzhiyun : "c"((unsigned long)state.count + count), \
116*4882a593Smuzhiyun "a"((unsigned long)state.count), \
117*4882a593Smuzhiyun "S"(in), "D"(result));
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun out:
122*4882a593Smuzhiyun return err;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
padlock_sha1_final(struct shash_desc * desc,u8 * out)125*4882a593Smuzhiyun static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun u8 buf[4];
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun return padlock_sha1_finup(desc, buf, 0, out);
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
padlock_sha256_finup(struct shash_desc * desc,const u8 * in,unsigned int count,u8 * out)132*4882a593Smuzhiyun static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
133*4882a593Smuzhiyun unsigned int count, u8 *out)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun /* We can't store directly to *out as it may be unaligned. */
136*4882a593Smuzhiyun /* BTW Don't reduce the buffer size below 128 Bytes!
137*4882a593Smuzhiyun * PadLock microcode needs it that big. */
138*4882a593Smuzhiyun char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
139*4882a593Smuzhiyun ((aligned(STACK_ALIGN)));
140*4882a593Smuzhiyun char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
141*4882a593Smuzhiyun struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
142*4882a593Smuzhiyun struct sha256_state state;
143*4882a593Smuzhiyun unsigned int space;
144*4882a593Smuzhiyun unsigned int leftover;
145*4882a593Smuzhiyun int err;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun err = crypto_shash_export(&dctx->fallback, &state);
148*4882a593Smuzhiyun if (err)
149*4882a593Smuzhiyun goto out;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun if (state.count + count > ULONG_MAX)
152*4882a593Smuzhiyun return crypto_shash_finup(&dctx->fallback, in, count, out);
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
155*4882a593Smuzhiyun space = SHA256_BLOCK_SIZE - leftover;
156*4882a593Smuzhiyun if (space) {
157*4882a593Smuzhiyun if (count > space) {
158*4882a593Smuzhiyun err = crypto_shash_update(&dctx->fallback, in, space) ?:
159*4882a593Smuzhiyun crypto_shash_export(&dctx->fallback, &state);
160*4882a593Smuzhiyun if (err)
161*4882a593Smuzhiyun goto out;
162*4882a593Smuzhiyun count -= space;
163*4882a593Smuzhiyun in += space;
164*4882a593Smuzhiyun } else {
165*4882a593Smuzhiyun memcpy(state.buf + leftover, in, count);
166*4882a593Smuzhiyun in = state.buf;
167*4882a593Smuzhiyun count += leftover;
168*4882a593Smuzhiyun state.count &= ~(SHA1_BLOCK_SIZE - 1);
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun memcpy(result, &state.state, SHA256_DIGEST_SIZE);
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
175*4882a593Smuzhiyun : \
176*4882a593Smuzhiyun : "c"((unsigned long)state.count + count), \
177*4882a593Smuzhiyun "a"((unsigned long)state.count), \
178*4882a593Smuzhiyun "S"(in), "D"(result));
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun out:
183*4882a593Smuzhiyun return err;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
padlock_sha256_final(struct shash_desc * desc,u8 * out)186*4882a593Smuzhiyun static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun u8 buf[4];
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun return padlock_sha256_finup(desc, buf, 0, out);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun
padlock_init_tfm(struct crypto_shash * hash)193*4882a593Smuzhiyun static int padlock_init_tfm(struct crypto_shash *hash)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun const char *fallback_driver_name = crypto_shash_alg_name(hash);
196*4882a593Smuzhiyun struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
197*4882a593Smuzhiyun struct crypto_shash *fallback_tfm;
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun /* Allocate a fallback and abort if it failed. */
200*4882a593Smuzhiyun fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
201*4882a593Smuzhiyun CRYPTO_ALG_NEED_FALLBACK);
202*4882a593Smuzhiyun if (IS_ERR(fallback_tfm)) {
203*4882a593Smuzhiyun printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
204*4882a593Smuzhiyun fallback_driver_name);
205*4882a593Smuzhiyun return PTR_ERR(fallback_tfm);
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun ctx->fallback = fallback_tfm;
209*4882a593Smuzhiyun hash->descsize += crypto_shash_descsize(fallback_tfm);
210*4882a593Smuzhiyun return 0;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
padlock_exit_tfm(struct crypto_shash * hash)213*4882a593Smuzhiyun static void padlock_exit_tfm(struct crypto_shash *hash)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun crypto_free_shash(ctx->fallback);
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun static struct shash_alg sha1_alg = {
221*4882a593Smuzhiyun .digestsize = SHA1_DIGEST_SIZE,
222*4882a593Smuzhiyun .init = padlock_sha_init,
223*4882a593Smuzhiyun .update = padlock_sha_update,
224*4882a593Smuzhiyun .finup = padlock_sha1_finup,
225*4882a593Smuzhiyun .final = padlock_sha1_final,
226*4882a593Smuzhiyun .export = padlock_sha_export,
227*4882a593Smuzhiyun .import = padlock_sha_import,
228*4882a593Smuzhiyun .init_tfm = padlock_init_tfm,
229*4882a593Smuzhiyun .exit_tfm = padlock_exit_tfm,
230*4882a593Smuzhiyun .descsize = sizeof(struct padlock_sha_desc),
231*4882a593Smuzhiyun .statesize = sizeof(struct sha1_state),
232*4882a593Smuzhiyun .base = {
233*4882a593Smuzhiyun .cra_name = "sha1",
234*4882a593Smuzhiyun .cra_driver_name = "sha1-padlock",
235*4882a593Smuzhiyun .cra_priority = PADLOCK_CRA_PRIORITY,
236*4882a593Smuzhiyun .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
237*4882a593Smuzhiyun .cra_blocksize = SHA1_BLOCK_SIZE,
238*4882a593Smuzhiyun .cra_ctxsize = sizeof(struct padlock_sha_ctx),
239*4882a593Smuzhiyun .cra_module = THIS_MODULE,
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun };
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun static struct shash_alg sha256_alg = {
244*4882a593Smuzhiyun .digestsize = SHA256_DIGEST_SIZE,
245*4882a593Smuzhiyun .init = padlock_sha_init,
246*4882a593Smuzhiyun .update = padlock_sha_update,
247*4882a593Smuzhiyun .finup = padlock_sha256_finup,
248*4882a593Smuzhiyun .final = padlock_sha256_final,
249*4882a593Smuzhiyun .export = padlock_sha_export,
250*4882a593Smuzhiyun .import = padlock_sha_import,
251*4882a593Smuzhiyun .init_tfm = padlock_init_tfm,
252*4882a593Smuzhiyun .exit_tfm = padlock_exit_tfm,
253*4882a593Smuzhiyun .descsize = sizeof(struct padlock_sha_desc),
254*4882a593Smuzhiyun .statesize = sizeof(struct sha256_state),
255*4882a593Smuzhiyun .base = {
256*4882a593Smuzhiyun .cra_name = "sha256",
257*4882a593Smuzhiyun .cra_driver_name = "sha256-padlock",
258*4882a593Smuzhiyun .cra_priority = PADLOCK_CRA_PRIORITY,
259*4882a593Smuzhiyun .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
260*4882a593Smuzhiyun .cra_blocksize = SHA256_BLOCK_SIZE,
261*4882a593Smuzhiyun .cra_ctxsize = sizeof(struct padlock_sha_ctx),
262*4882a593Smuzhiyun .cra_module = THIS_MODULE,
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun };
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun /* Add two shash_alg instance for hardware-implemented *
267*4882a593Smuzhiyun * multiple-parts hash supported by VIA Nano Processor.*/
padlock_sha1_init_nano(struct shash_desc * desc)268*4882a593Smuzhiyun static int padlock_sha1_init_nano(struct shash_desc *desc)
269*4882a593Smuzhiyun {
270*4882a593Smuzhiyun struct sha1_state *sctx = shash_desc_ctx(desc);
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun *sctx = (struct sha1_state){
273*4882a593Smuzhiyun .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
274*4882a593Smuzhiyun };
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun return 0;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun
padlock_sha1_update_nano(struct shash_desc * desc,const u8 * data,unsigned int len)279*4882a593Smuzhiyun static int padlock_sha1_update_nano(struct shash_desc *desc,
280*4882a593Smuzhiyun const u8 *data, unsigned int len)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun struct sha1_state *sctx = shash_desc_ctx(desc);
283*4882a593Smuzhiyun unsigned int partial, done;
284*4882a593Smuzhiyun const u8 *src;
285*4882a593Smuzhiyun /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
286*4882a593Smuzhiyun u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
287*4882a593Smuzhiyun ((aligned(STACK_ALIGN)));
288*4882a593Smuzhiyun u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun partial = sctx->count & 0x3f;
291*4882a593Smuzhiyun sctx->count += len;
292*4882a593Smuzhiyun done = 0;
293*4882a593Smuzhiyun src = data;
294*4882a593Smuzhiyun memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun if ((partial + len) >= SHA1_BLOCK_SIZE) {
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun /* Append the bytes in state's buffer to a block to handle */
299*4882a593Smuzhiyun if (partial) {
300*4882a593Smuzhiyun done = -partial;
301*4882a593Smuzhiyun memcpy(sctx->buffer + partial, data,
302*4882a593Smuzhiyun done + SHA1_BLOCK_SIZE);
303*4882a593Smuzhiyun src = sctx->buffer;
304*4882a593Smuzhiyun asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
305*4882a593Smuzhiyun : "+S"(src), "+D"(dst) \
306*4882a593Smuzhiyun : "a"((long)-1), "c"((unsigned long)1));
307*4882a593Smuzhiyun done += SHA1_BLOCK_SIZE;
308*4882a593Smuzhiyun src = data + done;
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun /* Process the left bytes from the input data */
312*4882a593Smuzhiyun if (len - done >= SHA1_BLOCK_SIZE) {
313*4882a593Smuzhiyun asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
314*4882a593Smuzhiyun : "+S"(src), "+D"(dst)
315*4882a593Smuzhiyun : "a"((long)-1),
316*4882a593Smuzhiyun "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
317*4882a593Smuzhiyun done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
318*4882a593Smuzhiyun src = data + done;
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun partial = 0;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
323*4882a593Smuzhiyun memcpy(sctx->buffer + partial, src, len - done);
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun return 0;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
padlock_sha1_final_nano(struct shash_desc * desc,u8 * out)328*4882a593Smuzhiyun static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
331*4882a593Smuzhiyun unsigned int partial, padlen;
332*4882a593Smuzhiyun __be64 bits;
333*4882a593Smuzhiyun static const u8 padding[64] = { 0x80, };
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun bits = cpu_to_be64(state->count << 3);
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun /* Pad out to 56 mod 64 */
338*4882a593Smuzhiyun partial = state->count & 0x3f;
339*4882a593Smuzhiyun padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
340*4882a593Smuzhiyun padlock_sha1_update_nano(desc, padding, padlen);
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun /* Append length field bytes */
343*4882a593Smuzhiyun padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun /* Swap to output */
346*4882a593Smuzhiyun padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun return 0;
349*4882a593Smuzhiyun }
350*4882a593Smuzhiyun
padlock_sha256_init_nano(struct shash_desc * desc)351*4882a593Smuzhiyun static int padlock_sha256_init_nano(struct shash_desc *desc)
352*4882a593Smuzhiyun {
353*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun *sctx = (struct sha256_state){
356*4882a593Smuzhiyun .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
357*4882a593Smuzhiyun SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
358*4882a593Smuzhiyun };
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun return 0;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun
padlock_sha256_update_nano(struct shash_desc * desc,const u8 * data,unsigned int len)363*4882a593Smuzhiyun static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
364*4882a593Smuzhiyun unsigned int len)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
367*4882a593Smuzhiyun unsigned int partial, done;
368*4882a593Smuzhiyun const u8 *src;
369*4882a593Smuzhiyun /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
370*4882a593Smuzhiyun u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
371*4882a593Smuzhiyun ((aligned(STACK_ALIGN)));
372*4882a593Smuzhiyun u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun partial = sctx->count & 0x3f;
375*4882a593Smuzhiyun sctx->count += len;
376*4882a593Smuzhiyun done = 0;
377*4882a593Smuzhiyun src = data;
378*4882a593Smuzhiyun memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun if ((partial + len) >= SHA256_BLOCK_SIZE) {
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun /* Append the bytes in state's buffer to a block to handle */
383*4882a593Smuzhiyun if (partial) {
384*4882a593Smuzhiyun done = -partial;
385*4882a593Smuzhiyun memcpy(sctx->buf + partial, data,
386*4882a593Smuzhiyun done + SHA256_BLOCK_SIZE);
387*4882a593Smuzhiyun src = sctx->buf;
388*4882a593Smuzhiyun asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
389*4882a593Smuzhiyun : "+S"(src), "+D"(dst)
390*4882a593Smuzhiyun : "a"((long)-1), "c"((unsigned long)1));
391*4882a593Smuzhiyun done += SHA256_BLOCK_SIZE;
392*4882a593Smuzhiyun src = data + done;
393*4882a593Smuzhiyun }
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun /* Process the left bytes from input data*/
396*4882a593Smuzhiyun if (len - done >= SHA256_BLOCK_SIZE) {
397*4882a593Smuzhiyun asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
398*4882a593Smuzhiyun : "+S"(src), "+D"(dst)
399*4882a593Smuzhiyun : "a"((long)-1),
400*4882a593Smuzhiyun "c"((unsigned long)((len - done) / 64)));
401*4882a593Smuzhiyun done += ((len - done) - (len - done) % 64);
402*4882a593Smuzhiyun src = data + done;
403*4882a593Smuzhiyun }
404*4882a593Smuzhiyun partial = 0;
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
407*4882a593Smuzhiyun memcpy(sctx->buf + partial, src, len - done);
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun return 0;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
padlock_sha256_final_nano(struct shash_desc * desc,u8 * out)412*4882a593Smuzhiyun static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
413*4882a593Smuzhiyun {
414*4882a593Smuzhiyun struct sha256_state *state =
415*4882a593Smuzhiyun (struct sha256_state *)shash_desc_ctx(desc);
416*4882a593Smuzhiyun unsigned int partial, padlen;
417*4882a593Smuzhiyun __be64 bits;
418*4882a593Smuzhiyun static const u8 padding[64] = { 0x80, };
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun bits = cpu_to_be64(state->count << 3);
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun /* Pad out to 56 mod 64 */
423*4882a593Smuzhiyun partial = state->count & 0x3f;
424*4882a593Smuzhiyun padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
425*4882a593Smuzhiyun padlock_sha256_update_nano(desc, padding, padlen);
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun /* Append length field bytes */
428*4882a593Smuzhiyun padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun /* Swap to output */
431*4882a593Smuzhiyun padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun return 0;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
padlock_sha_export_nano(struct shash_desc * desc,void * out)436*4882a593Smuzhiyun static int padlock_sha_export_nano(struct shash_desc *desc,
437*4882a593Smuzhiyun void *out)
438*4882a593Smuzhiyun {
439*4882a593Smuzhiyun int statesize = crypto_shash_statesize(desc->tfm);
440*4882a593Smuzhiyun void *sctx = shash_desc_ctx(desc);
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun memcpy(out, sctx, statesize);
443*4882a593Smuzhiyun return 0;
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun
padlock_sha_import_nano(struct shash_desc * desc,const void * in)446*4882a593Smuzhiyun static int padlock_sha_import_nano(struct shash_desc *desc,
447*4882a593Smuzhiyun const void *in)
448*4882a593Smuzhiyun {
449*4882a593Smuzhiyun int statesize = crypto_shash_statesize(desc->tfm);
450*4882a593Smuzhiyun void *sctx = shash_desc_ctx(desc);
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun memcpy(sctx, in, statesize);
453*4882a593Smuzhiyun return 0;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun static struct shash_alg sha1_alg_nano = {
457*4882a593Smuzhiyun .digestsize = SHA1_DIGEST_SIZE,
458*4882a593Smuzhiyun .init = padlock_sha1_init_nano,
459*4882a593Smuzhiyun .update = padlock_sha1_update_nano,
460*4882a593Smuzhiyun .final = padlock_sha1_final_nano,
461*4882a593Smuzhiyun .export = padlock_sha_export_nano,
462*4882a593Smuzhiyun .import = padlock_sha_import_nano,
463*4882a593Smuzhiyun .descsize = sizeof(struct sha1_state),
464*4882a593Smuzhiyun .statesize = sizeof(struct sha1_state),
465*4882a593Smuzhiyun .base = {
466*4882a593Smuzhiyun .cra_name = "sha1",
467*4882a593Smuzhiyun .cra_driver_name = "sha1-padlock-nano",
468*4882a593Smuzhiyun .cra_priority = PADLOCK_CRA_PRIORITY,
469*4882a593Smuzhiyun .cra_blocksize = SHA1_BLOCK_SIZE,
470*4882a593Smuzhiyun .cra_module = THIS_MODULE,
471*4882a593Smuzhiyun }
472*4882a593Smuzhiyun };
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun static struct shash_alg sha256_alg_nano = {
475*4882a593Smuzhiyun .digestsize = SHA256_DIGEST_SIZE,
476*4882a593Smuzhiyun .init = padlock_sha256_init_nano,
477*4882a593Smuzhiyun .update = padlock_sha256_update_nano,
478*4882a593Smuzhiyun .final = padlock_sha256_final_nano,
479*4882a593Smuzhiyun .export = padlock_sha_export_nano,
480*4882a593Smuzhiyun .import = padlock_sha_import_nano,
481*4882a593Smuzhiyun .descsize = sizeof(struct sha256_state),
482*4882a593Smuzhiyun .statesize = sizeof(struct sha256_state),
483*4882a593Smuzhiyun .base = {
484*4882a593Smuzhiyun .cra_name = "sha256",
485*4882a593Smuzhiyun .cra_driver_name = "sha256-padlock-nano",
486*4882a593Smuzhiyun .cra_priority = PADLOCK_CRA_PRIORITY,
487*4882a593Smuzhiyun .cra_blocksize = SHA256_BLOCK_SIZE,
488*4882a593Smuzhiyun .cra_module = THIS_MODULE,
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun };
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun static const struct x86_cpu_id padlock_sha_ids[] = {
493*4882a593Smuzhiyun X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL),
494*4882a593Smuzhiyun {}
495*4882a593Smuzhiyun };
496*4882a593Smuzhiyun MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
497*4882a593Smuzhiyun
padlock_init(void)498*4882a593Smuzhiyun static int __init padlock_init(void)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun int rc = -ENODEV;
501*4882a593Smuzhiyun struct cpuinfo_x86 *c = &cpu_data(0);
502*4882a593Smuzhiyun struct shash_alg *sha1;
503*4882a593Smuzhiyun struct shash_alg *sha256;
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
506*4882a593Smuzhiyun return -ENODEV;
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun /* Register the newly added algorithm module if on *
509*4882a593Smuzhiyun * VIA Nano processor, or else just do as before */
510*4882a593Smuzhiyun if (c->x86_model < 0x0f) {
511*4882a593Smuzhiyun sha1 = &sha1_alg;
512*4882a593Smuzhiyun sha256 = &sha256_alg;
513*4882a593Smuzhiyun } else {
514*4882a593Smuzhiyun sha1 = &sha1_alg_nano;
515*4882a593Smuzhiyun sha256 = &sha256_alg_nano;
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun
518*4882a593Smuzhiyun rc = crypto_register_shash(sha1);
519*4882a593Smuzhiyun if (rc)
520*4882a593Smuzhiyun goto out;
521*4882a593Smuzhiyun
522*4882a593Smuzhiyun rc = crypto_register_shash(sha256);
523*4882a593Smuzhiyun if (rc)
524*4882a593Smuzhiyun goto out_unreg1;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun return 0;
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun out_unreg1:
531*4882a593Smuzhiyun crypto_unregister_shash(sha1);
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun out:
534*4882a593Smuzhiyun printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
535*4882a593Smuzhiyun return rc;
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun
padlock_fini(void)538*4882a593Smuzhiyun static void __exit padlock_fini(void)
539*4882a593Smuzhiyun {
540*4882a593Smuzhiyun struct cpuinfo_x86 *c = &cpu_data(0);
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun if (c->x86_model >= 0x0f) {
543*4882a593Smuzhiyun crypto_unregister_shash(&sha1_alg_nano);
544*4882a593Smuzhiyun crypto_unregister_shash(&sha256_alg_nano);
545*4882a593Smuzhiyun } else {
546*4882a593Smuzhiyun crypto_unregister_shash(&sha1_alg);
547*4882a593Smuzhiyun crypto_unregister_shash(&sha256_alg);
548*4882a593Smuzhiyun }
549*4882a593Smuzhiyun }
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun module_init(padlock_init);
552*4882a593Smuzhiyun module_exit(padlock_fini);
553*4882a593Smuzhiyun
554*4882a593Smuzhiyun MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
555*4882a593Smuzhiyun MODULE_LICENSE("GPL");
556*4882a593Smuzhiyun MODULE_AUTHOR("Michal Ludvig");
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha1-all");
559*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha256-all");
560*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha1-padlock");
561*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha256-padlock");
562