1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Glue code for SHA-256 implementation for SPE instructions (PPC)
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Based on generic implementation. The assembler module takes care
6*4882a593Smuzhiyun * about the SPE registers so it can run from interrupt context.
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
9*4882a593Smuzhiyun */
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <crypto/internal/hash.h>
12*4882a593Smuzhiyun #include <linux/init.h>
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/mm.h>
15*4882a593Smuzhiyun #include <linux/types.h>
16*4882a593Smuzhiyun #include <crypto/sha.h>
17*4882a593Smuzhiyun #include <asm/byteorder.h>
18*4882a593Smuzhiyun #include <asm/switch_to.h>
19*4882a593Smuzhiyun #include <linux/hardirq.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun /*
22*4882a593Smuzhiyun * MAX_BYTES defines the number of bytes that are allowed to be processed
23*4882a593Smuzhiyun * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
24*4882a593Smuzhiyun * operations per 64 bytes. e500 cores can issue two arithmetic instructions
25*4882a593Smuzhiyun * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
26*4882a593Smuzhiyun * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
27*4882a593Smuzhiyun * Headroom for cache misses included. Even with the low end model clocked
28*4882a593Smuzhiyun * at 667 MHz this equals to a critical time window of less than 27us.
29*4882a593Smuzhiyun *
30*4882a593Smuzhiyun */
31*4882a593Smuzhiyun #define MAX_BYTES 1024
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
34*4882a593Smuzhiyun
spe_begin(void)35*4882a593Smuzhiyun static void spe_begin(void)
36*4882a593Smuzhiyun {
37*4882a593Smuzhiyun /* We just start SPE operations and will save SPE registers later. */
38*4882a593Smuzhiyun preempt_disable();
39*4882a593Smuzhiyun enable_kernel_spe();
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun
spe_end(void)42*4882a593Smuzhiyun static void spe_end(void)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun disable_kernel_spe();
45*4882a593Smuzhiyun /* reenable preemption */
46*4882a593Smuzhiyun preempt_enable();
47*4882a593Smuzhiyun }
48*4882a593Smuzhiyun
ppc_sha256_clear_context(struct sha256_state * sctx)49*4882a593Smuzhiyun static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun int count = sizeof(struct sha256_state) >> 2;
52*4882a593Smuzhiyun u32 *ptr = (u32 *)sctx;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /* make sure we can clear the fast way */
55*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
56*4882a593Smuzhiyun do { *ptr++ = 0; } while (--count);
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
ppc_spe_sha256_init(struct shash_desc * desc)59*4882a593Smuzhiyun static int ppc_spe_sha256_init(struct shash_desc *desc)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun sctx->state[0] = SHA256_H0;
64*4882a593Smuzhiyun sctx->state[1] = SHA256_H1;
65*4882a593Smuzhiyun sctx->state[2] = SHA256_H2;
66*4882a593Smuzhiyun sctx->state[3] = SHA256_H3;
67*4882a593Smuzhiyun sctx->state[4] = SHA256_H4;
68*4882a593Smuzhiyun sctx->state[5] = SHA256_H5;
69*4882a593Smuzhiyun sctx->state[6] = SHA256_H6;
70*4882a593Smuzhiyun sctx->state[7] = SHA256_H7;
71*4882a593Smuzhiyun sctx->count = 0;
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun return 0;
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun
ppc_spe_sha224_init(struct shash_desc * desc)76*4882a593Smuzhiyun static int ppc_spe_sha224_init(struct shash_desc *desc)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun sctx->state[0] = SHA224_H0;
81*4882a593Smuzhiyun sctx->state[1] = SHA224_H1;
82*4882a593Smuzhiyun sctx->state[2] = SHA224_H2;
83*4882a593Smuzhiyun sctx->state[3] = SHA224_H3;
84*4882a593Smuzhiyun sctx->state[4] = SHA224_H4;
85*4882a593Smuzhiyun sctx->state[5] = SHA224_H5;
86*4882a593Smuzhiyun sctx->state[6] = SHA224_H6;
87*4882a593Smuzhiyun sctx->state[7] = SHA224_H7;
88*4882a593Smuzhiyun sctx->count = 0;
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun return 0;
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun
ppc_spe_sha256_update(struct shash_desc * desc,const u8 * data,unsigned int len)93*4882a593Smuzhiyun static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
94*4882a593Smuzhiyun unsigned int len)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
97*4882a593Smuzhiyun const unsigned int offset = sctx->count & 0x3f;
98*4882a593Smuzhiyun const unsigned int avail = 64 - offset;
99*4882a593Smuzhiyun unsigned int bytes;
100*4882a593Smuzhiyun const u8 *src = data;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun if (avail > len) {
103*4882a593Smuzhiyun sctx->count += len;
104*4882a593Smuzhiyun memcpy((char *)sctx->buf + offset, src, len);
105*4882a593Smuzhiyun return 0;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun sctx->count += len;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun if (offset) {
111*4882a593Smuzhiyun memcpy((char *)sctx->buf + offset, src, avail);
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun spe_begin();
114*4882a593Smuzhiyun ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
115*4882a593Smuzhiyun spe_end();
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun len -= avail;
118*4882a593Smuzhiyun src += avail;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun while (len > 63) {
122*4882a593Smuzhiyun /* cut input data into smaller blocks */
123*4882a593Smuzhiyun bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
124*4882a593Smuzhiyun bytes = bytes & ~0x3f;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun spe_begin();
127*4882a593Smuzhiyun ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
128*4882a593Smuzhiyun spe_end();
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun src += bytes;
131*4882a593Smuzhiyun len -= bytes;
132*4882a593Smuzhiyun };
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun memcpy((char *)sctx->buf, src, len);
135*4882a593Smuzhiyun return 0;
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun
ppc_spe_sha256_final(struct shash_desc * desc,u8 * out)138*4882a593Smuzhiyun static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
139*4882a593Smuzhiyun {
140*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
141*4882a593Smuzhiyun const unsigned int offset = sctx->count & 0x3f;
142*4882a593Smuzhiyun char *p = (char *)sctx->buf + offset;
143*4882a593Smuzhiyun int padlen;
144*4882a593Smuzhiyun __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
145*4882a593Smuzhiyun __be32 *dst = (__be32 *)out;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun padlen = 55 - offset;
148*4882a593Smuzhiyun *p++ = 0x80;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun spe_begin();
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun if (padlen < 0) {
153*4882a593Smuzhiyun memset(p, 0x00, padlen + sizeof (u64));
154*4882a593Smuzhiyun ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
155*4882a593Smuzhiyun p = (char *)sctx->buf;
156*4882a593Smuzhiyun padlen = 56;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun memset(p, 0, padlen);
160*4882a593Smuzhiyun *pbits = cpu_to_be64(sctx->count << 3);
161*4882a593Smuzhiyun ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun spe_end();
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun dst[0] = cpu_to_be32(sctx->state[0]);
166*4882a593Smuzhiyun dst[1] = cpu_to_be32(sctx->state[1]);
167*4882a593Smuzhiyun dst[2] = cpu_to_be32(sctx->state[2]);
168*4882a593Smuzhiyun dst[3] = cpu_to_be32(sctx->state[3]);
169*4882a593Smuzhiyun dst[4] = cpu_to_be32(sctx->state[4]);
170*4882a593Smuzhiyun dst[5] = cpu_to_be32(sctx->state[5]);
171*4882a593Smuzhiyun dst[6] = cpu_to_be32(sctx->state[6]);
172*4882a593Smuzhiyun dst[7] = cpu_to_be32(sctx->state[7]);
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun ppc_sha256_clear_context(sctx);
175*4882a593Smuzhiyun return 0;
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
ppc_spe_sha224_final(struct shash_desc * desc,u8 * out)178*4882a593Smuzhiyun static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun u32 D[SHA256_DIGEST_SIZE >> 2];
181*4882a593Smuzhiyun __be32 *dst = (__be32 *)out;
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun ppc_spe_sha256_final(desc, (u8 *)D);
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun /* avoid bytewise memcpy */
186*4882a593Smuzhiyun dst[0] = D[0];
187*4882a593Smuzhiyun dst[1] = D[1];
188*4882a593Smuzhiyun dst[2] = D[2];
189*4882a593Smuzhiyun dst[3] = D[3];
190*4882a593Smuzhiyun dst[4] = D[4];
191*4882a593Smuzhiyun dst[5] = D[5];
192*4882a593Smuzhiyun dst[6] = D[6];
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun /* clear sensitive data */
195*4882a593Smuzhiyun memzero_explicit(D, SHA256_DIGEST_SIZE);
196*4882a593Smuzhiyun return 0;
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
ppc_spe_sha256_export(struct shash_desc * desc,void * out)199*4882a593Smuzhiyun static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun memcpy(out, sctx, sizeof(*sctx));
204*4882a593Smuzhiyun return 0;
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun
ppc_spe_sha256_import(struct shash_desc * desc,const void * in)207*4882a593Smuzhiyun static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun struct sha256_state *sctx = shash_desc_ctx(desc);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun memcpy(sctx, in, sizeof(*sctx));
212*4882a593Smuzhiyun return 0;
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun static struct shash_alg algs[2] = { {
216*4882a593Smuzhiyun .digestsize = SHA256_DIGEST_SIZE,
217*4882a593Smuzhiyun .init = ppc_spe_sha256_init,
218*4882a593Smuzhiyun .update = ppc_spe_sha256_update,
219*4882a593Smuzhiyun .final = ppc_spe_sha256_final,
220*4882a593Smuzhiyun .export = ppc_spe_sha256_export,
221*4882a593Smuzhiyun .import = ppc_spe_sha256_import,
222*4882a593Smuzhiyun .descsize = sizeof(struct sha256_state),
223*4882a593Smuzhiyun .statesize = sizeof(struct sha256_state),
224*4882a593Smuzhiyun .base = {
225*4882a593Smuzhiyun .cra_name = "sha256",
226*4882a593Smuzhiyun .cra_driver_name= "sha256-ppc-spe",
227*4882a593Smuzhiyun .cra_priority = 300,
228*4882a593Smuzhiyun .cra_blocksize = SHA256_BLOCK_SIZE,
229*4882a593Smuzhiyun .cra_module = THIS_MODULE,
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun }, {
232*4882a593Smuzhiyun .digestsize = SHA224_DIGEST_SIZE,
233*4882a593Smuzhiyun .init = ppc_spe_sha224_init,
234*4882a593Smuzhiyun .update = ppc_spe_sha256_update,
235*4882a593Smuzhiyun .final = ppc_spe_sha224_final,
236*4882a593Smuzhiyun .export = ppc_spe_sha256_export,
237*4882a593Smuzhiyun .import = ppc_spe_sha256_import,
238*4882a593Smuzhiyun .descsize = sizeof(struct sha256_state),
239*4882a593Smuzhiyun .statesize = sizeof(struct sha256_state),
240*4882a593Smuzhiyun .base = {
241*4882a593Smuzhiyun .cra_name = "sha224",
242*4882a593Smuzhiyun .cra_driver_name= "sha224-ppc-spe",
243*4882a593Smuzhiyun .cra_priority = 300,
244*4882a593Smuzhiyun .cra_blocksize = SHA224_BLOCK_SIZE,
245*4882a593Smuzhiyun .cra_module = THIS_MODULE,
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun } };
248*4882a593Smuzhiyun
ppc_spe_sha256_mod_init(void)249*4882a593Smuzhiyun static int __init ppc_spe_sha256_mod_init(void)
250*4882a593Smuzhiyun {
251*4882a593Smuzhiyun return crypto_register_shashes(algs, ARRAY_SIZE(algs));
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun
ppc_spe_sha256_mod_fini(void)254*4882a593Smuzhiyun static void __exit ppc_spe_sha256_mod_fini(void)
255*4882a593Smuzhiyun {
256*4882a593Smuzhiyun crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun module_init(ppc_spe_sha256_mod_init);
260*4882a593Smuzhiyun module_exit(ppc_spe_sha256_mod_fini);
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun MODULE_LICENSE("GPL");
263*4882a593Smuzhiyun MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha224");
266*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
267*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha256");
268*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
269