1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Cryptographic API.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Glue code for the SHA512 Secure Hash Algorithm assembler
5*4882a593Smuzhiyun * implementation using supplemental SSE3 / AVX / AVX2 instructions.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * This file is based on sha512_generic.c
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Copyright (C) 2013 Intel Corporation
10*4882a593Smuzhiyun * Author: Tim Chen <tim.c.chen@linux.intel.com>
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or modify it
13*4882a593Smuzhiyun * under the terms of the GNU General Public License as published by the Free
14*4882a593Smuzhiyun * Software Foundation; either version 2 of the License, or (at your option)
15*4882a593Smuzhiyun * any later version.
16*4882a593Smuzhiyun *
17*4882a593Smuzhiyun * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18*4882a593Smuzhiyun * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19*4882a593Smuzhiyun * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20*4882a593Smuzhiyun * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21*4882a593Smuzhiyun * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22*4882a593Smuzhiyun * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23*4882a593Smuzhiyun * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24*4882a593Smuzhiyun * SOFTWARE.
25*4882a593Smuzhiyun *
26*4882a593Smuzhiyun */
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun #include <crypto/internal/hash.h>
31*4882a593Smuzhiyun #include <crypto/internal/simd.h>
32*4882a593Smuzhiyun #include <linux/init.h>
33*4882a593Smuzhiyun #include <linux/module.h>
34*4882a593Smuzhiyun #include <linux/mm.h>
35*4882a593Smuzhiyun #include <linux/string.h>
36*4882a593Smuzhiyun #include <linux/types.h>
37*4882a593Smuzhiyun #include <crypto/sha.h>
38*4882a593Smuzhiyun #include <crypto/sha512_base.h>
39*4882a593Smuzhiyun #include <asm/simd.h>
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
42*4882a593Smuzhiyun const u8 *data, int blocks);
43*4882a593Smuzhiyun
sha512_update(struct shash_desc * desc,const u8 * data,unsigned int len,sha512_block_fn * sha512_xform)44*4882a593Smuzhiyun static int sha512_update(struct shash_desc *desc, const u8 *data,
45*4882a593Smuzhiyun unsigned int len, sha512_block_fn *sha512_xform)
46*4882a593Smuzhiyun {
47*4882a593Smuzhiyun struct sha512_state *sctx = shash_desc_ctx(desc);
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun if (!crypto_simd_usable() ||
50*4882a593Smuzhiyun (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
51*4882a593Smuzhiyun return crypto_sha512_update(desc, data, len);
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun /*
54*4882a593Smuzhiyun * Make sure struct sha512_state begins directly with the SHA512
55*4882a593Smuzhiyun * 512-bit internal state, as this is what the asm functions expect.
56*4882a593Smuzhiyun */
57*4882a593Smuzhiyun BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun kernel_fpu_begin();
60*4882a593Smuzhiyun sha512_base_do_update(desc, data, len, sha512_xform);
61*4882a593Smuzhiyun kernel_fpu_end();
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun return 0;
64*4882a593Smuzhiyun }
65*4882a593Smuzhiyun
sha512_finup(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out,sha512_block_fn * sha512_xform)66*4882a593Smuzhiyun static int sha512_finup(struct shash_desc *desc, const u8 *data,
67*4882a593Smuzhiyun unsigned int len, u8 *out, sha512_block_fn *sha512_xform)
68*4882a593Smuzhiyun {
69*4882a593Smuzhiyun if (!crypto_simd_usable())
70*4882a593Smuzhiyun return crypto_sha512_finup(desc, data, len, out);
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun kernel_fpu_begin();
73*4882a593Smuzhiyun if (len)
74*4882a593Smuzhiyun sha512_base_do_update(desc, data, len, sha512_xform);
75*4882a593Smuzhiyun sha512_base_do_finalize(desc, sha512_xform);
76*4882a593Smuzhiyun kernel_fpu_end();
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun return sha512_base_finish(desc, out);
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun
sha512_ssse3_update(struct shash_desc * desc,const u8 * data,unsigned int len)81*4882a593Smuzhiyun static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
82*4882a593Smuzhiyun unsigned int len)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun return sha512_update(desc, data, len, sha512_transform_ssse3);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
sha512_ssse3_finup(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out)87*4882a593Smuzhiyun static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
88*4882a593Smuzhiyun unsigned int len, u8 *out)
89*4882a593Smuzhiyun {
90*4882a593Smuzhiyun return sha512_finup(desc, data, len, out, sha512_transform_ssse3);
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun /* Add padding and return the message digest. */
sha512_ssse3_final(struct shash_desc * desc,u8 * out)94*4882a593Smuzhiyun static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun return sha512_ssse3_finup(desc, NULL, 0, out);
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun static struct shash_alg sha512_ssse3_algs[] = { {
100*4882a593Smuzhiyun .digestsize = SHA512_DIGEST_SIZE,
101*4882a593Smuzhiyun .init = sha512_base_init,
102*4882a593Smuzhiyun .update = sha512_ssse3_update,
103*4882a593Smuzhiyun .final = sha512_ssse3_final,
104*4882a593Smuzhiyun .finup = sha512_ssse3_finup,
105*4882a593Smuzhiyun .descsize = sizeof(struct sha512_state),
106*4882a593Smuzhiyun .base = {
107*4882a593Smuzhiyun .cra_name = "sha512",
108*4882a593Smuzhiyun .cra_driver_name = "sha512-ssse3",
109*4882a593Smuzhiyun .cra_priority = 150,
110*4882a593Smuzhiyun .cra_blocksize = SHA512_BLOCK_SIZE,
111*4882a593Smuzhiyun .cra_module = THIS_MODULE,
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun }, {
114*4882a593Smuzhiyun .digestsize = SHA384_DIGEST_SIZE,
115*4882a593Smuzhiyun .init = sha384_base_init,
116*4882a593Smuzhiyun .update = sha512_ssse3_update,
117*4882a593Smuzhiyun .final = sha512_ssse3_final,
118*4882a593Smuzhiyun .finup = sha512_ssse3_finup,
119*4882a593Smuzhiyun .descsize = sizeof(struct sha512_state),
120*4882a593Smuzhiyun .base = {
121*4882a593Smuzhiyun .cra_name = "sha384",
122*4882a593Smuzhiyun .cra_driver_name = "sha384-ssse3",
123*4882a593Smuzhiyun .cra_priority = 150,
124*4882a593Smuzhiyun .cra_blocksize = SHA384_BLOCK_SIZE,
125*4882a593Smuzhiyun .cra_module = THIS_MODULE,
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun } };
128*4882a593Smuzhiyun
register_sha512_ssse3(void)129*4882a593Smuzhiyun static int register_sha512_ssse3(void)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_SSSE3))
132*4882a593Smuzhiyun return crypto_register_shashes(sha512_ssse3_algs,
133*4882a593Smuzhiyun ARRAY_SIZE(sha512_ssse3_algs));
134*4882a593Smuzhiyun return 0;
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
unregister_sha512_ssse3(void)137*4882a593Smuzhiyun static void unregister_sha512_ssse3(void)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_SSSE3))
140*4882a593Smuzhiyun crypto_unregister_shashes(sha512_ssse3_algs,
141*4882a593Smuzhiyun ARRAY_SIZE(sha512_ssse3_algs));
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun asmlinkage void sha512_transform_avx(struct sha512_state *state,
145*4882a593Smuzhiyun const u8 *data, int blocks);
avx_usable(void)146*4882a593Smuzhiyun static bool avx_usable(void)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
149*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_AVX))
150*4882a593Smuzhiyun pr_info("AVX detected but unusable.\n");
151*4882a593Smuzhiyun return false;
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun return true;
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun
sha512_avx_update(struct shash_desc * desc,const u8 * data,unsigned int len)157*4882a593Smuzhiyun static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
158*4882a593Smuzhiyun unsigned int len)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun return sha512_update(desc, data, len, sha512_transform_avx);
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
sha512_avx_finup(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out)163*4882a593Smuzhiyun static int sha512_avx_finup(struct shash_desc *desc, const u8 *data,
164*4882a593Smuzhiyun unsigned int len, u8 *out)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun return sha512_finup(desc, data, len, out, sha512_transform_avx);
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun /* Add padding and return the message digest. */
sha512_avx_final(struct shash_desc * desc,u8 * out)170*4882a593Smuzhiyun static int sha512_avx_final(struct shash_desc *desc, u8 *out)
171*4882a593Smuzhiyun {
172*4882a593Smuzhiyun return sha512_avx_finup(desc, NULL, 0, out);
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun static struct shash_alg sha512_avx_algs[] = { {
176*4882a593Smuzhiyun .digestsize = SHA512_DIGEST_SIZE,
177*4882a593Smuzhiyun .init = sha512_base_init,
178*4882a593Smuzhiyun .update = sha512_avx_update,
179*4882a593Smuzhiyun .final = sha512_avx_final,
180*4882a593Smuzhiyun .finup = sha512_avx_finup,
181*4882a593Smuzhiyun .descsize = sizeof(struct sha512_state),
182*4882a593Smuzhiyun .base = {
183*4882a593Smuzhiyun .cra_name = "sha512",
184*4882a593Smuzhiyun .cra_driver_name = "sha512-avx",
185*4882a593Smuzhiyun .cra_priority = 160,
186*4882a593Smuzhiyun .cra_blocksize = SHA512_BLOCK_SIZE,
187*4882a593Smuzhiyun .cra_module = THIS_MODULE,
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun }, {
190*4882a593Smuzhiyun .digestsize = SHA384_DIGEST_SIZE,
191*4882a593Smuzhiyun .init = sha384_base_init,
192*4882a593Smuzhiyun .update = sha512_avx_update,
193*4882a593Smuzhiyun .final = sha512_avx_final,
194*4882a593Smuzhiyun .finup = sha512_avx_finup,
195*4882a593Smuzhiyun .descsize = sizeof(struct sha512_state),
196*4882a593Smuzhiyun .base = {
197*4882a593Smuzhiyun .cra_name = "sha384",
198*4882a593Smuzhiyun .cra_driver_name = "sha384-avx",
199*4882a593Smuzhiyun .cra_priority = 160,
200*4882a593Smuzhiyun .cra_blocksize = SHA384_BLOCK_SIZE,
201*4882a593Smuzhiyun .cra_module = THIS_MODULE,
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun } };
204*4882a593Smuzhiyun
register_sha512_avx(void)205*4882a593Smuzhiyun static int register_sha512_avx(void)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun if (avx_usable())
208*4882a593Smuzhiyun return crypto_register_shashes(sha512_avx_algs,
209*4882a593Smuzhiyun ARRAY_SIZE(sha512_avx_algs));
210*4882a593Smuzhiyun return 0;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
unregister_sha512_avx(void)213*4882a593Smuzhiyun static void unregister_sha512_avx(void)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun if (avx_usable())
216*4882a593Smuzhiyun crypto_unregister_shashes(sha512_avx_algs,
217*4882a593Smuzhiyun ARRAY_SIZE(sha512_avx_algs));
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun asmlinkage void sha512_transform_rorx(struct sha512_state *state,
221*4882a593Smuzhiyun const u8 *data, int blocks);
222*4882a593Smuzhiyun
sha512_avx2_update(struct shash_desc * desc,const u8 * data,unsigned int len)223*4882a593Smuzhiyun static int sha512_avx2_update(struct shash_desc *desc, const u8 *data,
224*4882a593Smuzhiyun unsigned int len)
225*4882a593Smuzhiyun {
226*4882a593Smuzhiyun return sha512_update(desc, data, len, sha512_transform_rorx);
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
sha512_avx2_finup(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out)229*4882a593Smuzhiyun static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data,
230*4882a593Smuzhiyun unsigned int len, u8 *out)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun return sha512_finup(desc, data, len, out, sha512_transform_rorx);
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun /* Add padding and return the message digest. */
sha512_avx2_final(struct shash_desc * desc,u8 * out)236*4882a593Smuzhiyun static int sha512_avx2_final(struct shash_desc *desc, u8 *out)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun return sha512_avx2_finup(desc, NULL, 0, out);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun static struct shash_alg sha512_avx2_algs[] = { {
242*4882a593Smuzhiyun .digestsize = SHA512_DIGEST_SIZE,
243*4882a593Smuzhiyun .init = sha512_base_init,
244*4882a593Smuzhiyun .update = sha512_avx2_update,
245*4882a593Smuzhiyun .final = sha512_avx2_final,
246*4882a593Smuzhiyun .finup = sha512_avx2_finup,
247*4882a593Smuzhiyun .descsize = sizeof(struct sha512_state),
248*4882a593Smuzhiyun .base = {
249*4882a593Smuzhiyun .cra_name = "sha512",
250*4882a593Smuzhiyun .cra_driver_name = "sha512-avx2",
251*4882a593Smuzhiyun .cra_priority = 170,
252*4882a593Smuzhiyun .cra_blocksize = SHA512_BLOCK_SIZE,
253*4882a593Smuzhiyun .cra_module = THIS_MODULE,
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun }, {
256*4882a593Smuzhiyun .digestsize = SHA384_DIGEST_SIZE,
257*4882a593Smuzhiyun .init = sha384_base_init,
258*4882a593Smuzhiyun .update = sha512_avx2_update,
259*4882a593Smuzhiyun .final = sha512_avx2_final,
260*4882a593Smuzhiyun .finup = sha512_avx2_finup,
261*4882a593Smuzhiyun .descsize = sizeof(struct sha512_state),
262*4882a593Smuzhiyun .base = {
263*4882a593Smuzhiyun .cra_name = "sha384",
264*4882a593Smuzhiyun .cra_driver_name = "sha384-avx2",
265*4882a593Smuzhiyun .cra_priority = 170,
266*4882a593Smuzhiyun .cra_blocksize = SHA384_BLOCK_SIZE,
267*4882a593Smuzhiyun .cra_module = THIS_MODULE,
268*4882a593Smuzhiyun }
269*4882a593Smuzhiyun } };
270*4882a593Smuzhiyun
avx2_usable(void)271*4882a593Smuzhiyun static bool avx2_usable(void)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
274*4882a593Smuzhiyun boot_cpu_has(X86_FEATURE_BMI2))
275*4882a593Smuzhiyun return true;
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun return false;
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun
register_sha512_avx2(void)280*4882a593Smuzhiyun static int register_sha512_avx2(void)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun if (avx2_usable())
283*4882a593Smuzhiyun return crypto_register_shashes(sha512_avx2_algs,
284*4882a593Smuzhiyun ARRAY_SIZE(sha512_avx2_algs));
285*4882a593Smuzhiyun return 0;
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun
unregister_sha512_avx2(void)288*4882a593Smuzhiyun static void unregister_sha512_avx2(void)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun if (avx2_usable())
291*4882a593Smuzhiyun crypto_unregister_shashes(sha512_avx2_algs,
292*4882a593Smuzhiyun ARRAY_SIZE(sha512_avx2_algs));
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
sha512_ssse3_mod_init(void)295*4882a593Smuzhiyun static int __init sha512_ssse3_mod_init(void)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun if (register_sha512_ssse3())
299*4882a593Smuzhiyun goto fail;
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun if (register_sha512_avx()) {
302*4882a593Smuzhiyun unregister_sha512_ssse3();
303*4882a593Smuzhiyun goto fail;
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun if (register_sha512_avx2()) {
307*4882a593Smuzhiyun unregister_sha512_avx();
308*4882a593Smuzhiyun unregister_sha512_ssse3();
309*4882a593Smuzhiyun goto fail;
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun return 0;
313*4882a593Smuzhiyun fail:
314*4882a593Smuzhiyun return -ENODEV;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun
sha512_ssse3_mod_fini(void)317*4882a593Smuzhiyun static void __exit sha512_ssse3_mod_fini(void)
318*4882a593Smuzhiyun {
319*4882a593Smuzhiyun unregister_sha512_avx2();
320*4882a593Smuzhiyun unregister_sha512_avx();
321*4882a593Smuzhiyun unregister_sha512_ssse3();
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun module_init(sha512_ssse3_mod_init);
325*4882a593Smuzhiyun module_exit(sha512_ssse3_mod_fini);
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun MODULE_LICENSE("GPL");
328*4882a593Smuzhiyun MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha512");
331*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha512-ssse3");
332*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha512-avx");
333*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha512-avx2");
334*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha384");
335*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha384-ssse3");
336*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha384-avx");
337*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("sha384-avx2");
338