1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * BLAKE2b digest algorithm, NEON accelerated
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright 2020 Google LLC
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <crypto/internal/blake2b.h>
9*4882a593Smuzhiyun #include <crypto/internal/hash.h>
10*4882a593Smuzhiyun #include <crypto/internal/simd.h>
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun #include <linux/sizes.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include <asm/neon.h>
16*4882a593Smuzhiyun #include <asm/simd.h>
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun asmlinkage void blake2b_compress_neon(struct blake2b_state *state,
19*4882a593Smuzhiyun const u8 *block, size_t nblocks, u32 inc);
20*4882a593Smuzhiyun
blake2b_compress_arch(struct blake2b_state * state,const u8 * block,size_t nblocks,u32 inc)21*4882a593Smuzhiyun static void blake2b_compress_arch(struct blake2b_state *state,
22*4882a593Smuzhiyun const u8 *block, size_t nblocks, u32 inc)
23*4882a593Smuzhiyun {
24*4882a593Smuzhiyun if (!crypto_simd_usable()) {
25*4882a593Smuzhiyun blake2b_compress_generic(state, block, nblocks, inc);
26*4882a593Smuzhiyun return;
27*4882a593Smuzhiyun }
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun do {
30*4882a593Smuzhiyun const size_t blocks = min_t(size_t, nblocks,
31*4882a593Smuzhiyun SZ_4K / BLAKE2B_BLOCK_SIZE);
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun kernel_neon_begin();
34*4882a593Smuzhiyun blake2b_compress_neon(state, block, blocks, inc);
35*4882a593Smuzhiyun kernel_neon_end();
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun nblocks -= blocks;
38*4882a593Smuzhiyun block += blocks * BLAKE2B_BLOCK_SIZE;
39*4882a593Smuzhiyun } while (nblocks);
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun
crypto_blake2b_update_neon(struct shash_desc * desc,const u8 * in,unsigned int inlen)42*4882a593Smuzhiyun static int crypto_blake2b_update_neon(struct shash_desc *desc,
43*4882a593Smuzhiyun const u8 *in, unsigned int inlen)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun return crypto_blake2b_update(desc, in, inlen, blake2b_compress_arch);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
crypto_blake2b_final_neon(struct shash_desc * desc,u8 * out)48*4882a593Smuzhiyun static int crypto_blake2b_final_neon(struct shash_desc *desc, u8 *out)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun return crypto_blake2b_final(desc, out, blake2b_compress_arch);
51*4882a593Smuzhiyun }
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun #define BLAKE2B_ALG(name, driver_name, digest_size) \
54*4882a593Smuzhiyun { \
55*4882a593Smuzhiyun .base.cra_name = name, \
56*4882a593Smuzhiyun .base.cra_driver_name = driver_name, \
57*4882a593Smuzhiyun .base.cra_priority = 200, \
58*4882a593Smuzhiyun .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
59*4882a593Smuzhiyun .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \
60*4882a593Smuzhiyun .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \
61*4882a593Smuzhiyun .base.cra_module = THIS_MODULE, \
62*4882a593Smuzhiyun .digestsize = digest_size, \
63*4882a593Smuzhiyun .setkey = crypto_blake2b_setkey, \
64*4882a593Smuzhiyun .init = crypto_blake2b_init, \
65*4882a593Smuzhiyun .update = crypto_blake2b_update_neon, \
66*4882a593Smuzhiyun .final = crypto_blake2b_final_neon, \
67*4882a593Smuzhiyun .descsize = sizeof(struct blake2b_state), \
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun static struct shash_alg blake2b_neon_algs[] = {
71*4882a593Smuzhiyun BLAKE2B_ALG("blake2b-160", "blake2b-160-neon", BLAKE2B_160_HASH_SIZE),
72*4882a593Smuzhiyun BLAKE2B_ALG("blake2b-256", "blake2b-256-neon", BLAKE2B_256_HASH_SIZE),
73*4882a593Smuzhiyun BLAKE2B_ALG("blake2b-384", "blake2b-384-neon", BLAKE2B_384_HASH_SIZE),
74*4882a593Smuzhiyun BLAKE2B_ALG("blake2b-512", "blake2b-512-neon", BLAKE2B_512_HASH_SIZE),
75*4882a593Smuzhiyun };
76*4882a593Smuzhiyun
blake2b_neon_mod_init(void)77*4882a593Smuzhiyun static int __init blake2b_neon_mod_init(void)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun if (!(elf_hwcap & HWCAP_NEON))
80*4882a593Smuzhiyun return -ENODEV;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun return crypto_register_shashes(blake2b_neon_algs,
83*4882a593Smuzhiyun ARRAY_SIZE(blake2b_neon_algs));
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
blake2b_neon_mod_exit(void)86*4882a593Smuzhiyun static void __exit blake2b_neon_mod_exit(void)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun crypto_unregister_shashes(blake2b_neon_algs,
89*4882a593Smuzhiyun ARRAY_SIZE(blake2b_neon_algs));
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun module_init(blake2b_neon_mod_init);
93*4882a593Smuzhiyun module_exit(blake2b_neon_mod_exit);
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun MODULE_DESCRIPTION("BLAKE2b digest algorithm, NEON accelerated");
96*4882a593Smuzhiyun MODULE_LICENSE("GPL");
97*4882a593Smuzhiyun MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
98*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-160");
99*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-160-neon");
100*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-256");
101*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-256-neon");
102*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-384");
103*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-384-neon");
104*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-512");
105*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("blake2b-512-neon");
106