xref: /OK3568_Linux_fs/kernel/arch/arm64/crypto/chacha-neon-glue.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
3*4882a593Smuzhiyun  * including ChaCha20 (RFC7539)
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * This program is free software; you can redistribute it and/or modify
8*4882a593Smuzhiyun  * it under the terms of the GNU General Public License version 2 as
9*4882a593Smuzhiyun  * published by the Free Software Foundation.
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * Based on:
12*4882a593Smuzhiyun  * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * Copyright (C) 2015 Martin Willi
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * This program is free software; you can redistribute it and/or modify
17*4882a593Smuzhiyun  * it under the terms of the GNU General Public License as published by
18*4882a593Smuzhiyun  * the Free Software Foundation; either version 2 of the License, or
19*4882a593Smuzhiyun  * (at your option) any later version.
20*4882a593Smuzhiyun  */
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun #include <crypto/algapi.h>
23*4882a593Smuzhiyun #include <crypto/internal/chacha.h>
24*4882a593Smuzhiyun #include <crypto/internal/simd.h>
25*4882a593Smuzhiyun #include <crypto/internal/skcipher.h>
26*4882a593Smuzhiyun #include <linux/jump_label.h>
27*4882a593Smuzhiyun #include <linux/kernel.h>
28*4882a593Smuzhiyun #include <linux/module.h>
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun #include <asm/hwcap.h>
31*4882a593Smuzhiyun #include <asm/neon.h>
32*4882a593Smuzhiyun #include <asm/simd.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
35*4882a593Smuzhiyun 				      int nrounds);
36*4882a593Smuzhiyun asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
37*4882a593Smuzhiyun 				       int nrounds, int bytes);
38*4882a593Smuzhiyun asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
41*4882a593Smuzhiyun 
chacha_doneon(u32 * state,u8 * dst,const u8 * src,int bytes,int nrounds)42*4882a593Smuzhiyun static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
43*4882a593Smuzhiyun 			  int bytes, int nrounds)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun 	while (bytes > 0) {
46*4882a593Smuzhiyun 		int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 		if (l <= CHACHA_BLOCK_SIZE) {
49*4882a593Smuzhiyun 			u8 buf[CHACHA_BLOCK_SIZE];
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 			memcpy(buf, src, l);
52*4882a593Smuzhiyun 			chacha_block_xor_neon(state, buf, buf, nrounds);
53*4882a593Smuzhiyun 			memcpy(dst, buf, l);
54*4882a593Smuzhiyun 			state[12] += 1;
55*4882a593Smuzhiyun 			break;
56*4882a593Smuzhiyun 		}
57*4882a593Smuzhiyun 		chacha_4block_xor_neon(state, dst, src, nrounds, l);
58*4882a593Smuzhiyun 		bytes -= l;
59*4882a593Smuzhiyun 		src += l;
60*4882a593Smuzhiyun 		dst += l;
61*4882a593Smuzhiyun 		state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
62*4882a593Smuzhiyun 	}
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun 
hchacha_block_arch(const u32 * state,u32 * stream,int nrounds)65*4882a593Smuzhiyun void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun 	if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
68*4882a593Smuzhiyun 		hchacha_block_generic(state, stream, nrounds);
69*4882a593Smuzhiyun 	} else {
70*4882a593Smuzhiyun 		kernel_neon_begin();
71*4882a593Smuzhiyun 		hchacha_block_neon(state, stream, nrounds);
72*4882a593Smuzhiyun 		kernel_neon_end();
73*4882a593Smuzhiyun 	}
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun EXPORT_SYMBOL(hchacha_block_arch);
76*4882a593Smuzhiyun 
chacha_init_arch(u32 * state,const u32 * key,const u8 * iv)77*4882a593Smuzhiyun void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	chacha_init_generic(state, key, iv);
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun EXPORT_SYMBOL(chacha_init_arch);
82*4882a593Smuzhiyun 
chacha_crypt_arch(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)83*4882a593Smuzhiyun void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
84*4882a593Smuzhiyun 		       int nrounds)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun 	if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
87*4882a593Smuzhiyun 	    !crypto_simd_usable())
88*4882a593Smuzhiyun 		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 	do {
91*4882a593Smuzhiyun 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 		kernel_neon_begin();
94*4882a593Smuzhiyun 		chacha_doneon(state, dst, src, todo, nrounds);
95*4882a593Smuzhiyun 		kernel_neon_end();
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 		bytes -= todo;
98*4882a593Smuzhiyun 		src += todo;
99*4882a593Smuzhiyun 		dst += todo;
100*4882a593Smuzhiyun 	} while (bytes);
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun EXPORT_SYMBOL(chacha_crypt_arch);
103*4882a593Smuzhiyun 
chacha_neon_stream_xor(struct skcipher_request * req,const struct chacha_ctx * ctx,const u8 * iv)104*4882a593Smuzhiyun static int chacha_neon_stream_xor(struct skcipher_request *req,
105*4882a593Smuzhiyun 				  const struct chacha_ctx *ctx, const u8 *iv)
106*4882a593Smuzhiyun {
107*4882a593Smuzhiyun 	struct skcipher_walk walk;
108*4882a593Smuzhiyun 	u32 state[16];
109*4882a593Smuzhiyun 	int err;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	err = skcipher_walk_virt(&walk, req, false);
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	chacha_init_generic(state, ctx->key, iv);
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	while (walk.nbytes > 0) {
116*4882a593Smuzhiyun 		unsigned int nbytes = walk.nbytes;
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 		if (nbytes < walk.total)
119*4882a593Smuzhiyun 			nbytes = rounddown(nbytes, walk.stride);
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 		if (!static_branch_likely(&have_neon) ||
122*4882a593Smuzhiyun 		    !crypto_simd_usable()) {
123*4882a593Smuzhiyun 			chacha_crypt_generic(state, walk.dst.virt.addr,
124*4882a593Smuzhiyun 					     walk.src.virt.addr, nbytes,
125*4882a593Smuzhiyun 					     ctx->nrounds);
126*4882a593Smuzhiyun 		} else {
127*4882a593Smuzhiyun 			kernel_neon_begin();
128*4882a593Smuzhiyun 			chacha_doneon(state, walk.dst.virt.addr,
129*4882a593Smuzhiyun 				      walk.src.virt.addr, nbytes, ctx->nrounds);
130*4882a593Smuzhiyun 			kernel_neon_end();
131*4882a593Smuzhiyun 		}
132*4882a593Smuzhiyun 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
133*4882a593Smuzhiyun 	}
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	return err;
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun 
chacha_neon(struct skcipher_request * req)138*4882a593Smuzhiyun static int chacha_neon(struct skcipher_request *req)
139*4882a593Smuzhiyun {
140*4882a593Smuzhiyun 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
141*4882a593Smuzhiyun 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	return chacha_neon_stream_xor(req, ctx, req->iv);
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun 
xchacha_neon(struct skcipher_request * req)146*4882a593Smuzhiyun static int xchacha_neon(struct skcipher_request *req)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
149*4882a593Smuzhiyun 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
150*4882a593Smuzhiyun 	struct chacha_ctx subctx;
151*4882a593Smuzhiyun 	u32 state[16];
152*4882a593Smuzhiyun 	u8 real_iv[16];
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	chacha_init_generic(state, ctx->key, req->iv);
155*4882a593Smuzhiyun 	hchacha_block_arch(state, subctx.key, ctx->nrounds);
156*4882a593Smuzhiyun 	subctx.nrounds = ctx->nrounds;
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	memcpy(&real_iv[0], req->iv + 24, 8);
159*4882a593Smuzhiyun 	memcpy(&real_iv[8], req->iv + 16, 8);
160*4882a593Smuzhiyun 	return chacha_neon_stream_xor(req, &subctx, real_iv);
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun static struct skcipher_alg algs[] = {
164*4882a593Smuzhiyun 	{
165*4882a593Smuzhiyun 		.base.cra_name		= "chacha20",
166*4882a593Smuzhiyun 		.base.cra_driver_name	= "chacha20-neon",
167*4882a593Smuzhiyun 		.base.cra_priority	= 300,
168*4882a593Smuzhiyun 		.base.cra_blocksize	= 1,
169*4882a593Smuzhiyun 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
170*4882a593Smuzhiyun 		.base.cra_module	= THIS_MODULE,
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 		.min_keysize		= CHACHA_KEY_SIZE,
173*4882a593Smuzhiyun 		.max_keysize		= CHACHA_KEY_SIZE,
174*4882a593Smuzhiyun 		.ivsize			= CHACHA_IV_SIZE,
175*4882a593Smuzhiyun 		.chunksize		= CHACHA_BLOCK_SIZE,
176*4882a593Smuzhiyun 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
177*4882a593Smuzhiyun 		.setkey			= chacha20_setkey,
178*4882a593Smuzhiyun 		.encrypt		= chacha_neon,
179*4882a593Smuzhiyun 		.decrypt		= chacha_neon,
180*4882a593Smuzhiyun 	}, {
181*4882a593Smuzhiyun 		.base.cra_name		= "xchacha20",
182*4882a593Smuzhiyun 		.base.cra_driver_name	= "xchacha20-neon",
183*4882a593Smuzhiyun 		.base.cra_priority	= 300,
184*4882a593Smuzhiyun 		.base.cra_blocksize	= 1,
185*4882a593Smuzhiyun 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
186*4882a593Smuzhiyun 		.base.cra_module	= THIS_MODULE,
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 		.min_keysize		= CHACHA_KEY_SIZE,
189*4882a593Smuzhiyun 		.max_keysize		= CHACHA_KEY_SIZE,
190*4882a593Smuzhiyun 		.ivsize			= XCHACHA_IV_SIZE,
191*4882a593Smuzhiyun 		.chunksize		= CHACHA_BLOCK_SIZE,
192*4882a593Smuzhiyun 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
193*4882a593Smuzhiyun 		.setkey			= chacha20_setkey,
194*4882a593Smuzhiyun 		.encrypt		= xchacha_neon,
195*4882a593Smuzhiyun 		.decrypt		= xchacha_neon,
196*4882a593Smuzhiyun 	}, {
197*4882a593Smuzhiyun 		.base.cra_name		= "xchacha12",
198*4882a593Smuzhiyun 		.base.cra_driver_name	= "xchacha12-neon",
199*4882a593Smuzhiyun 		.base.cra_priority	= 300,
200*4882a593Smuzhiyun 		.base.cra_blocksize	= 1,
201*4882a593Smuzhiyun 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
202*4882a593Smuzhiyun 		.base.cra_module	= THIS_MODULE,
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 		.min_keysize		= CHACHA_KEY_SIZE,
205*4882a593Smuzhiyun 		.max_keysize		= CHACHA_KEY_SIZE,
206*4882a593Smuzhiyun 		.ivsize			= XCHACHA_IV_SIZE,
207*4882a593Smuzhiyun 		.chunksize		= CHACHA_BLOCK_SIZE,
208*4882a593Smuzhiyun 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
209*4882a593Smuzhiyun 		.setkey			= chacha12_setkey,
210*4882a593Smuzhiyun 		.encrypt		= xchacha_neon,
211*4882a593Smuzhiyun 		.decrypt		= xchacha_neon,
212*4882a593Smuzhiyun 	}
213*4882a593Smuzhiyun };
214*4882a593Smuzhiyun 
chacha_simd_mod_init(void)215*4882a593Smuzhiyun static int __init chacha_simd_mod_init(void)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun 	if (!cpu_have_named_feature(ASIMD))
218*4882a593Smuzhiyun 		return 0;
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	static_branch_enable(&have_neon);
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
223*4882a593Smuzhiyun 		crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun 
chacha_simd_mod_fini(void)226*4882a593Smuzhiyun static void __exit chacha_simd_mod_fini(void)
227*4882a593Smuzhiyun {
228*4882a593Smuzhiyun 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
229*4882a593Smuzhiyun 		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun module_init(chacha_simd_mod_init);
233*4882a593Smuzhiyun module_exit(chacha_simd_mod_fini);
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
236*4882a593Smuzhiyun MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
237*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
238*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("chacha20");
239*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("chacha20-neon");
240*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("xchacha20");
241*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("xchacha20-neon");
242*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("xchacha12");
243*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("xchacha12-neon");
244