xref: /OK3568_Linux_fs/kernel/arch/arm/crypto/poly1305-glue.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <asm/hwcap.h>
9*4882a593Smuzhiyun #include <asm/neon.h>
10*4882a593Smuzhiyun #include <asm/simd.h>
11*4882a593Smuzhiyun #include <asm/unaligned.h>
12*4882a593Smuzhiyun #include <crypto/algapi.h>
13*4882a593Smuzhiyun #include <crypto/internal/hash.h>
14*4882a593Smuzhiyun #include <crypto/internal/poly1305.h>
15*4882a593Smuzhiyun #include <crypto/internal/simd.h>
16*4882a593Smuzhiyun #include <linux/cpufeature.h>
17*4882a593Smuzhiyun #include <linux/crypto.h>
18*4882a593Smuzhiyun #include <linux/jump_label.h>
19*4882a593Smuzhiyun #include <linux/module.h>
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun void poly1305_init_arm(void *state, const u8 *key);
22*4882a593Smuzhiyun void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
23*4882a593Smuzhiyun void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24*4882a593Smuzhiyun void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
25*4882a593Smuzhiyun 
poly1305_blocks_neon(void * state,const u8 * src,u32 len,u32 hibit)26*4882a593Smuzhiyun void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
27*4882a593Smuzhiyun {
28*4882a593Smuzhiyun }
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
31*4882a593Smuzhiyun 
poly1305_init_arch(struct poly1305_desc_ctx * dctx,const u8 key[POLY1305_KEY_SIZE])32*4882a593Smuzhiyun void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun 	poly1305_init_arm(&dctx->h, key);
35*4882a593Smuzhiyun 	dctx->s[0] = get_unaligned_le32(key + 16);
36*4882a593Smuzhiyun 	dctx->s[1] = get_unaligned_le32(key + 20);
37*4882a593Smuzhiyun 	dctx->s[2] = get_unaligned_le32(key + 24);
38*4882a593Smuzhiyun 	dctx->s[3] = get_unaligned_le32(key + 28);
39*4882a593Smuzhiyun 	dctx->buflen = 0;
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun EXPORT_SYMBOL(poly1305_init_arch);
42*4882a593Smuzhiyun 
arm_poly1305_init(struct shash_desc * desc)43*4882a593Smuzhiyun static int arm_poly1305_init(struct shash_desc *desc)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	dctx->buflen = 0;
48*4882a593Smuzhiyun 	dctx->rset = 0;
49*4882a593Smuzhiyun 	dctx->sset = false;
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	return 0;
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun 
arm_poly1305_blocks(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,u32 hibit,bool do_neon)54*4882a593Smuzhiyun static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
55*4882a593Smuzhiyun 				 u32 len, u32 hibit, bool do_neon)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	if (unlikely(!dctx->sset)) {
58*4882a593Smuzhiyun 		if (!dctx->rset) {
59*4882a593Smuzhiyun 			poly1305_init_arm(&dctx->h, src);
60*4882a593Smuzhiyun 			src += POLY1305_BLOCK_SIZE;
61*4882a593Smuzhiyun 			len -= POLY1305_BLOCK_SIZE;
62*4882a593Smuzhiyun 			dctx->rset = 1;
63*4882a593Smuzhiyun 		}
64*4882a593Smuzhiyun 		if (len >= POLY1305_BLOCK_SIZE) {
65*4882a593Smuzhiyun 			dctx->s[0] = get_unaligned_le32(src +  0);
66*4882a593Smuzhiyun 			dctx->s[1] = get_unaligned_le32(src +  4);
67*4882a593Smuzhiyun 			dctx->s[2] = get_unaligned_le32(src +  8);
68*4882a593Smuzhiyun 			dctx->s[3] = get_unaligned_le32(src + 12);
69*4882a593Smuzhiyun 			src += POLY1305_BLOCK_SIZE;
70*4882a593Smuzhiyun 			len -= POLY1305_BLOCK_SIZE;
71*4882a593Smuzhiyun 			dctx->sset = true;
72*4882a593Smuzhiyun 		}
73*4882a593Smuzhiyun 		if (len < POLY1305_BLOCK_SIZE)
74*4882a593Smuzhiyun 			return;
75*4882a593Smuzhiyun 	}
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 	len &= ~(POLY1305_BLOCK_SIZE - 1);
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	if (static_branch_likely(&have_neon) && likely(do_neon))
80*4882a593Smuzhiyun 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
81*4882a593Smuzhiyun 	else
82*4882a593Smuzhiyun 		poly1305_blocks_arm(&dctx->h, src, len, hibit);
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun 
arm_poly1305_do_update(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,bool do_neon)85*4882a593Smuzhiyun static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
86*4882a593Smuzhiyun 				    const u8 *src, u32 len, bool do_neon)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun 	if (unlikely(dctx->buflen)) {
89*4882a593Smuzhiyun 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 		memcpy(dctx->buf + dctx->buflen, src, bytes);
92*4882a593Smuzhiyun 		src += bytes;
93*4882a593Smuzhiyun 		len -= bytes;
94*4882a593Smuzhiyun 		dctx->buflen += bytes;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
97*4882a593Smuzhiyun 			arm_poly1305_blocks(dctx, dctx->buf,
98*4882a593Smuzhiyun 					    POLY1305_BLOCK_SIZE, 1, false);
99*4882a593Smuzhiyun 			dctx->buflen = 0;
100*4882a593Smuzhiyun 		}
101*4882a593Smuzhiyun 	}
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
104*4882a593Smuzhiyun 		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
105*4882a593Smuzhiyun 		src += round_down(len, POLY1305_BLOCK_SIZE);
106*4882a593Smuzhiyun 		len %= POLY1305_BLOCK_SIZE;
107*4882a593Smuzhiyun 	}
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	if (unlikely(len)) {
110*4882a593Smuzhiyun 		dctx->buflen = len;
111*4882a593Smuzhiyun 		memcpy(dctx->buf, src, len);
112*4882a593Smuzhiyun 	}
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun 
arm_poly1305_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)115*4882a593Smuzhiyun static int arm_poly1305_update(struct shash_desc *desc,
116*4882a593Smuzhiyun 			       const u8 *src, unsigned int srclen)
117*4882a593Smuzhiyun {
118*4882a593Smuzhiyun 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	arm_poly1305_do_update(dctx, src, srclen, false);
121*4882a593Smuzhiyun 	return 0;
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun 
arm_poly1305_update_neon(struct shash_desc * desc,const u8 * src,unsigned int srclen)124*4882a593Smuzhiyun static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
125*4882a593Smuzhiyun 						   const u8 *src,
126*4882a593Smuzhiyun 						   unsigned int srclen)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
129*4882a593Smuzhiyun 	bool do_neon = crypto_simd_usable() && srclen > 128;
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	if (static_branch_likely(&have_neon) && do_neon)
132*4882a593Smuzhiyun 		kernel_neon_begin();
133*4882a593Smuzhiyun 	arm_poly1305_do_update(dctx, src, srclen, do_neon);
134*4882a593Smuzhiyun 	if (static_branch_likely(&have_neon) && do_neon)
135*4882a593Smuzhiyun 		kernel_neon_end();
136*4882a593Smuzhiyun 	return 0;
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun 
poly1305_update_arch(struct poly1305_desc_ctx * dctx,const u8 * src,unsigned int nbytes)139*4882a593Smuzhiyun void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
140*4882a593Smuzhiyun 			  unsigned int nbytes)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun 	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
143*4882a593Smuzhiyun 		       crypto_simd_usable();
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	if (unlikely(dctx->buflen)) {
146*4882a593Smuzhiyun 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 		memcpy(dctx->buf + dctx->buflen, src, bytes);
149*4882a593Smuzhiyun 		src += bytes;
150*4882a593Smuzhiyun 		nbytes -= bytes;
151*4882a593Smuzhiyun 		dctx->buflen += bytes;
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
154*4882a593Smuzhiyun 			poly1305_blocks_arm(&dctx->h, dctx->buf,
155*4882a593Smuzhiyun 					    POLY1305_BLOCK_SIZE, 1);
156*4882a593Smuzhiyun 			dctx->buflen = 0;
157*4882a593Smuzhiyun 		}
158*4882a593Smuzhiyun 	}
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
161*4882a593Smuzhiyun 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 		if (static_branch_likely(&have_neon) && do_neon) {
164*4882a593Smuzhiyun 			do {
165*4882a593Smuzhiyun 				unsigned int todo = min_t(unsigned int, len, SZ_4K);
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 				kernel_neon_begin();
168*4882a593Smuzhiyun 				poly1305_blocks_neon(&dctx->h, src, todo, 1);
169*4882a593Smuzhiyun 				kernel_neon_end();
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 				len -= todo;
172*4882a593Smuzhiyun 				src += todo;
173*4882a593Smuzhiyun 			} while (len);
174*4882a593Smuzhiyun 		} else {
175*4882a593Smuzhiyun 			poly1305_blocks_arm(&dctx->h, src, len, 1);
176*4882a593Smuzhiyun 			src += len;
177*4882a593Smuzhiyun 		}
178*4882a593Smuzhiyun 		nbytes %= POLY1305_BLOCK_SIZE;
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	if (unlikely(nbytes)) {
182*4882a593Smuzhiyun 		dctx->buflen = nbytes;
183*4882a593Smuzhiyun 		memcpy(dctx->buf, src, nbytes);
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun EXPORT_SYMBOL(poly1305_update_arch);
187*4882a593Smuzhiyun 
poly1305_final_arch(struct poly1305_desc_ctx * dctx,u8 * dst)188*4882a593Smuzhiyun void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
189*4882a593Smuzhiyun {
190*4882a593Smuzhiyun 	if (unlikely(dctx->buflen)) {
191*4882a593Smuzhiyun 		dctx->buf[dctx->buflen++] = 1;
192*4882a593Smuzhiyun 		memset(dctx->buf + dctx->buflen, 0,
193*4882a593Smuzhiyun 		       POLY1305_BLOCK_SIZE - dctx->buflen);
194*4882a593Smuzhiyun 		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
195*4882a593Smuzhiyun 	}
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	poly1305_emit_arm(&dctx->h, dst, dctx->s);
198*4882a593Smuzhiyun 	*dctx = (struct poly1305_desc_ctx){};
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun EXPORT_SYMBOL(poly1305_final_arch);
201*4882a593Smuzhiyun 
arm_poly1305_final(struct shash_desc * desc,u8 * dst)202*4882a593Smuzhiyun static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
203*4882a593Smuzhiyun {
204*4882a593Smuzhiyun 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	if (unlikely(!dctx->sset))
207*4882a593Smuzhiyun 		return -ENOKEY;
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	poly1305_final_arch(dctx, dst);
210*4882a593Smuzhiyun 	return 0;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun static struct shash_alg arm_poly1305_algs[] = {{
214*4882a593Smuzhiyun 	.init			= arm_poly1305_init,
215*4882a593Smuzhiyun 	.update			= arm_poly1305_update,
216*4882a593Smuzhiyun 	.final			= arm_poly1305_final,
217*4882a593Smuzhiyun 	.digestsize		= POLY1305_DIGEST_SIZE,
218*4882a593Smuzhiyun 	.descsize		= sizeof(struct poly1305_desc_ctx),
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	.base.cra_name		= "poly1305",
221*4882a593Smuzhiyun 	.base.cra_driver_name	= "poly1305-arm",
222*4882a593Smuzhiyun 	.base.cra_priority	= 150,
223*4882a593Smuzhiyun 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
224*4882a593Smuzhiyun 	.base.cra_module	= THIS_MODULE,
225*4882a593Smuzhiyun #ifdef CONFIG_KERNEL_MODE_NEON
226*4882a593Smuzhiyun }, {
227*4882a593Smuzhiyun 	.init			= arm_poly1305_init,
228*4882a593Smuzhiyun 	.update			= arm_poly1305_update_neon,
229*4882a593Smuzhiyun 	.final			= arm_poly1305_final,
230*4882a593Smuzhiyun 	.digestsize		= POLY1305_DIGEST_SIZE,
231*4882a593Smuzhiyun 	.descsize		= sizeof(struct poly1305_desc_ctx),
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	.base.cra_name		= "poly1305",
234*4882a593Smuzhiyun 	.base.cra_driver_name	= "poly1305-neon",
235*4882a593Smuzhiyun 	.base.cra_priority	= 200,
236*4882a593Smuzhiyun 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
237*4882a593Smuzhiyun 	.base.cra_module	= THIS_MODULE,
238*4882a593Smuzhiyun #endif
239*4882a593Smuzhiyun }};
240*4882a593Smuzhiyun 
arm_poly1305_mod_init(void)241*4882a593Smuzhiyun static int __init arm_poly1305_mod_init(void)
242*4882a593Smuzhiyun {
243*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
244*4882a593Smuzhiyun 	    (elf_hwcap & HWCAP_NEON))
245*4882a593Smuzhiyun 		static_branch_enable(&have_neon);
246*4882a593Smuzhiyun 	else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
247*4882a593Smuzhiyun 		/* register only the first entry */
248*4882a593Smuzhiyun 		return crypto_register_shash(&arm_poly1305_algs[0]);
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
251*4882a593Smuzhiyun 		crypto_register_shashes(arm_poly1305_algs,
252*4882a593Smuzhiyun 					ARRAY_SIZE(arm_poly1305_algs)) : 0;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun 
arm_poly1305_mod_exit(void)255*4882a593Smuzhiyun static void __exit arm_poly1305_mod_exit(void)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun 	if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
258*4882a593Smuzhiyun 		return;
259*4882a593Smuzhiyun 	if (!static_branch_likely(&have_neon)) {
260*4882a593Smuzhiyun 		crypto_unregister_shash(&arm_poly1305_algs[0]);
261*4882a593Smuzhiyun 		return;
262*4882a593Smuzhiyun 	}
263*4882a593Smuzhiyun 	crypto_unregister_shashes(arm_poly1305_algs,
264*4882a593Smuzhiyun 				  ARRAY_SIZE(arm_poly1305_algs));
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun module_init(arm_poly1305_mod_init);
268*4882a593Smuzhiyun module_exit(arm_poly1305_mod_exit);
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
271*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("poly1305");
272*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("poly1305-arm");
273*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("poly1305-neon");
274