xref: /OK3568_Linux_fs/kernel/arch/x86/crypto/camellia_aesni_avx2_glue.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <asm/crypto/camellia.h>
9*4882a593Smuzhiyun #include <asm/crypto/glue_helper.h>
10*4882a593Smuzhiyun #include <crypto/algapi.h>
11*4882a593Smuzhiyun #include <crypto/internal/simd.h>
12*4882a593Smuzhiyun #include <crypto/xts.h>
13*4882a593Smuzhiyun #include <linux/crypto.h>
14*4882a593Smuzhiyun #include <linux/err.h>
15*4882a593Smuzhiyun #include <linux/module.h>
16*4882a593Smuzhiyun #include <linux/types.h>
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
19*4882a593Smuzhiyun #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun /* 32-way AVX2/AES-NI parallel cipher functions */
22*4882a593Smuzhiyun asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src);
23*4882a593Smuzhiyun asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src);
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src);
26*4882a593Smuzhiyun asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
27*4882a593Smuzhiyun 				   le128 *iv);
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
30*4882a593Smuzhiyun 				       le128 *iv);
31*4882a593Smuzhiyun asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
32*4882a593Smuzhiyun 				       le128 *iv);
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun static const struct common_glue_ctx camellia_enc = {
35*4882a593Smuzhiyun 	.num_funcs = 4,
36*4882a593Smuzhiyun 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun 	.funcs = { {
39*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
40*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_ecb_enc_32way }
41*4882a593Smuzhiyun 	}, {
42*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
43*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_ecb_enc_16way }
44*4882a593Smuzhiyun 	}, {
45*4882a593Smuzhiyun 		.num_blocks = 2,
46*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_enc_blk_2way }
47*4882a593Smuzhiyun 	}, {
48*4882a593Smuzhiyun 		.num_blocks = 1,
49*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_enc_blk }
50*4882a593Smuzhiyun 	} }
51*4882a593Smuzhiyun };
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun static const struct common_glue_ctx camellia_ctr = {
54*4882a593Smuzhiyun 	.num_funcs = 4,
55*4882a593Smuzhiyun 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 	.funcs = { {
58*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
59*4882a593Smuzhiyun 		.fn_u = { .ctr = camellia_ctr_32way }
60*4882a593Smuzhiyun 	}, {
61*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
62*4882a593Smuzhiyun 		.fn_u = { .ctr = camellia_ctr_16way }
63*4882a593Smuzhiyun 	}, {
64*4882a593Smuzhiyun 		.num_blocks = 2,
65*4882a593Smuzhiyun 		.fn_u = { .ctr = camellia_crypt_ctr_2way }
66*4882a593Smuzhiyun 	}, {
67*4882a593Smuzhiyun 		.num_blocks = 1,
68*4882a593Smuzhiyun 		.fn_u = { .ctr = camellia_crypt_ctr }
69*4882a593Smuzhiyun 	} }
70*4882a593Smuzhiyun };
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun static const struct common_glue_ctx camellia_enc_xts = {
73*4882a593Smuzhiyun 	.num_funcs = 3,
74*4882a593Smuzhiyun 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	.funcs = { {
77*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
78*4882a593Smuzhiyun 		.fn_u = { .xts = camellia_xts_enc_32way }
79*4882a593Smuzhiyun 	}, {
80*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
81*4882a593Smuzhiyun 		.fn_u = { .xts = camellia_xts_enc_16way }
82*4882a593Smuzhiyun 	}, {
83*4882a593Smuzhiyun 		.num_blocks = 1,
84*4882a593Smuzhiyun 		.fn_u = { .xts = camellia_xts_enc }
85*4882a593Smuzhiyun 	} }
86*4882a593Smuzhiyun };
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun static const struct common_glue_ctx camellia_dec = {
89*4882a593Smuzhiyun 	.num_funcs = 4,
90*4882a593Smuzhiyun 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 	.funcs = { {
93*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
94*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_ecb_dec_32way }
95*4882a593Smuzhiyun 	}, {
96*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
97*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_ecb_dec_16way }
98*4882a593Smuzhiyun 	}, {
99*4882a593Smuzhiyun 		.num_blocks = 2,
100*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_dec_blk_2way }
101*4882a593Smuzhiyun 	}, {
102*4882a593Smuzhiyun 		.num_blocks = 1,
103*4882a593Smuzhiyun 		.fn_u = { .ecb = camellia_dec_blk }
104*4882a593Smuzhiyun 	} }
105*4882a593Smuzhiyun };
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun static const struct common_glue_ctx camellia_dec_cbc = {
108*4882a593Smuzhiyun 	.num_funcs = 4,
109*4882a593Smuzhiyun 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	.funcs = { {
112*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
113*4882a593Smuzhiyun 		.fn_u = { .cbc = camellia_cbc_dec_32way }
114*4882a593Smuzhiyun 	}, {
115*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
116*4882a593Smuzhiyun 		.fn_u = { .cbc = camellia_cbc_dec_16way }
117*4882a593Smuzhiyun 	}, {
118*4882a593Smuzhiyun 		.num_blocks = 2,
119*4882a593Smuzhiyun 		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
120*4882a593Smuzhiyun 	}, {
121*4882a593Smuzhiyun 		.num_blocks = 1,
122*4882a593Smuzhiyun 		.fn_u = { .cbc = camellia_dec_blk }
123*4882a593Smuzhiyun 	} }
124*4882a593Smuzhiyun };
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun static const struct common_glue_ctx camellia_dec_xts = {
127*4882a593Smuzhiyun 	.num_funcs = 3,
128*4882a593Smuzhiyun 	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	.funcs = { {
131*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
132*4882a593Smuzhiyun 		.fn_u = { .xts = camellia_xts_dec_32way }
133*4882a593Smuzhiyun 	}, {
134*4882a593Smuzhiyun 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
135*4882a593Smuzhiyun 		.fn_u = { .xts = camellia_xts_dec_16way }
136*4882a593Smuzhiyun 	}, {
137*4882a593Smuzhiyun 		.num_blocks = 1,
138*4882a593Smuzhiyun 		.fn_u = { .xts = camellia_xts_dec }
139*4882a593Smuzhiyun 	} }
140*4882a593Smuzhiyun };
141*4882a593Smuzhiyun 
camellia_setkey(struct crypto_skcipher * tfm,const u8 * key,unsigned int keylen)142*4882a593Smuzhiyun static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
143*4882a593Smuzhiyun 			   unsigned int keylen)
144*4882a593Smuzhiyun {
145*4882a593Smuzhiyun 	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen);
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun 
ecb_encrypt(struct skcipher_request * req)148*4882a593Smuzhiyun static int ecb_encrypt(struct skcipher_request *req)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun 	return glue_ecb_req_128bit(&camellia_enc, req);
151*4882a593Smuzhiyun }
152*4882a593Smuzhiyun 
ecb_decrypt(struct skcipher_request * req)153*4882a593Smuzhiyun static int ecb_decrypt(struct skcipher_request *req)
154*4882a593Smuzhiyun {
155*4882a593Smuzhiyun 	return glue_ecb_req_128bit(&camellia_dec, req);
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun 
cbc_encrypt(struct skcipher_request * req)158*4882a593Smuzhiyun static int cbc_encrypt(struct skcipher_request *req)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun 	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun 
cbc_decrypt(struct skcipher_request * req)163*4882a593Smuzhiyun static int cbc_decrypt(struct skcipher_request *req)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun 	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun 
ctr_crypt(struct skcipher_request * req)168*4882a593Smuzhiyun static int ctr_crypt(struct skcipher_request *req)
169*4882a593Smuzhiyun {
170*4882a593Smuzhiyun 	return glue_ctr_req_128bit(&camellia_ctr, req);
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun 
xts_encrypt(struct skcipher_request * req)173*4882a593Smuzhiyun static int xts_encrypt(struct skcipher_request *req)
174*4882a593Smuzhiyun {
175*4882a593Smuzhiyun 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
176*4882a593Smuzhiyun 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
179*4882a593Smuzhiyun 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun 
xts_decrypt(struct skcipher_request * req)182*4882a593Smuzhiyun static int xts_decrypt(struct skcipher_request *req)
183*4882a593Smuzhiyun {
184*4882a593Smuzhiyun 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
185*4882a593Smuzhiyun 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
188*4882a593Smuzhiyun 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun static struct skcipher_alg camellia_algs[] = {
192*4882a593Smuzhiyun 	{
193*4882a593Smuzhiyun 		.base.cra_name		= "__ecb(camellia)",
194*4882a593Smuzhiyun 		.base.cra_driver_name	= "__ecb-camellia-aesni-avx2",
195*4882a593Smuzhiyun 		.base.cra_priority	= 500,
196*4882a593Smuzhiyun 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
197*4882a593Smuzhiyun 		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
198*4882a593Smuzhiyun 		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
199*4882a593Smuzhiyun 		.base.cra_module	= THIS_MODULE,
200*4882a593Smuzhiyun 		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
201*4882a593Smuzhiyun 		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
202*4882a593Smuzhiyun 		.setkey			= camellia_setkey,
203*4882a593Smuzhiyun 		.encrypt		= ecb_encrypt,
204*4882a593Smuzhiyun 		.decrypt		= ecb_decrypt,
205*4882a593Smuzhiyun 	}, {
206*4882a593Smuzhiyun 		.base.cra_name		= "__cbc(camellia)",
207*4882a593Smuzhiyun 		.base.cra_driver_name	= "__cbc-camellia-aesni-avx2",
208*4882a593Smuzhiyun 		.base.cra_priority	= 500,
209*4882a593Smuzhiyun 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
210*4882a593Smuzhiyun 		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
211*4882a593Smuzhiyun 		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
212*4882a593Smuzhiyun 		.base.cra_module	= THIS_MODULE,
213*4882a593Smuzhiyun 		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
214*4882a593Smuzhiyun 		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
215*4882a593Smuzhiyun 		.ivsize			= CAMELLIA_BLOCK_SIZE,
216*4882a593Smuzhiyun 		.setkey			= camellia_setkey,
217*4882a593Smuzhiyun 		.encrypt		= cbc_encrypt,
218*4882a593Smuzhiyun 		.decrypt		= cbc_decrypt,
219*4882a593Smuzhiyun 	}, {
220*4882a593Smuzhiyun 		.base.cra_name		= "__ctr(camellia)",
221*4882a593Smuzhiyun 		.base.cra_driver_name	= "__ctr-camellia-aesni-avx2",
222*4882a593Smuzhiyun 		.base.cra_priority	= 500,
223*4882a593Smuzhiyun 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
224*4882a593Smuzhiyun 		.base.cra_blocksize	= 1,
225*4882a593Smuzhiyun 		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
226*4882a593Smuzhiyun 		.base.cra_module	= THIS_MODULE,
227*4882a593Smuzhiyun 		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
228*4882a593Smuzhiyun 		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
229*4882a593Smuzhiyun 		.ivsize			= CAMELLIA_BLOCK_SIZE,
230*4882a593Smuzhiyun 		.chunksize		= CAMELLIA_BLOCK_SIZE,
231*4882a593Smuzhiyun 		.setkey			= camellia_setkey,
232*4882a593Smuzhiyun 		.encrypt		= ctr_crypt,
233*4882a593Smuzhiyun 		.decrypt		= ctr_crypt,
234*4882a593Smuzhiyun 	}, {
235*4882a593Smuzhiyun 		.base.cra_name		= "__xts(camellia)",
236*4882a593Smuzhiyun 		.base.cra_driver_name	= "__xts-camellia-aesni-avx2",
237*4882a593Smuzhiyun 		.base.cra_priority	= 500,
238*4882a593Smuzhiyun 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
239*4882a593Smuzhiyun 		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
240*4882a593Smuzhiyun 		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
241*4882a593Smuzhiyun 		.base.cra_module	= THIS_MODULE,
242*4882a593Smuzhiyun 		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
243*4882a593Smuzhiyun 		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
244*4882a593Smuzhiyun 		.ivsize			= CAMELLIA_BLOCK_SIZE,
245*4882a593Smuzhiyun 		.setkey			= xts_camellia_setkey,
246*4882a593Smuzhiyun 		.encrypt		= xts_encrypt,
247*4882a593Smuzhiyun 		.decrypt		= xts_decrypt,
248*4882a593Smuzhiyun 	},
249*4882a593Smuzhiyun };
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
252*4882a593Smuzhiyun 
camellia_aesni_init(void)253*4882a593Smuzhiyun static int __init camellia_aesni_init(void)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun 	const char *feature_name;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	if (!boot_cpu_has(X86_FEATURE_AVX) ||
258*4882a593Smuzhiyun 	    !boot_cpu_has(X86_FEATURE_AVX2) ||
259*4882a593Smuzhiyun 	    !boot_cpu_has(X86_FEATURE_AES) ||
260*4882a593Smuzhiyun 	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
261*4882a593Smuzhiyun 		pr_info("AVX2 or AES-NI instructions are not detected.\n");
262*4882a593Smuzhiyun 		return -ENODEV;
263*4882a593Smuzhiyun 	}
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
266*4882a593Smuzhiyun 				&feature_name)) {
267*4882a593Smuzhiyun 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
268*4882a593Smuzhiyun 		return -ENODEV;
269*4882a593Smuzhiyun 	}
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	return simd_register_skciphers_compat(camellia_algs,
272*4882a593Smuzhiyun 					      ARRAY_SIZE(camellia_algs),
273*4882a593Smuzhiyun 					      camellia_simd_algs);
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun 
camellia_aesni_fini(void)276*4882a593Smuzhiyun static void __exit camellia_aesni_fini(void)
277*4882a593Smuzhiyun {
278*4882a593Smuzhiyun 	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
279*4882a593Smuzhiyun 				  camellia_simd_algs);
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun module_init(camellia_aesni_init);
283*4882a593Smuzhiyun module_exit(camellia_aesni_fini);
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun MODULE_LICENSE("GPL");
286*4882a593Smuzhiyun MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
287*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("camellia");
288*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("camellia-asm");
289