xref: /OK3568_Linux_fs/kernel/arch/powerpc/crypto/crct10dif-vpmsum_glue.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Calculate a CRC T10-DIF with vpmsum acceleration
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright 2017, Daniel Axtens, IBM Corporation.
6*4882a593Smuzhiyun  * [based on crc32c-vpmsum_glue.c]
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <linux/crc-t10dif.h>
10*4882a593Smuzhiyun #include <crypto/internal/hash.h>
11*4882a593Smuzhiyun #include <crypto/internal/simd.h>
12*4882a593Smuzhiyun #include <linux/init.h>
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/string.h>
15*4882a593Smuzhiyun #include <linux/kernel.h>
16*4882a593Smuzhiyun #include <linux/cpufeature.h>
17*4882a593Smuzhiyun #include <asm/simd.h>
18*4882a593Smuzhiyun #include <asm/switch_to.h>
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #define VMX_ALIGN		16
21*4882a593Smuzhiyun #define VMX_ALIGN_MASK		(VMX_ALIGN-1)
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #define VECTOR_BREAKPOINT	64
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
26*4882a593Smuzhiyun 
crct10dif_vpmsum(u16 crci,unsigned char const * p,size_t len)27*4882a593Smuzhiyun static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun 	unsigned int prealign;
30*4882a593Smuzhiyun 	unsigned int tail;
31*4882a593Smuzhiyun 	u32 crc = crci;
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun 	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable())
34*4882a593Smuzhiyun 		return crc_t10dif_generic(crc, p, len);
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun 	if ((unsigned long)p & VMX_ALIGN_MASK) {
37*4882a593Smuzhiyun 		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
38*4882a593Smuzhiyun 		crc = crc_t10dif_generic(crc, p, prealign);
39*4882a593Smuzhiyun 		len -= prealign;
40*4882a593Smuzhiyun 		p += prealign;
41*4882a593Smuzhiyun 	}
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	if (len & ~VMX_ALIGN_MASK) {
44*4882a593Smuzhiyun 		crc <<= 16;
45*4882a593Smuzhiyun 		preempt_disable();
46*4882a593Smuzhiyun 		pagefault_disable();
47*4882a593Smuzhiyun 		enable_kernel_altivec();
48*4882a593Smuzhiyun 		crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
49*4882a593Smuzhiyun 		disable_kernel_altivec();
50*4882a593Smuzhiyun 		pagefault_enable();
51*4882a593Smuzhiyun 		preempt_enable();
52*4882a593Smuzhiyun 		crc >>= 16;
53*4882a593Smuzhiyun 	}
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun 	tail = len & VMX_ALIGN_MASK;
56*4882a593Smuzhiyun 	if (tail) {
57*4882a593Smuzhiyun 		p += len & ~VMX_ALIGN_MASK;
58*4882a593Smuzhiyun 		crc = crc_t10dif_generic(crc, p, tail);
59*4882a593Smuzhiyun 	}
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun 	return crc & 0xffff;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun 
crct10dif_vpmsum_init(struct shash_desc * desc)64*4882a593Smuzhiyun static int crct10dif_vpmsum_init(struct shash_desc *desc)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	u16 *crc = shash_desc_ctx(desc);
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	*crc = 0;
69*4882a593Smuzhiyun 	return 0;
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun 
crct10dif_vpmsum_update(struct shash_desc * desc,const u8 * data,unsigned int length)72*4882a593Smuzhiyun static int crct10dif_vpmsum_update(struct shash_desc *desc, const u8 *data,
73*4882a593Smuzhiyun 			    unsigned int length)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun 	u16 *crc = shash_desc_ctx(desc);
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 	*crc = crct10dif_vpmsum(*crc, data, length);
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	return 0;
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 
crct10dif_vpmsum_final(struct shash_desc * desc,u8 * out)83*4882a593Smuzhiyun static int crct10dif_vpmsum_final(struct shash_desc *desc, u8 *out)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun 	u16 *crcp = shash_desc_ctx(desc);
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 	*(u16 *)out = *crcp;
88*4882a593Smuzhiyun 	return 0;
89*4882a593Smuzhiyun }
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun static struct shash_alg alg = {
92*4882a593Smuzhiyun 	.init		= crct10dif_vpmsum_init,
93*4882a593Smuzhiyun 	.update		= crct10dif_vpmsum_update,
94*4882a593Smuzhiyun 	.final		= crct10dif_vpmsum_final,
95*4882a593Smuzhiyun 	.descsize	= CRC_T10DIF_DIGEST_SIZE,
96*4882a593Smuzhiyun 	.digestsize	= CRC_T10DIF_DIGEST_SIZE,
97*4882a593Smuzhiyun 	.base		= {
98*4882a593Smuzhiyun 		.cra_name		= "crct10dif",
99*4882a593Smuzhiyun 		.cra_driver_name	= "crct10dif-vpmsum",
100*4882a593Smuzhiyun 		.cra_priority		= 200,
101*4882a593Smuzhiyun 		.cra_blocksize		= CRC_T10DIF_BLOCK_SIZE,
102*4882a593Smuzhiyun 		.cra_module		= THIS_MODULE,
103*4882a593Smuzhiyun 	}
104*4882a593Smuzhiyun };
105*4882a593Smuzhiyun 
crct10dif_vpmsum_mod_init(void)106*4882a593Smuzhiyun static int __init crct10dif_vpmsum_mod_init(void)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
109*4882a593Smuzhiyun 		return -ENODEV;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	return crypto_register_shash(&alg);
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun 
crct10dif_vpmsum_mod_fini(void)114*4882a593Smuzhiyun static void __exit crct10dif_vpmsum_mod_fini(void)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun 	crypto_unregister_shash(&alg);
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crct10dif_vpmsum_mod_init);
120*4882a593Smuzhiyun module_exit(crct10dif_vpmsum_mod_fini);
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
123*4882a593Smuzhiyun MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
124*4882a593Smuzhiyun MODULE_LICENSE("GPL");
125*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("crct10dif");
126*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("crct10dif-vpmsum");
127