1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
4*4882a593Smuzhiyun * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
5*4882a593Smuzhiyun * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
6*4882a593Smuzhiyun * http://www.intel.com/products/processor/manuals/
7*4882a593Smuzhiyun * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
8*4882a593Smuzhiyun * Volume 2A: Instruction Set Reference, A-M
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * Copyright (C) 2008 Intel Corporation
11*4882a593Smuzhiyun * Authors: Austin Zhang <austin_zhang@linux.intel.com>
12*4882a593Smuzhiyun * Kent Liu <kent.liu@intel.com>
13*4882a593Smuzhiyun */
14*4882a593Smuzhiyun #include <linux/init.h>
15*4882a593Smuzhiyun #include <linux/module.h>
16*4882a593Smuzhiyun #include <linux/string.h>
17*4882a593Smuzhiyun #include <linux/kernel.h>
18*4882a593Smuzhiyun #include <crypto/internal/hash.h>
19*4882a593Smuzhiyun #include <crypto/internal/simd.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun #include <asm/cpufeatures.h>
22*4882a593Smuzhiyun #include <asm/cpu_device_id.h>
23*4882a593Smuzhiyun #include <asm/simd.h>
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun #define CHKSUM_BLOCK_SIZE 1
26*4882a593Smuzhiyun #define CHKSUM_DIGEST_SIZE 4
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #define SCALE_F sizeof(unsigned long)
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun #ifdef CONFIG_X86_64
31*4882a593Smuzhiyun #define CRC32_INST "crc32q %1, %q0"
32*4882a593Smuzhiyun #else
33*4882a593Smuzhiyun #define CRC32_INST "crc32l %1, %0"
34*4882a593Smuzhiyun #endif
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun #ifdef CONFIG_X86_64
37*4882a593Smuzhiyun /*
38*4882a593Smuzhiyun * use carryless multiply version of crc32c when buffer
39*4882a593Smuzhiyun * size is >= 512 to account
40*4882a593Smuzhiyun * for fpu state save/restore overhead.
41*4882a593Smuzhiyun */
42*4882a593Smuzhiyun #define CRC32C_PCL_BREAKEVEN 512
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
45*4882a593Smuzhiyun unsigned int crc_init);
46*4882a593Smuzhiyun #endif /* CONFIG_X86_64 */
47*4882a593Smuzhiyun
crc32c_intel_le_hw_byte(u32 crc,unsigned char const * data,size_t length)48*4882a593Smuzhiyun static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun while (length--) {
51*4882a593Smuzhiyun asm("crc32b %1, %0"
52*4882a593Smuzhiyun : "+r" (crc) : "rm" (*data));
53*4882a593Smuzhiyun data++;
54*4882a593Smuzhiyun }
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun return crc;
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
crc32c_intel_le_hw(u32 crc,unsigned char const * p,size_t len)59*4882a593Smuzhiyun static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun unsigned int iquotient = len / SCALE_F;
62*4882a593Smuzhiyun unsigned int iremainder = len % SCALE_F;
63*4882a593Smuzhiyun unsigned long *ptmp = (unsigned long *)p;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun while (iquotient--) {
66*4882a593Smuzhiyun asm(CRC32_INST
67*4882a593Smuzhiyun : "+r" (crc) : "rm" (*ptmp));
68*4882a593Smuzhiyun ptmp++;
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun if (iremainder)
72*4882a593Smuzhiyun crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
73*4882a593Smuzhiyun iremainder);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun return crc;
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /*
79*4882a593Smuzhiyun * Setting the seed allows arbitrary accumulators and flexible XOR policy
80*4882a593Smuzhiyun * If your algorithm starts with ~0, then XOR with ~0 before you set
81*4882a593Smuzhiyun * the seed.
82*4882a593Smuzhiyun */
crc32c_intel_setkey(struct crypto_shash * hash,const u8 * key,unsigned int keylen)83*4882a593Smuzhiyun static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
84*4882a593Smuzhiyun unsigned int keylen)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun u32 *mctx = crypto_shash_ctx(hash);
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun if (keylen != sizeof(u32))
89*4882a593Smuzhiyun return -EINVAL;
90*4882a593Smuzhiyun *mctx = le32_to_cpup((__le32 *)key);
91*4882a593Smuzhiyun return 0;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
crc32c_intel_init(struct shash_desc * desc)94*4882a593Smuzhiyun static int crc32c_intel_init(struct shash_desc *desc)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun u32 *mctx = crypto_shash_ctx(desc->tfm);
97*4882a593Smuzhiyun u32 *crcp = shash_desc_ctx(desc);
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun *crcp = *mctx;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun return 0;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
crc32c_intel_update(struct shash_desc * desc,const u8 * data,unsigned int len)104*4882a593Smuzhiyun static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
105*4882a593Smuzhiyun unsigned int len)
106*4882a593Smuzhiyun {
107*4882a593Smuzhiyun u32 *crcp = shash_desc_ctx(desc);
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun *crcp = crc32c_intel_le_hw(*crcp, data, len);
110*4882a593Smuzhiyun return 0;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
__crc32c_intel_finup(u32 * crcp,const u8 * data,unsigned int len,u8 * out)113*4882a593Smuzhiyun static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
114*4882a593Smuzhiyun u8 *out)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
117*4882a593Smuzhiyun return 0;
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun
crc32c_intel_finup(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out)120*4882a593Smuzhiyun static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
121*4882a593Smuzhiyun unsigned int len, u8 *out)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
crc32c_intel_final(struct shash_desc * desc,u8 * out)126*4882a593Smuzhiyun static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun u32 *crcp = shash_desc_ctx(desc);
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun *(__le32 *)out = ~cpu_to_le32p(crcp);
131*4882a593Smuzhiyun return 0;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
crc32c_intel_digest(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out)134*4882a593Smuzhiyun static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
135*4882a593Smuzhiyun unsigned int len, u8 *out)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
138*4882a593Smuzhiyun out);
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
crc32c_intel_cra_init(struct crypto_tfm * tfm)141*4882a593Smuzhiyun static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun u32 *key = crypto_tfm_ctx(tfm);
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun *key = ~0;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun return 0;
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun #ifdef CONFIG_X86_64
crc32c_pcl_intel_update(struct shash_desc * desc,const u8 * data,unsigned int len)151*4882a593Smuzhiyun static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
152*4882a593Smuzhiyun unsigned int len)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun u32 *crcp = shash_desc_ctx(desc);
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun /*
157*4882a593Smuzhiyun * use faster PCL version if datasize is large enough to
158*4882a593Smuzhiyun * overcome kernel fpu state save/restore overhead
159*4882a593Smuzhiyun */
160*4882a593Smuzhiyun if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
161*4882a593Smuzhiyun kernel_fpu_begin();
162*4882a593Smuzhiyun *crcp = crc_pcl(data, len, *crcp);
163*4882a593Smuzhiyun kernel_fpu_end();
164*4882a593Smuzhiyun } else
165*4882a593Smuzhiyun *crcp = crc32c_intel_le_hw(*crcp, data, len);
166*4882a593Smuzhiyun return 0;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun
__crc32c_pcl_intel_finup(u32 * crcp,const u8 * data,unsigned int len,u8 * out)169*4882a593Smuzhiyun static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
170*4882a593Smuzhiyun u8 *out)
171*4882a593Smuzhiyun {
172*4882a593Smuzhiyun if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
173*4882a593Smuzhiyun kernel_fpu_begin();
174*4882a593Smuzhiyun *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
175*4882a593Smuzhiyun kernel_fpu_end();
176*4882a593Smuzhiyun } else
177*4882a593Smuzhiyun *(__le32 *)out =
178*4882a593Smuzhiyun ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
179*4882a593Smuzhiyun return 0;
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun
crc32c_pcl_intel_finup(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out)182*4882a593Smuzhiyun static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
183*4882a593Smuzhiyun unsigned int len, u8 *out)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun
crc32c_pcl_intel_digest(struct shash_desc * desc,const u8 * data,unsigned int len,u8 * out)188*4882a593Smuzhiyun static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
189*4882a593Smuzhiyun unsigned int len, u8 *out)
190*4882a593Smuzhiyun {
191*4882a593Smuzhiyun return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
192*4882a593Smuzhiyun out);
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun #endif /* CONFIG_X86_64 */
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun static struct shash_alg alg = {
197*4882a593Smuzhiyun .setkey = crc32c_intel_setkey,
198*4882a593Smuzhiyun .init = crc32c_intel_init,
199*4882a593Smuzhiyun .update = crc32c_intel_update,
200*4882a593Smuzhiyun .final = crc32c_intel_final,
201*4882a593Smuzhiyun .finup = crc32c_intel_finup,
202*4882a593Smuzhiyun .digest = crc32c_intel_digest,
203*4882a593Smuzhiyun .descsize = sizeof(u32),
204*4882a593Smuzhiyun .digestsize = CHKSUM_DIGEST_SIZE,
205*4882a593Smuzhiyun .base = {
206*4882a593Smuzhiyun .cra_name = "crc32c",
207*4882a593Smuzhiyun .cra_driver_name = "crc32c-intel",
208*4882a593Smuzhiyun .cra_priority = 200,
209*4882a593Smuzhiyun .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
210*4882a593Smuzhiyun .cra_blocksize = CHKSUM_BLOCK_SIZE,
211*4882a593Smuzhiyun .cra_ctxsize = sizeof(u32),
212*4882a593Smuzhiyun .cra_module = THIS_MODULE,
213*4882a593Smuzhiyun .cra_init = crc32c_intel_cra_init,
214*4882a593Smuzhiyun }
215*4882a593Smuzhiyun };
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun static const struct x86_cpu_id crc32c_cpu_id[] = {
218*4882a593Smuzhiyun X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
219*4882a593Smuzhiyun {}
220*4882a593Smuzhiyun };
221*4882a593Smuzhiyun MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
222*4882a593Smuzhiyun
crc32c_intel_mod_init(void)223*4882a593Smuzhiyun static int __init crc32c_intel_mod_init(void)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun if (!x86_match_cpu(crc32c_cpu_id))
226*4882a593Smuzhiyun return -ENODEV;
227*4882a593Smuzhiyun #ifdef CONFIG_X86_64
228*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
229*4882a593Smuzhiyun alg.update = crc32c_pcl_intel_update;
230*4882a593Smuzhiyun alg.finup = crc32c_pcl_intel_finup;
231*4882a593Smuzhiyun alg.digest = crc32c_pcl_intel_digest;
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun #endif
234*4882a593Smuzhiyun return crypto_register_shash(&alg);
235*4882a593Smuzhiyun }
236*4882a593Smuzhiyun
crc32c_intel_mod_fini(void)237*4882a593Smuzhiyun static void __exit crc32c_intel_mod_fini(void)
238*4882a593Smuzhiyun {
239*4882a593Smuzhiyun crypto_unregister_shash(&alg);
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun module_init(crc32c_intel_mod_init);
243*4882a593Smuzhiyun module_exit(crc32c_intel_mod_fini);
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
246*4882a593Smuzhiyun MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
247*4882a593Smuzhiyun MODULE_LICENSE("GPL");
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("crc32c");
250*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("crc32c-intel");
251