1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/cpufeature.h>
9*4882a593Smuzhiyun #include <linux/crc-t10dif.h>
10*4882a593Smuzhiyun #include <linux/init.h>
11*4882a593Smuzhiyun #include <linux/kernel.h>
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun #include <linux/string.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include <crypto/internal/hash.h>
16*4882a593Smuzhiyun #include <crypto/internal/simd.h>
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #include <asm/neon.h>
19*4882a593Smuzhiyun #include <asm/simd.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun #define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len);
24*4882a593Smuzhiyun asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
25*4882a593Smuzhiyun
crct10dif_init(struct shash_desc * desc)26*4882a593Smuzhiyun static int crct10dif_init(struct shash_desc *desc)
27*4882a593Smuzhiyun {
28*4882a593Smuzhiyun u16 *crc = shash_desc_ctx(desc);
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun *crc = 0;
31*4882a593Smuzhiyun return 0;
32*4882a593Smuzhiyun }
33*4882a593Smuzhiyun
crct10dif_update_pmull_p8(struct shash_desc * desc,const u8 * data,unsigned int length)34*4882a593Smuzhiyun static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data,
35*4882a593Smuzhiyun unsigned int length)
36*4882a593Smuzhiyun {
37*4882a593Smuzhiyun u16 *crc = shash_desc_ctx(desc);
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
40*4882a593Smuzhiyun do {
41*4882a593Smuzhiyun unsigned int chunk = length;
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
44*4882a593Smuzhiyun chunk = SZ_4K;
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun kernel_neon_begin();
47*4882a593Smuzhiyun *crc = crc_t10dif_pmull_p8(*crc, data, chunk);
48*4882a593Smuzhiyun kernel_neon_end();
49*4882a593Smuzhiyun data += chunk;
50*4882a593Smuzhiyun length -= chunk;
51*4882a593Smuzhiyun } while (length);
52*4882a593Smuzhiyun } else {
53*4882a593Smuzhiyun *crc = crc_t10dif_generic(*crc, data, length);
54*4882a593Smuzhiyun }
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun return 0;
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
crct10dif_update_pmull_p64(struct shash_desc * desc,const u8 * data,unsigned int length)59*4882a593Smuzhiyun static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data,
60*4882a593Smuzhiyun unsigned int length)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun u16 *crc = shash_desc_ctx(desc);
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
65*4882a593Smuzhiyun do {
66*4882a593Smuzhiyun unsigned int chunk = length;
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
69*4882a593Smuzhiyun chunk = SZ_4K;
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun kernel_neon_begin();
72*4882a593Smuzhiyun *crc = crc_t10dif_pmull_p64(*crc, data, chunk);
73*4882a593Smuzhiyun kernel_neon_end();
74*4882a593Smuzhiyun data += chunk;
75*4882a593Smuzhiyun length -= chunk;
76*4882a593Smuzhiyun } while (length);
77*4882a593Smuzhiyun } else {
78*4882a593Smuzhiyun *crc = crc_t10dif_generic(*crc, data, length);
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun return 0;
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun
crct10dif_final(struct shash_desc * desc,u8 * out)84*4882a593Smuzhiyun static int crct10dif_final(struct shash_desc *desc, u8 *out)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun u16 *crc = shash_desc_ctx(desc);
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun *(u16 *)out = *crc;
89*4882a593Smuzhiyun return 0;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun static struct shash_alg crc_t10dif_alg[] = {{
93*4882a593Smuzhiyun .digestsize = CRC_T10DIF_DIGEST_SIZE,
94*4882a593Smuzhiyun .init = crct10dif_init,
95*4882a593Smuzhiyun .update = crct10dif_update_pmull_p8,
96*4882a593Smuzhiyun .final = crct10dif_final,
97*4882a593Smuzhiyun .descsize = CRC_T10DIF_DIGEST_SIZE,
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun .base.cra_name = "crct10dif",
100*4882a593Smuzhiyun .base.cra_driver_name = "crct10dif-arm64-neon",
101*4882a593Smuzhiyun .base.cra_priority = 100,
102*4882a593Smuzhiyun .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
103*4882a593Smuzhiyun .base.cra_module = THIS_MODULE,
104*4882a593Smuzhiyun }, {
105*4882a593Smuzhiyun .digestsize = CRC_T10DIF_DIGEST_SIZE,
106*4882a593Smuzhiyun .init = crct10dif_init,
107*4882a593Smuzhiyun .update = crct10dif_update_pmull_p64,
108*4882a593Smuzhiyun .final = crct10dif_final,
109*4882a593Smuzhiyun .descsize = CRC_T10DIF_DIGEST_SIZE,
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun .base.cra_name = "crct10dif",
112*4882a593Smuzhiyun .base.cra_driver_name = "crct10dif-arm64-ce",
113*4882a593Smuzhiyun .base.cra_priority = 200,
114*4882a593Smuzhiyun .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
115*4882a593Smuzhiyun .base.cra_module = THIS_MODULE,
116*4882a593Smuzhiyun }};
117*4882a593Smuzhiyun
crc_t10dif_mod_init(void)118*4882a593Smuzhiyun static int __init crc_t10dif_mod_init(void)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun if (cpu_have_named_feature(PMULL))
121*4882a593Smuzhiyun return crypto_register_shashes(crc_t10dif_alg,
122*4882a593Smuzhiyun ARRAY_SIZE(crc_t10dif_alg));
123*4882a593Smuzhiyun else
124*4882a593Smuzhiyun /* only register the first array element */
125*4882a593Smuzhiyun return crypto_register_shash(crc_t10dif_alg);
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun
crc_t10dif_mod_exit(void)128*4882a593Smuzhiyun static void __exit crc_t10dif_mod_exit(void)
129*4882a593Smuzhiyun {
130*4882a593Smuzhiyun if (cpu_have_named_feature(PMULL))
131*4882a593Smuzhiyun crypto_unregister_shashes(crc_t10dif_alg,
132*4882a593Smuzhiyun ARRAY_SIZE(crc_t10dif_alg));
133*4882a593Smuzhiyun else
134*4882a593Smuzhiyun crypto_unregister_shash(crc_t10dif_alg);
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
138*4882a593Smuzhiyun module_exit(crc_t10dif_mod_exit);
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
141*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
142*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("crct10dif");
143*4882a593Smuzhiyun MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
144