1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2016 Intel Corporation
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6*4882a593Smuzhiyun * Author: Megha Dey <megha.dey@linux.intel.com>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #ifdef CONFIG_AS_AVX512
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <linux/raid/pq.h>
12*4882a593Smuzhiyun #include "x86.h"
13*4882a593Smuzhiyun
raid6_has_avx512(void)14*4882a593Smuzhiyun static int raid6_has_avx512(void)
15*4882a593Smuzhiyun {
16*4882a593Smuzhiyun return boot_cpu_has(X86_FEATURE_AVX2) &&
17*4882a593Smuzhiyun boot_cpu_has(X86_FEATURE_AVX) &&
18*4882a593Smuzhiyun boot_cpu_has(X86_FEATURE_AVX512F) &&
19*4882a593Smuzhiyun boot_cpu_has(X86_FEATURE_AVX512BW) &&
20*4882a593Smuzhiyun boot_cpu_has(X86_FEATURE_AVX512VL) &&
21*4882a593Smuzhiyun boot_cpu_has(X86_FEATURE_AVX512DQ);
22*4882a593Smuzhiyun }
23*4882a593Smuzhiyun
raid6_2data_recov_avx512(int disks,size_t bytes,int faila,int failb,void ** ptrs)24*4882a593Smuzhiyun static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
25*4882a593Smuzhiyun int failb, void **ptrs)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun u8 *p, *q, *dp, *dq;
28*4882a593Smuzhiyun const u8 *pbmul; /* P multiplier table for B data */
29*4882a593Smuzhiyun const u8 *qmul; /* Q multiplier table (for both) */
30*4882a593Smuzhiyun const u8 x0f = 0x0f;
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun p = (u8 *)ptrs[disks-2];
33*4882a593Smuzhiyun q = (u8 *)ptrs[disks-1];
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun * Compute syndrome with zero for the missing data pages
37*4882a593Smuzhiyun * Use the dead data pages as temporary storage for
38*4882a593Smuzhiyun * delta p and delta q
39*4882a593Smuzhiyun */
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun dp = (u8 *)ptrs[faila];
42*4882a593Smuzhiyun ptrs[faila] = (void *)raid6_empty_zero_page;
43*4882a593Smuzhiyun ptrs[disks-2] = dp;
44*4882a593Smuzhiyun dq = (u8 *)ptrs[failb];
45*4882a593Smuzhiyun ptrs[failb] = (void *)raid6_empty_zero_page;
46*4882a593Smuzhiyun ptrs[disks-1] = dq;
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun raid6_call.gen_syndrome(disks, bytes, ptrs);
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun /* Restore pointer table */
51*4882a593Smuzhiyun ptrs[faila] = dp;
52*4882a593Smuzhiyun ptrs[failb] = dq;
53*4882a593Smuzhiyun ptrs[disks-2] = p;
54*4882a593Smuzhiyun ptrs[disks-1] = q;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun /* Now, pick the proper data tables */
57*4882a593Smuzhiyun pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
58*4882a593Smuzhiyun qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
59*4882a593Smuzhiyun raid6_gfexp[failb]]];
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun kernel_fpu_begin();
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun /* zmm0 = x0f[16] */
64*4882a593Smuzhiyun asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun while (bytes) {
67*4882a593Smuzhiyun #ifdef CONFIG_X86_64
68*4882a593Smuzhiyun asm volatile("vmovdqa64 %0, %%zmm1\n\t"
69*4882a593Smuzhiyun "vmovdqa64 %1, %%zmm9\n\t"
70*4882a593Smuzhiyun "vmovdqa64 %2, %%zmm0\n\t"
71*4882a593Smuzhiyun "vmovdqa64 %3, %%zmm8\n\t"
72*4882a593Smuzhiyun "vpxorq %4, %%zmm1, %%zmm1\n\t"
73*4882a593Smuzhiyun "vpxorq %5, %%zmm9, %%zmm9\n\t"
74*4882a593Smuzhiyun "vpxorq %6, %%zmm0, %%zmm0\n\t"
75*4882a593Smuzhiyun "vpxorq %7, %%zmm8, %%zmm8"
76*4882a593Smuzhiyun :
77*4882a593Smuzhiyun : "m" (q[0]), "m" (q[64]), "m" (p[0]),
78*4882a593Smuzhiyun "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
79*4882a593Smuzhiyun "m" (dp[0]), "m" (dp[64]));
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /*
82*4882a593Smuzhiyun * 1 = dq[0] ^ q[0]
83*4882a593Smuzhiyun * 9 = dq[64] ^ q[64]
84*4882a593Smuzhiyun * 0 = dp[0] ^ p[0]
85*4882a593Smuzhiyun * 8 = dp[64] ^ p[64]
86*4882a593Smuzhiyun */
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
89*4882a593Smuzhiyun "vbroadcasti64x2 %1, %%zmm5"
90*4882a593Smuzhiyun :
91*4882a593Smuzhiyun : "m" (qmul[0]), "m" (qmul[16]));
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
94*4882a593Smuzhiyun "vpsraw $4, %%zmm9, %%zmm12\n\t"
95*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
96*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
97*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
98*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
99*4882a593Smuzhiyun "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
100*4882a593Smuzhiyun "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
101*4882a593Smuzhiyun "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
102*4882a593Smuzhiyun "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
103*4882a593Smuzhiyun "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
104*4882a593Smuzhiyun "vpxorq %%zmm4, %%zmm5, %%zmm5"
105*4882a593Smuzhiyun :
106*4882a593Smuzhiyun : );
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun /*
109*4882a593Smuzhiyun * 5 = qx[0]
110*4882a593Smuzhiyun * 15 = qx[64]
111*4882a593Smuzhiyun */
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
114*4882a593Smuzhiyun "vbroadcasti64x2 %1, %%zmm1\n\t"
115*4882a593Smuzhiyun "vpsraw $4, %%zmm0, %%zmm2\n\t"
116*4882a593Smuzhiyun "vpsraw $4, %%zmm8, %%zmm6\n\t"
117*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
118*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
119*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
120*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
121*4882a593Smuzhiyun "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
122*4882a593Smuzhiyun "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
123*4882a593Smuzhiyun "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
124*4882a593Smuzhiyun "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
125*4882a593Smuzhiyun "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
126*4882a593Smuzhiyun "vpxorq %%zmm12, %%zmm13, %%zmm13"
127*4882a593Smuzhiyun :
128*4882a593Smuzhiyun : "m" (pbmul[0]), "m" (pbmul[16]));
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun * 1 = pbmul[px[0]]
132*4882a593Smuzhiyun * 13 = pbmul[px[64]]
133*4882a593Smuzhiyun */
134*4882a593Smuzhiyun asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
135*4882a593Smuzhiyun "vpxorq %%zmm15, %%zmm13, %%zmm13"
136*4882a593Smuzhiyun :
137*4882a593Smuzhiyun : );
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /*
140*4882a593Smuzhiyun * 1 = db = DQ
141*4882a593Smuzhiyun * 13 = db[64] = DQ[64]
142*4882a593Smuzhiyun */
143*4882a593Smuzhiyun asm volatile("vmovdqa64 %%zmm1, %0\n\t"
144*4882a593Smuzhiyun "vmovdqa64 %%zmm13,%1\n\t"
145*4882a593Smuzhiyun "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
146*4882a593Smuzhiyun "vpxorq %%zmm13, %%zmm8, %%zmm8"
147*4882a593Smuzhiyun :
148*4882a593Smuzhiyun : "m" (dq[0]), "m" (dq[64]));
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun asm volatile("vmovdqa64 %%zmm0, %0\n\t"
151*4882a593Smuzhiyun "vmovdqa64 %%zmm8, %1"
152*4882a593Smuzhiyun :
153*4882a593Smuzhiyun : "m" (dp[0]), "m" (dp[64]));
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun bytes -= 128;
156*4882a593Smuzhiyun p += 128;
157*4882a593Smuzhiyun q += 128;
158*4882a593Smuzhiyun dp += 128;
159*4882a593Smuzhiyun dq += 128;
160*4882a593Smuzhiyun #else
161*4882a593Smuzhiyun asm volatile("vmovdqa64 %0, %%zmm1\n\t"
162*4882a593Smuzhiyun "vmovdqa64 %1, %%zmm0\n\t"
163*4882a593Smuzhiyun "vpxorq %2, %%zmm1, %%zmm1\n\t"
164*4882a593Smuzhiyun "vpxorq %3, %%zmm0, %%zmm0"
165*4882a593Smuzhiyun :
166*4882a593Smuzhiyun : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun /* 1 = dq ^ q; 0 = dp ^ p */
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
171*4882a593Smuzhiyun "vbroadcasti64x2 %1, %%zmm5"
172*4882a593Smuzhiyun :
173*4882a593Smuzhiyun : "m" (qmul[0]), "m" (qmul[16]));
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun /*
176*4882a593Smuzhiyun * 1 = dq ^ q
177*4882a593Smuzhiyun * 3 = dq ^ p >> 4
178*4882a593Smuzhiyun */
179*4882a593Smuzhiyun asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
180*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
181*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
182*4882a593Smuzhiyun "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
183*4882a593Smuzhiyun "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
184*4882a593Smuzhiyun "vpxorq %%zmm4, %%zmm5, %%zmm5"
185*4882a593Smuzhiyun :
186*4882a593Smuzhiyun : );
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun /* 5 = qx */
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
191*4882a593Smuzhiyun "vbroadcasti64x2 %1, %%zmm1"
192*4882a593Smuzhiyun :
193*4882a593Smuzhiyun : "m" (pbmul[0]), "m" (pbmul[16]));
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
196*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
197*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
198*4882a593Smuzhiyun "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
199*4882a593Smuzhiyun "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
200*4882a593Smuzhiyun "vpxorq %%zmm4, %%zmm1, %%zmm1"
201*4882a593Smuzhiyun :
202*4882a593Smuzhiyun : );
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun /* 1 = pbmul[px] */
205*4882a593Smuzhiyun asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
206*4882a593Smuzhiyun /* 1 = db = DQ */
207*4882a593Smuzhiyun "vmovdqa64 %%zmm1, %0\n\t"
208*4882a593Smuzhiyun :
209*4882a593Smuzhiyun : "m" (dq[0]));
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
212*4882a593Smuzhiyun "vmovdqa64 %%zmm0, %0"
213*4882a593Smuzhiyun :
214*4882a593Smuzhiyun : "m" (dp[0]));
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun bytes -= 64;
217*4882a593Smuzhiyun p += 64;
218*4882a593Smuzhiyun q += 64;
219*4882a593Smuzhiyun dp += 64;
220*4882a593Smuzhiyun dq += 64;
221*4882a593Smuzhiyun #endif
222*4882a593Smuzhiyun }
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun kernel_fpu_end();
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun
raid6_datap_recov_avx512(int disks,size_t bytes,int faila,void ** ptrs)227*4882a593Smuzhiyun static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
228*4882a593Smuzhiyun void **ptrs)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun u8 *p, *q, *dq;
231*4882a593Smuzhiyun const u8 *qmul; /* Q multiplier table */
232*4882a593Smuzhiyun const u8 x0f = 0x0f;
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun p = (u8 *)ptrs[disks-2];
235*4882a593Smuzhiyun q = (u8 *)ptrs[disks-1];
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun /*
238*4882a593Smuzhiyun * Compute syndrome with zero for the missing data page
239*4882a593Smuzhiyun * Use the dead data page as temporary storage for delta q
240*4882a593Smuzhiyun */
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun dq = (u8 *)ptrs[faila];
243*4882a593Smuzhiyun ptrs[faila] = (void *)raid6_empty_zero_page;
244*4882a593Smuzhiyun ptrs[disks-1] = dq;
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun raid6_call.gen_syndrome(disks, bytes, ptrs);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun /* Restore pointer table */
249*4882a593Smuzhiyun ptrs[faila] = dq;
250*4882a593Smuzhiyun ptrs[disks-1] = q;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun /* Now, pick the proper data tables */
253*4882a593Smuzhiyun qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun kernel_fpu_begin();
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun while (bytes) {
260*4882a593Smuzhiyun #ifdef CONFIG_X86_64
261*4882a593Smuzhiyun asm volatile("vmovdqa64 %0, %%zmm3\n\t"
262*4882a593Smuzhiyun "vmovdqa64 %1, %%zmm8\n\t"
263*4882a593Smuzhiyun "vpxorq %2, %%zmm3, %%zmm3\n\t"
264*4882a593Smuzhiyun "vpxorq %3, %%zmm8, %%zmm8"
265*4882a593Smuzhiyun :
266*4882a593Smuzhiyun : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
267*4882a593Smuzhiyun "m" (q[64]));
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun /*
270*4882a593Smuzhiyun * 3 = q[0] ^ dq[0]
271*4882a593Smuzhiyun * 8 = q[64] ^ dq[64]
272*4882a593Smuzhiyun */
273*4882a593Smuzhiyun asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
274*4882a593Smuzhiyun "vmovapd %%zmm0, %%zmm13\n\t"
275*4882a593Smuzhiyun "vbroadcasti64x2 %1, %%zmm1\n\t"
276*4882a593Smuzhiyun "vmovapd %%zmm1, %%zmm14"
277*4882a593Smuzhiyun :
278*4882a593Smuzhiyun : "m" (qmul[0]), "m" (qmul[16]));
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
281*4882a593Smuzhiyun "vpsraw $4, %%zmm8, %%zmm12\n\t"
282*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
283*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
284*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
285*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
286*4882a593Smuzhiyun "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
287*4882a593Smuzhiyun "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
288*4882a593Smuzhiyun "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
289*4882a593Smuzhiyun "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
290*4882a593Smuzhiyun "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
291*4882a593Smuzhiyun "vpxorq %%zmm13, %%zmm14, %%zmm14"
292*4882a593Smuzhiyun :
293*4882a593Smuzhiyun : );
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun /*
296*4882a593Smuzhiyun * 1 = qmul[q[0] ^ dq[0]]
297*4882a593Smuzhiyun * 14 = qmul[q[64] ^ dq[64]]
298*4882a593Smuzhiyun */
299*4882a593Smuzhiyun asm volatile("vmovdqa64 %0, %%zmm2\n\t"
300*4882a593Smuzhiyun "vmovdqa64 %1, %%zmm12\n\t"
301*4882a593Smuzhiyun "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
302*4882a593Smuzhiyun "vpxorq %%zmm14, %%zmm12, %%zmm12"
303*4882a593Smuzhiyun :
304*4882a593Smuzhiyun : "m" (p[0]), "m" (p[64]));
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun /*
307*4882a593Smuzhiyun * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
308*4882a593Smuzhiyun * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
309*4882a593Smuzhiyun */
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun asm volatile("vmovdqa64 %%zmm1, %0\n\t"
312*4882a593Smuzhiyun "vmovdqa64 %%zmm14, %1\n\t"
313*4882a593Smuzhiyun "vmovdqa64 %%zmm2, %2\n\t"
314*4882a593Smuzhiyun "vmovdqa64 %%zmm12,%3"
315*4882a593Smuzhiyun :
316*4882a593Smuzhiyun : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
317*4882a593Smuzhiyun "m" (p[64]));
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun bytes -= 128;
320*4882a593Smuzhiyun p += 128;
321*4882a593Smuzhiyun q += 128;
322*4882a593Smuzhiyun dq += 128;
323*4882a593Smuzhiyun #else
324*4882a593Smuzhiyun asm volatile("vmovdqa64 %0, %%zmm3\n\t"
325*4882a593Smuzhiyun "vpxorq %1, %%zmm3, %%zmm3"
326*4882a593Smuzhiyun :
327*4882a593Smuzhiyun : "m" (dq[0]), "m" (q[0]));
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun /* 3 = q ^ dq */
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
332*4882a593Smuzhiyun "vbroadcasti64x2 %1, %%zmm1"
333*4882a593Smuzhiyun :
334*4882a593Smuzhiyun : "m" (qmul[0]), "m" (qmul[16]));
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
337*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
338*4882a593Smuzhiyun "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
339*4882a593Smuzhiyun "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
340*4882a593Smuzhiyun "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
341*4882a593Smuzhiyun "vpxorq %%zmm0, %%zmm1, %%zmm1"
342*4882a593Smuzhiyun :
343*4882a593Smuzhiyun : );
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun /* 1 = qmul[q ^ dq] */
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun asm volatile("vmovdqa64 %0, %%zmm2\n\t"
348*4882a593Smuzhiyun "vpxorq %%zmm1, %%zmm2, %%zmm2"
349*4882a593Smuzhiyun :
350*4882a593Smuzhiyun : "m" (p[0]));
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun /* 2 = p ^ qmul[q ^ dq] */
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun asm volatile("vmovdqa64 %%zmm1, %0\n\t"
355*4882a593Smuzhiyun "vmovdqa64 %%zmm2, %1"
356*4882a593Smuzhiyun :
357*4882a593Smuzhiyun : "m" (dq[0]), "m" (p[0]));
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun bytes -= 64;
360*4882a593Smuzhiyun p += 64;
361*4882a593Smuzhiyun q += 64;
362*4882a593Smuzhiyun dq += 64;
363*4882a593Smuzhiyun #endif
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun kernel_fpu_end();
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun const struct raid6_recov_calls raid6_recov_avx512 = {
370*4882a593Smuzhiyun .data2 = raid6_2data_recov_avx512,
371*4882a593Smuzhiyun .datap = raid6_datap_recov_avx512,
372*4882a593Smuzhiyun .valid = raid6_has_avx512,
373*4882a593Smuzhiyun #ifdef CONFIG_X86_64
374*4882a593Smuzhiyun .name = "avx512x2",
375*4882a593Smuzhiyun #else
376*4882a593Smuzhiyun .name = "avx512x1",
377*4882a593Smuzhiyun #endif
378*4882a593Smuzhiyun .priority = 3,
379*4882a593Smuzhiyun };
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun #else
382*4882a593Smuzhiyun #warning "your version of binutils lacks AVX512 support"
383*4882a593Smuzhiyun #endif
384