xref: /OK3568_Linux_fs/kernel/lib/raid6/recov_ssse3.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2012 Intel Corporation
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include <linux/raid/pq.h>
7*4882a593Smuzhiyun #include "x86.h"
8*4882a593Smuzhiyun 
raid6_has_ssse3(void)9*4882a593Smuzhiyun static int raid6_has_ssse3(void)
10*4882a593Smuzhiyun {
11*4882a593Smuzhiyun 	return boot_cpu_has(X86_FEATURE_XMM) &&
12*4882a593Smuzhiyun 		boot_cpu_has(X86_FEATURE_XMM2) &&
13*4882a593Smuzhiyun 		boot_cpu_has(X86_FEATURE_SSSE3);
14*4882a593Smuzhiyun }
15*4882a593Smuzhiyun 
raid6_2data_recov_ssse3(int disks,size_t bytes,int faila,int failb,void ** ptrs)16*4882a593Smuzhiyun static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila,
17*4882a593Smuzhiyun 		int failb, void **ptrs)
18*4882a593Smuzhiyun {
19*4882a593Smuzhiyun 	u8 *p, *q, *dp, *dq;
20*4882a593Smuzhiyun 	const u8 *pbmul;	/* P multiplier table for B data */
21*4882a593Smuzhiyun 	const u8 *qmul;		/* Q multiplier table (for both) */
22*4882a593Smuzhiyun 	static const u8 __aligned(16) x0f[16] = {
23*4882a593Smuzhiyun 		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
24*4882a593Smuzhiyun 		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun 	p = (u8 *)ptrs[disks-2];
27*4882a593Smuzhiyun 	q = (u8 *)ptrs[disks-1];
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun 	/* Compute syndrome with zero for the missing data pages
30*4882a593Smuzhiyun 	   Use the dead data pages as temporary storage for
31*4882a593Smuzhiyun 	   delta p and delta q */
32*4882a593Smuzhiyun 	dp = (u8 *)ptrs[faila];
33*4882a593Smuzhiyun 	ptrs[faila] = (void *)raid6_empty_zero_page;
34*4882a593Smuzhiyun 	ptrs[disks-2] = dp;
35*4882a593Smuzhiyun 	dq = (u8 *)ptrs[failb];
36*4882a593Smuzhiyun 	ptrs[failb] = (void *)raid6_empty_zero_page;
37*4882a593Smuzhiyun 	ptrs[disks-1] = dq;
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 	raid6_call.gen_syndrome(disks, bytes, ptrs);
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	/* Restore pointer table */
42*4882a593Smuzhiyun 	ptrs[faila]   = dp;
43*4882a593Smuzhiyun 	ptrs[failb]   = dq;
44*4882a593Smuzhiyun 	ptrs[disks-2] = p;
45*4882a593Smuzhiyun 	ptrs[disks-1] = q;
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	/* Now, pick the proper data tables */
48*4882a593Smuzhiyun 	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
49*4882a593Smuzhiyun 	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
50*4882a593Smuzhiyun 		raid6_gfexp[failb]]];
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	kernel_fpu_begin();
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0]));
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun #ifdef CONFIG_X86_64
57*4882a593Smuzhiyun 	asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0]));
58*4882a593Smuzhiyun 	asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0]));
59*4882a593Smuzhiyun 	asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16]));
60*4882a593Smuzhiyun #endif
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	/* Now do it... */
63*4882a593Smuzhiyun 	while (bytes) {
64*4882a593Smuzhiyun #ifdef CONFIG_X86_64
65*4882a593Smuzhiyun 		/* xmm6, xmm14, xmm15 */
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm1" : : "m" (q[0]));
68*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm9" : : "m" (q[16]));
69*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm0" : : "m" (p[0]));
70*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm8" : : "m" (p[16]));
71*4882a593Smuzhiyun 		asm volatile("pxor   %0,%%xmm1" : : "m" (dq[0]));
72*4882a593Smuzhiyun 		asm volatile("pxor   %0,%%xmm9" : : "m" (dq[16]));
73*4882a593Smuzhiyun 		asm volatile("pxor   %0,%%xmm0" : : "m" (dp[0]));
74*4882a593Smuzhiyun 		asm volatile("pxor   %0,%%xmm8" : : "m" (dp[16]));
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 		/* xmm0/8 = px */
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 		asm volatile("movdqa %xmm6,%xmm4");
79*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
80*4882a593Smuzhiyun 		asm volatile("movdqa %xmm6,%xmm12");
81*4882a593Smuzhiyun 		asm volatile("movdqa %xmm5,%xmm13");
82*4882a593Smuzhiyun 		asm volatile("movdqa %xmm1,%xmm3");
83*4882a593Smuzhiyun 		asm volatile("movdqa %xmm9,%xmm11");
84*4882a593Smuzhiyun 		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */
85*4882a593Smuzhiyun 		asm volatile("movdqa %xmm8,%xmm10");
86*4882a593Smuzhiyun 		asm volatile("psraw  $4,%xmm1");
87*4882a593Smuzhiyun 		asm volatile("psraw  $4,%xmm9");
88*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm3");
89*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm11");
90*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm1");
91*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm9");
92*4882a593Smuzhiyun 		asm volatile("pshufb %xmm3,%xmm4");
93*4882a593Smuzhiyun 		asm volatile("pshufb %xmm11,%xmm12");
94*4882a593Smuzhiyun 		asm volatile("pshufb %xmm1,%xmm5");
95*4882a593Smuzhiyun 		asm volatile("pshufb %xmm9,%xmm13");
96*4882a593Smuzhiyun 		asm volatile("pxor   %xmm4,%xmm5");
97*4882a593Smuzhiyun 		asm volatile("pxor   %xmm12,%xmm13");
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 		/* xmm5/13 = qx */
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 		asm volatile("movdqa %xmm14,%xmm4");
102*4882a593Smuzhiyun 		asm volatile("movdqa %xmm15,%xmm1");
103*4882a593Smuzhiyun 		asm volatile("movdqa %xmm14,%xmm12");
104*4882a593Smuzhiyun 		asm volatile("movdqa %xmm15,%xmm9");
105*4882a593Smuzhiyun 		asm volatile("movdqa %xmm2,%xmm3");
106*4882a593Smuzhiyun 		asm volatile("movdqa %xmm10,%xmm11");
107*4882a593Smuzhiyun 		asm volatile("psraw  $4,%xmm2");
108*4882a593Smuzhiyun 		asm volatile("psraw  $4,%xmm10");
109*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm3");
110*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm11");
111*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm2");
112*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm10");
113*4882a593Smuzhiyun 		asm volatile("pshufb %xmm3,%xmm4");
114*4882a593Smuzhiyun 		asm volatile("pshufb %xmm11,%xmm12");
115*4882a593Smuzhiyun 		asm volatile("pshufb %xmm2,%xmm1");
116*4882a593Smuzhiyun 		asm volatile("pshufb %xmm10,%xmm9");
117*4882a593Smuzhiyun 		asm volatile("pxor   %xmm4,%xmm1");
118*4882a593Smuzhiyun 		asm volatile("pxor   %xmm12,%xmm9");
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 		/* xmm1/9 = pbmul[px] */
121*4882a593Smuzhiyun 		asm volatile("pxor   %xmm5,%xmm1");
122*4882a593Smuzhiyun 		asm volatile("pxor   %xmm13,%xmm9");
123*4882a593Smuzhiyun 		/* xmm1/9 = db = DQ */
124*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0]));
125*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16]));
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 		asm volatile("pxor   %xmm1,%xmm0");
128*4882a593Smuzhiyun 		asm volatile("pxor   %xmm9,%xmm8");
129*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0]));
130*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16]));
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 		bytes -= 32;
133*4882a593Smuzhiyun 		p += 32;
134*4882a593Smuzhiyun 		q += 32;
135*4882a593Smuzhiyun 		dp += 32;
136*4882a593Smuzhiyun 		dq += 32;
137*4882a593Smuzhiyun #else
138*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm1" : : "m" (*q));
139*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm0" : : "m" (*p));
140*4882a593Smuzhiyun 		asm volatile("pxor   %0,%%xmm1" : : "m" (*dq));
141*4882a593Smuzhiyun 		asm volatile("pxor   %0,%%xmm0" : : "m" (*dp));
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 		/* 1 = dq ^ q
144*4882a593Smuzhiyun 		 * 0 = dp ^ p
145*4882a593Smuzhiyun 		 */
146*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0]));
147*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 		asm volatile("movdqa %xmm1,%xmm3");
150*4882a593Smuzhiyun 		asm volatile("psraw  $4,%xmm1");
151*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm3");
152*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm1");
153*4882a593Smuzhiyun 		asm volatile("pshufb %xmm3,%xmm4");
154*4882a593Smuzhiyun 		asm volatile("pshufb %xmm1,%xmm5");
155*4882a593Smuzhiyun 		asm volatile("pxor   %xmm4,%xmm5");
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 		/* xmm5 = qx */
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0]));
162*4882a593Smuzhiyun 		asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16]));
163*4882a593Smuzhiyun 		asm volatile("movdqa %xmm2,%xmm3");
164*4882a593Smuzhiyun 		asm volatile("psraw  $4,%xmm2");
165*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm3");
166*4882a593Smuzhiyun 		asm volatile("pand   %xmm7,%xmm2");
167*4882a593Smuzhiyun 		asm volatile("pshufb %xmm3,%xmm4");
168*4882a593Smuzhiyun 		asm volatile("pshufb %xmm2,%xmm1");
169*4882a593Smuzhiyun 		asm volatile("pxor   %xmm4,%xmm1");
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 		/* xmm1 = pbmul[px] */
172*4882a593Smuzhiyun 		asm volatile("pxor   %xmm5,%xmm1");
173*4882a593Smuzhiyun 		/* xmm1 = db = DQ */
174*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm1,%0" : "=m" (*dq));
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 		asm volatile("pxor   %xmm1,%xmm0");
177*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm0,%0" : "=m" (*dp));
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 		bytes -= 16;
180*4882a593Smuzhiyun 		p += 16;
181*4882a593Smuzhiyun 		q += 16;
182*4882a593Smuzhiyun 		dp += 16;
183*4882a593Smuzhiyun 		dq += 16;
184*4882a593Smuzhiyun #endif
185*4882a593Smuzhiyun 	}
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	kernel_fpu_end();
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 
raid6_datap_recov_ssse3(int disks,size_t bytes,int faila,void ** ptrs)191*4882a593Smuzhiyun static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila,
192*4882a593Smuzhiyun 		void **ptrs)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun 	u8 *p, *q, *dq;
195*4882a593Smuzhiyun 	const u8 *qmul;		/* Q multiplier table */
196*4882a593Smuzhiyun 	static const u8 __aligned(16) x0f[16] = {
197*4882a593Smuzhiyun 		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
198*4882a593Smuzhiyun 		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	p = (u8 *)ptrs[disks-2];
201*4882a593Smuzhiyun 	q = (u8 *)ptrs[disks-1];
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	/* Compute syndrome with zero for the missing data page
204*4882a593Smuzhiyun 	   Use the dead data page as temporary storage for delta q */
205*4882a593Smuzhiyun 	dq = (u8 *)ptrs[faila];
206*4882a593Smuzhiyun 	ptrs[faila] = (void *)raid6_empty_zero_page;
207*4882a593Smuzhiyun 	ptrs[disks-1] = dq;
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	raid6_call.gen_syndrome(disks, bytes, ptrs);
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	/* Restore pointer table */
212*4882a593Smuzhiyun 	ptrs[faila]   = dq;
213*4882a593Smuzhiyun 	ptrs[disks-1] = q;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	/* Now, pick the proper data tables */
216*4882a593Smuzhiyun 	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	kernel_fpu_begin();
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0]));
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	while (bytes) {
223*4882a593Smuzhiyun #ifdef CONFIG_X86_64
224*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
225*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16]));
226*4882a593Smuzhiyun 		asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
227*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 		/* xmm3 = q[0] ^ dq[0] */
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 		asm volatile("pxor %0, %%xmm4" : : "m" (q[16]));
232*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 		/* xmm4 = q[16] ^ dq[16] */
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 		asm volatile("movdqa %xmm3, %xmm6");
237*4882a593Smuzhiyun 		asm volatile("movdqa %xmm4, %xmm8");
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 		/* xmm4 = xmm8 = q[16] ^ dq[16] */
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 		asm volatile("psraw $4, %xmm3");
242*4882a593Smuzhiyun 		asm volatile("pand %xmm7, %xmm6");
243*4882a593Smuzhiyun 		asm volatile("pand %xmm7, %xmm3");
244*4882a593Smuzhiyun 		asm volatile("pshufb %xmm6, %xmm0");
245*4882a593Smuzhiyun 		asm volatile("pshufb %xmm3, %xmm1");
246*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0]));
247*4882a593Smuzhiyun 		asm volatile("pxor %xmm0, %xmm1");
248*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16]));
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 		/* xmm1 = qmul[q[0] ^ dq[0]] */
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 		asm volatile("psraw $4, %xmm4");
253*4882a593Smuzhiyun 		asm volatile("pand %xmm7, %xmm8");
254*4882a593Smuzhiyun 		asm volatile("pand %xmm7, %xmm4");
255*4882a593Smuzhiyun 		asm volatile("pshufb %xmm8, %xmm10");
256*4882a593Smuzhiyun 		asm volatile("pshufb %xmm4, %xmm11");
257*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
258*4882a593Smuzhiyun 		asm volatile("pxor %xmm10, %xmm11");
259*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm12" : : "m" (p[16]));
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 		/* xmm11 = qmul[q[16] ^ dq[16]] */
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 		asm volatile("pxor %xmm1, %xmm2");
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 		/* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 		asm volatile("pxor %xmm11, %xmm12");
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 		/* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
272*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16]));
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
275*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm12, %0" : "=m" (p[16]));
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 		bytes -= 32;
278*4882a593Smuzhiyun 		p += 32;
279*4882a593Smuzhiyun 		q += 32;
280*4882a593Smuzhiyun 		dq += 32;
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun #else
283*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
284*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
285*4882a593Smuzhiyun 		asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
286*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 		/* xmm3 = *q ^ *dq */
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 		asm volatile("movdqa %xmm3, %xmm6");
291*4882a593Smuzhiyun 		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
292*4882a593Smuzhiyun 		asm volatile("psraw $4, %xmm3");
293*4882a593Smuzhiyun 		asm volatile("pand %xmm7, %xmm6");
294*4882a593Smuzhiyun 		asm volatile("pand %xmm7, %xmm3");
295*4882a593Smuzhiyun 		asm volatile("pshufb %xmm6, %xmm0");
296*4882a593Smuzhiyun 		asm volatile("pshufb %xmm3, %xmm1");
297*4882a593Smuzhiyun 		asm volatile("pxor %xmm0, %xmm1");
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 		/* xmm1 = qmul[*q ^ *dq */
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 		asm volatile("pxor %xmm1, %xmm2");
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 		/* xmm2 = *p ^ qmul[*q ^ *dq] */
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
306*4882a593Smuzhiyun 		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 		bytes -= 16;
309*4882a593Smuzhiyun 		p += 16;
310*4882a593Smuzhiyun 		q += 16;
311*4882a593Smuzhiyun 		dq += 16;
312*4882a593Smuzhiyun #endif
313*4882a593Smuzhiyun 	}
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	kernel_fpu_end();
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun const struct raid6_recov_calls raid6_recov_ssse3 = {
319*4882a593Smuzhiyun 	.data2 = raid6_2data_recov_ssse3,
320*4882a593Smuzhiyun 	.datap = raid6_datap_recov_ssse3,
321*4882a593Smuzhiyun 	.valid = raid6_has_ssse3,
322*4882a593Smuzhiyun #ifdef CONFIG_X86_64
323*4882a593Smuzhiyun 	.name = "ssse3x2",
324*4882a593Smuzhiyun #else
325*4882a593Smuzhiyun 	.name = "ssse3x1",
326*4882a593Smuzhiyun #endif
327*4882a593Smuzhiyun 	.priority = 1,
328*4882a593Smuzhiyun };
329