xref: /OK3568_Linux_fs/kernel/arch/x86/lib/mmx_32.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *	MMX 3DNow! library helper functions
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  *	To do:
6*4882a593Smuzhiyun  *	We can use MMX just for prefetch in IRQ's. This may be a win.
7*4882a593Smuzhiyun  *		(reported so on K6-III)
8*4882a593Smuzhiyun  *	We should use a better code neutral filler for the short jump
9*4882a593Smuzhiyun  *		leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
10*4882a593Smuzhiyun  *	We also want to clobber the filler register so we don't get any
11*4882a593Smuzhiyun  *		register forwarding stalls on the filler.
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  *	Add *user handling. Checksums are not a win with MMX on any CPU
14*4882a593Smuzhiyun  *	tested so far for any MMX solution figured.
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  *	22/09/2000 - Arjan van de Ven
17*4882a593Smuzhiyun  *		Improved for non-egineering-sample Athlons
18*4882a593Smuzhiyun  *
19*4882a593Smuzhiyun  */
20*4882a593Smuzhiyun #include <linux/hardirq.h>
21*4882a593Smuzhiyun #include <linux/string.h>
22*4882a593Smuzhiyun #include <linux/export.h>
23*4882a593Smuzhiyun #include <linux/sched.h>
24*4882a593Smuzhiyun #include <linux/types.h>
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun #include <asm/fpu/api.h>
27*4882a593Smuzhiyun #include <asm/asm.h>
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun /*
30*4882a593Smuzhiyun  * Use KFPU_387.  MMX instructions are not affected by MXCSR,
31*4882a593Smuzhiyun  * but both AMD and Intel documentation states that even integer MMX
32*4882a593Smuzhiyun  * operations will result in #MF if an exception is pending in FCW.
33*4882a593Smuzhiyun  *
34*4882a593Smuzhiyun  * EMMS is not needed afterwards because, after calling kernel_fpu_end(),
35*4882a593Smuzhiyun  * any subsequent user of the 387 stack will reinitialize it using
36*4882a593Smuzhiyun  * KFPU_387.
37*4882a593Smuzhiyun  */
38*4882a593Smuzhiyun 
_mmx_memcpy(void * to,const void * from,size_t len)39*4882a593Smuzhiyun void *_mmx_memcpy(void *to, const void *from, size_t len)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun 	void *p;
42*4882a593Smuzhiyun 	int i;
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun 	if (unlikely(in_interrupt()))
45*4882a593Smuzhiyun 		return __memcpy(to, from, len);
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	p = to;
48*4882a593Smuzhiyun 	i = len >> 6; /* len/64 */
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun 	kernel_fpu_begin_mask(KFPU_387);
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	__asm__ __volatile__ (
53*4882a593Smuzhiyun 		"1: prefetch (%0)\n"		/* This set is 28 bytes */
54*4882a593Smuzhiyun 		"   prefetch 64(%0)\n"
55*4882a593Smuzhiyun 		"   prefetch 128(%0)\n"
56*4882a593Smuzhiyun 		"   prefetch 192(%0)\n"
57*4882a593Smuzhiyun 		"   prefetch 256(%0)\n"
58*4882a593Smuzhiyun 		"2:  \n"
59*4882a593Smuzhiyun 		".section .fixup, \"ax\"\n"
60*4882a593Smuzhiyun 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
61*4882a593Smuzhiyun 		"   jmp 2b\n"
62*4882a593Smuzhiyun 		".previous\n"
63*4882a593Smuzhiyun 			_ASM_EXTABLE(1b, 3b)
64*4882a593Smuzhiyun 			: : "r" (from));
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	for ( ; i > 5; i--) {
67*4882a593Smuzhiyun 		__asm__ __volatile__ (
68*4882a593Smuzhiyun 		"1:  prefetch 320(%0)\n"
69*4882a593Smuzhiyun 		"2:  movq (%0), %%mm0\n"
70*4882a593Smuzhiyun 		"  movq 8(%0), %%mm1\n"
71*4882a593Smuzhiyun 		"  movq 16(%0), %%mm2\n"
72*4882a593Smuzhiyun 		"  movq 24(%0), %%mm3\n"
73*4882a593Smuzhiyun 		"  movq %%mm0, (%1)\n"
74*4882a593Smuzhiyun 		"  movq %%mm1, 8(%1)\n"
75*4882a593Smuzhiyun 		"  movq %%mm2, 16(%1)\n"
76*4882a593Smuzhiyun 		"  movq %%mm3, 24(%1)\n"
77*4882a593Smuzhiyun 		"  movq 32(%0), %%mm0\n"
78*4882a593Smuzhiyun 		"  movq 40(%0), %%mm1\n"
79*4882a593Smuzhiyun 		"  movq 48(%0), %%mm2\n"
80*4882a593Smuzhiyun 		"  movq 56(%0), %%mm3\n"
81*4882a593Smuzhiyun 		"  movq %%mm0, 32(%1)\n"
82*4882a593Smuzhiyun 		"  movq %%mm1, 40(%1)\n"
83*4882a593Smuzhiyun 		"  movq %%mm2, 48(%1)\n"
84*4882a593Smuzhiyun 		"  movq %%mm3, 56(%1)\n"
85*4882a593Smuzhiyun 		".section .fixup, \"ax\"\n"
86*4882a593Smuzhiyun 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
87*4882a593Smuzhiyun 		"   jmp 2b\n"
88*4882a593Smuzhiyun 		".previous\n"
89*4882a593Smuzhiyun 			_ASM_EXTABLE(1b, 3b)
90*4882a593Smuzhiyun 			: : "r" (from), "r" (to) : "memory");
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 		from += 64;
93*4882a593Smuzhiyun 		to += 64;
94*4882a593Smuzhiyun 	}
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	for ( ; i > 0; i--) {
97*4882a593Smuzhiyun 		__asm__ __volatile__ (
98*4882a593Smuzhiyun 		"  movq (%0), %%mm0\n"
99*4882a593Smuzhiyun 		"  movq 8(%0), %%mm1\n"
100*4882a593Smuzhiyun 		"  movq 16(%0), %%mm2\n"
101*4882a593Smuzhiyun 		"  movq 24(%0), %%mm3\n"
102*4882a593Smuzhiyun 		"  movq %%mm0, (%1)\n"
103*4882a593Smuzhiyun 		"  movq %%mm1, 8(%1)\n"
104*4882a593Smuzhiyun 		"  movq %%mm2, 16(%1)\n"
105*4882a593Smuzhiyun 		"  movq %%mm3, 24(%1)\n"
106*4882a593Smuzhiyun 		"  movq 32(%0), %%mm0\n"
107*4882a593Smuzhiyun 		"  movq 40(%0), %%mm1\n"
108*4882a593Smuzhiyun 		"  movq 48(%0), %%mm2\n"
109*4882a593Smuzhiyun 		"  movq 56(%0), %%mm3\n"
110*4882a593Smuzhiyun 		"  movq %%mm0, 32(%1)\n"
111*4882a593Smuzhiyun 		"  movq %%mm1, 40(%1)\n"
112*4882a593Smuzhiyun 		"  movq %%mm2, 48(%1)\n"
113*4882a593Smuzhiyun 		"  movq %%mm3, 56(%1)\n"
114*4882a593Smuzhiyun 			: : "r" (from), "r" (to) : "memory");
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 		from += 64;
117*4882a593Smuzhiyun 		to += 64;
118*4882a593Smuzhiyun 	}
119*4882a593Smuzhiyun 	/*
120*4882a593Smuzhiyun 	 * Now do the tail of the block:
121*4882a593Smuzhiyun 	 */
122*4882a593Smuzhiyun 	__memcpy(to, from, len & 63);
123*4882a593Smuzhiyun 	kernel_fpu_end();
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	return p;
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun EXPORT_SYMBOL(_mmx_memcpy);
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun #ifdef CONFIG_MK7
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun /*
132*4882a593Smuzhiyun  *	The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
133*4882a593Smuzhiyun  *	other MMX using processors do not.
134*4882a593Smuzhiyun  */
135*4882a593Smuzhiyun 
fast_clear_page(void * page)136*4882a593Smuzhiyun static void fast_clear_page(void *page)
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun 	int i;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	kernel_fpu_begin_mask(KFPU_387);
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	__asm__ __volatile__ (
143*4882a593Smuzhiyun 		"  pxor %%mm0, %%mm0\n" : :
144*4882a593Smuzhiyun 	);
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	for (i = 0; i < 4096/64; i++) {
147*4882a593Smuzhiyun 		__asm__ __volatile__ (
148*4882a593Smuzhiyun 		"  movntq %%mm0, (%0)\n"
149*4882a593Smuzhiyun 		"  movntq %%mm0, 8(%0)\n"
150*4882a593Smuzhiyun 		"  movntq %%mm0, 16(%0)\n"
151*4882a593Smuzhiyun 		"  movntq %%mm0, 24(%0)\n"
152*4882a593Smuzhiyun 		"  movntq %%mm0, 32(%0)\n"
153*4882a593Smuzhiyun 		"  movntq %%mm0, 40(%0)\n"
154*4882a593Smuzhiyun 		"  movntq %%mm0, 48(%0)\n"
155*4882a593Smuzhiyun 		"  movntq %%mm0, 56(%0)\n"
156*4882a593Smuzhiyun 		: : "r" (page) : "memory");
157*4882a593Smuzhiyun 		page += 64;
158*4882a593Smuzhiyun 	}
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	/*
161*4882a593Smuzhiyun 	 * Since movntq is weakly-ordered, a "sfence" is needed to become
162*4882a593Smuzhiyun 	 * ordered again:
163*4882a593Smuzhiyun 	 */
164*4882a593Smuzhiyun 	__asm__ __volatile__("sfence\n"::);
165*4882a593Smuzhiyun 
166*4882a593Smuzhiyun 	kernel_fpu_end();
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun 
fast_copy_page(void * to,void * from)169*4882a593Smuzhiyun static void fast_copy_page(void *to, void *from)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun 	int i;
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	kernel_fpu_begin_mask(KFPU_387);
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	/*
176*4882a593Smuzhiyun 	 * maybe the prefetch stuff can go before the expensive fnsave...
177*4882a593Smuzhiyun 	 * but that is for later. -AV
178*4882a593Smuzhiyun 	 */
179*4882a593Smuzhiyun 	__asm__ __volatile__(
180*4882a593Smuzhiyun 		"1: prefetch (%0)\n"
181*4882a593Smuzhiyun 		"   prefetch 64(%0)\n"
182*4882a593Smuzhiyun 		"   prefetch 128(%0)\n"
183*4882a593Smuzhiyun 		"   prefetch 192(%0)\n"
184*4882a593Smuzhiyun 		"   prefetch 256(%0)\n"
185*4882a593Smuzhiyun 		"2:  \n"
186*4882a593Smuzhiyun 		".section .fixup, \"ax\"\n"
187*4882a593Smuzhiyun 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
188*4882a593Smuzhiyun 		"   jmp 2b\n"
189*4882a593Smuzhiyun 		".previous\n"
190*4882a593Smuzhiyun 			_ASM_EXTABLE(1b, 3b) : : "r" (from));
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	for (i = 0; i < (4096-320)/64; i++) {
193*4882a593Smuzhiyun 		__asm__ __volatile__ (
194*4882a593Smuzhiyun 		"1: prefetch 320(%0)\n"
195*4882a593Smuzhiyun 		"2: movq (%0), %%mm0\n"
196*4882a593Smuzhiyun 		"   movntq %%mm0, (%1)\n"
197*4882a593Smuzhiyun 		"   movq 8(%0), %%mm1\n"
198*4882a593Smuzhiyun 		"   movntq %%mm1, 8(%1)\n"
199*4882a593Smuzhiyun 		"   movq 16(%0), %%mm2\n"
200*4882a593Smuzhiyun 		"   movntq %%mm2, 16(%1)\n"
201*4882a593Smuzhiyun 		"   movq 24(%0), %%mm3\n"
202*4882a593Smuzhiyun 		"   movntq %%mm3, 24(%1)\n"
203*4882a593Smuzhiyun 		"   movq 32(%0), %%mm4\n"
204*4882a593Smuzhiyun 		"   movntq %%mm4, 32(%1)\n"
205*4882a593Smuzhiyun 		"   movq 40(%0), %%mm5\n"
206*4882a593Smuzhiyun 		"   movntq %%mm5, 40(%1)\n"
207*4882a593Smuzhiyun 		"   movq 48(%0), %%mm6\n"
208*4882a593Smuzhiyun 		"   movntq %%mm6, 48(%1)\n"
209*4882a593Smuzhiyun 		"   movq 56(%0), %%mm7\n"
210*4882a593Smuzhiyun 		"   movntq %%mm7, 56(%1)\n"
211*4882a593Smuzhiyun 		".section .fixup, \"ax\"\n"
212*4882a593Smuzhiyun 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
213*4882a593Smuzhiyun 		"   jmp 2b\n"
214*4882a593Smuzhiyun 		".previous\n"
215*4882a593Smuzhiyun 		_ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 		from += 64;
218*4882a593Smuzhiyun 		to += 64;
219*4882a593Smuzhiyun 	}
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	for (i = (4096-320)/64; i < 4096/64; i++) {
222*4882a593Smuzhiyun 		__asm__ __volatile__ (
223*4882a593Smuzhiyun 		"2: movq (%0), %%mm0\n"
224*4882a593Smuzhiyun 		"   movntq %%mm0, (%1)\n"
225*4882a593Smuzhiyun 		"   movq 8(%0), %%mm1\n"
226*4882a593Smuzhiyun 		"   movntq %%mm1, 8(%1)\n"
227*4882a593Smuzhiyun 		"   movq 16(%0), %%mm2\n"
228*4882a593Smuzhiyun 		"   movntq %%mm2, 16(%1)\n"
229*4882a593Smuzhiyun 		"   movq 24(%0), %%mm3\n"
230*4882a593Smuzhiyun 		"   movntq %%mm3, 24(%1)\n"
231*4882a593Smuzhiyun 		"   movq 32(%0), %%mm4\n"
232*4882a593Smuzhiyun 		"   movntq %%mm4, 32(%1)\n"
233*4882a593Smuzhiyun 		"   movq 40(%0), %%mm5\n"
234*4882a593Smuzhiyun 		"   movntq %%mm5, 40(%1)\n"
235*4882a593Smuzhiyun 		"   movq 48(%0), %%mm6\n"
236*4882a593Smuzhiyun 		"   movntq %%mm6, 48(%1)\n"
237*4882a593Smuzhiyun 		"   movq 56(%0), %%mm7\n"
238*4882a593Smuzhiyun 		"   movntq %%mm7, 56(%1)\n"
239*4882a593Smuzhiyun 			: : "r" (from), "r" (to) : "memory");
240*4882a593Smuzhiyun 		from += 64;
241*4882a593Smuzhiyun 		to += 64;
242*4882a593Smuzhiyun 	}
243*4882a593Smuzhiyun 	/*
244*4882a593Smuzhiyun 	 * Since movntq is weakly-ordered, a "sfence" is needed to become
245*4882a593Smuzhiyun 	 * ordered again:
246*4882a593Smuzhiyun 	 */
247*4882a593Smuzhiyun 	__asm__ __volatile__("sfence \n"::);
248*4882a593Smuzhiyun 	kernel_fpu_end();
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun #else /* CONFIG_MK7 */
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun /*
254*4882a593Smuzhiyun  *	Generic MMX implementation without K7 specific streaming
255*4882a593Smuzhiyun  */
fast_clear_page(void * page)256*4882a593Smuzhiyun static void fast_clear_page(void *page)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	int i;
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	kernel_fpu_begin_mask(KFPU_387);
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	__asm__ __volatile__ (
263*4882a593Smuzhiyun 		"  pxor %%mm0, %%mm0\n" : :
264*4882a593Smuzhiyun 	);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	for (i = 0; i < 4096/128; i++) {
267*4882a593Smuzhiyun 		__asm__ __volatile__ (
268*4882a593Smuzhiyun 		"  movq %%mm0, (%0)\n"
269*4882a593Smuzhiyun 		"  movq %%mm0, 8(%0)\n"
270*4882a593Smuzhiyun 		"  movq %%mm0, 16(%0)\n"
271*4882a593Smuzhiyun 		"  movq %%mm0, 24(%0)\n"
272*4882a593Smuzhiyun 		"  movq %%mm0, 32(%0)\n"
273*4882a593Smuzhiyun 		"  movq %%mm0, 40(%0)\n"
274*4882a593Smuzhiyun 		"  movq %%mm0, 48(%0)\n"
275*4882a593Smuzhiyun 		"  movq %%mm0, 56(%0)\n"
276*4882a593Smuzhiyun 		"  movq %%mm0, 64(%0)\n"
277*4882a593Smuzhiyun 		"  movq %%mm0, 72(%0)\n"
278*4882a593Smuzhiyun 		"  movq %%mm0, 80(%0)\n"
279*4882a593Smuzhiyun 		"  movq %%mm0, 88(%0)\n"
280*4882a593Smuzhiyun 		"  movq %%mm0, 96(%0)\n"
281*4882a593Smuzhiyun 		"  movq %%mm0, 104(%0)\n"
282*4882a593Smuzhiyun 		"  movq %%mm0, 112(%0)\n"
283*4882a593Smuzhiyun 		"  movq %%mm0, 120(%0)\n"
284*4882a593Smuzhiyun 			: : "r" (page) : "memory");
285*4882a593Smuzhiyun 		page += 128;
286*4882a593Smuzhiyun 	}
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	kernel_fpu_end();
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun 
fast_copy_page(void * to,void * from)291*4882a593Smuzhiyun static void fast_copy_page(void *to, void *from)
292*4882a593Smuzhiyun {
293*4882a593Smuzhiyun 	int i;
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	kernel_fpu_begin_mask(KFPU_387);
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	__asm__ __volatile__ (
298*4882a593Smuzhiyun 		"1: prefetch (%0)\n"
299*4882a593Smuzhiyun 		"   prefetch 64(%0)\n"
300*4882a593Smuzhiyun 		"   prefetch 128(%0)\n"
301*4882a593Smuzhiyun 		"   prefetch 192(%0)\n"
302*4882a593Smuzhiyun 		"   prefetch 256(%0)\n"
303*4882a593Smuzhiyun 		"2:  \n"
304*4882a593Smuzhiyun 		".section .fixup, \"ax\"\n"
305*4882a593Smuzhiyun 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
306*4882a593Smuzhiyun 		"   jmp 2b\n"
307*4882a593Smuzhiyun 		".previous\n"
308*4882a593Smuzhiyun 			_ASM_EXTABLE(1b, 3b) : : "r" (from));
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	for (i = 0; i < 4096/64; i++) {
311*4882a593Smuzhiyun 		__asm__ __volatile__ (
312*4882a593Smuzhiyun 		"1: prefetch 320(%0)\n"
313*4882a593Smuzhiyun 		"2: movq (%0), %%mm0\n"
314*4882a593Smuzhiyun 		"   movq 8(%0), %%mm1\n"
315*4882a593Smuzhiyun 		"   movq 16(%0), %%mm2\n"
316*4882a593Smuzhiyun 		"   movq 24(%0), %%mm3\n"
317*4882a593Smuzhiyun 		"   movq %%mm0, (%1)\n"
318*4882a593Smuzhiyun 		"   movq %%mm1, 8(%1)\n"
319*4882a593Smuzhiyun 		"   movq %%mm2, 16(%1)\n"
320*4882a593Smuzhiyun 		"   movq %%mm3, 24(%1)\n"
321*4882a593Smuzhiyun 		"   movq 32(%0), %%mm0\n"
322*4882a593Smuzhiyun 		"   movq 40(%0), %%mm1\n"
323*4882a593Smuzhiyun 		"   movq 48(%0), %%mm2\n"
324*4882a593Smuzhiyun 		"   movq 56(%0), %%mm3\n"
325*4882a593Smuzhiyun 		"   movq %%mm0, 32(%1)\n"
326*4882a593Smuzhiyun 		"   movq %%mm1, 40(%1)\n"
327*4882a593Smuzhiyun 		"   movq %%mm2, 48(%1)\n"
328*4882a593Smuzhiyun 		"   movq %%mm3, 56(%1)\n"
329*4882a593Smuzhiyun 		".section .fixup, \"ax\"\n"
330*4882a593Smuzhiyun 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
331*4882a593Smuzhiyun 		"   jmp 2b\n"
332*4882a593Smuzhiyun 		".previous\n"
333*4882a593Smuzhiyun 			_ASM_EXTABLE(1b, 3b)
334*4882a593Smuzhiyun 			: : "r" (from), "r" (to) : "memory");
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 		from += 64;
337*4882a593Smuzhiyun 		to += 64;
338*4882a593Smuzhiyun 	}
339*4882a593Smuzhiyun 	kernel_fpu_end();
340*4882a593Smuzhiyun }
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun #endif /* !CONFIG_MK7 */
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun /*
345*4882a593Smuzhiyun  * Favour MMX for page clear and copy:
346*4882a593Smuzhiyun  */
slow_zero_page(void * page)347*4882a593Smuzhiyun static void slow_zero_page(void *page)
348*4882a593Smuzhiyun {
349*4882a593Smuzhiyun 	int d0, d1;
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	__asm__ __volatile__(
352*4882a593Smuzhiyun 		"cld\n\t"
353*4882a593Smuzhiyun 		"rep ; stosl"
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 			: "=&c" (d0), "=&D" (d1)
356*4882a593Smuzhiyun 			:"a" (0), "1" (page), "0" (1024)
357*4882a593Smuzhiyun 			:"memory");
358*4882a593Smuzhiyun }
359*4882a593Smuzhiyun 
mmx_clear_page(void * page)360*4882a593Smuzhiyun void mmx_clear_page(void *page)
361*4882a593Smuzhiyun {
362*4882a593Smuzhiyun 	if (unlikely(in_interrupt()))
363*4882a593Smuzhiyun 		slow_zero_page(page);
364*4882a593Smuzhiyun 	else
365*4882a593Smuzhiyun 		fast_clear_page(page);
366*4882a593Smuzhiyun }
367*4882a593Smuzhiyun EXPORT_SYMBOL(mmx_clear_page);
368*4882a593Smuzhiyun 
slow_copy_page(void * to,void * from)369*4882a593Smuzhiyun static void slow_copy_page(void *to, void *from)
370*4882a593Smuzhiyun {
371*4882a593Smuzhiyun 	int d0, d1, d2;
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	__asm__ __volatile__(
374*4882a593Smuzhiyun 		"cld\n\t"
375*4882a593Smuzhiyun 		"rep ; movsl"
376*4882a593Smuzhiyun 		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
377*4882a593Smuzhiyun 		: "0" (1024), "1" ((long) to), "2" ((long) from)
378*4882a593Smuzhiyun 		: "memory");
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun 
mmx_copy_page(void * to,void * from)381*4882a593Smuzhiyun void mmx_copy_page(void *to, void *from)
382*4882a593Smuzhiyun {
383*4882a593Smuzhiyun 	if (unlikely(in_interrupt()))
384*4882a593Smuzhiyun 		slow_copy_page(to, from);
385*4882a593Smuzhiyun 	else
386*4882a593Smuzhiyun 		fast_copy_page(to, from);
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun EXPORT_SYMBOL(mmx_copy_page);
389