xref: /OK3568_Linux_fs/kernel/arch/x86/lib/copy_user_64.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
4*4882a593Smuzhiyun * Copyright 2002 Andi Kleen, SuSE Labs.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Functions to copy from and to user space.
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun#include <linux/linkage.h>
10*4882a593Smuzhiyun#include <asm/current.h>
11*4882a593Smuzhiyun#include <asm/asm-offsets.h>
12*4882a593Smuzhiyun#include <asm/thread_info.h>
13*4882a593Smuzhiyun#include <asm/cpufeatures.h>
14*4882a593Smuzhiyun#include <asm/alternative.h>
15*4882a593Smuzhiyun#include <asm/asm.h>
16*4882a593Smuzhiyun#include <asm/smap.h>
17*4882a593Smuzhiyun#include <asm/export.h>
18*4882a593Smuzhiyun#include <asm/trapnr.h>
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun.macro ALIGN_DESTINATION
21*4882a593Smuzhiyun	/* check for bad alignment of destination */
22*4882a593Smuzhiyun	movl %edi,%ecx
23*4882a593Smuzhiyun	andl $7,%ecx
24*4882a593Smuzhiyun	jz 102f				/* already aligned */
25*4882a593Smuzhiyun	subl $8,%ecx
26*4882a593Smuzhiyun	negl %ecx
27*4882a593Smuzhiyun	subl %ecx,%edx
28*4882a593Smuzhiyun100:	movb (%rsi),%al
29*4882a593Smuzhiyun101:	movb %al,(%rdi)
30*4882a593Smuzhiyun	incq %rsi
31*4882a593Smuzhiyun	incq %rdi
32*4882a593Smuzhiyun	decl %ecx
33*4882a593Smuzhiyun	jnz 100b
34*4882a593Smuzhiyun102:
35*4882a593Smuzhiyun	.section .fixup,"ax"
36*4882a593Smuzhiyun103:	addl %ecx,%edx			/* ecx is zerorest also */
37*4882a593Smuzhiyun	jmp .Lcopy_user_handle_tail
38*4882a593Smuzhiyun	.previous
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(100b, 103b)
41*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(101b, 103b)
42*4882a593Smuzhiyun	.endm
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun/*
45*4882a593Smuzhiyun * copy_user_generic_unrolled - memory copy with exception handling.
46*4882a593Smuzhiyun * This version is for CPUs like P4 that don't have efficient micro
47*4882a593Smuzhiyun * code for rep movsq
48*4882a593Smuzhiyun *
49*4882a593Smuzhiyun * Input:
50*4882a593Smuzhiyun * rdi destination
51*4882a593Smuzhiyun * rsi source
52*4882a593Smuzhiyun * rdx count
53*4882a593Smuzhiyun *
54*4882a593Smuzhiyun * Output:
55*4882a593Smuzhiyun * eax uncopied bytes or 0 if successful.
56*4882a593Smuzhiyun */
57*4882a593SmuzhiyunSYM_FUNC_START(copy_user_generic_unrolled)
58*4882a593Smuzhiyun	ASM_STAC
59*4882a593Smuzhiyun	cmpl $8,%edx
60*4882a593Smuzhiyun	jb 20f		/* less then 8 bytes, go to byte copy loop */
61*4882a593Smuzhiyun	ALIGN_DESTINATION
62*4882a593Smuzhiyun	movl %edx,%ecx
63*4882a593Smuzhiyun	andl $63,%edx
64*4882a593Smuzhiyun	shrl $6,%ecx
65*4882a593Smuzhiyun	jz .L_copy_short_string
66*4882a593Smuzhiyun1:	movq (%rsi),%r8
67*4882a593Smuzhiyun2:	movq 1*8(%rsi),%r9
68*4882a593Smuzhiyun3:	movq 2*8(%rsi),%r10
69*4882a593Smuzhiyun4:	movq 3*8(%rsi),%r11
70*4882a593Smuzhiyun5:	movq %r8,(%rdi)
71*4882a593Smuzhiyun6:	movq %r9,1*8(%rdi)
72*4882a593Smuzhiyun7:	movq %r10,2*8(%rdi)
73*4882a593Smuzhiyun8:	movq %r11,3*8(%rdi)
74*4882a593Smuzhiyun9:	movq 4*8(%rsi),%r8
75*4882a593Smuzhiyun10:	movq 5*8(%rsi),%r9
76*4882a593Smuzhiyun11:	movq 6*8(%rsi),%r10
77*4882a593Smuzhiyun12:	movq 7*8(%rsi),%r11
78*4882a593Smuzhiyun13:	movq %r8,4*8(%rdi)
79*4882a593Smuzhiyun14:	movq %r9,5*8(%rdi)
80*4882a593Smuzhiyun15:	movq %r10,6*8(%rdi)
81*4882a593Smuzhiyun16:	movq %r11,7*8(%rdi)
82*4882a593Smuzhiyun	leaq 64(%rsi),%rsi
83*4882a593Smuzhiyun	leaq 64(%rdi),%rdi
84*4882a593Smuzhiyun	decl %ecx
85*4882a593Smuzhiyun	jnz 1b
86*4882a593Smuzhiyun.L_copy_short_string:
87*4882a593Smuzhiyun	movl %edx,%ecx
88*4882a593Smuzhiyun	andl $7,%edx
89*4882a593Smuzhiyun	shrl $3,%ecx
90*4882a593Smuzhiyun	jz 20f
91*4882a593Smuzhiyun18:	movq (%rsi),%r8
92*4882a593Smuzhiyun19:	movq %r8,(%rdi)
93*4882a593Smuzhiyun	leaq 8(%rsi),%rsi
94*4882a593Smuzhiyun	leaq 8(%rdi),%rdi
95*4882a593Smuzhiyun	decl %ecx
96*4882a593Smuzhiyun	jnz 18b
97*4882a593Smuzhiyun20:	andl %edx,%edx
98*4882a593Smuzhiyun	jz 23f
99*4882a593Smuzhiyun	movl %edx,%ecx
100*4882a593Smuzhiyun21:	movb (%rsi),%al
101*4882a593Smuzhiyun22:	movb %al,(%rdi)
102*4882a593Smuzhiyun	incq %rsi
103*4882a593Smuzhiyun	incq %rdi
104*4882a593Smuzhiyun	decl %ecx
105*4882a593Smuzhiyun	jnz 21b
106*4882a593Smuzhiyun23:	xor %eax,%eax
107*4882a593Smuzhiyun	ASM_CLAC
108*4882a593Smuzhiyun	RET
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun	.section .fixup,"ax"
111*4882a593Smuzhiyun30:	shll $6,%ecx
112*4882a593Smuzhiyun	addl %ecx,%edx
113*4882a593Smuzhiyun	jmp 60f
114*4882a593Smuzhiyun40:	leal (%rdx,%rcx,8),%edx
115*4882a593Smuzhiyun	jmp 60f
116*4882a593Smuzhiyun50:	movl %ecx,%edx
117*4882a593Smuzhiyun60:	jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
118*4882a593Smuzhiyun	.previous
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(1b, 30b)
121*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(2b, 30b)
122*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(3b, 30b)
123*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(4b, 30b)
124*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(5b, 30b)
125*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(6b, 30b)
126*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(7b, 30b)
127*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(8b, 30b)
128*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(9b, 30b)
129*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(10b, 30b)
130*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(11b, 30b)
131*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(12b, 30b)
132*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(13b, 30b)
133*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(14b, 30b)
134*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(15b, 30b)
135*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(16b, 30b)
136*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(18b, 40b)
137*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(19b, 40b)
138*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(21b, 50b)
139*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(22b, 50b)
140*4882a593SmuzhiyunSYM_FUNC_END(copy_user_generic_unrolled)
141*4882a593SmuzhiyunEXPORT_SYMBOL(copy_user_generic_unrolled)
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun/* Some CPUs run faster using the string copy instructions.
144*4882a593Smuzhiyun * This is also a lot simpler. Use them when possible.
145*4882a593Smuzhiyun *
146*4882a593Smuzhiyun * Only 4GB of copy is supported. This shouldn't be a problem
147*4882a593Smuzhiyun * because the kernel normally only writes from/to page sized chunks
148*4882a593Smuzhiyun * even if user space passed a longer buffer.
149*4882a593Smuzhiyun * And more would be dangerous because both Intel and AMD have
150*4882a593Smuzhiyun * errata with rep movsq > 4GB. If someone feels the need to fix
151*4882a593Smuzhiyun * this please consider this.
152*4882a593Smuzhiyun *
153*4882a593Smuzhiyun * Input:
154*4882a593Smuzhiyun * rdi destination
155*4882a593Smuzhiyun * rsi source
156*4882a593Smuzhiyun * rdx count
157*4882a593Smuzhiyun *
158*4882a593Smuzhiyun * Output:
159*4882a593Smuzhiyun * eax uncopied bytes or 0 if successful.
160*4882a593Smuzhiyun */
161*4882a593SmuzhiyunSYM_FUNC_START(copy_user_generic_string)
162*4882a593Smuzhiyun	ASM_STAC
163*4882a593Smuzhiyun	cmpl $8,%edx
164*4882a593Smuzhiyun	jb 2f		/* less than 8 bytes, go to byte copy loop */
165*4882a593Smuzhiyun	ALIGN_DESTINATION
166*4882a593Smuzhiyun	movl %edx,%ecx
167*4882a593Smuzhiyun	shrl $3,%ecx
168*4882a593Smuzhiyun	andl $7,%edx
169*4882a593Smuzhiyun1:	rep
170*4882a593Smuzhiyun	movsq
171*4882a593Smuzhiyun2:	movl %edx,%ecx
172*4882a593Smuzhiyun3:	rep
173*4882a593Smuzhiyun	movsb
174*4882a593Smuzhiyun	xorl %eax,%eax
175*4882a593Smuzhiyun	ASM_CLAC
176*4882a593Smuzhiyun	RET
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun	.section .fixup,"ax"
179*4882a593Smuzhiyun11:	leal (%rdx,%rcx,8),%ecx
180*4882a593Smuzhiyun12:	movl %ecx,%edx		/* ecx is zerorest also */
181*4882a593Smuzhiyun	jmp .Lcopy_user_handle_tail
182*4882a593Smuzhiyun	.previous
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(1b, 11b)
185*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(3b, 12b)
186*4882a593SmuzhiyunSYM_FUNC_END(copy_user_generic_string)
187*4882a593SmuzhiyunEXPORT_SYMBOL(copy_user_generic_string)
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun/*
190*4882a593Smuzhiyun * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
191*4882a593Smuzhiyun * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
192*4882a593Smuzhiyun *
193*4882a593Smuzhiyun * Input:
194*4882a593Smuzhiyun * rdi destination
195*4882a593Smuzhiyun * rsi source
196*4882a593Smuzhiyun * rdx count
197*4882a593Smuzhiyun *
198*4882a593Smuzhiyun * Output:
199*4882a593Smuzhiyun * eax uncopied bytes or 0 if successful.
200*4882a593Smuzhiyun */
201*4882a593SmuzhiyunSYM_FUNC_START(copy_user_enhanced_fast_string)
202*4882a593Smuzhiyun	ASM_STAC
203*4882a593Smuzhiyun	cmpl $64,%edx
204*4882a593Smuzhiyun	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
205*4882a593Smuzhiyun	movl %edx,%ecx
206*4882a593Smuzhiyun1:	rep
207*4882a593Smuzhiyun	movsb
208*4882a593Smuzhiyun	xorl %eax,%eax
209*4882a593Smuzhiyun	ASM_CLAC
210*4882a593Smuzhiyun	RET
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun	.section .fixup,"ax"
213*4882a593Smuzhiyun12:	movl %ecx,%edx		/* ecx is zerorest also */
214*4882a593Smuzhiyun	jmp .Lcopy_user_handle_tail
215*4882a593Smuzhiyun	.previous
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(1b, 12b)
218*4882a593SmuzhiyunSYM_FUNC_END(copy_user_enhanced_fast_string)
219*4882a593SmuzhiyunEXPORT_SYMBOL(copy_user_enhanced_fast_string)
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun/*
222*4882a593Smuzhiyun * Try to copy last bytes and clear the rest if needed.
223*4882a593Smuzhiyun * Since protection fault in copy_from/to_user is not a normal situation,
224*4882a593Smuzhiyun * it is not necessary to optimize tail handling.
225*4882a593Smuzhiyun * Don't try to copy the tail if machine check happened
226*4882a593Smuzhiyun *
227*4882a593Smuzhiyun * Input:
228*4882a593Smuzhiyun * rdi destination
229*4882a593Smuzhiyun * rsi source
230*4882a593Smuzhiyun * rdx count
231*4882a593Smuzhiyun *
232*4882a593Smuzhiyun * Output:
233*4882a593Smuzhiyun * eax uncopied bytes or 0 if successful.
234*4882a593Smuzhiyun */
235*4882a593SmuzhiyunSYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
236*4882a593Smuzhiyun	movl %edx,%ecx
237*4882a593Smuzhiyun	cmp $X86_TRAP_MC,%eax		/* check if X86_TRAP_MC */
238*4882a593Smuzhiyun	je 3f
239*4882a593Smuzhiyun1:	rep movsb
240*4882a593Smuzhiyun2:	mov %ecx,%eax
241*4882a593Smuzhiyun	ASM_CLAC
242*4882a593Smuzhiyun	RET
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun	/*
245*4882a593Smuzhiyun	 * Return zero to pretend that this copy succeeded. This
246*4882a593Smuzhiyun	 * is counter-intuitive, but needed to prevent the code
247*4882a593Smuzhiyun	 * in lib/iov_iter.c from retrying and running back into
248*4882a593Smuzhiyun	 * the poison cache line again. The machine check handler
249*4882a593Smuzhiyun	 * will ensure that a SIGBUS is sent to the task.
250*4882a593Smuzhiyun	 */
251*4882a593Smuzhiyun3:	xorl %eax,%eax
252*4882a593Smuzhiyun	ASM_CLAC
253*4882a593Smuzhiyun	RET
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(1b, 2b)
256*4882a593SmuzhiyunSYM_CODE_END(.Lcopy_user_handle_tail)
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun/*
259*4882a593Smuzhiyun * copy_user_nocache - Uncached memory copy with exception handling
260*4882a593Smuzhiyun * This will force destination out of cache for more performance.
261*4882a593Smuzhiyun *
262*4882a593Smuzhiyun * Note: Cached memory copy is used when destination or size is not
263*4882a593Smuzhiyun * naturally aligned. That is:
264*4882a593Smuzhiyun *  - Require 8-byte alignment when size is 8 bytes or larger.
265*4882a593Smuzhiyun *  - Require 4-byte alignment when size is 4 bytes.
266*4882a593Smuzhiyun */
267*4882a593SmuzhiyunSYM_FUNC_START(__copy_user_nocache)
268*4882a593Smuzhiyun	ASM_STAC
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun	/* If size is less than 8 bytes, go to 4-byte copy */
271*4882a593Smuzhiyun	cmpl $8,%edx
272*4882a593Smuzhiyun	jb .L_4b_nocache_copy_entry
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun	/* If destination is not 8-byte aligned, "cache" copy to align it */
275*4882a593Smuzhiyun	ALIGN_DESTINATION
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun	/* Set 4x8-byte copy count and remainder */
278*4882a593Smuzhiyun	movl %edx,%ecx
279*4882a593Smuzhiyun	andl $63,%edx
280*4882a593Smuzhiyun	shrl $6,%ecx
281*4882a593Smuzhiyun	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun	/* Perform 4x8-byte nocache loop-copy */
284*4882a593Smuzhiyun.L_4x8b_nocache_copy_loop:
285*4882a593Smuzhiyun1:	movq (%rsi),%r8
286*4882a593Smuzhiyun2:	movq 1*8(%rsi),%r9
287*4882a593Smuzhiyun3:	movq 2*8(%rsi),%r10
288*4882a593Smuzhiyun4:	movq 3*8(%rsi),%r11
289*4882a593Smuzhiyun5:	movnti %r8,(%rdi)
290*4882a593Smuzhiyun6:	movnti %r9,1*8(%rdi)
291*4882a593Smuzhiyun7:	movnti %r10,2*8(%rdi)
292*4882a593Smuzhiyun8:	movnti %r11,3*8(%rdi)
293*4882a593Smuzhiyun9:	movq 4*8(%rsi),%r8
294*4882a593Smuzhiyun10:	movq 5*8(%rsi),%r9
295*4882a593Smuzhiyun11:	movq 6*8(%rsi),%r10
296*4882a593Smuzhiyun12:	movq 7*8(%rsi),%r11
297*4882a593Smuzhiyun13:	movnti %r8,4*8(%rdi)
298*4882a593Smuzhiyun14:	movnti %r9,5*8(%rdi)
299*4882a593Smuzhiyun15:	movnti %r10,6*8(%rdi)
300*4882a593Smuzhiyun16:	movnti %r11,7*8(%rdi)
301*4882a593Smuzhiyun	leaq 64(%rsi),%rsi
302*4882a593Smuzhiyun	leaq 64(%rdi),%rdi
303*4882a593Smuzhiyun	decl %ecx
304*4882a593Smuzhiyun	jnz .L_4x8b_nocache_copy_loop
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun	/* Set 8-byte copy count and remainder */
307*4882a593Smuzhiyun.L_8b_nocache_copy_entry:
308*4882a593Smuzhiyun	movl %edx,%ecx
309*4882a593Smuzhiyun	andl $7,%edx
310*4882a593Smuzhiyun	shrl $3,%ecx
311*4882a593Smuzhiyun	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun	/* Perform 8-byte nocache loop-copy */
314*4882a593Smuzhiyun.L_8b_nocache_copy_loop:
315*4882a593Smuzhiyun20:	movq (%rsi),%r8
316*4882a593Smuzhiyun21:	movnti %r8,(%rdi)
317*4882a593Smuzhiyun	leaq 8(%rsi),%rsi
318*4882a593Smuzhiyun	leaq 8(%rdi),%rdi
319*4882a593Smuzhiyun	decl %ecx
320*4882a593Smuzhiyun	jnz .L_8b_nocache_copy_loop
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun	/* If no byte left, we're done */
323*4882a593Smuzhiyun.L_4b_nocache_copy_entry:
324*4882a593Smuzhiyun	andl %edx,%edx
325*4882a593Smuzhiyun	jz .L_finish_copy
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun	/* If destination is not 4-byte aligned, go to byte copy: */
328*4882a593Smuzhiyun	movl %edi,%ecx
329*4882a593Smuzhiyun	andl $3,%ecx
330*4882a593Smuzhiyun	jnz .L_1b_cache_copy_entry
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun	/* Set 4-byte copy count (1 or 0) and remainder */
333*4882a593Smuzhiyun	movl %edx,%ecx
334*4882a593Smuzhiyun	andl $3,%edx
335*4882a593Smuzhiyun	shrl $2,%ecx
336*4882a593Smuzhiyun	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun	/* Perform 4-byte nocache copy: */
339*4882a593Smuzhiyun30:	movl (%rsi),%r8d
340*4882a593Smuzhiyun31:	movnti %r8d,(%rdi)
341*4882a593Smuzhiyun	leaq 4(%rsi),%rsi
342*4882a593Smuzhiyun	leaq 4(%rdi),%rdi
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun	/* If no bytes left, we're done: */
345*4882a593Smuzhiyun	andl %edx,%edx
346*4882a593Smuzhiyun	jz .L_finish_copy
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun	/* Perform byte "cache" loop-copy for the remainder */
349*4882a593Smuzhiyun.L_1b_cache_copy_entry:
350*4882a593Smuzhiyun	movl %edx,%ecx
351*4882a593Smuzhiyun.L_1b_cache_copy_loop:
352*4882a593Smuzhiyun40:	movb (%rsi),%al
353*4882a593Smuzhiyun41:	movb %al,(%rdi)
354*4882a593Smuzhiyun	incq %rsi
355*4882a593Smuzhiyun	incq %rdi
356*4882a593Smuzhiyun	decl %ecx
357*4882a593Smuzhiyun	jnz .L_1b_cache_copy_loop
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun	/* Finished copying; fence the prior stores */
360*4882a593Smuzhiyun.L_finish_copy:
361*4882a593Smuzhiyun	xorl %eax,%eax
362*4882a593Smuzhiyun	ASM_CLAC
363*4882a593Smuzhiyun	sfence
364*4882a593Smuzhiyun	RET
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun	.section .fixup,"ax"
367*4882a593Smuzhiyun.L_fixup_4x8b_copy:
368*4882a593Smuzhiyun	shll $6,%ecx
369*4882a593Smuzhiyun	addl %ecx,%edx
370*4882a593Smuzhiyun	jmp .L_fixup_handle_tail
371*4882a593Smuzhiyun.L_fixup_8b_copy:
372*4882a593Smuzhiyun	lea (%rdx,%rcx,8),%rdx
373*4882a593Smuzhiyun	jmp .L_fixup_handle_tail
374*4882a593Smuzhiyun.L_fixup_4b_copy:
375*4882a593Smuzhiyun	lea (%rdx,%rcx,4),%rdx
376*4882a593Smuzhiyun	jmp .L_fixup_handle_tail
377*4882a593Smuzhiyun.L_fixup_1b_copy:
378*4882a593Smuzhiyun	movl %ecx,%edx
379*4882a593Smuzhiyun.L_fixup_handle_tail:
380*4882a593Smuzhiyun	sfence
381*4882a593Smuzhiyun	jmp .Lcopy_user_handle_tail
382*4882a593Smuzhiyun	.previous
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
385*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
386*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
387*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
388*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
389*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
390*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
391*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
392*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
393*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
394*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
395*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
396*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
397*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
398*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
399*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
400*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
401*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
402*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
403*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
404*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
405*4882a593Smuzhiyun	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
406*4882a593SmuzhiyunSYM_FUNC_END(__copy_user_nocache)
407*4882a593SmuzhiyunEXPORT_SYMBOL(__copy_user_nocache)
408