xref: /OK3568_Linux_fs/kernel/arch/sh/lib/copy_page.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * copy_page, __copy_user_page, __copy_user implementation of SuperH
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
6*4882a593Smuzhiyun * Copyright (C) 2002  Toshinobu Sugioka
7*4882a593Smuzhiyun * Copyright (C) 2006  Paul Mundt
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun#include <linux/linkage.h>
10*4882a593Smuzhiyun#include <asm/page.h>
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun/*
13*4882a593Smuzhiyun * copy_page
14*4882a593Smuzhiyun * @to: P1 address
15*4882a593Smuzhiyun * @from: P1 address
16*4882a593Smuzhiyun *
17*4882a593Smuzhiyun * void copy_page(void *to, void *from)
18*4882a593Smuzhiyun */
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun/*
21*4882a593Smuzhiyun * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
22*4882a593Smuzhiyun * r8 --- from + PAGE_SIZE
23*4882a593Smuzhiyun * r9 --- not used
24*4882a593Smuzhiyun * r10 --- to
25*4882a593Smuzhiyun * r11 --- from
26*4882a593Smuzhiyun */
27*4882a593SmuzhiyunENTRY(copy_page)
28*4882a593Smuzhiyun	mov.l	r8,@-r15
29*4882a593Smuzhiyun	mov.l	r10,@-r15
30*4882a593Smuzhiyun	mov.l	r11,@-r15
31*4882a593Smuzhiyun	mov	r4,r10
32*4882a593Smuzhiyun	mov	r5,r11
33*4882a593Smuzhiyun	mov	r5,r8
34*4882a593Smuzhiyun	mov	#(PAGE_SIZE >> 10), r0
35*4882a593Smuzhiyun	shll8	r0
36*4882a593Smuzhiyun	shll2	r0
37*4882a593Smuzhiyun	add	r0,r8
38*4882a593Smuzhiyun	!
39*4882a593Smuzhiyun1:	mov.l	@r11+,r0
40*4882a593Smuzhiyun	mov.l	@r11+,r1
41*4882a593Smuzhiyun	mov.l	@r11+,r2
42*4882a593Smuzhiyun	mov.l	@r11+,r3
43*4882a593Smuzhiyun	mov.l	@r11+,r4
44*4882a593Smuzhiyun	mov.l	@r11+,r5
45*4882a593Smuzhiyun	mov.l	@r11+,r6
46*4882a593Smuzhiyun	mov.l	@r11+,r7
47*4882a593Smuzhiyun#if defined(CONFIG_CPU_SH4)
48*4882a593Smuzhiyun	movca.l	r0,@r10
49*4882a593Smuzhiyun#else
50*4882a593Smuzhiyun	mov.l	r0,@r10
51*4882a593Smuzhiyun#endif
52*4882a593Smuzhiyun	add	#32,r10
53*4882a593Smuzhiyun	mov.l	r7,@-r10
54*4882a593Smuzhiyun	mov.l	r6,@-r10
55*4882a593Smuzhiyun	mov.l	r5,@-r10
56*4882a593Smuzhiyun	mov.l	r4,@-r10
57*4882a593Smuzhiyun	mov.l	r3,@-r10
58*4882a593Smuzhiyun	mov.l	r2,@-r10
59*4882a593Smuzhiyun	mov.l	r1,@-r10
60*4882a593Smuzhiyun	cmp/eq	r11,r8
61*4882a593Smuzhiyun	bf/s	1b
62*4882a593Smuzhiyun	 add	#28,r10
63*4882a593Smuzhiyun	!
64*4882a593Smuzhiyun	mov.l	@r15+,r11
65*4882a593Smuzhiyun	mov.l	@r15+,r10
66*4882a593Smuzhiyun	mov.l	@r15+,r8
67*4882a593Smuzhiyun	rts
68*4882a593Smuzhiyun	 nop
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun/*
71*4882a593Smuzhiyun * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
72*4882a593Smuzhiyun * Return the number of bytes NOT copied
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun#define EX(...)			\
75*4882a593Smuzhiyun	9999: __VA_ARGS__ ;		\
76*4882a593Smuzhiyun	.section __ex_table, "a";	\
77*4882a593Smuzhiyun	.long 9999b, 6000f	;	\
78*4882a593Smuzhiyun	.previous
79*4882a593Smuzhiyun#define EX_NO_POP(...)			\
80*4882a593Smuzhiyun	9999: __VA_ARGS__ ;		\
81*4882a593Smuzhiyun	.section __ex_table, "a";	\
82*4882a593Smuzhiyun	.long 9999b, 6005f	;	\
83*4882a593Smuzhiyun	.previous
84*4882a593SmuzhiyunENTRY(__copy_user)
85*4882a593Smuzhiyun	! Check if small number of bytes
86*4882a593Smuzhiyun	mov	#11,r0
87*4882a593Smuzhiyun	mov	r4,r3
88*4882a593Smuzhiyun	cmp/gt	r0,r6		! r6 (len) > r0 (11)
89*4882a593Smuzhiyun	bf/s	.L_cleanup_loop_no_pop
90*4882a593Smuzhiyun	 add	r6,r3		! last destination address
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	! Calculate bytes needed to align to src
93*4882a593Smuzhiyun	mov.l	r11,@-r15
94*4882a593Smuzhiyun	neg	r5,r0
95*4882a593Smuzhiyun	mov.l	r10,@-r15
96*4882a593Smuzhiyun	add	#4,r0
97*4882a593Smuzhiyun	mov.l	r9,@-r15
98*4882a593Smuzhiyun	and	#3,r0
99*4882a593Smuzhiyun	mov.l	r8,@-r15
100*4882a593Smuzhiyun	tst	r0,r0
101*4882a593Smuzhiyun	bt	2f
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun1:
104*4882a593Smuzhiyun	! Copy bytes to long word align src
105*4882a593SmuzhiyunEX(	mov.b	@r5+,r1		)
106*4882a593Smuzhiyun	dt	r0
107*4882a593Smuzhiyun	add	#-1,r6
108*4882a593SmuzhiyunEX(	mov.b	r1,@r4		)
109*4882a593Smuzhiyun	bf/s	1b
110*4882a593Smuzhiyun	 add	#1,r4
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun	! Jump to appropriate routine depending on dest
113*4882a593Smuzhiyun2:	mov	#3,r1
114*4882a593Smuzhiyun	mov	r6, r2
115*4882a593Smuzhiyun	and	r4,r1
116*4882a593Smuzhiyun	shlr2	r2
117*4882a593Smuzhiyun	shll2	r1
118*4882a593Smuzhiyun	mova	.L_jump_tbl,r0
119*4882a593Smuzhiyun	mov.l	@(r0,r1),r1
120*4882a593Smuzhiyun	jmp	@r1
121*4882a593Smuzhiyun	 nop
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun	.align 2
124*4882a593Smuzhiyun.L_jump_tbl:
125*4882a593Smuzhiyun	.long	.L_dest00
126*4882a593Smuzhiyun	.long	.L_dest01
127*4882a593Smuzhiyun	.long	.L_dest10
128*4882a593Smuzhiyun	.long	.L_dest11
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun/*
131*4882a593Smuzhiyun * Come here if there are less than 12 bytes to copy
132*4882a593Smuzhiyun *
133*4882a593Smuzhiyun * Keep the branch target close, so the bf/s callee doesn't overflow
134*4882a593Smuzhiyun * and result in a more expensive branch being inserted. This is the
135*4882a593Smuzhiyun * fast-path for small copies, the jump via the jump table will hit the
136*4882a593Smuzhiyun * default slow-path cleanup. -PFM.
137*4882a593Smuzhiyun */
138*4882a593Smuzhiyun.L_cleanup_loop_no_pop:
139*4882a593Smuzhiyun	tst	r6,r6		! Check explicitly for zero
140*4882a593Smuzhiyun	bt	1f
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun2:
143*4882a593SmuzhiyunEX_NO_POP(	mov.b	@r5+,r0		)
144*4882a593Smuzhiyun	dt	r6
145*4882a593SmuzhiyunEX_NO_POP(	mov.b	r0,@r4		)
146*4882a593Smuzhiyun	bf/s	2b
147*4882a593Smuzhiyun	 add	#1,r4
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun1:	mov	#0,r0		! normal return
150*4882a593Smuzhiyun5000:
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun# Exception handler:
153*4882a593Smuzhiyun.section .fixup, "ax"
154*4882a593Smuzhiyun6005:
155*4882a593Smuzhiyun	mov.l	8000f,r1
156*4882a593Smuzhiyun	mov	r3,r0
157*4882a593Smuzhiyun	jmp	@r1
158*4882a593Smuzhiyun	 sub	r4,r0
159*4882a593Smuzhiyun	.align	2
160*4882a593Smuzhiyun8000:	.long	5000b
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun.previous
163*4882a593Smuzhiyun	rts
164*4882a593Smuzhiyun	 nop
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun! Destination = 00
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun.L_dest00:
169*4882a593Smuzhiyun	! Skip the large copy for small transfers
170*4882a593Smuzhiyun	mov	#(32+32-4), r0
171*4882a593Smuzhiyun	cmp/gt	r6, r0		! r0 (60) > r6 (len)
172*4882a593Smuzhiyun	bt	1f
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun	! Align dest to a 32 byte boundary
175*4882a593Smuzhiyun	neg	r4,r0
176*4882a593Smuzhiyun	add	#0x20, r0
177*4882a593Smuzhiyun	and	#0x1f, r0
178*4882a593Smuzhiyun	tst	r0, r0
179*4882a593Smuzhiyun	bt	2f
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun	sub	r0, r6
182*4882a593Smuzhiyun	shlr2	r0
183*4882a593Smuzhiyun3:
184*4882a593SmuzhiyunEX(	mov.l	@r5+,r1		)
185*4882a593Smuzhiyun	dt	r0
186*4882a593SmuzhiyunEX(	mov.l	r1,@r4		)
187*4882a593Smuzhiyun	bf/s	3b
188*4882a593Smuzhiyun	 add	#4,r4
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun2:
191*4882a593SmuzhiyunEX(	mov.l	@r5+,r0		)
192*4882a593SmuzhiyunEX(	mov.l	@r5+,r1		)
193*4882a593SmuzhiyunEX(	mov.l	@r5+,r2		)
194*4882a593SmuzhiyunEX(	mov.l	@r5+,r7		)
195*4882a593SmuzhiyunEX(	mov.l	@r5+,r8		)
196*4882a593SmuzhiyunEX(	mov.l	@r5+,r9		)
197*4882a593SmuzhiyunEX(	mov.l	@r5+,r10	)
198*4882a593SmuzhiyunEX(	mov.l	@r5+,r11	)
199*4882a593Smuzhiyun#ifdef CONFIG_CPU_SH4
200*4882a593SmuzhiyunEX(	movca.l	r0,@r4		)
201*4882a593Smuzhiyun#else
202*4882a593SmuzhiyunEX(	mov.l	r0,@r4		)
203*4882a593Smuzhiyun#endif
204*4882a593Smuzhiyun	add	#-32, r6
205*4882a593SmuzhiyunEX(	mov.l	r1,@(4,r4)	)
206*4882a593Smuzhiyun	mov	#32, r0
207*4882a593SmuzhiyunEX(	mov.l	r2,@(8,r4)	)
208*4882a593Smuzhiyun	cmp/gt	r6, r0		! r0 (32) > r6 (len)
209*4882a593SmuzhiyunEX(	mov.l	r7,@(12,r4)	)
210*4882a593SmuzhiyunEX(	mov.l	r8,@(16,r4)	)
211*4882a593SmuzhiyunEX(	mov.l	r9,@(20,r4)	)
212*4882a593SmuzhiyunEX(	mov.l	r10,@(24,r4)	)
213*4882a593SmuzhiyunEX(	mov.l	r11,@(28,r4)	)
214*4882a593Smuzhiyun	bf/s	2b
215*4882a593Smuzhiyun	 add	#32,r4
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun1:	mov	r6, r0
218*4882a593Smuzhiyun	shlr2	r0
219*4882a593Smuzhiyun	tst	r0, r0
220*4882a593Smuzhiyun	bt	.L_cleanup
221*4882a593Smuzhiyun1:
222*4882a593SmuzhiyunEX(	mov.l	@r5+,r1		)
223*4882a593Smuzhiyun	dt	r0
224*4882a593SmuzhiyunEX(	mov.l	r1,@r4		)
225*4882a593Smuzhiyun	bf/s	1b
226*4882a593Smuzhiyun	 add	#4,r4
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun	bra	.L_cleanup
229*4882a593Smuzhiyun	 nop
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun! Destination = 10
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun.L_dest10:
234*4882a593Smuzhiyun	mov	r2,r7
235*4882a593Smuzhiyun	shlr2	r7
236*4882a593Smuzhiyun	shlr	r7
237*4882a593Smuzhiyun	tst	r7,r7
238*4882a593Smuzhiyun	mov	#7,r0
239*4882a593Smuzhiyun	bt/s	1f
240*4882a593Smuzhiyun	 and	r0,r2
241*4882a593Smuzhiyun2:
242*4882a593Smuzhiyun	dt	r7
243*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN
244*4882a593SmuzhiyunEX(	mov.l	@r5+,r0		)
245*4882a593SmuzhiyunEX(	mov.l	@r5+,r1		)
246*4882a593SmuzhiyunEX(	mov.l	@r5+,r8		)
247*4882a593SmuzhiyunEX(	mov.l	@r5+,r9		)
248*4882a593SmuzhiyunEX(	mov.l	@r5+,r10	)
249*4882a593SmuzhiyunEX(	mov.w	r0,@r4		)
250*4882a593Smuzhiyun	add	#2,r4
251*4882a593Smuzhiyun	xtrct	r1,r0
252*4882a593Smuzhiyun	xtrct	r8,r1
253*4882a593Smuzhiyun	xtrct	r9,r8
254*4882a593Smuzhiyun	xtrct	r10,r9
255*4882a593Smuzhiyun
256*4882a593SmuzhiyunEX(	mov.l	r0,@r4		)
257*4882a593SmuzhiyunEX(	mov.l	r1,@(4,r4)	)
258*4882a593SmuzhiyunEX(	mov.l	r8,@(8,r4)	)
259*4882a593SmuzhiyunEX(	mov.l	r9,@(12,r4)	)
260*4882a593Smuzhiyun
261*4882a593SmuzhiyunEX(	mov.l	@r5+,r1		)
262*4882a593SmuzhiyunEX(	mov.l	@r5+,r8		)
263*4882a593SmuzhiyunEX(	mov.l	@r5+,r0		)
264*4882a593Smuzhiyun	xtrct	r1,r10
265*4882a593Smuzhiyun	xtrct	r8,r1
266*4882a593Smuzhiyun	xtrct	r0,r8
267*4882a593Smuzhiyun	shlr16	r0
268*4882a593SmuzhiyunEX(	mov.l	r10,@(16,r4)	)
269*4882a593SmuzhiyunEX(	mov.l	r1,@(20,r4)	)
270*4882a593SmuzhiyunEX(	mov.l	r8,@(24,r4)	)
271*4882a593SmuzhiyunEX(	mov.w	r0,@(28,r4)	)
272*4882a593Smuzhiyun	bf/s	2b
273*4882a593Smuzhiyun	 add	#30,r4
274*4882a593Smuzhiyun#else
275*4882a593SmuzhiyunEX(	mov.l	@(28,r5),r0	)
276*4882a593SmuzhiyunEX(	mov.l	@(24,r5),r8	)
277*4882a593SmuzhiyunEX(	mov.l	@(20,r5),r9	)
278*4882a593SmuzhiyunEX(	mov.l	@(16,r5),r10	)
279*4882a593SmuzhiyunEX(	mov.w	r0,@(30,r4)	)
280*4882a593Smuzhiyun	add	#-2,r4
281*4882a593Smuzhiyun	xtrct	r8,r0
282*4882a593Smuzhiyun	xtrct	r9,r8
283*4882a593Smuzhiyun	xtrct	r10,r9
284*4882a593SmuzhiyunEX(	mov.l	r0,@(28,r4)	)
285*4882a593SmuzhiyunEX(	mov.l	r8,@(24,r4)	)
286*4882a593SmuzhiyunEX(	mov.l	r9,@(20,r4)	)
287*4882a593Smuzhiyun
288*4882a593SmuzhiyunEX(	mov.l	@(12,r5),r0	)
289*4882a593SmuzhiyunEX(	mov.l	@(8,r5),r8	)
290*4882a593Smuzhiyun	xtrct	r0,r10
291*4882a593SmuzhiyunEX(	mov.l	@(4,r5),r9	)
292*4882a593Smuzhiyun	mov.l	r10,@(16,r4)
293*4882a593SmuzhiyunEX(	mov.l	@r5,r10		)
294*4882a593Smuzhiyun	xtrct	r8,r0
295*4882a593Smuzhiyun	xtrct	r9,r8
296*4882a593Smuzhiyun	xtrct	r10,r9
297*4882a593SmuzhiyunEX(	mov.l	r0,@(12,r4)	)
298*4882a593SmuzhiyunEX(	mov.l	r8,@(8,r4)	)
299*4882a593Smuzhiyun	swap.w	r10,r0
300*4882a593SmuzhiyunEX(	mov.l	r9,@(4,r4)	)
301*4882a593SmuzhiyunEX(	mov.w	r0,@(2,r4)	)
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun	add	#32,r5
304*4882a593Smuzhiyun	bf/s	2b
305*4882a593Smuzhiyun	 add	#34,r4
306*4882a593Smuzhiyun#endif
307*4882a593Smuzhiyun	tst	r2,r2
308*4882a593Smuzhiyun	bt	.L_cleanup
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun1:	! Read longword, write two words per iteration
311*4882a593SmuzhiyunEX(	mov.l	@r5+,r0		)
312*4882a593Smuzhiyun	dt	r2
313*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN
314*4882a593SmuzhiyunEX(	mov.w	r0,@r4		)
315*4882a593Smuzhiyun	shlr16	r0
316*4882a593SmuzhiyunEX(	mov.w 	r0,@(2,r4)	)
317*4882a593Smuzhiyun#else
318*4882a593SmuzhiyunEX(	mov.w	r0,@(2,r4)	)
319*4882a593Smuzhiyun	shlr16	r0
320*4882a593SmuzhiyunEX(	mov.w	r0,@r4		)
321*4882a593Smuzhiyun#endif
322*4882a593Smuzhiyun	bf/s	1b
323*4882a593Smuzhiyun	 add	#4,r4
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun	bra	.L_cleanup
326*4882a593Smuzhiyun	 nop
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun! Destination = 01 or 11
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun.L_dest01:
331*4882a593Smuzhiyun.L_dest11:
332*4882a593Smuzhiyun	! Read longword, write byte, word, byte per iteration
333*4882a593SmuzhiyunEX(	mov.l	@r5+,r0		)
334*4882a593Smuzhiyun	dt	r2
335*4882a593Smuzhiyun#ifdef CONFIG_CPU_LITTLE_ENDIAN
336*4882a593SmuzhiyunEX(	mov.b	r0,@r4		)
337*4882a593Smuzhiyun	shlr8	r0
338*4882a593Smuzhiyun	add	#1,r4
339*4882a593SmuzhiyunEX(	mov.w	r0,@r4		)
340*4882a593Smuzhiyun	shlr16	r0
341*4882a593SmuzhiyunEX(	mov.b	r0,@(2,r4)	)
342*4882a593Smuzhiyun	bf/s	.L_dest01
343*4882a593Smuzhiyun	 add	#3,r4
344*4882a593Smuzhiyun#else
345*4882a593SmuzhiyunEX(	mov.b	r0,@(3,r4)	)
346*4882a593Smuzhiyun	shlr8	r0
347*4882a593Smuzhiyun	swap.w	r0,r7
348*4882a593SmuzhiyunEX(	mov.b	r7,@r4		)
349*4882a593Smuzhiyun	add	#1,r4
350*4882a593SmuzhiyunEX(	mov.w	r0,@r4		)
351*4882a593Smuzhiyun	bf/s	.L_dest01
352*4882a593Smuzhiyun	 add	#3,r4
353*4882a593Smuzhiyun#endif
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun! Cleanup last few bytes
356*4882a593Smuzhiyun.L_cleanup:
357*4882a593Smuzhiyun	mov	r6,r0
358*4882a593Smuzhiyun	and	#3,r0
359*4882a593Smuzhiyun	tst	r0,r0
360*4882a593Smuzhiyun	bt	.L_exit
361*4882a593Smuzhiyun	mov	r0,r6
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun.L_cleanup_loop:
364*4882a593SmuzhiyunEX(	mov.b	@r5+,r0		)
365*4882a593Smuzhiyun	dt	r6
366*4882a593SmuzhiyunEX(	mov.b	r0,@r4		)
367*4882a593Smuzhiyun	bf/s	.L_cleanup_loop
368*4882a593Smuzhiyun	 add	#1,r4
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun.L_exit:
371*4882a593Smuzhiyun	mov	#0,r0		! normal return
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun5000:
374*4882a593Smuzhiyun
375*4882a593Smuzhiyun# Exception handler:
376*4882a593Smuzhiyun.section .fixup, "ax"
377*4882a593Smuzhiyun6000:
378*4882a593Smuzhiyun	mov.l	8000f,r1
379*4882a593Smuzhiyun	mov	r3,r0
380*4882a593Smuzhiyun	jmp	@r1
381*4882a593Smuzhiyun	 sub	r4,r0
382*4882a593Smuzhiyun	.align	2
383*4882a593Smuzhiyun8000:	.long	5000b
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun.previous
386*4882a593Smuzhiyun	mov.l	@r15+,r8
387*4882a593Smuzhiyun	mov.l	@r15+,r9
388*4882a593Smuzhiyun	mov.l	@r15+,r10
389*4882a593Smuzhiyun	rts
390*4882a593Smuzhiyun	 mov.l	@r15+,r11
391