xref: /OK3568_Linux_fs/kernel/arch/arm/lib/copy_template.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun *  linux/arch/arm/lib/copy_template.s
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun *  Code template for optimized memory copy functions
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun *  Author:	Nicolas Pitre
8*4882a593Smuzhiyun *  Created:	Sep 28, 2005
9*4882a593Smuzhiyun *  Copyright:	MontaVista Software, Inc.
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun/*
13*4882a593Smuzhiyun * Theory of operation
14*4882a593Smuzhiyun * -------------------
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * This file provides the core code for a forward memory copy used in
17*4882a593Smuzhiyun * the implementation of memcopy(), copy_to_user() and copy_from_user().
18*4882a593Smuzhiyun *
19*4882a593Smuzhiyun * The including file must define the following accessor macros
20*4882a593Smuzhiyun * according to the need of the given function:
21*4882a593Smuzhiyun *
22*4882a593Smuzhiyun * ldr1w ptr reg abort
23*4882a593Smuzhiyun *
24*4882a593Smuzhiyun *	This loads one word from 'ptr', stores it in 'reg' and increments
25*4882a593Smuzhiyun *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
26*4882a593Smuzhiyun *
27*4882a593Smuzhiyun * ldr4w ptr reg1 reg2 reg3 reg4 abort
28*4882a593Smuzhiyun * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
29*4882a593Smuzhiyun *
30*4882a593Smuzhiyun *	This loads four or eight words starting from 'ptr', stores them
31*4882a593Smuzhiyun *	in provided registers and increments 'ptr' past those words.
32*4882a593Smuzhiyun *	The'abort' argument is used for fixup tables.
33*4882a593Smuzhiyun *
34*4882a593Smuzhiyun * ldr1b ptr reg cond abort
35*4882a593Smuzhiyun *
36*4882a593Smuzhiyun *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
37*4882a593Smuzhiyun *	It also must apply the condition code if provided, otherwise the
38*4882a593Smuzhiyun *	"al" condition is assumed by default.
39*4882a593Smuzhiyun *
40*4882a593Smuzhiyun * str1w ptr reg abort
41*4882a593Smuzhiyun * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
42*4882a593Smuzhiyun * str1b ptr reg cond abort
43*4882a593Smuzhiyun *
44*4882a593Smuzhiyun *	Same as their ldr* counterparts, but data is stored to 'ptr' location
45*4882a593Smuzhiyun *	rather than being loaded.
46*4882a593Smuzhiyun *
47*4882a593Smuzhiyun * enter reg1 reg2
48*4882a593Smuzhiyun *
49*4882a593Smuzhiyun *	Preserve the provided registers on the stack plus any additional
50*4882a593Smuzhiyun *	data as needed by the implementation including this code. Called
51*4882a593Smuzhiyun *	upon code entry.
52*4882a593Smuzhiyun *
53*4882a593Smuzhiyun * usave reg1 reg2
54*4882a593Smuzhiyun *
55*4882a593Smuzhiyun *	Unwind annotation macro is corresponding for 'enter' macro.
56*4882a593Smuzhiyun *	It tell unwinder that preserved some provided registers on the stack
57*4882a593Smuzhiyun *	and additional data by a prior 'enter' macro.
58*4882a593Smuzhiyun *
59*4882a593Smuzhiyun * exit reg1 reg2
60*4882a593Smuzhiyun *
61*4882a593Smuzhiyun *	Restore registers with the values previously saved with the
62*4882a593Smuzhiyun *	'preserv' macro. Called upon code termination.
63*4882a593Smuzhiyun *
64*4882a593Smuzhiyun * LDR1W_SHIFT
65*4882a593Smuzhiyun * STR1W_SHIFT
66*4882a593Smuzhiyun *
67*4882a593Smuzhiyun *	Correction to be applied to the "ip" register when branching into
68*4882a593Smuzhiyun *	the ldr1w or str1w instructions (some of these macros may expand to
69*4882a593Smuzhiyun *	than one 32bit instruction in Thumb-2)
70*4882a593Smuzhiyun */
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun	UNWIND(	.fnstart			)
74*4882a593Smuzhiyun		enter	r4, lr
75*4882a593Smuzhiyun	UNWIND(	.fnend				)
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun	UNWIND(	.fnstart			)
78*4882a593Smuzhiyun		usave	r4, lr			  @ in first stmdb block
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun		subs	r2, r2, #4
81*4882a593Smuzhiyun		blt	8f
82*4882a593Smuzhiyun		ands	ip, r0, #3
83*4882a593Smuzhiyun	PLD(	pld	[r1, #0]		)
84*4882a593Smuzhiyun		bne	9f
85*4882a593Smuzhiyun		ands	ip, r1, #3
86*4882a593Smuzhiyun		bne	10f
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun1:		subs	r2, r2, #(28)
89*4882a593Smuzhiyun		stmfd	sp!, {r5 - r8}
90*4882a593Smuzhiyun	UNWIND(	.fnend				)
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	UNWIND(	.fnstart			)
93*4882a593Smuzhiyun		usave	r4, lr
94*4882a593Smuzhiyun	UNWIND(	.save	{r5 - r8}		) @ in second stmfd block
95*4882a593Smuzhiyun		blt	5f
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun	CALGN(	ands	ip, r0, #31		)
98*4882a593Smuzhiyun	CALGN(	rsb	r3, ip, #32		)
99*4882a593Smuzhiyun	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
100*4882a593Smuzhiyun	CALGN(	bcs	2f			)
101*4882a593Smuzhiyun	CALGN(	adr	r4, 6f			)
102*4882a593Smuzhiyun	CALGN(	subs	r2, r2, r3		)  @ C gets set
103*4882a593Smuzhiyun	CALGN(	add	pc, r4, ip		)
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun	PLD(	pld	[r1, #0]		)
106*4882a593Smuzhiyun2:	PLD(	subs	r2, r2, #96		)
107*4882a593Smuzhiyun	PLD(	pld	[r1, #28]		)
108*4882a593Smuzhiyun	PLD(	blt	4f			)
109*4882a593Smuzhiyun	PLD(	pld	[r1, #60]		)
110*4882a593Smuzhiyun	PLD(	pld	[r1, #92]		)
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun3:	PLD(	pld	[r1, #124]		)
113*4882a593Smuzhiyun4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
114*4882a593Smuzhiyun		subs	r2, r2, #32
115*4882a593Smuzhiyun		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
116*4882a593Smuzhiyun		bge	3b
117*4882a593Smuzhiyun	PLD(	cmn	r2, #96			)
118*4882a593Smuzhiyun	PLD(	bge	4b			)
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun5:		ands	ip, r2, #28
121*4882a593Smuzhiyun		rsb	ip, ip, #32
122*4882a593Smuzhiyun#if LDR1W_SHIFT > 0
123*4882a593Smuzhiyun		lsl	ip, ip, #LDR1W_SHIFT
124*4882a593Smuzhiyun#endif
125*4882a593Smuzhiyun		addne	pc, pc, ip		@ C is always clear here
126*4882a593Smuzhiyun		b	7f
127*4882a593Smuzhiyun6:
128*4882a593Smuzhiyun		.rept	(1 << LDR1W_SHIFT)
129*4882a593Smuzhiyun		W(nop)
130*4882a593Smuzhiyun		.endr
131*4882a593Smuzhiyun		ldr1w	r1, r3, abort=20f
132*4882a593Smuzhiyun		ldr1w	r1, r4, abort=20f
133*4882a593Smuzhiyun		ldr1w	r1, r5, abort=20f
134*4882a593Smuzhiyun		ldr1w	r1, r6, abort=20f
135*4882a593Smuzhiyun		ldr1w	r1, r7, abort=20f
136*4882a593Smuzhiyun		ldr1w	r1, r8, abort=20f
137*4882a593Smuzhiyun		ldr1w	r1, lr, abort=20f
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun#if LDR1W_SHIFT < STR1W_SHIFT
140*4882a593Smuzhiyun		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
141*4882a593Smuzhiyun#elif LDR1W_SHIFT > STR1W_SHIFT
142*4882a593Smuzhiyun		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
143*4882a593Smuzhiyun#endif
144*4882a593Smuzhiyun		add	pc, pc, ip
145*4882a593Smuzhiyun		nop
146*4882a593Smuzhiyun		.rept	(1 << STR1W_SHIFT)
147*4882a593Smuzhiyun		W(nop)
148*4882a593Smuzhiyun		.endr
149*4882a593Smuzhiyun		str1w	r0, r3, abort=20f
150*4882a593Smuzhiyun		str1w	r0, r4, abort=20f
151*4882a593Smuzhiyun		str1w	r0, r5, abort=20f
152*4882a593Smuzhiyun		str1w	r0, r6, abort=20f
153*4882a593Smuzhiyun		str1w	r0, r7, abort=20f
154*4882a593Smuzhiyun		str1w	r0, r8, abort=20f
155*4882a593Smuzhiyun		str1w	r0, lr, abort=20f
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun	CALGN(	bcs	2b			)
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun7:		ldmfd	sp!, {r5 - r8}
160*4882a593Smuzhiyun	UNWIND(	.fnend				) @ end of second stmfd block
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun	UNWIND(	.fnstart			)
163*4882a593Smuzhiyun		usave	r4, lr			  @ still in first stmdb block
164*4882a593Smuzhiyun8:		movs	r2, r2, lsl #31
165*4882a593Smuzhiyun		ldr1b	r1, r3, ne, abort=21f
166*4882a593Smuzhiyun		ldr1b	r1, r4, cs, abort=21f
167*4882a593Smuzhiyun		ldr1b	r1, ip, cs, abort=21f
168*4882a593Smuzhiyun		str1b	r0, r3, ne, abort=21f
169*4882a593Smuzhiyun		str1b	r0, r4, cs, abort=21f
170*4882a593Smuzhiyun		str1b	r0, ip, cs, abort=21f
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun		exit	r4, pc
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun9:		rsb	ip, ip, #4
175*4882a593Smuzhiyun		cmp	ip, #2
176*4882a593Smuzhiyun		ldr1b	r1, r3, gt, abort=21f
177*4882a593Smuzhiyun		ldr1b	r1, r4, ge, abort=21f
178*4882a593Smuzhiyun		ldr1b	r1, lr, abort=21f
179*4882a593Smuzhiyun		str1b	r0, r3, gt, abort=21f
180*4882a593Smuzhiyun		str1b	r0, r4, ge, abort=21f
181*4882a593Smuzhiyun		subs	r2, r2, ip
182*4882a593Smuzhiyun		str1b	r0, lr, abort=21f
183*4882a593Smuzhiyun		blt	8b
184*4882a593Smuzhiyun		ands	ip, r1, #3
185*4882a593Smuzhiyun		beq	1b
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun10:		bic	r1, r1, #3
188*4882a593Smuzhiyun		cmp	ip, #2
189*4882a593Smuzhiyun		ldr1w	r1, lr, abort=21f
190*4882a593Smuzhiyun		beq	17f
191*4882a593Smuzhiyun		bgt	18f
192*4882a593Smuzhiyun	UNWIND(	.fnend				)
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun		.macro	forward_copy_shift pull push
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun	UNWIND(	.fnstart			)
198*4882a593Smuzhiyun		usave	r4, lr			  @ still in first stmdb block
199*4882a593Smuzhiyun		subs	r2, r2, #28
200*4882a593Smuzhiyun		blt	14f
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun	CALGN(	ands	ip, r0, #31		)
203*4882a593Smuzhiyun	CALGN(	rsb	ip, ip, #32		)
204*4882a593Smuzhiyun	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
205*4882a593Smuzhiyun	CALGN(	subcc	r2, r2, ip		)
206*4882a593Smuzhiyun	CALGN(	bcc	15f			)
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun11:		stmfd	sp!, {r5 - r9}
209*4882a593Smuzhiyun	UNWIND(	.fnend				)
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun	UNWIND(	.fnstart			)
212*4882a593Smuzhiyun		usave	r4, lr
213*4882a593Smuzhiyun	UNWIND(	.save	{r5 - r9}		) @ in new second stmfd block
214*4882a593Smuzhiyun	PLD(	pld	[r1, #0]		)
215*4882a593Smuzhiyun	PLD(	subs	r2, r2, #96		)
216*4882a593Smuzhiyun	PLD(	pld	[r1, #28]		)
217*4882a593Smuzhiyun	PLD(	blt	13f			)
218*4882a593Smuzhiyun	PLD(	pld	[r1, #60]		)
219*4882a593Smuzhiyun	PLD(	pld	[r1, #92]		)
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun12:	PLD(	pld	[r1, #124]		)
222*4882a593Smuzhiyun13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
223*4882a593Smuzhiyun		mov	r3, lr, lspull #\pull
224*4882a593Smuzhiyun		subs	r2, r2, #32
225*4882a593Smuzhiyun		ldr4w	r1, r8, r9, ip, lr, abort=19f
226*4882a593Smuzhiyun		orr	r3, r3, r4, lspush #\push
227*4882a593Smuzhiyun		mov	r4, r4, lspull #\pull
228*4882a593Smuzhiyun		orr	r4, r4, r5, lspush #\push
229*4882a593Smuzhiyun		mov	r5, r5, lspull #\pull
230*4882a593Smuzhiyun		orr	r5, r5, r6, lspush #\push
231*4882a593Smuzhiyun		mov	r6, r6, lspull #\pull
232*4882a593Smuzhiyun		orr	r6, r6, r7, lspush #\push
233*4882a593Smuzhiyun		mov	r7, r7, lspull #\pull
234*4882a593Smuzhiyun		orr	r7, r7, r8, lspush #\push
235*4882a593Smuzhiyun		mov	r8, r8, lspull #\pull
236*4882a593Smuzhiyun		orr	r8, r8, r9, lspush #\push
237*4882a593Smuzhiyun		mov	r9, r9, lspull #\pull
238*4882a593Smuzhiyun		orr	r9, r9, ip, lspush #\push
239*4882a593Smuzhiyun		mov	ip, ip, lspull #\pull
240*4882a593Smuzhiyun		orr	ip, ip, lr, lspush #\push
241*4882a593Smuzhiyun		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f
242*4882a593Smuzhiyun		bge	12b
243*4882a593Smuzhiyun	PLD(	cmn	r2, #96			)
244*4882a593Smuzhiyun	PLD(	bge	13b			)
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun		ldmfd	sp!, {r5 - r9}
247*4882a593Smuzhiyun	UNWIND(	.fnend				) @ end of the second stmfd block
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun	UNWIND(	.fnstart			)
250*4882a593Smuzhiyun		usave	r4, lr			  @ still in first stmdb block
251*4882a593Smuzhiyun14:		ands	ip, r2, #28
252*4882a593Smuzhiyun		beq	16f
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun15:		mov	r3, lr, lspull #\pull
255*4882a593Smuzhiyun		ldr1w	r1, lr, abort=21f
256*4882a593Smuzhiyun		subs	ip, ip, #4
257*4882a593Smuzhiyun		orr	r3, r3, lr, lspush #\push
258*4882a593Smuzhiyun		str1w	r0, r3, abort=21f
259*4882a593Smuzhiyun		bgt	15b
260*4882a593Smuzhiyun	CALGN(	cmp	r2, #0			)
261*4882a593Smuzhiyun	CALGN(	bge	11b			)
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun16:		sub	r1, r1, #(\push / 8)
264*4882a593Smuzhiyun		b	8b
265*4882a593Smuzhiyun	UNWIND(	.fnend				)
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun		.endm
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun		forward_copy_shift	pull=8	push=24
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun17:		forward_copy_shift	pull=16	push=16
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun18:		forward_copy_shift	pull=24	push=8
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun/*
278*4882a593Smuzhiyun * Abort preamble and completion macros.
279*4882a593Smuzhiyun * If a fixup handler is required then those macros must surround it.
280*4882a593Smuzhiyun * It is assumed that the fixup code will handle the private part of
281*4882a593Smuzhiyun * the exit macro.
282*4882a593Smuzhiyun */
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun	.macro	copy_abort_preamble
285*4882a593Smuzhiyun19:	ldmfd	sp!, {r5 - r9}
286*4882a593Smuzhiyun	b	21f
287*4882a593Smuzhiyun20:	ldmfd	sp!, {r5 - r8}
288*4882a593Smuzhiyun21:
289*4882a593Smuzhiyun	.endm
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun	.macro	copy_abort_end
292*4882a593Smuzhiyun	ldmfd	sp!, {r4, pc}
293*4882a593Smuzhiyun	.endm
294*4882a593Smuzhiyun
295