xref: /rk3399_rockchip-uboot/arch/arm/lib/memcpy.S (revision 34fe8281d7323784544e94d2f7218f52b8a2899d)
1d8834a13SMatthias Weisser/*
2d8834a13SMatthias Weisser *  linux/arch/arm/lib/memcpy.S
3d8834a13SMatthias Weisser *
4d8834a13SMatthias Weisser *  Author:	Nicolas Pitre
5d8834a13SMatthias Weisser *  Created:	Sep 28, 2005
6d8834a13SMatthias Weisser *  Copyright:	MontaVista Software, Inc.
7d8834a13SMatthias Weisser *
8d8834a13SMatthias Weisser *  This program is free software; you can redistribute it and/or modify
9d8834a13SMatthias Weisser *  it under the terms of the GNU General Public License version 2 as
10d8834a13SMatthias Weisser *  published by the Free Software Foundation.
11d8834a13SMatthias Weisser */
12d8834a13SMatthias Weisser
13d8834a13SMatthias Weisser#include <asm/assembler.h>
14d8834a13SMatthias Weisser
15d8834a13SMatthias Weisser#define W(instr)	instr
16d8834a13SMatthias Weisser
17d8834a13SMatthias Weisser#define LDR1W_SHIFT	0
18d8834a13SMatthias Weisser#define STR1W_SHIFT	0
19d8834a13SMatthias Weisser
20d8834a13SMatthias Weisser	.macro ldr1w ptr reg abort
21d8834a13SMatthias Weisser	W(ldr) \reg, [\ptr], #4
22d8834a13SMatthias Weisser	.endm
23d8834a13SMatthias Weisser
24d8834a13SMatthias Weisser	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
25d8834a13SMatthias Weisser	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
26d8834a13SMatthias Weisser	.endm
27d8834a13SMatthias Weisser
28d8834a13SMatthias Weisser	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
29d8834a13SMatthias Weisser	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
30d8834a13SMatthias Weisser	.endm
31d8834a13SMatthias Weisser
32d8834a13SMatthias Weisser	.macro ldr1b ptr reg cond=al abort
33d8834a13SMatthias Weisser	ldr\cond\()b \reg, [\ptr], #1
34d8834a13SMatthias Weisser	.endm
35d8834a13SMatthias Weisser
36d8834a13SMatthias Weisser	.macro str1w ptr reg abort
37d8834a13SMatthias Weisser	W(str) \reg, [\ptr], #4
38d8834a13SMatthias Weisser	.endm
39d8834a13SMatthias Weisser
40d8834a13SMatthias Weisser	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
41d8834a13SMatthias Weisser	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
42d8834a13SMatthias Weisser	.endm
43d8834a13SMatthias Weisser
44d8834a13SMatthias Weisser	.macro str1b ptr reg cond=al abort
45d8834a13SMatthias Weisser	str\cond\()b \reg, [\ptr], #1
46d8834a13SMatthias Weisser	.endm
47d8834a13SMatthias Weisser
48d8834a13SMatthias Weisser	.macro enter reg1 reg2
49d8834a13SMatthias Weisser	stmdb sp!, {r0, \reg1, \reg2}
50d8834a13SMatthias Weisser	.endm
51d8834a13SMatthias Weisser
52d8834a13SMatthias Weisser	.macro exit reg1 reg2
53d8834a13SMatthias Weisser	ldmfd sp!, {r0, \reg1, \reg2}
54d8834a13SMatthias Weisser	.endm
55d8834a13SMatthias Weisser
56d8834a13SMatthias Weisser	.text
57d8834a13SMatthias Weisser
58d8834a13SMatthias Weisser/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
59d8834a13SMatthias Weisser
60d8834a13SMatthias Weisser.globl memcpy
61d8834a13SMatthias Weissermemcpy:
62d8834a13SMatthias Weisser
63*34fe8281SMatthias Weisser		cmp	r0, r1
64*34fe8281SMatthias Weisser		moveq	pc, lr
65*34fe8281SMatthias Weisser
66d8834a13SMatthias Weisser		enter	r4, lr
67d8834a13SMatthias Weisser
68d8834a13SMatthias Weisser		subs	r2, r2, #4
69d8834a13SMatthias Weisser		blt	8f
70d8834a13SMatthias Weisser		ands	ip, r0, #3
71d8834a13SMatthias Weisser	PLD(	pld	[r1, #0]		)
72d8834a13SMatthias Weisser		bne	9f
73d8834a13SMatthias Weisser		ands	ip, r1, #3
74d8834a13SMatthias Weisser		bne	10f
75d8834a13SMatthias Weisser
76d8834a13SMatthias Weisser1:		subs	r2, r2, #(28)
77d8834a13SMatthias Weisser		stmfd	sp!, {r5 - r8}
78d8834a13SMatthias Weisser		blt	5f
79d8834a13SMatthias Weisser
80d8834a13SMatthias Weisser	CALGN(	ands	ip, r0, #31		)
81d8834a13SMatthias Weisser	CALGN(	rsb	r3, ip, #32		)
82d8834a13SMatthias Weisser	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
83d8834a13SMatthias Weisser	CALGN(	bcs	2f			)
84d8834a13SMatthias Weisser	CALGN(	adr	r4, 6f			)
85d8834a13SMatthias Weisser	CALGN(	subs	r2, r2, r3		)  @ C gets set
86d8834a13SMatthias Weisser	CALGN(	add	pc, r4, ip		)
87d8834a13SMatthias Weisser
88d8834a13SMatthias Weisser	PLD(	pld	[r1, #0]		)
89d8834a13SMatthias Weisser2:	PLD(	subs	r2, r2, #96		)
90d8834a13SMatthias Weisser	PLD(	pld	[r1, #28]		)
91d8834a13SMatthias Weisser	PLD(	blt	4f			)
92d8834a13SMatthias Weisser	PLD(	pld	[r1, #60]		)
93d8834a13SMatthias Weisser	PLD(	pld	[r1, #92]		)
94d8834a13SMatthias Weisser
95d8834a13SMatthias Weisser3:	PLD(	pld	[r1, #124]		)
96d8834a13SMatthias Weisser4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
97d8834a13SMatthias Weisser		subs	r2, r2, #32
98d8834a13SMatthias Weisser		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
99d8834a13SMatthias Weisser		bge	3b
100d8834a13SMatthias Weisser	PLD(	cmn	r2, #96			)
101d8834a13SMatthias Weisser	PLD(	bge	4b			)
102d8834a13SMatthias Weisser
103d8834a13SMatthias Weisser5:		ands	ip, r2, #28
104d8834a13SMatthias Weisser		rsb	ip, ip, #32
105d8834a13SMatthias Weisser#if LDR1W_SHIFT > 0
106d8834a13SMatthias Weisser		lsl	ip, ip, #LDR1W_SHIFT
107d8834a13SMatthias Weisser#endif
108d8834a13SMatthias Weisser		addne	pc, pc, ip		@ C is always clear here
109d8834a13SMatthias Weisser		b	7f
110d8834a13SMatthias Weisser6:
111d8834a13SMatthias Weisser		.rept	(1 << LDR1W_SHIFT)
112d8834a13SMatthias Weisser		W(nop)
113d8834a13SMatthias Weisser		.endr
114d8834a13SMatthias Weisser		ldr1w	r1, r3, abort=20f
115d8834a13SMatthias Weisser		ldr1w	r1, r4, abort=20f
116d8834a13SMatthias Weisser		ldr1w	r1, r5, abort=20f
117d8834a13SMatthias Weisser		ldr1w	r1, r6, abort=20f
118d8834a13SMatthias Weisser		ldr1w	r1, r7, abort=20f
119d8834a13SMatthias Weisser		ldr1w	r1, r8, abort=20f
120d8834a13SMatthias Weisser		ldr1w	r1, lr, abort=20f
121d8834a13SMatthias Weisser
122d8834a13SMatthias Weisser#if LDR1W_SHIFT < STR1W_SHIFT
123d8834a13SMatthias Weisser		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
124d8834a13SMatthias Weisser#elif LDR1W_SHIFT > STR1W_SHIFT
125d8834a13SMatthias Weisser		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
126d8834a13SMatthias Weisser#endif
127d8834a13SMatthias Weisser		add	pc, pc, ip
128d8834a13SMatthias Weisser		nop
129d8834a13SMatthias Weisser		.rept	(1 << STR1W_SHIFT)
130d8834a13SMatthias Weisser		W(nop)
131d8834a13SMatthias Weisser		.endr
132d8834a13SMatthias Weisser		str1w	r0, r3, abort=20f
133d8834a13SMatthias Weisser		str1w	r0, r4, abort=20f
134d8834a13SMatthias Weisser		str1w	r0, r5, abort=20f
135d8834a13SMatthias Weisser		str1w	r0, r6, abort=20f
136d8834a13SMatthias Weisser		str1w	r0, r7, abort=20f
137d8834a13SMatthias Weisser		str1w	r0, r8, abort=20f
138d8834a13SMatthias Weisser		str1w	r0, lr, abort=20f
139d8834a13SMatthias Weisser
140d8834a13SMatthias Weisser	CALGN(	bcs	2b			)
141d8834a13SMatthias Weisser
142d8834a13SMatthias Weisser7:		ldmfd	sp!, {r5 - r8}
143d8834a13SMatthias Weisser
144d8834a13SMatthias Weisser8:		movs	r2, r2, lsl #31
145d8834a13SMatthias Weisser		ldr1b	r1, r3, ne, abort=21f
146d8834a13SMatthias Weisser		ldr1b	r1, r4, cs, abort=21f
147d8834a13SMatthias Weisser		ldr1b	r1, ip, cs, abort=21f
148d8834a13SMatthias Weisser		str1b	r0, r3, ne, abort=21f
149d8834a13SMatthias Weisser		str1b	r0, r4, cs, abort=21f
150d8834a13SMatthias Weisser		str1b	r0, ip, cs, abort=21f
151d8834a13SMatthias Weisser
152d8834a13SMatthias Weisser		exit	r4, pc
153d8834a13SMatthias Weisser
154d8834a13SMatthias Weisser9:		rsb	ip, ip, #4
155d8834a13SMatthias Weisser		cmp	ip, #2
156d8834a13SMatthias Weisser		ldr1b	r1, r3, gt, abort=21f
157d8834a13SMatthias Weisser		ldr1b	r1, r4, ge, abort=21f
158d8834a13SMatthias Weisser		ldr1b	r1, lr, abort=21f
159d8834a13SMatthias Weisser		str1b	r0, r3, gt, abort=21f
160d8834a13SMatthias Weisser		str1b	r0, r4, ge, abort=21f
161d8834a13SMatthias Weisser		subs	r2, r2, ip
162d8834a13SMatthias Weisser		str1b	r0, lr, abort=21f
163d8834a13SMatthias Weisser		blt	8b
164d8834a13SMatthias Weisser		ands	ip, r1, #3
165d8834a13SMatthias Weisser		beq	1b
166d8834a13SMatthias Weisser
167d8834a13SMatthias Weisser10:		bic	r1, r1, #3
168d8834a13SMatthias Weisser		cmp	ip, #2
169d8834a13SMatthias Weisser		ldr1w	r1, lr, abort=21f
170d8834a13SMatthias Weisser		beq	17f
171d8834a13SMatthias Weisser		bgt	18f
172d8834a13SMatthias Weisser
173d8834a13SMatthias Weisser
174d8834a13SMatthias Weisser		.macro	forward_copy_shift pull push
175d8834a13SMatthias Weisser
176d8834a13SMatthias Weisser		subs	r2, r2, #28
177d8834a13SMatthias Weisser		blt	14f
178d8834a13SMatthias Weisser
179d8834a13SMatthias Weisser	CALGN(	ands	ip, r0, #31		)
180d8834a13SMatthias Weisser	CALGN(	rsb	ip, ip, #32		)
181d8834a13SMatthias Weisser	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
182d8834a13SMatthias Weisser	CALGN(	subcc	r2, r2, ip		)
183d8834a13SMatthias Weisser	CALGN(	bcc	15f			)
184d8834a13SMatthias Weisser
185d8834a13SMatthias Weisser11:		stmfd	sp!, {r5 - r9}
186d8834a13SMatthias Weisser
187d8834a13SMatthias Weisser	PLD(	pld	[r1, #0]		)
188d8834a13SMatthias Weisser	PLD(	subs	r2, r2, #96		)
189d8834a13SMatthias Weisser	PLD(	pld	[r1, #28]		)
190d8834a13SMatthias Weisser	PLD(	blt	13f			)
191d8834a13SMatthias Weisser	PLD(	pld	[r1, #60]		)
192d8834a13SMatthias Weisser	PLD(	pld	[r1, #92]		)
193d8834a13SMatthias Weisser
194d8834a13SMatthias Weisser12:	PLD(	pld	[r1, #124]		)
195d8834a13SMatthias Weisser13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
196d8834a13SMatthias Weisser		mov	r3, lr, pull #\pull
197d8834a13SMatthias Weisser		subs	r2, r2, #32
198d8834a13SMatthias Weisser		ldr4w	r1, r8, r9, ip, lr, abort=19f
199d8834a13SMatthias Weisser		orr	r3, r3, r4, push #\push
200d8834a13SMatthias Weisser		mov	r4, r4, pull #\pull
201d8834a13SMatthias Weisser		orr	r4, r4, r5, push #\push
202d8834a13SMatthias Weisser		mov	r5, r5, pull #\pull
203d8834a13SMatthias Weisser		orr	r5, r5, r6, push #\push
204d8834a13SMatthias Weisser		mov	r6, r6, pull #\pull
205d8834a13SMatthias Weisser		orr	r6, r6, r7, push #\push
206d8834a13SMatthias Weisser		mov	r7, r7, pull #\pull
207d8834a13SMatthias Weisser		orr	r7, r7, r8, push #\push
208d8834a13SMatthias Weisser		mov	r8, r8, pull #\pull
209d8834a13SMatthias Weisser		orr	r8, r8, r9, push #\push
210d8834a13SMatthias Weisser		mov	r9, r9, pull #\pull
211d8834a13SMatthias Weisser		orr	r9, r9, ip, push #\push
212d8834a13SMatthias Weisser		mov	ip, ip, pull #\pull
213d8834a13SMatthias Weisser		orr	ip, ip, lr, push #\push
214d8834a13SMatthias Weisser		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
215d8834a13SMatthias Weisser		bge	12b
216d8834a13SMatthias Weisser	PLD(	cmn	r2, #96			)
217d8834a13SMatthias Weisser	PLD(	bge	13b			)
218d8834a13SMatthias Weisser
219d8834a13SMatthias Weisser		ldmfd	sp!, {r5 - r9}
220d8834a13SMatthias Weisser
221d8834a13SMatthias Weisser14:		ands	ip, r2, #28
222d8834a13SMatthias Weisser		beq	16f
223d8834a13SMatthias Weisser
224d8834a13SMatthias Weisser15:		mov	r3, lr, pull #\pull
225d8834a13SMatthias Weisser		ldr1w	r1, lr, abort=21f
226d8834a13SMatthias Weisser		subs	ip, ip, #4
227d8834a13SMatthias Weisser		orr	r3, r3, lr, push #\push
228d8834a13SMatthias Weisser		str1w	r0, r3, abort=21f
229d8834a13SMatthias Weisser		bgt	15b
230d8834a13SMatthias Weisser	CALGN(	cmp	r2, #0			)
231d8834a13SMatthias Weisser	CALGN(	bge	11b			)
232d8834a13SMatthias Weisser
233d8834a13SMatthias Weisser16:		sub	r1, r1, #(\push / 8)
234d8834a13SMatthias Weisser		b	8b
235d8834a13SMatthias Weisser
236d8834a13SMatthias Weisser		.endm
237d8834a13SMatthias Weisser
238d8834a13SMatthias Weisser
239d8834a13SMatthias Weisser		forward_copy_shift	pull=8	push=24
240d8834a13SMatthias Weisser
241d8834a13SMatthias Weisser17:		forward_copy_shift	pull=16	push=16
242d8834a13SMatthias Weisser
243d8834a13SMatthias Weisser18:		forward_copy_shift	pull=24	push=8
244