xref: /rk3399_rockchip-uboot/arch/arm/lib/memset.S (revision 75d7a0d7f11ef66dcd3c27ae141f6b59f2ffd00d)
1d8834a13SMatthias Weisser/*
2d8834a13SMatthias Weisser *  linux/arch/arm/lib/memset.S
3d8834a13SMatthias Weisser *
4d8834a13SMatthias Weisser *  Copyright (C) 1995-2000 Russell King
5d8834a13SMatthias Weisser *
6d8834a13SMatthias Weisser * This program is free software; you can redistribute it and/or modify
7d8834a13SMatthias Weisser * it under the terms of the GNU General Public License version 2 as
8d8834a13SMatthias Weisser * published by the Free Software Foundation.
9d8834a13SMatthias Weisser *
10d8834a13SMatthias Weisser *  ASM optimised string functions
11d8834a13SMatthias Weisser */
12*75d7a0d7SStefan Agner#include <linux/linkage.h>
13d8834a13SMatthias Weisser#include <asm/assembler.h>
14d8834a13SMatthias Weisser
15d8834a13SMatthias Weisser	.text
16d8834a13SMatthias Weisser	.align	5
17d8834a13SMatthias Weisser
18*75d7a0d7SStefan Agner	.syntax unified
19*75d7a0d7SStefan Agner#ifdef CONFIG_SYS_THUMB_BUILD
20*75d7a0d7SStefan Agner	.thumb
21*75d7a0d7SStefan Agner	.thumb_func
22*75d7a0d7SStefan Agner#endif
23*75d7a0d7SStefan AgnerENTRY(memset)
24d8834a13SMatthias Weisser	ands	r3, r0, #3		@ 1 unaligned?
25*75d7a0d7SStefan Agner	mov	ip, r0			@ preserve r0 as return value
26*75d7a0d7SStefan Agner	bne	6f			@ 1
27d8834a13SMatthias Weisser/*
28*75d7a0d7SStefan Agner * we know that the pointer in ip is aligned to a word boundary.
29d8834a13SMatthias Weisser */
30*75d7a0d7SStefan Agner1:	orr	r1, r1, r1, lsl #8
31d8834a13SMatthias Weisser	orr	r1, r1, r1, lsl #16
32d8834a13SMatthias Weisser	mov	r3, r1
33d8834a13SMatthias Weisser	cmp	r2, #16
34d8834a13SMatthias Weisser	blt	4f
35d8834a13SMatthias Weisser
36d8834a13SMatthias Weisser#if ! CALGN(1)+0
37d8834a13SMatthias Weisser
38d8834a13SMatthias Weisser/*
39*75d7a0d7SStefan Agner * We need 2 extra registers for this loop - use r8 and the LR
40d8834a13SMatthias Weisser */
41*75d7a0d7SStefan Agner	stmfd	sp!, {r8, lr}
42*75d7a0d7SStefan Agner	mov	r8, r1
43d8834a13SMatthias Weisser	mov	lr, r1
44d8834a13SMatthias Weisser
45d8834a13SMatthias Weisser2:	subs	r2, r2, #64
46*75d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
47*75d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
48*75d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
49*75d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
50d8834a13SMatthias Weisser	bgt	2b
51*75d7a0d7SStefan Agner	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
52d8834a13SMatthias Weisser/*
53d8834a13SMatthias Weisser * No need to correct the count; we're only testing bits from now on
54d8834a13SMatthias Weisser */
55d8834a13SMatthias Weisser	tst	r2, #32
56*75d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
57*75d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
58d8834a13SMatthias Weisser	tst	r2, #16
59*75d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
60*75d7a0d7SStefan Agner	ldmfd	sp!, {r8, lr}
61d8834a13SMatthias Weisser
62d8834a13SMatthias Weisser#else
63d8834a13SMatthias Weisser
64d8834a13SMatthias Weisser/*
65d8834a13SMatthias Weisser * This version aligns the destination pointer in order to write
66d8834a13SMatthias Weisser * whole cache lines at once.
67d8834a13SMatthias Weisser */
68d8834a13SMatthias Weisser
69*75d7a0d7SStefan Agner	stmfd	sp!, {r4-r8, lr}
70d8834a13SMatthias Weisser	mov	r4, r1
71d8834a13SMatthias Weisser	mov	r5, r1
72d8834a13SMatthias Weisser	mov	r6, r1
73d8834a13SMatthias Weisser	mov	r7, r1
74*75d7a0d7SStefan Agner	mov	r8, r1
75d8834a13SMatthias Weisser	mov	lr, r1
76d8834a13SMatthias Weisser
77d8834a13SMatthias Weisser	cmp	r2, #96
78*75d7a0d7SStefan Agner	tstgt	ip, #31
79d8834a13SMatthias Weisser	ble	3f
80d8834a13SMatthias Weisser
81*75d7a0d7SStefan Agner	and	r8, ip, #31
82*75d7a0d7SStefan Agner	rsb	r8, r8, #32
83*75d7a0d7SStefan Agner	sub	r2, r2, r8
84*75d7a0d7SStefan Agner	movs	r8, r8, lsl #(32 - 4)
85*75d7a0d7SStefan Agner	stmiacs	ip!, {r4, r5, r6, r7}
86*75d7a0d7SStefan Agner	stmiami	ip!, {r4, r5}
87*75d7a0d7SStefan Agner	tst	r8, #(1 << 30)
88*75d7a0d7SStefan Agner	mov	r8, r1
89*75d7a0d7SStefan Agner	strne	r1, [ip], #4
90d8834a13SMatthias Weisser
91d8834a13SMatthias Weisser3:	subs	r2, r2, #64
92*75d7a0d7SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
93*75d7a0d7SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
94d8834a13SMatthias Weisser	bgt	3b
95*75d7a0d7SStefan Agner	ldmfdeq	sp!, {r4-r8, pc}
96d8834a13SMatthias Weisser
97d8834a13SMatthias Weisser	tst	r2, #32
98*75d7a0d7SStefan Agner	stmiane	ip!, {r1, r3-r8, lr}
99d8834a13SMatthias Weisser	tst	r2, #16
100*75d7a0d7SStefan Agner	stmiane	ip!, {r4-r7}
101*75d7a0d7SStefan Agner	ldmfd	sp!, {r4-r8, lr}
102d8834a13SMatthias Weisser
103d8834a13SMatthias Weisser#endif
104d8834a13SMatthias Weisser
105d8834a13SMatthias Weisser4:	tst	r2, #8
106*75d7a0d7SStefan Agner	stmiane	ip!, {r1, r3}
107d8834a13SMatthias Weisser	tst	r2, #4
108*75d7a0d7SStefan Agner	strne	r1, [ip], #4
109d8834a13SMatthias Weisser/*
110d8834a13SMatthias Weisser * When we get here, we've got less than 4 bytes to zero.  We
111d8834a13SMatthias Weisser * may have an unaligned pointer as well.
112d8834a13SMatthias Weisser */
113d8834a13SMatthias Weisser5:	tst	r2, #2
114*75d7a0d7SStefan Agner	strbne	r1, [ip], #1
115*75d7a0d7SStefan Agner	strbne	r1, [ip], #1
116d8834a13SMatthias Weisser	tst	r2, #1
117*75d7a0d7SStefan Agner	strbne	r1, [ip], #1
118*75d7a0d7SStefan Agner	ret	lr
119*75d7a0d7SStefan Agner
120*75d7a0d7SStefan Agner6:	subs	r2, r2, #4		@ 1 do we have enough
121*75d7a0d7SStefan Agner	blt	5b			@ 1 bytes to align with?
122*75d7a0d7SStefan Agner	cmp	r3, #2			@ 1
123*75d7a0d7SStefan Agner	strblt	r1, [ip], #1		@ 1
124*75d7a0d7SStefan Agner	strble	r1, [ip], #1		@ 1
125*75d7a0d7SStefan Agner	strb	r1, [ip], #1		@ 1
126*75d7a0d7SStefan Agner	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
127*75d7a0d7SStefan Agner	b	1b
128*75d7a0d7SStefan AgnerENDPROC(memset)
129