xref: /rk3399_rockchip-uboot/arch/arm/lib/memset.S (revision 3a649407a49b041ceb826d55b5919dc8297f8965)
1d8834a13SMatthias Weisser/*
2d8834a13SMatthias Weisser *  linux/arch/arm/lib/memset.S
3d8834a13SMatthias Weisser *
4d8834a13SMatthias Weisser *  Copyright (C) 1995-2000 Russell King
5d8834a13SMatthias Weisser *
6d8834a13SMatthias Weisser * This program is free software; you can redistribute it and/or modify
7d8834a13SMatthias Weisser * it under the terms of the GNU General Public License version 2 as
8d8834a13SMatthias Weisser * published by the Free Software Foundation.
9d8834a13SMatthias Weisser *
10d8834a13SMatthias Weisser *  ASM optimised string functions
11d8834a13SMatthias Weisser */
1275d7a0d7SStefan Agner#include <linux/linkage.h>
13d8834a13SMatthias Weisser#include <asm/assembler.h>
14d8834a13SMatthias Weisser
15d8834a13SMatthias Weisser	.text
16d8834a13SMatthias Weisser	.align	5
17d8834a13SMatthias Weisser
1875d7a0d7SStefan Agner	.syntax unified
19*3a649407STom Rini#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(MEMSET_NO_THUMB_BUILD)
2075d7a0d7SStefan Agner	.thumb
2175d7a0d7SStefan Agner	.thumb_func
2275d7a0d7SStefan Agner#endif
2375d7a0d7SStefan AgnerENTRY(memset)
24d8834a13SMatthias Weisser	ands	r3, r0, #3		@ 1 unaligned?
2575d7a0d7SStefan Agner	mov	ip, r0			@ preserve r0 as return value
2675d7a0d7SStefan Agner	bne	6f			@ 1
27d8834a13SMatthias Weisser/*
2875d7a0d7SStefan Agner * we know that the pointer in ip is aligned to a word boundary.
29d8834a13SMatthias Weisser */
3075d7a0d7SStefan Agner1:	orr	r1, r1, r1, lsl #8
31d8834a13SMatthias Weisser	orr	r1, r1, r1, lsl #16
32d8834a13SMatthias Weisser	mov	r3, r1
33d8834a13SMatthias Weisser	cmp	r2, #16
34d8834a13SMatthias Weisser	blt	4f
35d8834a13SMatthias Weisser
36d8834a13SMatthias Weisser#if ! CALGN(1)+0
37d8834a13SMatthias Weisser
38d8834a13SMatthias Weisser/*
3975d7a0d7SStefan Agner * We need 2 extra registers for this loop - use r8 and the LR
40d8834a13SMatthias Weisser */
4175d7a0d7SStefan Agner	stmfd	sp!, {r8, lr}
4275d7a0d7SStefan Agner	mov	r8, r1
43d8834a13SMatthias Weisser	mov	lr, r1
44d8834a13SMatthias Weisser
45d8834a13SMatthias Weisser2:	subs	r2, r2, #64
4675d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
4775d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
4875d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
4975d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
50d8834a13SMatthias Weisser	bgt	2b
5175d7a0d7SStefan Agner	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
52d8834a13SMatthias Weisser/*
53d8834a13SMatthias Weisser * No need to correct the count; we're only testing bits from now on
54d8834a13SMatthias Weisser */
55d8834a13SMatthias Weisser	tst	r2, #32
5675d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
5775d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
58d8834a13SMatthias Weisser	tst	r2, #16
5975d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
6075d7a0d7SStefan Agner	ldmfd	sp!, {r8, lr}
61d8834a13SMatthias Weisser
62d8834a13SMatthias Weisser#else
63d8834a13SMatthias Weisser
64d8834a13SMatthias Weisser/*
65d8834a13SMatthias Weisser * This version aligns the destination pointer in order to write
66d8834a13SMatthias Weisser * whole cache lines at once.
67d8834a13SMatthias Weisser */
68d8834a13SMatthias Weisser
6975d7a0d7SStefan Agner	stmfd	sp!, {r4-r8, lr}
70d8834a13SMatthias Weisser	mov	r4, r1
71d8834a13SMatthias Weisser	mov	r5, r1
72d8834a13SMatthias Weisser	mov	r6, r1
73d8834a13SMatthias Weisser	mov	r7, r1
7475d7a0d7SStefan Agner	mov	r8, r1
75d8834a13SMatthias Weisser	mov	lr, r1
76d8834a13SMatthias Weisser
77d8834a13SMatthias Weisser	cmp	r2, #96
7875d7a0d7SStefan Agner	tstgt	ip, #31
79d8834a13SMatthias Weisser	ble	3f
80d8834a13SMatthias Weisser
8175d7a0d7SStefan Agner	and	r8, ip, #31
8275d7a0d7SStefan Agner	rsb	r8, r8, #32
8375d7a0d7SStefan Agner	sub	r2, r2, r8
8475d7a0d7SStefan Agner	movs	r8, r8, lsl #(32 - 4)
8575d7a0d7SStefan Agner	stmiacs	ip!, {r4, r5, r6, r7}
8675d7a0d7SStefan Agner	stmiami	ip!, {r4, r5}
8775d7a0d7SStefan Agner	tst	r8, #(1 << 30)
8875d7a0d7SStefan Agner	mov	r8, r1
8975d7a0d7SStefan Agner	strne	r1, [ip], #4
90d8834a13SMatthias Weisser
91d8834a13SMatthias Weisser3:	subs	r2, r2, #64
9275d7a0d7SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
9375d7a0d7SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
94d8834a13SMatthias Weisser	bgt	3b
9575d7a0d7SStefan Agner	ldmfdeq	sp!, {r4-r8, pc}
96d8834a13SMatthias Weisser
97d8834a13SMatthias Weisser	tst	r2, #32
9875d7a0d7SStefan Agner	stmiane	ip!, {r1, r3-r8, lr}
99d8834a13SMatthias Weisser	tst	r2, #16
10075d7a0d7SStefan Agner	stmiane	ip!, {r4-r7}
10175d7a0d7SStefan Agner	ldmfd	sp!, {r4-r8, lr}
102d8834a13SMatthias Weisser
103d8834a13SMatthias Weisser#endif
104d8834a13SMatthias Weisser
105d8834a13SMatthias Weisser4:	tst	r2, #8
10675d7a0d7SStefan Agner	stmiane	ip!, {r1, r3}
107d8834a13SMatthias Weisser	tst	r2, #4
10875d7a0d7SStefan Agner	strne	r1, [ip], #4
109d8834a13SMatthias Weisser/*
110d8834a13SMatthias Weisser * When we get here, we've got less than 4 bytes to zero.  We
111d8834a13SMatthias Weisser * may have an unaligned pointer as well.
112d8834a13SMatthias Weisser */
113d8834a13SMatthias Weisser5:	tst	r2, #2
11475d7a0d7SStefan Agner	strbne	r1, [ip], #1
11575d7a0d7SStefan Agner	strbne	r1, [ip], #1
116d8834a13SMatthias Weisser	tst	r2, #1
11775d7a0d7SStefan Agner	strbne	r1, [ip], #1
11875d7a0d7SStefan Agner	ret	lr
11975d7a0d7SStefan Agner
12075d7a0d7SStefan Agner6:	subs	r2, r2, #4		@ 1 do we have enough
12175d7a0d7SStefan Agner	blt	5b			@ 1 bytes to align with?
12275d7a0d7SStefan Agner	cmp	r3, #2			@ 1
12375d7a0d7SStefan Agner	strblt	r1, [ip], #1		@ 1
12475d7a0d7SStefan Agner	strble	r1, [ip], #1		@ 1
12575d7a0d7SStefan Agner	strb	r1, [ip], #1		@ 1
12675d7a0d7SStefan Agner	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
12775d7a0d7SStefan Agner	b	1b
12875d7a0d7SStefan AgnerENDPROC(memset)
129