xref: /OK3568_Linux_fs/kernel/arch/sh/lib/movmem.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4*4882a593Smuzhiyun   2004, 2005, 2006
5*4882a593Smuzhiyun   Free Software Foundation, Inc.
6*4882a593Smuzhiyun*/
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun!! libgcc routines for the Renesas / SuperH SH CPUs.
9*4882a593Smuzhiyun!! Contributed by Steve Chamberlain.
10*4882a593Smuzhiyun!! sac@cygnus.com
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
13*4882a593Smuzhiyun!! recoded in assembly by Toshiyasu Morita
14*4882a593Smuzhiyun!! tm@netcom.com
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
17*4882a593Smuzhiyun   ELF local label prefixes by J"orn Rennecke
18*4882a593Smuzhiyun   amylaar@cygnus.com  */
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun	.text
21*4882a593Smuzhiyun	.balign	4
22*4882a593Smuzhiyun	.global	__movmem
23*4882a593Smuzhiyun	.global __movstr
24*4882a593Smuzhiyun	.set __movstr, __movmem
25*4882a593Smuzhiyun	/* This would be a lot simpler if r6 contained the byte count
26*4882a593Smuzhiyun	   minus 64, and we wouldn't be called here for a byte count of 64.  */
27*4882a593Smuzhiyun__movmem:
28*4882a593Smuzhiyun	sts.l	pr,@-r15
29*4882a593Smuzhiyun	shll2	r6
30*4882a593Smuzhiyun	bsr	__movmemSI52+2
31*4882a593Smuzhiyun	mov.l	@(48,r5),r0
32*4882a593Smuzhiyun	.balign	4
33*4882a593Smuzhiyunmovmem_loop: /* Reached with rts */
34*4882a593Smuzhiyun	mov.l	@(60,r5),r0
35*4882a593Smuzhiyun	add	#-64,r6
36*4882a593Smuzhiyun	mov.l	r0,@(60,r4)
37*4882a593Smuzhiyun	tst	r6,r6
38*4882a593Smuzhiyun	mov.l	@(56,r5),r0
39*4882a593Smuzhiyun	bt	movmem_done
40*4882a593Smuzhiyun	mov.l	r0,@(56,r4)
41*4882a593Smuzhiyun	cmp/pl	r6
42*4882a593Smuzhiyun	mov.l	@(52,r5),r0
43*4882a593Smuzhiyun	add	#64,r5
44*4882a593Smuzhiyun	mov.l	r0,@(52,r4)
45*4882a593Smuzhiyun	add	#64,r4
46*4882a593Smuzhiyun	bt	__movmemSI52
47*4882a593Smuzhiyun! done all the large groups, do the remainder
48*4882a593Smuzhiyun! jump to movmem+
49*4882a593Smuzhiyun	mova	__movmemSI4+4,r0
50*4882a593Smuzhiyun	add	r6,r0
51*4882a593Smuzhiyun	jmp	@r0
52*4882a593Smuzhiyunmovmem_done: ! share slot insn, works out aligned.
53*4882a593Smuzhiyun	lds.l	@r15+,pr
54*4882a593Smuzhiyun	mov.l	r0,@(56,r4)
55*4882a593Smuzhiyun	mov.l	@(52,r5),r0
56*4882a593Smuzhiyun	rts
57*4882a593Smuzhiyun	mov.l	r0,@(52,r4)
58*4882a593Smuzhiyun	.balign	4
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun	.global	__movmemSI64
61*4882a593Smuzhiyun	.global __movstrSI64
62*4882a593Smuzhiyun	.set	__movstrSI64, __movmemSI64
63*4882a593Smuzhiyun__movmemSI64:
64*4882a593Smuzhiyun	mov.l	@(60,r5),r0
65*4882a593Smuzhiyun	mov.l	r0,@(60,r4)
66*4882a593Smuzhiyun	.global	__movmemSI60
67*4882a593Smuzhiyun	.global __movstrSI60
68*4882a593Smuzhiyun	.set	__movstrSI60, __movmemSI60
69*4882a593Smuzhiyun__movmemSI60:
70*4882a593Smuzhiyun	mov.l	@(56,r5),r0
71*4882a593Smuzhiyun	mov.l	r0,@(56,r4)
72*4882a593Smuzhiyun	.global	__movmemSI56
73*4882a593Smuzhiyun	.global __movstrSI56
74*4882a593Smuzhiyun	.set	__movstrSI56, __movmemSI56
75*4882a593Smuzhiyun__movmemSI56:
76*4882a593Smuzhiyun	mov.l	@(52,r5),r0
77*4882a593Smuzhiyun	mov.l	r0,@(52,r4)
78*4882a593Smuzhiyun	.global	__movmemSI52
79*4882a593Smuzhiyun	.global __movstrSI52
80*4882a593Smuzhiyun	.set	__movstrSI52, __movmemSI52
81*4882a593Smuzhiyun__movmemSI52:
82*4882a593Smuzhiyun	mov.l	@(48,r5),r0
83*4882a593Smuzhiyun	mov.l	r0,@(48,r4)
84*4882a593Smuzhiyun	.global	__movmemSI48
85*4882a593Smuzhiyun	.global	__movstrSI48
86*4882a593Smuzhiyun	.set	__movstrSI48, __movmemSI48
87*4882a593Smuzhiyun__movmemSI48:
88*4882a593Smuzhiyun	mov.l	@(44,r5),r0
89*4882a593Smuzhiyun	mov.l	r0,@(44,r4)
90*4882a593Smuzhiyun	.global	__movmemSI44
91*4882a593Smuzhiyun	.global	__movstrSI44
92*4882a593Smuzhiyun	.set	__movstrSI44, __movmemSI44
93*4882a593Smuzhiyun__movmemSI44:
94*4882a593Smuzhiyun	mov.l	@(40,r5),r0
95*4882a593Smuzhiyun	mov.l	r0,@(40,r4)
96*4882a593Smuzhiyun	.global	__movmemSI40
97*4882a593Smuzhiyun	.global __movstrSI40
98*4882a593Smuzhiyun	.set	__movstrSI40, __movmemSI40
99*4882a593Smuzhiyun__movmemSI40:
100*4882a593Smuzhiyun	mov.l	@(36,r5),r0
101*4882a593Smuzhiyun	mov.l	r0,@(36,r4)
102*4882a593Smuzhiyun	.global	__movmemSI36
103*4882a593Smuzhiyun	.global	__movstrSI36
104*4882a593Smuzhiyun	.set	__movstrSI36, __movmemSI36
105*4882a593Smuzhiyun__movmemSI36:
106*4882a593Smuzhiyun	mov.l	@(32,r5),r0
107*4882a593Smuzhiyun	mov.l	r0,@(32,r4)
108*4882a593Smuzhiyun	.global	__movmemSI32
109*4882a593Smuzhiyun	.global	__movstrSI32
110*4882a593Smuzhiyun	.set	__movstrSI32, __movmemSI32
111*4882a593Smuzhiyun__movmemSI32:
112*4882a593Smuzhiyun	mov.l	@(28,r5),r0
113*4882a593Smuzhiyun	mov.l	r0,@(28,r4)
114*4882a593Smuzhiyun	.global	__movmemSI28
115*4882a593Smuzhiyun	.global	__movstrSI28
116*4882a593Smuzhiyun	.set	__movstrSI28, __movmemSI28
117*4882a593Smuzhiyun__movmemSI28:
118*4882a593Smuzhiyun	mov.l	@(24,r5),r0
119*4882a593Smuzhiyun	mov.l	r0,@(24,r4)
120*4882a593Smuzhiyun	.global	__movmemSI24
121*4882a593Smuzhiyun	.global	__movstrSI24
122*4882a593Smuzhiyun	.set	__movstrSI24, __movmemSI24
123*4882a593Smuzhiyun__movmemSI24:
124*4882a593Smuzhiyun	mov.l	@(20,r5),r0
125*4882a593Smuzhiyun	mov.l	r0,@(20,r4)
126*4882a593Smuzhiyun	.global	__movmemSI20
127*4882a593Smuzhiyun	.global	__movstrSI20
128*4882a593Smuzhiyun	.set	__movstrSI20, __movmemSI20
129*4882a593Smuzhiyun__movmemSI20:
130*4882a593Smuzhiyun	mov.l	@(16,r5),r0
131*4882a593Smuzhiyun	mov.l	r0,@(16,r4)
132*4882a593Smuzhiyun	.global	__movmemSI16
133*4882a593Smuzhiyun	.global	__movstrSI16
134*4882a593Smuzhiyun	.set	__movstrSI16, __movmemSI16
135*4882a593Smuzhiyun__movmemSI16:
136*4882a593Smuzhiyun	mov.l	@(12,r5),r0
137*4882a593Smuzhiyun	mov.l	r0,@(12,r4)
138*4882a593Smuzhiyun	.global	__movmemSI12
139*4882a593Smuzhiyun	.global	__movstrSI12
140*4882a593Smuzhiyun	.set	__movstrSI12, __movmemSI12
141*4882a593Smuzhiyun__movmemSI12:
142*4882a593Smuzhiyun	mov.l	@(8,r5),r0
143*4882a593Smuzhiyun	mov.l	r0,@(8,r4)
144*4882a593Smuzhiyun	.global	__movmemSI8
145*4882a593Smuzhiyun	.global	__movstrSI8
146*4882a593Smuzhiyun	.set	__movstrSI8, __movmemSI8
147*4882a593Smuzhiyun__movmemSI8:
148*4882a593Smuzhiyun	mov.l	@(4,r5),r0
149*4882a593Smuzhiyun	mov.l	r0,@(4,r4)
150*4882a593Smuzhiyun	.global	__movmemSI4
151*4882a593Smuzhiyun	.global	__movstrSI4
152*4882a593Smuzhiyun	.set	__movstrSI4, __movmemSI4
153*4882a593Smuzhiyun__movmemSI4:
154*4882a593Smuzhiyun	mov.l	@(0,r5),r0
155*4882a593Smuzhiyun	rts
156*4882a593Smuzhiyun	mov.l	r0,@(0,r4)
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun	.global	__movmem_i4_even
159*4882a593Smuzhiyun	.global	__movstr_i4_even
160*4882a593Smuzhiyun	.set	__movstr_i4_even, __movmem_i4_even
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun	.global	__movmem_i4_odd
163*4882a593Smuzhiyun	.global	__movstr_i4_odd
164*4882a593Smuzhiyun	.set	__movstr_i4_odd, __movmem_i4_odd
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun	.global	__movmemSI12_i4
167*4882a593Smuzhiyun	.global	__movstrSI12_i4
168*4882a593Smuzhiyun	.set	__movstrSI12_i4, __movmemSI12_i4
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun	.p2align	5
171*4882a593SmuzhiyunL_movmem_2mod4_end:
172*4882a593Smuzhiyun	mov.l	r0,@(16,r4)
173*4882a593Smuzhiyun	rts
174*4882a593Smuzhiyun	mov.l	r1,@(20,r4)
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun	.p2align	2
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun__movmem_i4_even:
179*4882a593Smuzhiyun	mov.l	@r5+,r0
180*4882a593Smuzhiyun	bra	L_movmem_start_even
181*4882a593Smuzhiyun	mov.l	@r5+,r1
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun__movmem_i4_odd:
184*4882a593Smuzhiyun	mov.l	@r5+,r1
185*4882a593Smuzhiyun	add	#-4,r4
186*4882a593Smuzhiyun	mov.l	@r5+,r2
187*4882a593Smuzhiyun	mov.l	@r5+,r3
188*4882a593Smuzhiyun	mov.l	r1,@(4,r4)
189*4882a593Smuzhiyun	mov.l	r2,@(8,r4)
190*4882a593Smuzhiyun
191*4882a593SmuzhiyunL_movmem_loop:
192*4882a593Smuzhiyun	mov.l	r3,@(12,r4)
193*4882a593Smuzhiyun	dt	r6
194*4882a593Smuzhiyun	mov.l	@r5+,r0
195*4882a593Smuzhiyun	bt/s	L_movmem_2mod4_end
196*4882a593Smuzhiyun	mov.l	@r5+,r1
197*4882a593Smuzhiyun	add	#16,r4
198*4882a593SmuzhiyunL_movmem_start_even:
199*4882a593Smuzhiyun	mov.l	@r5+,r2
200*4882a593Smuzhiyun	mov.l	@r5+,r3
201*4882a593Smuzhiyun	mov.l	r0,@r4
202*4882a593Smuzhiyun	dt	r6
203*4882a593Smuzhiyun	mov.l	r1,@(4,r4)
204*4882a593Smuzhiyun	bf/s	L_movmem_loop
205*4882a593Smuzhiyun	mov.l	r2,@(8,r4)
206*4882a593Smuzhiyun	rts
207*4882a593Smuzhiyun	mov.l	r3,@(12,r4)
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun	.p2align	4
210*4882a593Smuzhiyun__movmemSI12_i4:
211*4882a593Smuzhiyun	mov.l	@r5,r0
212*4882a593Smuzhiyun	mov.l	@(4,r5),r1
213*4882a593Smuzhiyun	mov.l	@(8,r5),r2
214*4882a593Smuzhiyun	mov.l	r0,@r4
215*4882a593Smuzhiyun	mov.l	r1,@(4,r4)
216*4882a593Smuzhiyun	rts
217*4882a593Smuzhiyun	mov.l	r2,@(8,r4)
218