xref: /OK3568_Linux_fs/kernel/arch/sparc/lib/memcpy.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/* memcpy.S: Sparc optimized memcpy and memmove code
3*4882a593Smuzhiyun * Hand optimized from GNU libc's memcpy and memmove
4*4882a593Smuzhiyun * Copyright (C) 1991,1996 Free Software Foundation
5*4882a593Smuzhiyun * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
6*4882a593Smuzhiyun * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
7*4882a593Smuzhiyun * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
8*4882a593Smuzhiyun * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
9*4882a593Smuzhiyun */
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun#include <asm/export.h>
12*4882a593Smuzhiyun#define FUNC(x) 		\
13*4882a593Smuzhiyun	.globl	x;		\
14*4882a593Smuzhiyun	.type	x,@function;	\
15*4882a593Smuzhiyun	.align	4;		\
16*4882a593Smuzhiyunx:
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun/* Both these macros have to start with exactly the same insn */
19*4882a593Smuzhiyun#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
20*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x00], %t0; \
21*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x08], %t2; \
22*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x10], %t4; \
23*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x18], %t6; \
24*4882a593Smuzhiyun	st	%t0, [%dst + (offset) + 0x00]; \
25*4882a593Smuzhiyun	st	%t1, [%dst + (offset) + 0x04]; \
26*4882a593Smuzhiyun	st	%t2, [%dst + (offset) + 0x08]; \
27*4882a593Smuzhiyun	st	%t3, [%dst + (offset) + 0x0c]; \
28*4882a593Smuzhiyun	st	%t4, [%dst + (offset) + 0x10]; \
29*4882a593Smuzhiyun	st	%t5, [%dst + (offset) + 0x14]; \
30*4882a593Smuzhiyun	st	%t6, [%dst + (offset) + 0x18]; \
31*4882a593Smuzhiyun	st	%t7, [%dst + (offset) + 0x1c];
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
34*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x00], %t0; \
35*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x08], %t2; \
36*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x10], %t4; \
37*4882a593Smuzhiyun	ldd	[%src + (offset) + 0x18], %t6; \
38*4882a593Smuzhiyun	std	%t0, [%dst + (offset) + 0x00]; \
39*4882a593Smuzhiyun	std	%t2, [%dst + (offset) + 0x08]; \
40*4882a593Smuzhiyun	std	%t4, [%dst + (offset) + 0x10]; \
41*4882a593Smuzhiyun	std	%t6, [%dst + (offset) + 0x18];
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
44*4882a593Smuzhiyun	ldd	[%src - (offset) - 0x10], %t0; \
45*4882a593Smuzhiyun	ldd	[%src - (offset) - 0x08], %t2; \
46*4882a593Smuzhiyun	st	%t0, [%dst - (offset) - 0x10]; \
47*4882a593Smuzhiyun	st	%t1, [%dst - (offset) - 0x0c]; \
48*4882a593Smuzhiyun	st	%t2, [%dst - (offset) - 0x08]; \
49*4882a593Smuzhiyun	st	%t3, [%dst - (offset) - 0x04];
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
52*4882a593Smuzhiyun	ldd	[%src - (offset) - 0x10], %t0; \
53*4882a593Smuzhiyun	ldd	[%src - (offset) - 0x08], %t2; \
54*4882a593Smuzhiyun	std	%t0, [%dst - (offset) - 0x10]; \
55*4882a593Smuzhiyun	std	%t2, [%dst - (offset) - 0x08];
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
58*4882a593Smuzhiyun	ldub	[%src - (offset) - 0x02], %t0; \
59*4882a593Smuzhiyun	ldub	[%src - (offset) - 0x01], %t1; \
60*4882a593Smuzhiyun	stb	%t0, [%dst - (offset) - 0x02]; \
61*4882a593Smuzhiyun	stb	%t1, [%dst - (offset) - 0x01];
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun	.text
64*4882a593Smuzhiyun	.align	4
65*4882a593Smuzhiyun
66*4882a593SmuzhiyunFUNC(memmove)
67*4882a593SmuzhiyunEXPORT_SYMBOL(memmove)
68*4882a593Smuzhiyun	cmp		%o0, %o1
69*4882a593Smuzhiyun	mov		%o0, %g7
70*4882a593Smuzhiyun	bleu		9f
71*4882a593Smuzhiyun	 sub		%o0, %o1, %o4
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun	add		%o1, %o2, %o3
74*4882a593Smuzhiyun	cmp		%o3, %o0
75*4882a593Smuzhiyun	bleu		0f
76*4882a593Smuzhiyun	 andcc		%o4, 3, %o5
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun	add		%o1, %o2, %o1
79*4882a593Smuzhiyun	add		%o0, %o2, %o0
80*4882a593Smuzhiyun	sub		%o1, 1, %o1
81*4882a593Smuzhiyun	sub		%o0, 1, %o0
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun1:	/* reverse_bytes */
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun	ldub		[%o1], %o4
86*4882a593Smuzhiyun	subcc		%o2, 1, %o2
87*4882a593Smuzhiyun	stb		%o4, [%o0]
88*4882a593Smuzhiyun	sub		%o1, 1, %o1
89*4882a593Smuzhiyun	bne		1b
90*4882a593Smuzhiyun	 sub		%o0, 1, %o0
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	retl
93*4882a593Smuzhiyun	 mov		%g7, %o0
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun/* NOTE: This code is executed just for the cases,
96*4882a593Smuzhiyun         where %src (=%o1) & 3 is != 0.
97*4882a593Smuzhiyun	 We need to align it to 4. So, for (%src & 3)
98*4882a593Smuzhiyun	 1 we need to do ldub,lduh
99*4882a593Smuzhiyun	 2 lduh
100*4882a593Smuzhiyun	 3 just ldub
101*4882a593Smuzhiyun         so even if it looks weird, the branches
102*4882a593Smuzhiyun         are correct here. -jj
103*4882a593Smuzhiyun */
104*4882a593Smuzhiyun78:	/* dword_align */
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun	andcc		%o1, 1, %g0
107*4882a593Smuzhiyun	be		4f
108*4882a593Smuzhiyun	 andcc		%o1, 2, %g0
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun	ldub		[%o1], %g2
111*4882a593Smuzhiyun	add		%o1, 1, %o1
112*4882a593Smuzhiyun	stb		%g2, [%o0]
113*4882a593Smuzhiyun	sub		%o2, 1, %o2
114*4882a593Smuzhiyun	bne		3f
115*4882a593Smuzhiyun	 add		%o0, 1, %o0
116*4882a593Smuzhiyun4:
117*4882a593Smuzhiyun	lduh		[%o1], %g2
118*4882a593Smuzhiyun	add		%o1, 2, %o1
119*4882a593Smuzhiyun	sth		%g2, [%o0]
120*4882a593Smuzhiyun	sub		%o2, 2, %o2
121*4882a593Smuzhiyun	b		3f
122*4882a593Smuzhiyun	 add		%o0, 2, %o0
123*4882a593Smuzhiyun
124*4882a593SmuzhiyunFUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
125*4882a593SmuzhiyunEXPORT_SYMBOL(memcpy)
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	sub		%o0, %o1, %o4
128*4882a593Smuzhiyun	mov		%o0, %g7
129*4882a593Smuzhiyun9:
130*4882a593Smuzhiyun	andcc		%o4, 3, %o5
131*4882a593Smuzhiyun0:
132*4882a593Smuzhiyun	bne		86f
133*4882a593Smuzhiyun	 cmp		%o2, 15
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun	bleu		90f
136*4882a593Smuzhiyun	 andcc		%o1, 3, %g0
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun	bne		78b
139*4882a593Smuzhiyun3:
140*4882a593Smuzhiyun	 andcc		%o1, 4, %g0
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun	be		2f
143*4882a593Smuzhiyun	 mov		%o2, %g1
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun	ld		[%o1], %o4
146*4882a593Smuzhiyun	sub		%g1, 4, %g1
147*4882a593Smuzhiyun	st		%o4, [%o0]
148*4882a593Smuzhiyun	add		%o1, 4, %o1
149*4882a593Smuzhiyun	add		%o0, 4, %o0
150*4882a593Smuzhiyun2:
151*4882a593Smuzhiyun	andcc		%g1, 0xffffff80, %g0
152*4882a593Smuzhiyun	be		3f
153*4882a593Smuzhiyun	 andcc		%o0, 4, %g0
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun	be		82f + 4
156*4882a593Smuzhiyun5:
157*4882a593Smuzhiyun	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
158*4882a593Smuzhiyun	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
159*4882a593Smuzhiyun	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
160*4882a593Smuzhiyun	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
161*4882a593Smuzhiyun	sub		%g1, 128, %g1
162*4882a593Smuzhiyun	add		%o1, 128, %o1
163*4882a593Smuzhiyun	cmp		%g1, 128
164*4882a593Smuzhiyun	bge		5b
165*4882a593Smuzhiyun	 add		%o0, 128, %o0
166*4882a593Smuzhiyun3:
167*4882a593Smuzhiyun	andcc		%g1, 0x70, %g4
168*4882a593Smuzhiyun	be		80f
169*4882a593Smuzhiyun	 andcc		%g1, 8, %g0
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun	sethi		%hi(80f), %o5
172*4882a593Smuzhiyun	srl		%g4, 1, %o4
173*4882a593Smuzhiyun	add		%g4, %o4, %o4
174*4882a593Smuzhiyun	add		%o1, %g4, %o1
175*4882a593Smuzhiyun	sub		%o5, %o4, %o5
176*4882a593Smuzhiyun	jmpl		%o5 + %lo(80f), %g0
177*4882a593Smuzhiyun	 add		%o0, %g4, %o0
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun79:	/* memcpy_table */
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
182*4882a593Smuzhiyun	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
183*4882a593Smuzhiyun	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
184*4882a593Smuzhiyun	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
185*4882a593Smuzhiyun	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
186*4882a593Smuzhiyun	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
187*4882a593Smuzhiyun	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun80:	/* memcpy_table_end */
190*4882a593Smuzhiyun	be		81f
191*4882a593Smuzhiyun	 andcc		%g1, 4, %g0
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun	ldd		[%o1], %g2
194*4882a593Smuzhiyun	add		%o0, 8, %o0
195*4882a593Smuzhiyun	st		%g2, [%o0 - 0x08]
196*4882a593Smuzhiyun	add		%o1, 8, %o1
197*4882a593Smuzhiyun	st		%g3, [%o0 - 0x04]
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun81:	/* memcpy_last7 */
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun	be		1f
202*4882a593Smuzhiyun	 andcc		%g1, 2, %g0
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun	ld		[%o1], %g2
205*4882a593Smuzhiyun	add		%o1, 4, %o1
206*4882a593Smuzhiyun	st		%g2, [%o0]
207*4882a593Smuzhiyun	add		%o0, 4, %o0
208*4882a593Smuzhiyun1:
209*4882a593Smuzhiyun	be		1f
210*4882a593Smuzhiyun	 andcc		%g1, 1, %g0
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun	lduh		[%o1], %g2
213*4882a593Smuzhiyun	add		%o1, 2, %o1
214*4882a593Smuzhiyun	sth		%g2, [%o0]
215*4882a593Smuzhiyun	add		%o0, 2, %o0
216*4882a593Smuzhiyun1:
217*4882a593Smuzhiyun	be		1f
218*4882a593Smuzhiyun	 nop
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun	ldub		[%o1], %g2
221*4882a593Smuzhiyun	stb		%g2, [%o0]
222*4882a593Smuzhiyun1:
223*4882a593Smuzhiyun	retl
224*4882a593Smuzhiyun	 mov		%g7, %o0
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun82:	/* ldd_std */
227*4882a593Smuzhiyun	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
228*4882a593Smuzhiyun	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
229*4882a593Smuzhiyun	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
230*4882a593Smuzhiyun	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
231*4882a593Smuzhiyun	subcc		%g1, 128, %g1
232*4882a593Smuzhiyun	add		%o1, 128, %o1
233*4882a593Smuzhiyun	cmp		%g1, 128
234*4882a593Smuzhiyun	bge		82b
235*4882a593Smuzhiyun	 add		%o0, 128, %o0
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun	andcc		%g1, 0x70, %g4
238*4882a593Smuzhiyun	be		84f
239*4882a593Smuzhiyun	 andcc		%g1, 8, %g0
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun	sethi		%hi(84f), %o5
242*4882a593Smuzhiyun	add		%o1, %g4, %o1
243*4882a593Smuzhiyun	sub		%o5, %g4, %o5
244*4882a593Smuzhiyun	jmpl		%o5 + %lo(84f), %g0
245*4882a593Smuzhiyun	 add		%o0, %g4, %o0
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun83:	/* amemcpy_table */
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun	MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
250*4882a593Smuzhiyun	MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
251*4882a593Smuzhiyun	MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
252*4882a593Smuzhiyun	MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
253*4882a593Smuzhiyun	MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
254*4882a593Smuzhiyun	MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
255*4882a593Smuzhiyun	MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun84:	/* amemcpy_table_end */
258*4882a593Smuzhiyun	be		85f
259*4882a593Smuzhiyun	 andcc		%g1, 4, %g0
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun	ldd		[%o1], %g2
262*4882a593Smuzhiyun	add		%o0, 8, %o0
263*4882a593Smuzhiyun	std		%g2, [%o0 - 0x08]
264*4882a593Smuzhiyun	add		%o1, 8, %o1
265*4882a593Smuzhiyun85:	/* amemcpy_last7 */
266*4882a593Smuzhiyun	be		1f
267*4882a593Smuzhiyun	 andcc		%g1, 2, %g0
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun	ld		[%o1], %g2
270*4882a593Smuzhiyun	add		%o1, 4, %o1
271*4882a593Smuzhiyun	st		%g2, [%o0]
272*4882a593Smuzhiyun	add		%o0, 4, %o0
273*4882a593Smuzhiyun1:
274*4882a593Smuzhiyun	be		1f
275*4882a593Smuzhiyun	 andcc		%g1, 1, %g0
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun	lduh		[%o1], %g2
278*4882a593Smuzhiyun	add		%o1, 2, %o1
279*4882a593Smuzhiyun	sth		%g2, [%o0]
280*4882a593Smuzhiyun	add		%o0, 2, %o0
281*4882a593Smuzhiyun1:
282*4882a593Smuzhiyun	be		1f
283*4882a593Smuzhiyun	 nop
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun	ldub		[%o1], %g2
286*4882a593Smuzhiyun	stb		%g2, [%o0]
287*4882a593Smuzhiyun1:
288*4882a593Smuzhiyun	retl
289*4882a593Smuzhiyun	 mov		%g7, %o0
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun86:	/* non_aligned */
292*4882a593Smuzhiyun	cmp		%o2, 6
293*4882a593Smuzhiyun	bleu		88f
294*4882a593Smuzhiyun	 nop
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun	save		%sp, -96, %sp
297*4882a593Smuzhiyun	andcc		%i0, 3, %g0
298*4882a593Smuzhiyun	be		61f
299*4882a593Smuzhiyun	 andcc		%i0, 1, %g0
300*4882a593Smuzhiyun	be		60f
301*4882a593Smuzhiyun	 andcc		%i0, 2, %g0
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun	ldub		[%i1], %g5
304*4882a593Smuzhiyun	add		%i1, 1, %i1
305*4882a593Smuzhiyun	stb		%g5, [%i0]
306*4882a593Smuzhiyun	sub		%i2, 1, %i2
307*4882a593Smuzhiyun	bne		61f
308*4882a593Smuzhiyun	 add		%i0, 1, %i0
309*4882a593Smuzhiyun60:
310*4882a593Smuzhiyun	ldub		[%i1], %g3
311*4882a593Smuzhiyun	add		%i1, 2, %i1
312*4882a593Smuzhiyun	stb		%g3, [%i0]
313*4882a593Smuzhiyun	sub		%i2, 2, %i2
314*4882a593Smuzhiyun	ldub		[%i1 - 1], %g3
315*4882a593Smuzhiyun	add		%i0, 2, %i0
316*4882a593Smuzhiyun	stb		%g3, [%i0 - 1]
317*4882a593Smuzhiyun61:
318*4882a593Smuzhiyun	and		%i1, 3, %g2
319*4882a593Smuzhiyun	and		%i2, 0xc, %g3
320*4882a593Smuzhiyun	and		%i1, -4, %i1
321*4882a593Smuzhiyun	cmp		%g3, 4
322*4882a593Smuzhiyun	sll		%g2, 3, %g4
323*4882a593Smuzhiyun	mov		32, %g2
324*4882a593Smuzhiyun	be		4f
325*4882a593Smuzhiyun	 sub		%g2, %g4, %l0
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun	blu		3f
328*4882a593Smuzhiyun	 cmp		%g3, 0x8
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun	be		2f
331*4882a593Smuzhiyun	 srl		%i2, 2, %g3
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun	ld		[%i1], %i3
334*4882a593Smuzhiyun	add		%i0, -8, %i0
335*4882a593Smuzhiyun	ld		[%i1 + 4], %i4
336*4882a593Smuzhiyun	b		8f
337*4882a593Smuzhiyun	 add		%g3, 1, %g3
338*4882a593Smuzhiyun2:
339*4882a593Smuzhiyun	ld		[%i1], %i4
340*4882a593Smuzhiyun	add		%i0, -12, %i0
341*4882a593Smuzhiyun	ld		[%i1 + 4], %i5
342*4882a593Smuzhiyun	add		%g3, 2, %g3
343*4882a593Smuzhiyun	b		9f
344*4882a593Smuzhiyun	 add		%i1, -4, %i1
345*4882a593Smuzhiyun3:
346*4882a593Smuzhiyun	ld		[%i1], %g1
347*4882a593Smuzhiyun	add		%i0, -4, %i0
348*4882a593Smuzhiyun	ld		[%i1 + 4], %i3
349*4882a593Smuzhiyun	srl		%i2, 2, %g3
350*4882a593Smuzhiyun	b		7f
351*4882a593Smuzhiyun	 add		%i1, 4, %i1
352*4882a593Smuzhiyun4:
353*4882a593Smuzhiyun	ld		[%i1], %i5
354*4882a593Smuzhiyun	cmp		%i2, 7
355*4882a593Smuzhiyun	ld		[%i1 + 4], %g1
356*4882a593Smuzhiyun	srl		%i2, 2, %g3
357*4882a593Smuzhiyun	bleu		10f
358*4882a593Smuzhiyun	 add		%i1, 8, %i1
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun	ld		[%i1], %i3
361*4882a593Smuzhiyun	add		%g3, -1, %g3
362*4882a593Smuzhiyun5:
363*4882a593Smuzhiyun	sll		%i5, %g4, %g2
364*4882a593Smuzhiyun	srl		%g1, %l0, %g5
365*4882a593Smuzhiyun	or		%g2, %g5, %g2
366*4882a593Smuzhiyun	st		%g2, [%i0]
367*4882a593Smuzhiyun7:
368*4882a593Smuzhiyun	ld		[%i1 + 4], %i4
369*4882a593Smuzhiyun	sll		%g1, %g4, %g2
370*4882a593Smuzhiyun	srl		%i3, %l0, %g5
371*4882a593Smuzhiyun	or		%g2, %g5, %g2
372*4882a593Smuzhiyun	st		%g2, [%i0 + 4]
373*4882a593Smuzhiyun8:
374*4882a593Smuzhiyun	ld		[%i1 + 8], %i5
375*4882a593Smuzhiyun	sll		%i3, %g4, %g2
376*4882a593Smuzhiyun	srl		%i4, %l0, %g5
377*4882a593Smuzhiyun	or		%g2, %g5, %g2
378*4882a593Smuzhiyun	st		%g2, [%i0 + 8]
379*4882a593Smuzhiyun9:
380*4882a593Smuzhiyun	ld		[%i1 + 12], %g1
381*4882a593Smuzhiyun	sll		%i4, %g4, %g2
382*4882a593Smuzhiyun	srl		%i5, %l0, %g5
383*4882a593Smuzhiyun	addcc		%g3, -4, %g3
384*4882a593Smuzhiyun	or		%g2, %g5, %g2
385*4882a593Smuzhiyun	add		%i1, 16, %i1
386*4882a593Smuzhiyun	st		%g2, [%i0 + 12]
387*4882a593Smuzhiyun	add		%i0, 16, %i0
388*4882a593Smuzhiyun	bne,a		5b
389*4882a593Smuzhiyun	 ld		[%i1], %i3
390*4882a593Smuzhiyun10:
391*4882a593Smuzhiyun	sll		%i5, %g4, %g2
392*4882a593Smuzhiyun	srl		%g1, %l0, %g5
393*4882a593Smuzhiyun	srl		%l0, 3, %g3
394*4882a593Smuzhiyun	or		%g2, %g5, %g2
395*4882a593Smuzhiyun	sub		%i1, %g3, %i1
396*4882a593Smuzhiyun	andcc		%i2, 2, %g0
397*4882a593Smuzhiyun	st		%g2, [%i0]
398*4882a593Smuzhiyun	be		1f
399*4882a593Smuzhiyun	 andcc		%i2, 1, %g0
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun	ldub		[%i1], %g2
402*4882a593Smuzhiyun	add		%i1, 2, %i1
403*4882a593Smuzhiyun	stb		%g2, [%i0 + 4]
404*4882a593Smuzhiyun	add		%i0, 2, %i0
405*4882a593Smuzhiyun	ldub		[%i1 - 1], %g2
406*4882a593Smuzhiyun	stb		%g2, [%i0 + 3]
407*4882a593Smuzhiyun1:
408*4882a593Smuzhiyun	be		1f
409*4882a593Smuzhiyun	 nop
410*4882a593Smuzhiyun	ldub		[%i1], %g2
411*4882a593Smuzhiyun	stb		%g2, [%i0 + 4]
412*4882a593Smuzhiyun1:
413*4882a593Smuzhiyun	ret
414*4882a593Smuzhiyun	 restore	%g7, %g0, %o0
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun88:	/* short_end */
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun	and		%o2, 0xe, %o3
419*4882a593Smuzhiyun20:
420*4882a593Smuzhiyun	sethi		%hi(89f), %o5
421*4882a593Smuzhiyun	sll		%o3, 3, %o4
422*4882a593Smuzhiyun	add		%o0, %o3, %o0
423*4882a593Smuzhiyun	sub		%o5, %o4, %o5
424*4882a593Smuzhiyun	add		%o1, %o3, %o1
425*4882a593Smuzhiyun	jmpl		%o5 + %lo(89f), %g0
426*4882a593Smuzhiyun	 andcc		%o2, 1, %g0
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
429*4882a593Smuzhiyun	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
430*4882a593Smuzhiyun	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
431*4882a593Smuzhiyun	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
432*4882a593Smuzhiyun	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
433*4882a593Smuzhiyun	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
434*4882a593Smuzhiyun	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun89:	/* short_table_end */
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun	be		1f
439*4882a593Smuzhiyun	 nop
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun	ldub		[%o1], %g2
442*4882a593Smuzhiyun	stb		%g2, [%o0]
443*4882a593Smuzhiyun1:
444*4882a593Smuzhiyun	retl
445*4882a593Smuzhiyun	 mov		%g7, %o0
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun90:	/* short_aligned_end */
448*4882a593Smuzhiyun	bne		88b
449*4882a593Smuzhiyun	 andcc		%o2, 8, %g0
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun	be		1f
452*4882a593Smuzhiyun	 andcc		%o2, 4, %g0
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun	ld		[%o1 + 0x00], %g2
455*4882a593Smuzhiyun	ld		[%o1 + 0x04], %g3
456*4882a593Smuzhiyun	add		%o1, 8, %o1
457*4882a593Smuzhiyun	st		%g2, [%o0 + 0x00]
458*4882a593Smuzhiyun	st		%g3, [%o0 + 0x04]
459*4882a593Smuzhiyun	add		%o0, 8, %o0
460*4882a593Smuzhiyun1:
461*4882a593Smuzhiyun	b		81b
462*4882a593Smuzhiyun	 mov		%o2, %g1
463