xref: /OK3568_Linux_fs/kernel/arch/parisc/lib/lusercopy.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun *    User Space Access Routines
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
6*4882a593Smuzhiyun *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
7*4882a593Smuzhiyun *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
8*4882a593Smuzhiyun *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
9*4882a593Smuzhiyun *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
10*4882a593Smuzhiyun *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun/*
14*4882a593Smuzhiyun * These routines still have plenty of room for optimization
15*4882a593Smuzhiyun * (word & doubleword load/store, dual issue, store hints, etc.).
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun/*
19*4882a593Smuzhiyun * The following routines assume that space register 3 (sr3) contains
20*4882a593Smuzhiyun * the space id associated with the current users address space.
21*4882a593Smuzhiyun */
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun	.text
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun#include <asm/assembly.h>
27*4882a593Smuzhiyun#include <asm/errno.h>
28*4882a593Smuzhiyun#include <linux/linkage.h>
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun	/*
31*4882a593Smuzhiyun	 * get_sr gets the appropriate space value into
32*4882a593Smuzhiyun	 * sr1 for kernel/user space access, depending
33*4882a593Smuzhiyun	 * on the flag stored in the task structure.
34*4882a593Smuzhiyun	 */
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun	.macro  get_sr
37*4882a593Smuzhiyun	mfctl       %cr30,%r1
38*4882a593Smuzhiyun	ldw         TI_SEGMENT(%r1),%r22
39*4882a593Smuzhiyun	mfsp        %sr3,%r1
40*4882a593Smuzhiyun	or,<>       %r22,%r0,%r0
41*4882a593Smuzhiyun	copy        %r0,%r1
42*4882a593Smuzhiyun	mtsp        %r1,%sr1
43*4882a593Smuzhiyun	.endm
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun	/*
46*4882a593Smuzhiyun	 * unsigned long lclear_user(void *to, unsigned long n)
47*4882a593Smuzhiyun	 *
48*4882a593Smuzhiyun	 * Returns 0 for success.
49*4882a593Smuzhiyun	 * otherwise, returns number of bytes not transferred.
50*4882a593Smuzhiyun	 */
51*4882a593Smuzhiyun
52*4882a593SmuzhiyunENTRY_CFI(lclear_user)
53*4882a593Smuzhiyun	comib,=,n   0,%r25,$lclu_done
54*4882a593Smuzhiyun	get_sr
55*4882a593Smuzhiyun$lclu_loop:
56*4882a593Smuzhiyun	addib,<>    -1,%r25,$lclu_loop
57*4882a593Smuzhiyun1:      stbs,ma     %r0,1(%sr1,%r26)
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun$lclu_done:
60*4882a593Smuzhiyun	bv          %r0(%r2)
61*4882a593Smuzhiyun	copy        %r25,%r28
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun2:	b           $lclu_done
64*4882a593Smuzhiyun	ldo         1(%r25),%r25
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
67*4882a593SmuzhiyunENDPROC_CFI(lclear_user)
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun	/*
71*4882a593Smuzhiyun	 * long lstrnlen_user(char *s, long n)
72*4882a593Smuzhiyun	 *
73*4882a593Smuzhiyun	 * Returns 0 if exception before zero byte or reaching N,
74*4882a593Smuzhiyun	 *         N+1 if N would be exceeded,
75*4882a593Smuzhiyun	 *         else strlen + 1 (i.e. includes zero byte).
76*4882a593Smuzhiyun	 */
77*4882a593Smuzhiyun
78*4882a593SmuzhiyunENTRY_CFI(lstrnlen_user)
79*4882a593Smuzhiyun	comib,=     0,%r25,$lslen_nzero
80*4882a593Smuzhiyun	copy	    %r26,%r24
81*4882a593Smuzhiyun	get_sr
82*4882a593Smuzhiyun1:      ldbs,ma     1(%sr1,%r26),%r1
83*4882a593Smuzhiyun$lslen_loop:
84*4882a593Smuzhiyun	comib,=,n   0,%r1,$lslen_done
85*4882a593Smuzhiyun	addib,<>    -1,%r25,$lslen_loop
86*4882a593Smuzhiyun2:      ldbs,ma     1(%sr1,%r26),%r1
87*4882a593Smuzhiyun$lslen_done:
88*4882a593Smuzhiyun	bv          %r0(%r2)
89*4882a593Smuzhiyun	sub	    %r26,%r24,%r28
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun$lslen_nzero:
92*4882a593Smuzhiyun	b           $lslen_done
93*4882a593Smuzhiyun	ldo         1(%r26),%r26 /* special case for N == 0 */
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun3:      b	    $lslen_done
96*4882a593Smuzhiyun	copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
99*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
100*4882a593Smuzhiyun
101*4882a593SmuzhiyunENDPROC_CFI(lstrnlen_user)
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun/*
105*4882a593Smuzhiyun * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
106*4882a593Smuzhiyun *
107*4882a593Smuzhiyun * Inputs:
108*4882a593Smuzhiyun * - sr1 already contains space of source region
109*4882a593Smuzhiyun * - sr2 already contains space of destination region
110*4882a593Smuzhiyun *
111*4882a593Smuzhiyun * Returns:
112*4882a593Smuzhiyun * - number of bytes that could not be copied.
113*4882a593Smuzhiyun *   On success, this will be zero.
114*4882a593Smuzhiyun *
115*4882a593Smuzhiyun * This code is based on a C-implementation of a copy routine written by
116*4882a593Smuzhiyun * Randolph Chung, which in turn was derived from the glibc.
117*4882a593Smuzhiyun *
118*4882a593Smuzhiyun * Several strategies are tried to try to get the best performance for various
119*4882a593Smuzhiyun * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
120*4882a593Smuzhiyun * at a time using general registers.  Unaligned copies are handled either by
121*4882a593Smuzhiyun * aligning the destination and then using shift-and-write method, or in a few
122*4882a593Smuzhiyun * cases by falling back to a byte-at-a-time copy.
123*4882a593Smuzhiyun *
124*4882a593Smuzhiyun * Testing with various alignments and buffer sizes shows that this code is
125*4882a593Smuzhiyun * often >10x faster than a simple byte-at-a-time copy, even for strangely
126*4882a593Smuzhiyun * aligned operands. It is interesting to note that the glibc version of memcpy
127*4882a593Smuzhiyun * (written in C) is actually quite fast already. This routine is able to beat
128*4882a593Smuzhiyun * it by 30-40% for aligned copies because of the loop unrolling, but in some
129*4882a593Smuzhiyun * cases the glibc version is still slightly faster. This lends more
130*4882a593Smuzhiyun * credibility that gcc can generate very good code as long as we are careful.
131*4882a593Smuzhiyun *
132*4882a593Smuzhiyun * Possible optimizations:
133*4882a593Smuzhiyun * - add cache prefetching
134*4882a593Smuzhiyun * - try not to use the post-increment address modifiers; they may create
135*4882a593Smuzhiyun *   additional interlocks. Assumption is that those were only efficient on old
136*4882a593Smuzhiyun *   machines (pre PA8000 processors)
137*4882a593Smuzhiyun */
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun	dst = arg0
140*4882a593Smuzhiyun	src = arg1
141*4882a593Smuzhiyun	len = arg2
142*4882a593Smuzhiyun	end = arg3
143*4882a593Smuzhiyun	t1  = r19
144*4882a593Smuzhiyun	t2  = r20
145*4882a593Smuzhiyun	t3  = r21
146*4882a593Smuzhiyun	t4  = r22
147*4882a593Smuzhiyun	srcspc = sr1
148*4882a593Smuzhiyun	dstspc = sr2
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun	t0 = r1
151*4882a593Smuzhiyun	a1 = t1
152*4882a593Smuzhiyun	a2 = t2
153*4882a593Smuzhiyun	a3 = t3
154*4882a593Smuzhiyun	a0 = t4
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun	save_src = ret0
157*4882a593Smuzhiyun	save_dst = ret1
158*4882a593Smuzhiyun	save_len = r31
159*4882a593Smuzhiyun
160*4882a593SmuzhiyunENTRY_CFI(pa_memcpy)
161*4882a593Smuzhiyun	/* Last destination address */
162*4882a593Smuzhiyun	add	dst,len,end
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun	/* short copy with less than 16 bytes? */
165*4882a593Smuzhiyun	cmpib,COND(>>=),n 15,len,.Lbyte_loop
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun	/* same alignment? */
168*4882a593Smuzhiyun	xor	src,dst,t0
169*4882a593Smuzhiyun	extru	t0,31,2,t1
170*4882a593Smuzhiyun	cmpib,<>,n  0,t1,.Lunaligned_copy
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun#ifdef CONFIG_64BIT
173*4882a593Smuzhiyun	/* only do 64-bit copies if we can get aligned. */
174*4882a593Smuzhiyun	extru	t0,31,3,t1
175*4882a593Smuzhiyun	cmpib,<>,n  0,t1,.Lalign_loop32
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun	/* loop until we are 64-bit aligned */
178*4882a593Smuzhiyun.Lalign_loop64:
179*4882a593Smuzhiyun	extru	dst,31,3,t1
180*4882a593Smuzhiyun	cmpib,=,n	0,t1,.Lcopy_loop_16_start
181*4882a593Smuzhiyun20:	ldb,ma	1(srcspc,src),t1
182*4882a593Smuzhiyun21:	stb,ma	t1,1(dstspc,dst)
183*4882a593Smuzhiyun	b	.Lalign_loop64
184*4882a593Smuzhiyun	ldo	-1(len),len
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
187*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun.Lcopy_loop_16_start:
190*4882a593Smuzhiyun	ldi	31,t0
191*4882a593Smuzhiyun.Lcopy_loop_16:
192*4882a593Smuzhiyun	cmpb,COND(>>=),n t0,len,.Lword_loop
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun10:	ldd	0(srcspc,src),t1
195*4882a593Smuzhiyun11:	ldd	8(srcspc,src),t2
196*4882a593Smuzhiyun	ldo	16(src),src
197*4882a593Smuzhiyun12:	std,ma	t1,8(dstspc,dst)
198*4882a593Smuzhiyun13:	std,ma	t2,8(dstspc,dst)
199*4882a593Smuzhiyun14:	ldd	0(srcspc,src),t1
200*4882a593Smuzhiyun15:	ldd	8(srcspc,src),t2
201*4882a593Smuzhiyun	ldo	16(src),src
202*4882a593Smuzhiyun16:	std,ma	t1,8(dstspc,dst)
203*4882a593Smuzhiyun17:	std,ma	t2,8(dstspc,dst)
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
206*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
207*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
208*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
209*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
210*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
211*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
212*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun	b	.Lcopy_loop_16
215*4882a593Smuzhiyun	ldo	-32(len),len
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun.Lword_loop:
218*4882a593Smuzhiyun	cmpib,COND(>>=),n 3,len,.Lbyte_loop
219*4882a593Smuzhiyun20:	ldw,ma	4(srcspc,src),t1
220*4882a593Smuzhiyun21:	stw,ma	t1,4(dstspc,dst)
221*4882a593Smuzhiyun	b	.Lword_loop
222*4882a593Smuzhiyun	ldo	-4(len),len
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
225*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun#endif /* CONFIG_64BIT */
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun	/* loop until we are 32-bit aligned */
230*4882a593Smuzhiyun.Lalign_loop32:
231*4882a593Smuzhiyun	extru	dst,31,2,t1
232*4882a593Smuzhiyun	cmpib,=,n	0,t1,.Lcopy_loop_8
233*4882a593Smuzhiyun20:	ldb,ma	1(srcspc,src),t1
234*4882a593Smuzhiyun21:	stb,ma	t1,1(dstspc,dst)
235*4882a593Smuzhiyun	b	.Lalign_loop32
236*4882a593Smuzhiyun	ldo	-1(len),len
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
239*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun.Lcopy_loop_8:
243*4882a593Smuzhiyun	cmpib,COND(>>=),n 15,len,.Lbyte_loop
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun10:	ldw	0(srcspc,src),t1
246*4882a593Smuzhiyun11:	ldw	4(srcspc,src),t2
247*4882a593Smuzhiyun12:	stw,ma	t1,4(dstspc,dst)
248*4882a593Smuzhiyun13:	stw,ma	t2,4(dstspc,dst)
249*4882a593Smuzhiyun14:	ldw	8(srcspc,src),t1
250*4882a593Smuzhiyun15:	ldw	12(srcspc,src),t2
251*4882a593Smuzhiyun	ldo	16(src),src
252*4882a593Smuzhiyun16:	stw,ma	t1,4(dstspc,dst)
253*4882a593Smuzhiyun17:	stw,ma	t2,4(dstspc,dst)
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
256*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
257*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
258*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
259*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
260*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
261*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
262*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun	b	.Lcopy_loop_8
265*4882a593Smuzhiyun	ldo	-16(len),len
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun.Lbyte_loop:
268*4882a593Smuzhiyun	cmpclr,COND(<>) len,%r0,%r0
269*4882a593Smuzhiyun	b,n	.Lcopy_done
270*4882a593Smuzhiyun20:	ldb	0(srcspc,src),t1
271*4882a593Smuzhiyun	ldo	1(src),src
272*4882a593Smuzhiyun21:	stb,ma	t1,1(dstspc,dst)
273*4882a593Smuzhiyun	b	.Lbyte_loop
274*4882a593Smuzhiyun	ldo	-1(len),len
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
277*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun.Lcopy_done:
280*4882a593Smuzhiyun	bv	%r0(%r2)
281*4882a593Smuzhiyun	sub	end,dst,ret0
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun	/* src and dst are not aligned the same way. */
285*4882a593Smuzhiyun	/* need to go the hard way */
286*4882a593Smuzhiyun.Lunaligned_copy:
287*4882a593Smuzhiyun	/* align until dst is 32bit-word-aligned */
288*4882a593Smuzhiyun	extru	dst,31,2,t1
289*4882a593Smuzhiyun	cmpib,=,n	0,t1,.Lcopy_dstaligned
290*4882a593Smuzhiyun20:	ldb	0(srcspc,src),t1
291*4882a593Smuzhiyun	ldo	1(src),src
292*4882a593Smuzhiyun21:	stb,ma	t1,1(dstspc,dst)
293*4882a593Smuzhiyun	b	.Lunaligned_copy
294*4882a593Smuzhiyun	ldo	-1(len),len
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
297*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun.Lcopy_dstaligned:
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun	/* store src, dst and len in safe place */
302*4882a593Smuzhiyun	copy	src,save_src
303*4882a593Smuzhiyun	copy	dst,save_dst
304*4882a593Smuzhiyun	copy	len,save_len
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun	/* len now needs give number of words to copy */
307*4882a593Smuzhiyun	SHRREG	len,2,len
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun	/*
310*4882a593Smuzhiyun	 * Copy from a not-aligned src to an aligned dst using shifts.
311*4882a593Smuzhiyun	 * Handles 4 words per loop.
312*4882a593Smuzhiyun	 */
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun	depw,z src,28,2,t0
315*4882a593Smuzhiyun	subi 32,t0,t0
316*4882a593Smuzhiyun	mtsar t0
317*4882a593Smuzhiyun	extru len,31,2,t0
318*4882a593Smuzhiyun	cmpib,= 2,t0,.Lcase2
319*4882a593Smuzhiyun	/* Make src aligned by rounding it down.  */
320*4882a593Smuzhiyun	depi 0,31,2,src
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun	cmpiclr,<> 3,t0,%r0
323*4882a593Smuzhiyun	b,n .Lcase3
324*4882a593Smuzhiyun	cmpiclr,<> 1,t0,%r0
325*4882a593Smuzhiyun	b,n .Lcase1
326*4882a593Smuzhiyun.Lcase0:
327*4882a593Smuzhiyun	cmpb,COND(=) %r0,len,.Lcda_finish
328*4882a593Smuzhiyun	nop
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a3
331*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
332*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a0
333*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
334*4882a593Smuzhiyun	b,n .Ldo3
335*4882a593Smuzhiyun.Lcase1:
336*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a2
337*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
338*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a3
339*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
340*4882a593Smuzhiyun	ldo -1(len),len
341*4882a593Smuzhiyun	cmpb,COND(=),n %r0,len,.Ldo0
342*4882a593Smuzhiyun.Ldo4:
343*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a0
344*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
345*4882a593Smuzhiyun	shrpw a2, a3, %sar, t0
346*4882a593Smuzhiyun1:	stw,ma t0, 4(dstspc,dst)
347*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
348*4882a593Smuzhiyun.Ldo3:
349*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a1
350*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
351*4882a593Smuzhiyun	shrpw a3, a0, %sar, t0
352*4882a593Smuzhiyun1:	stw,ma t0, 4(dstspc,dst)
353*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
354*4882a593Smuzhiyun.Ldo2:
355*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a2
356*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
357*4882a593Smuzhiyun	shrpw a0, a1, %sar, t0
358*4882a593Smuzhiyun1:	stw,ma t0, 4(dstspc,dst)
359*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
360*4882a593Smuzhiyun.Ldo1:
361*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a3
362*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
363*4882a593Smuzhiyun	shrpw a1, a2, %sar, t0
364*4882a593Smuzhiyun1:	stw,ma t0, 4(dstspc,dst)
365*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
366*4882a593Smuzhiyun	ldo -4(len),len
367*4882a593Smuzhiyun	cmpb,COND(<>) %r0,len,.Ldo4
368*4882a593Smuzhiyun	nop
369*4882a593Smuzhiyun.Ldo0:
370*4882a593Smuzhiyun	shrpw a2, a3, %sar, t0
371*4882a593Smuzhiyun1:	stw,ma t0, 4(dstspc,dst)
372*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun.Lcda_rdfault:
375*4882a593Smuzhiyun.Lcda_finish:
376*4882a593Smuzhiyun	/* calculate new src, dst and len and jump to byte-copy loop */
377*4882a593Smuzhiyun	sub	dst,save_dst,t0
378*4882a593Smuzhiyun	add	save_src,t0,src
379*4882a593Smuzhiyun	b	.Lbyte_loop
380*4882a593Smuzhiyun	sub	save_len,t0,len
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun.Lcase3:
383*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a0
384*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
385*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a1
386*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
387*4882a593Smuzhiyun	b .Ldo2
388*4882a593Smuzhiyun	ldo 1(len),len
389*4882a593Smuzhiyun.Lcase2:
390*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a1
391*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
392*4882a593Smuzhiyun1:	ldw,ma 4(srcspc,src), a2
393*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
394*4882a593Smuzhiyun	b .Ldo1
395*4882a593Smuzhiyun	ldo 2(len),len
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun	/* fault exception fixup handlers: */
399*4882a593Smuzhiyun#ifdef CONFIG_64BIT
400*4882a593Smuzhiyun.Lcopy16_fault:
401*4882a593Smuzhiyun	b	.Lcopy_done
402*4882a593Smuzhiyun10:	std,ma	t1,8(dstspc,dst)
403*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
404*4882a593Smuzhiyun#endif
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun.Lcopy8_fault:
407*4882a593Smuzhiyun	b	.Lcopy_done
408*4882a593Smuzhiyun10:	stw,ma	t1,4(dstspc,dst)
409*4882a593Smuzhiyun	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
410*4882a593SmuzhiyunENDPROC_CFI(pa_memcpy)
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun	.end
413