xref: /OK3568_Linux_fs/kernel/arch/powerpc/lib/copy_mc_64.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * Copyright (C) IBM Corporation, 2011
4*4882a593Smuzhiyun * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
5*4882a593Smuzhiyun * Author - Balbir Singh <bsingharora@gmail.com>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun#include <asm/ppc_asm.h>
8*4882a593Smuzhiyun#include <asm/errno.h>
9*4882a593Smuzhiyun#include <asm/export.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun	.macro err1
12*4882a593Smuzhiyun100:
13*4882a593Smuzhiyun	EX_TABLE(100b,.Ldo_err1)
14*4882a593Smuzhiyun	.endm
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun	.macro err2
17*4882a593Smuzhiyun200:
18*4882a593Smuzhiyun	EX_TABLE(200b,.Ldo_err2)
19*4882a593Smuzhiyun	.endm
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun	.macro err3
22*4882a593Smuzhiyun300:	EX_TABLE(300b,.Ldone)
23*4882a593Smuzhiyun	.endm
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun.Ldo_err2:
26*4882a593Smuzhiyun	ld	r22,STK_REG(R22)(r1)
27*4882a593Smuzhiyun	ld	r21,STK_REG(R21)(r1)
28*4882a593Smuzhiyun	ld	r20,STK_REG(R20)(r1)
29*4882a593Smuzhiyun	ld	r19,STK_REG(R19)(r1)
30*4882a593Smuzhiyun	ld	r18,STK_REG(R18)(r1)
31*4882a593Smuzhiyun	ld	r17,STK_REG(R17)(r1)
32*4882a593Smuzhiyun	ld	r16,STK_REG(R16)(r1)
33*4882a593Smuzhiyun	ld	r15,STK_REG(R15)(r1)
34*4882a593Smuzhiyun	ld	r14,STK_REG(R14)(r1)
35*4882a593Smuzhiyun	addi	r1,r1,STACKFRAMESIZE
36*4882a593Smuzhiyun.Ldo_err1:
37*4882a593Smuzhiyun	/* Do a byte by byte copy to get the exact remaining size */
38*4882a593Smuzhiyun	mtctr	r7
39*4882a593Smuzhiyun46:
40*4882a593Smuzhiyunerr3;	lbz	r0,0(r4)
41*4882a593Smuzhiyun	addi	r4,r4,1
42*4882a593Smuzhiyunerr3;	stb	r0,0(r3)
43*4882a593Smuzhiyun	addi	r3,r3,1
44*4882a593Smuzhiyun	bdnz	46b
45*4882a593Smuzhiyun	li	r3,0
46*4882a593Smuzhiyun	blr
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun.Ldone:
49*4882a593Smuzhiyun	mfctr	r3
50*4882a593Smuzhiyun	blr
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun_GLOBAL(copy_mc_generic)
54*4882a593Smuzhiyun	mr	r7,r5
55*4882a593Smuzhiyun	cmpldi	r5,16
56*4882a593Smuzhiyun	blt	.Lshort_copy
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun.Lcopy:
59*4882a593Smuzhiyun	/* Get the source 8B aligned */
60*4882a593Smuzhiyun	neg	r6,r4
61*4882a593Smuzhiyun	mtocrf	0x01,r6
62*4882a593Smuzhiyun	clrldi	r6,r6,(64-3)
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun	bf	cr7*4+3,1f
65*4882a593Smuzhiyunerr1;	lbz	r0,0(r4)
66*4882a593Smuzhiyun	addi	r4,r4,1
67*4882a593Smuzhiyunerr1;	stb	r0,0(r3)
68*4882a593Smuzhiyun	addi	r3,r3,1
69*4882a593Smuzhiyun	subi	r7,r7,1
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun1:	bf	cr7*4+2,2f
72*4882a593Smuzhiyunerr1;	lhz	r0,0(r4)
73*4882a593Smuzhiyun	addi	r4,r4,2
74*4882a593Smuzhiyunerr1;	sth	r0,0(r3)
75*4882a593Smuzhiyun	addi	r3,r3,2
76*4882a593Smuzhiyun	subi	r7,r7,2
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun2:	bf	cr7*4+1,3f
79*4882a593Smuzhiyunerr1;	lwz	r0,0(r4)
80*4882a593Smuzhiyun	addi	r4,r4,4
81*4882a593Smuzhiyunerr1;	stw	r0,0(r3)
82*4882a593Smuzhiyun	addi	r3,r3,4
83*4882a593Smuzhiyun	subi	r7,r7,4
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun3:	sub	r5,r5,r6
86*4882a593Smuzhiyun	cmpldi	r5,128
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun	mflr	r0
89*4882a593Smuzhiyun	stdu	r1,-STACKFRAMESIZE(r1)
90*4882a593Smuzhiyun	std	r14,STK_REG(R14)(r1)
91*4882a593Smuzhiyun	std	r15,STK_REG(R15)(r1)
92*4882a593Smuzhiyun	std	r16,STK_REG(R16)(r1)
93*4882a593Smuzhiyun	std	r17,STK_REG(R17)(r1)
94*4882a593Smuzhiyun	std	r18,STK_REG(R18)(r1)
95*4882a593Smuzhiyun	std	r19,STK_REG(R19)(r1)
96*4882a593Smuzhiyun	std	r20,STK_REG(R20)(r1)
97*4882a593Smuzhiyun	std	r21,STK_REG(R21)(r1)
98*4882a593Smuzhiyun	std	r22,STK_REG(R22)(r1)
99*4882a593Smuzhiyun	std	r0,STACKFRAMESIZE+16(r1)
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun	blt	5f
102*4882a593Smuzhiyun	srdi	r6,r5,7
103*4882a593Smuzhiyun	mtctr	r6
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun	/* Now do cacheline (128B) sized loads and stores. */
106*4882a593Smuzhiyun	.align	5
107*4882a593Smuzhiyun4:
108*4882a593Smuzhiyunerr2;	ld	r0,0(r4)
109*4882a593Smuzhiyunerr2;	ld	r6,8(r4)
110*4882a593Smuzhiyunerr2;	ld	r8,16(r4)
111*4882a593Smuzhiyunerr2;	ld	r9,24(r4)
112*4882a593Smuzhiyunerr2;	ld	r10,32(r4)
113*4882a593Smuzhiyunerr2;	ld	r11,40(r4)
114*4882a593Smuzhiyunerr2;	ld	r12,48(r4)
115*4882a593Smuzhiyunerr2;	ld	r14,56(r4)
116*4882a593Smuzhiyunerr2;	ld	r15,64(r4)
117*4882a593Smuzhiyunerr2;	ld	r16,72(r4)
118*4882a593Smuzhiyunerr2;	ld	r17,80(r4)
119*4882a593Smuzhiyunerr2;	ld	r18,88(r4)
120*4882a593Smuzhiyunerr2;	ld	r19,96(r4)
121*4882a593Smuzhiyunerr2;	ld	r20,104(r4)
122*4882a593Smuzhiyunerr2;	ld	r21,112(r4)
123*4882a593Smuzhiyunerr2;	ld	r22,120(r4)
124*4882a593Smuzhiyun	addi	r4,r4,128
125*4882a593Smuzhiyunerr2;	std	r0,0(r3)
126*4882a593Smuzhiyunerr2;	std	r6,8(r3)
127*4882a593Smuzhiyunerr2;	std	r8,16(r3)
128*4882a593Smuzhiyunerr2;	std	r9,24(r3)
129*4882a593Smuzhiyunerr2;	std	r10,32(r3)
130*4882a593Smuzhiyunerr2;	std	r11,40(r3)
131*4882a593Smuzhiyunerr2;	std	r12,48(r3)
132*4882a593Smuzhiyunerr2;	std	r14,56(r3)
133*4882a593Smuzhiyunerr2;	std	r15,64(r3)
134*4882a593Smuzhiyunerr2;	std	r16,72(r3)
135*4882a593Smuzhiyunerr2;	std	r17,80(r3)
136*4882a593Smuzhiyunerr2;	std	r18,88(r3)
137*4882a593Smuzhiyunerr2;	std	r19,96(r3)
138*4882a593Smuzhiyunerr2;	std	r20,104(r3)
139*4882a593Smuzhiyunerr2;	std	r21,112(r3)
140*4882a593Smuzhiyunerr2;	std	r22,120(r3)
141*4882a593Smuzhiyun	addi	r3,r3,128
142*4882a593Smuzhiyun	subi	r7,r7,128
143*4882a593Smuzhiyun	bdnz	4b
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun	clrldi	r5,r5,(64-7)
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun	/* Up to 127B to go */
148*4882a593Smuzhiyun5:	srdi	r6,r5,4
149*4882a593Smuzhiyun	mtocrf	0x01,r6
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun6:	bf	cr7*4+1,7f
152*4882a593Smuzhiyunerr2;	ld	r0,0(r4)
153*4882a593Smuzhiyunerr2;	ld	r6,8(r4)
154*4882a593Smuzhiyunerr2;	ld	r8,16(r4)
155*4882a593Smuzhiyunerr2;	ld	r9,24(r4)
156*4882a593Smuzhiyunerr2;	ld	r10,32(r4)
157*4882a593Smuzhiyunerr2;	ld	r11,40(r4)
158*4882a593Smuzhiyunerr2;	ld	r12,48(r4)
159*4882a593Smuzhiyunerr2;	ld	r14,56(r4)
160*4882a593Smuzhiyun	addi	r4,r4,64
161*4882a593Smuzhiyunerr2;	std	r0,0(r3)
162*4882a593Smuzhiyunerr2;	std	r6,8(r3)
163*4882a593Smuzhiyunerr2;	std	r8,16(r3)
164*4882a593Smuzhiyunerr2;	std	r9,24(r3)
165*4882a593Smuzhiyunerr2;	std	r10,32(r3)
166*4882a593Smuzhiyunerr2;	std	r11,40(r3)
167*4882a593Smuzhiyunerr2;	std	r12,48(r3)
168*4882a593Smuzhiyunerr2;	std	r14,56(r3)
169*4882a593Smuzhiyun	addi	r3,r3,64
170*4882a593Smuzhiyun	subi	r7,r7,64
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun7:	ld	r14,STK_REG(R14)(r1)
173*4882a593Smuzhiyun	ld	r15,STK_REG(R15)(r1)
174*4882a593Smuzhiyun	ld	r16,STK_REG(R16)(r1)
175*4882a593Smuzhiyun	ld	r17,STK_REG(R17)(r1)
176*4882a593Smuzhiyun	ld	r18,STK_REG(R18)(r1)
177*4882a593Smuzhiyun	ld	r19,STK_REG(R19)(r1)
178*4882a593Smuzhiyun	ld	r20,STK_REG(R20)(r1)
179*4882a593Smuzhiyun	ld	r21,STK_REG(R21)(r1)
180*4882a593Smuzhiyun	ld	r22,STK_REG(R22)(r1)
181*4882a593Smuzhiyun	addi	r1,r1,STACKFRAMESIZE
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun	/* Up to 63B to go */
184*4882a593Smuzhiyun	bf	cr7*4+2,8f
185*4882a593Smuzhiyunerr1;	ld	r0,0(r4)
186*4882a593Smuzhiyunerr1;	ld	r6,8(r4)
187*4882a593Smuzhiyunerr1;	ld	r8,16(r4)
188*4882a593Smuzhiyunerr1;	ld	r9,24(r4)
189*4882a593Smuzhiyun	addi	r4,r4,32
190*4882a593Smuzhiyunerr1;	std	r0,0(r3)
191*4882a593Smuzhiyunerr1;	std	r6,8(r3)
192*4882a593Smuzhiyunerr1;	std	r8,16(r3)
193*4882a593Smuzhiyunerr1;	std	r9,24(r3)
194*4882a593Smuzhiyun	addi	r3,r3,32
195*4882a593Smuzhiyun	subi	r7,r7,32
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun	/* Up to 31B to go */
198*4882a593Smuzhiyun8:	bf	cr7*4+3,9f
199*4882a593Smuzhiyunerr1;	ld	r0,0(r4)
200*4882a593Smuzhiyunerr1;	ld	r6,8(r4)
201*4882a593Smuzhiyun	addi	r4,r4,16
202*4882a593Smuzhiyunerr1;	std	r0,0(r3)
203*4882a593Smuzhiyunerr1;	std	r6,8(r3)
204*4882a593Smuzhiyun	addi	r3,r3,16
205*4882a593Smuzhiyun	subi	r7,r7,16
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun9:	clrldi	r5,r5,(64-4)
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun	/* Up to 15B to go */
210*4882a593Smuzhiyun.Lshort_copy:
211*4882a593Smuzhiyun	mtocrf	0x01,r5
212*4882a593Smuzhiyun	bf	cr7*4+0,12f
213*4882a593Smuzhiyunerr1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
214*4882a593Smuzhiyunerr1;	lwz	r6,4(r4)
215*4882a593Smuzhiyun	addi	r4,r4,8
216*4882a593Smuzhiyunerr1;	stw	r0,0(r3)
217*4882a593Smuzhiyunerr1;	stw	r6,4(r3)
218*4882a593Smuzhiyun	addi	r3,r3,8
219*4882a593Smuzhiyun	subi	r7,r7,8
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun12:	bf	cr7*4+1,13f
222*4882a593Smuzhiyunerr1;	lwz	r0,0(r4)
223*4882a593Smuzhiyun	addi	r4,r4,4
224*4882a593Smuzhiyunerr1;	stw	r0,0(r3)
225*4882a593Smuzhiyun	addi	r3,r3,4
226*4882a593Smuzhiyun	subi	r7,r7,4
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun13:	bf	cr7*4+2,14f
229*4882a593Smuzhiyunerr1;	lhz	r0,0(r4)
230*4882a593Smuzhiyun	addi	r4,r4,2
231*4882a593Smuzhiyunerr1;	sth	r0,0(r3)
232*4882a593Smuzhiyun	addi	r3,r3,2
233*4882a593Smuzhiyun	subi	r7,r7,2
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun14:	bf	cr7*4+3,15f
236*4882a593Smuzhiyunerr1;	lbz	r0,0(r4)
237*4882a593Smuzhiyunerr1;	stb	r0,0(r3)
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun15:	li	r3,0
240*4882a593Smuzhiyun	blr
241*4882a593Smuzhiyun
242*4882a593SmuzhiyunEXPORT_SYMBOL_GPL(copy_mc_generic);
243