xref: /OK3568_Linux_fs/kernel/arch/powerpc/lib/string_64.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright (C) IBM Corporation, 2012
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Author: Anton Blanchard <anton@au.ibm.com>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun#include <asm/ppc_asm.h>
10*4882a593Smuzhiyun#include <asm/linkage.h>
11*4882a593Smuzhiyun#include <asm/asm-offsets.h>
12*4882a593Smuzhiyun#include <asm/export.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun	.section	".toc","aw"
15*4882a593SmuzhiyunPPC64_CACHES:
16*4882a593Smuzhiyun	.tc		ppc64_caches[TC],ppc64_caches
17*4882a593Smuzhiyun	.section	".text"
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun/**
20*4882a593Smuzhiyun * __arch_clear_user: - Zero a block of memory in user space, with less checking.
21*4882a593Smuzhiyun * @to:   Destination address, in user space.
22*4882a593Smuzhiyun * @n:    Number of bytes to zero.
23*4882a593Smuzhiyun *
24*4882a593Smuzhiyun * Zero a block of memory in user space.  Caller must check
25*4882a593Smuzhiyun * the specified block with access_ok() before calling this function.
26*4882a593Smuzhiyun *
27*4882a593Smuzhiyun * Returns number of bytes that could not be cleared.
28*4882a593Smuzhiyun * On success, this will be zero.
29*4882a593Smuzhiyun */
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun	.macro err1
32*4882a593Smuzhiyun100:
33*4882a593Smuzhiyun	EX_TABLE(100b,.Ldo_err1)
34*4882a593Smuzhiyun	.endm
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun	.macro err2
37*4882a593Smuzhiyun200:
38*4882a593Smuzhiyun	EX_TABLE(200b,.Ldo_err2)
39*4882a593Smuzhiyun	.endm
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun	.macro err3
42*4882a593Smuzhiyun300:
43*4882a593Smuzhiyun	EX_TABLE(300b,.Ldo_err3)
44*4882a593Smuzhiyun	.endm
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun.Ldo_err1:
47*4882a593Smuzhiyun	mr	r3,r8
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun.Ldo_err2:
50*4882a593Smuzhiyun	mtctr	r4
51*4882a593Smuzhiyun1:
52*4882a593Smuzhiyunerr3;	stb	r0,0(r3)
53*4882a593Smuzhiyun	addi	r3,r3,1
54*4882a593Smuzhiyun	addi	r4,r4,-1
55*4882a593Smuzhiyun	bdnz	1b
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun.Ldo_err3:
58*4882a593Smuzhiyun	mr	r3,r4
59*4882a593Smuzhiyun	blr
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun_GLOBAL_TOC(__arch_clear_user)
62*4882a593Smuzhiyun	cmpdi	r4,32
63*4882a593Smuzhiyun	neg	r6,r3
64*4882a593Smuzhiyun	li	r0,0
65*4882a593Smuzhiyun	blt	.Lshort_clear
66*4882a593Smuzhiyun	mr	r8,r3
67*4882a593Smuzhiyun	mtocrf	0x01,r6
68*4882a593Smuzhiyun	clrldi	r6,r6,(64-3)
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun	/* Get the destination 8 byte aligned */
71*4882a593Smuzhiyun	bf	cr7*4+3,1f
72*4882a593Smuzhiyunerr1;	stb	r0,0(r3)
73*4882a593Smuzhiyun	addi	r3,r3,1
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun1:	bf	cr7*4+2,2f
76*4882a593Smuzhiyunerr1;	sth	r0,0(r3)
77*4882a593Smuzhiyun	addi	r3,r3,2
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun2:	bf	cr7*4+1,3f
80*4882a593Smuzhiyunerr1;	stw	r0,0(r3)
81*4882a593Smuzhiyun	addi	r3,r3,4
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun3:	sub	r4,r4,r6
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun	cmpdi	r4,32
86*4882a593Smuzhiyun	cmpdi	cr1,r4,512
87*4882a593Smuzhiyun	blt	.Lshort_clear
88*4882a593Smuzhiyun	bgt	cr1,.Llong_clear
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun.Lmedium_clear:
91*4882a593Smuzhiyun	srdi	r6,r4,5
92*4882a593Smuzhiyun	mtctr	r6
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun	/* Do 32 byte chunks */
95*4882a593Smuzhiyun4:
96*4882a593Smuzhiyunerr2;	std	r0,0(r3)
97*4882a593Smuzhiyunerr2;	std	r0,8(r3)
98*4882a593Smuzhiyunerr2;	std	r0,16(r3)
99*4882a593Smuzhiyunerr2;	std	r0,24(r3)
100*4882a593Smuzhiyun	addi	r3,r3,32
101*4882a593Smuzhiyun	addi	r4,r4,-32
102*4882a593Smuzhiyun	bdnz	4b
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun.Lshort_clear:
105*4882a593Smuzhiyun	/* up to 31 bytes to go */
106*4882a593Smuzhiyun	cmpdi	r4,16
107*4882a593Smuzhiyun	blt	6f
108*4882a593Smuzhiyunerr2;	std	r0,0(r3)
109*4882a593Smuzhiyunerr2;	std	r0,8(r3)
110*4882a593Smuzhiyun	addi	r3,r3,16
111*4882a593Smuzhiyun	addi	r4,r4,-16
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun	/* Up to 15 bytes to go */
114*4882a593Smuzhiyun6:	mr	r8,r3
115*4882a593Smuzhiyun	clrldi	r4,r4,(64-4)
116*4882a593Smuzhiyun	mtocrf	0x01,r4
117*4882a593Smuzhiyun	bf	cr7*4+0,7f
118*4882a593Smuzhiyunerr1;	std	r0,0(r3)
119*4882a593Smuzhiyun	addi	r3,r3,8
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun7:	bf	cr7*4+1,8f
122*4882a593Smuzhiyunerr1;	stw	r0,0(r3)
123*4882a593Smuzhiyun	addi	r3,r3,4
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun8:	bf	cr7*4+2,9f
126*4882a593Smuzhiyunerr1;	sth	r0,0(r3)
127*4882a593Smuzhiyun	addi	r3,r3,2
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun9:	bf	cr7*4+3,10f
130*4882a593Smuzhiyunerr1;	stb	r0,0(r3)
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun10:	li	r3,0
133*4882a593Smuzhiyun	blr
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun.Llong_clear:
136*4882a593Smuzhiyun	ld	r5,PPC64_CACHES@toc(r2)
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun	bf	cr7*4+0,11f
139*4882a593Smuzhiyunerr2;	std	r0,0(r3)
140*4882a593Smuzhiyun	addi	r3,r3,8
141*4882a593Smuzhiyun	addi	r4,r4,-8
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun	/* Destination is 16 byte aligned, need to get it cache block aligned */
144*4882a593Smuzhiyun11:	lwz	r7,DCACHEL1LOGBLOCKSIZE(r5)
145*4882a593Smuzhiyun	lwz	r9,DCACHEL1BLOCKSIZE(r5)
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun	/*
148*4882a593Smuzhiyun	 * With worst case alignment the long clear loop takes a minimum
149*4882a593Smuzhiyun	 * of 1 byte less than 2 cachelines.
150*4882a593Smuzhiyun	 */
151*4882a593Smuzhiyun	sldi	r10,r9,2
152*4882a593Smuzhiyun	cmpd	r4,r10
153*4882a593Smuzhiyun	blt	.Lmedium_clear
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun	neg	r6,r3
156*4882a593Smuzhiyun	addi	r10,r9,-1
157*4882a593Smuzhiyun	and.	r5,r6,r10
158*4882a593Smuzhiyun	beq	13f
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun	srdi	r6,r5,4
161*4882a593Smuzhiyun	mtctr	r6
162*4882a593Smuzhiyun	mr	r8,r3
163*4882a593Smuzhiyun12:
164*4882a593Smuzhiyunerr1;	std	r0,0(r3)
165*4882a593Smuzhiyunerr1;	std	r0,8(r3)
166*4882a593Smuzhiyun	addi	r3,r3,16
167*4882a593Smuzhiyun	bdnz	12b
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun	sub	r4,r4,r5
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun13:	srd	r6,r4,r7
172*4882a593Smuzhiyun	mtctr	r6
173*4882a593Smuzhiyun	mr	r8,r3
174*4882a593Smuzhiyun14:
175*4882a593Smuzhiyunerr1;	dcbz	0,r3
176*4882a593Smuzhiyun	add	r3,r3,r9
177*4882a593Smuzhiyun	bdnz	14b
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun	and	r4,r4,r10
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun	cmpdi	r4,32
182*4882a593Smuzhiyun	blt	.Lshort_clear
183*4882a593Smuzhiyun	b	.Lmedium_clear
184*4882a593SmuzhiyunEXPORT_SYMBOL(__arch_clear_user)
185