xref: /OK3568_Linux_fs/kernel/arch/sparc/lib/memscan_64.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * memscan.S: Optimized memscan for Sparc64.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
6*4882a593Smuzhiyun * Copyright (C) 1998 David S. Miller (davem@redhat.com)
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun	#include <asm/export.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun#define HI_MAGIC	0x8080808080808080
12*4882a593Smuzhiyun#define LO_MAGIC	0x0101010101010101
13*4882a593Smuzhiyun#define ASI_PL		0x88
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun	.text
16*4882a593Smuzhiyun	.align	32
17*4882a593Smuzhiyun	.globl		__memscan_zero, __memscan_generic
18*4882a593Smuzhiyun	.type		__memscan_zero,#function
19*4882a593Smuzhiyun	.type		__memscan_generic,#function
20*4882a593Smuzhiyun	.globl		memscan
21*4882a593Smuzhiyun	EXPORT_SYMBOL(__memscan_zero)
22*4882a593Smuzhiyun	EXPORT_SYMBOL(__memscan_generic)
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun__memscan_zero:
25*4882a593Smuzhiyun	/* %o0 = bufp, %o1 = size */
26*4882a593Smuzhiyun	brlez,pn	%o1, szzero
27*4882a593Smuzhiyun	 andcc		%o0, 7, %g0
28*4882a593Smuzhiyun	be,pt		%icc, we_are_aligned
29*4882a593Smuzhiyun	 sethi		%hi(HI_MAGIC), %o4
30*4882a593Smuzhiyun	ldub		[%o0], %o5
31*4882a593Smuzhiyun1:	subcc		%o1, 1, %o1
32*4882a593Smuzhiyun	brz,pn		%o5, 10f
33*4882a593Smuzhiyun	 add		%o0, 1, %o0
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun	be,pn		%xcc, szzero
36*4882a593Smuzhiyun	 andcc		%o0, 7, %g0
37*4882a593Smuzhiyun	bne,a,pn	%icc, 1b
38*4882a593Smuzhiyun	 ldub		[%o0], %o5
39*4882a593Smuzhiyunwe_are_aligned:
40*4882a593Smuzhiyun	ldxa		[%o0] ASI_PL, %o5
41*4882a593Smuzhiyun	or		%o4, %lo(HI_MAGIC), %o3
42*4882a593Smuzhiyun	sllx		%o3, 32, %o4
43*4882a593Smuzhiyun	or		%o4, %o3, %o3
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun	srlx		%o3, 7, %o2
46*4882a593Smuzhiyunmsloop:
47*4882a593Smuzhiyun	sub		%o1, 8, %o1
48*4882a593Smuzhiyun	add		%o0, 8, %o0
49*4882a593Smuzhiyun	sub		%o5, %o2, %o4
50*4882a593Smuzhiyun	xor		%o4, %o5, %o4
51*4882a593Smuzhiyun	andcc		%o4, %o3, %g3
52*4882a593Smuzhiyun	bne,pn		%xcc, check_bytes
53*4882a593Smuzhiyun	 srlx		%o4, 32, %g3
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun	brgz,a,pt	%o1, msloop
56*4882a593Smuzhiyun	 ldxa		[%o0] ASI_PL, %o5
57*4882a593Smuzhiyuncheck_bytes:
58*4882a593Smuzhiyun	bne,a,pn	%icc, 2f
59*4882a593Smuzhiyun	 andcc		%o5, 0xff, %g0
60*4882a593Smuzhiyun	add		%o0, -5, %g2
61*4882a593Smuzhiyun	ba,pt		%xcc, 3f
62*4882a593Smuzhiyun	 srlx		%o5, 32, %g7
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun2:	srlx		%o5, 8, %g7
65*4882a593Smuzhiyun	be,pn		%icc, 1f
66*4882a593Smuzhiyun	 add		%o0, -8, %g2
67*4882a593Smuzhiyun	andcc		%g7, 0xff, %g0
68*4882a593Smuzhiyun	srlx		%g7, 8, %g7
69*4882a593Smuzhiyun	be,pn		%icc, 1f
70*4882a593Smuzhiyun	 inc		%g2
71*4882a593Smuzhiyun	andcc		%g7, 0xff, %g0
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun	srlx		%g7, 8, %g7
74*4882a593Smuzhiyun	be,pn		%icc, 1f
75*4882a593Smuzhiyun	 inc		%g2
76*4882a593Smuzhiyun	andcc		%g7, 0xff, %g0
77*4882a593Smuzhiyun	srlx		%g7, 8, %g7
78*4882a593Smuzhiyun	be,pn		%icc, 1f
79*4882a593Smuzhiyun	 inc		%g2
80*4882a593Smuzhiyun	andcc		%g3, %o3, %g0
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun	be,a,pn		%icc, 2f
83*4882a593Smuzhiyun	 mov		%o0, %g2
84*4882a593Smuzhiyun3:	andcc		%g7, 0xff, %g0
85*4882a593Smuzhiyun	srlx		%g7, 8, %g7
86*4882a593Smuzhiyun	be,pn		%icc, 1f
87*4882a593Smuzhiyun	 inc		%g2
88*4882a593Smuzhiyun	andcc		%g7, 0xff, %g0
89*4882a593Smuzhiyun	srlx		%g7, 8, %g7
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun	be,pn		%icc, 1f
92*4882a593Smuzhiyun	 inc		%g2
93*4882a593Smuzhiyun	andcc		%g7, 0xff, %g0
94*4882a593Smuzhiyun	srlx		%g7, 8, %g7
95*4882a593Smuzhiyun	be,pn		%icc, 1f
96*4882a593Smuzhiyun	 inc		%g2
97*4882a593Smuzhiyun	andcc		%g7, 0xff, %g0
98*4882a593Smuzhiyun	srlx		%g7, 8, %g7
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun	be,pn		%icc, 1f
101*4882a593Smuzhiyun	 inc		%g2
102*4882a593Smuzhiyun2:	brgz,a,pt	%o1, msloop
103*4882a593Smuzhiyun	 ldxa		[%o0] ASI_PL, %o5
104*4882a593Smuzhiyun	inc		%g2
105*4882a593Smuzhiyun1:	add		%o0, %o1, %o0
106*4882a593Smuzhiyun	cmp		%g2, %o0
107*4882a593Smuzhiyun	retl
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun	 movle		%xcc, %g2, %o0
110*4882a593Smuzhiyun10:	retl
111*4882a593Smuzhiyun	 sub		%o0, 1, %o0
112*4882a593Smuzhiyunszzero:	retl
113*4882a593Smuzhiyun	 nop
114*4882a593Smuzhiyun
115*4882a593Smuzhiyunmemscan:
116*4882a593Smuzhiyun__memscan_generic:
117*4882a593Smuzhiyun	/* %o0 = addr, %o1 = c, %o2 = size */
118*4882a593Smuzhiyun	brz,pn		%o2, 3f
119*4882a593Smuzhiyun	 add		%o0, %o2, %o3
120*4882a593Smuzhiyun	ldub		[%o0], %o5
121*4882a593Smuzhiyun	sub		%g0, %o2, %o4
122*4882a593Smuzhiyun1:
123*4882a593Smuzhiyun	cmp		%o5, %o1
124*4882a593Smuzhiyun	be,pn		%icc, 2f
125*4882a593Smuzhiyun	 addcc		%o4, 1, %o4
126*4882a593Smuzhiyun	bne,a,pt 	%xcc, 1b
127*4882a593Smuzhiyun	 ldub		[%o3 + %o4], %o5
128*4882a593Smuzhiyun	retl
129*4882a593Smuzhiyun	/* The delay slot is the same as the next insn, this is just to make it look more awful */
130*4882a593Smuzhiyun2:
131*4882a593Smuzhiyun	 add		%o3, %o4, %o0
132*4882a593Smuzhiyun	retl
133*4882a593Smuzhiyun	 sub		%o0, 1, %o0
134*4882a593Smuzhiyun3:
135*4882a593Smuzhiyun	retl
136*4882a593Smuzhiyun	 nop
137