xref: /OK3568_Linux_fs/kernel/arch/arc/lib/strchr-700.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun/* ARC700 has a relatively long pipeline and branch prediction, so we want
7*4882a593Smuzhiyun   to avoid branches that are hard to predict.  On the other hand, the
8*4882a593Smuzhiyun   presence of the norm instruction makes it easier to operate on whole
9*4882a593Smuzhiyun   words branch-free.  */
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun#include <linux/linkage.h>
12*4882a593Smuzhiyun
13*4882a593SmuzhiyunENTRY_CFI(strchr)
14*4882a593Smuzhiyun	extb_s	r1,r1
15*4882a593Smuzhiyun	asl	r5,r1,8
16*4882a593Smuzhiyun	bmsk	r2,r0,1
17*4882a593Smuzhiyun	or	r5,r5,r1
18*4882a593Smuzhiyun	mov_s	r3,0x01010101
19*4882a593Smuzhiyun	breq.d	r2,r0,.Laligned
20*4882a593Smuzhiyun	asl	r4,r5,16
21*4882a593Smuzhiyun	sub_s	r0,r0,r2
22*4882a593Smuzhiyun	asl	r7,r2,3
23*4882a593Smuzhiyun	ld_s	r2,[r0]
24*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__
25*4882a593Smuzhiyun	asl	r7,r3,r7
26*4882a593Smuzhiyun#else
27*4882a593Smuzhiyun	lsr	r7,r3,r7
28*4882a593Smuzhiyun#endif
29*4882a593Smuzhiyun	or	r5,r5,r4
30*4882a593Smuzhiyun	ror	r4,r3
31*4882a593Smuzhiyun	sub	r12,r2,r7
32*4882a593Smuzhiyun	bic_s	r12,r12,r2
33*4882a593Smuzhiyun	and	r12,r12,r4
34*4882a593Smuzhiyun	brne.d	r12,0,.Lfound0_ua
35*4882a593Smuzhiyun	xor	r6,r2,r5
36*4882a593Smuzhiyun	ld.a	r2,[r0,4]
37*4882a593Smuzhiyun	sub	r12,r6,r7
38*4882a593Smuzhiyun	bic	r12,r12,r6
39*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__
40*4882a593Smuzhiyun	and	r7,r12,r4
41*4882a593Smuzhiyun	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
42*4882a593Smuzhiyun	b	.Lfound_char ; Likewise this one.
43*4882a593Smuzhiyun#else
44*4882a593Smuzhiyun	and	r12,r12,r4
45*4882a593Smuzhiyun	breq	r12,0,.Loop ; For speed, we want this branch to be unaligned.
46*4882a593Smuzhiyun	lsr_s	r12,r12,7
47*4882a593Smuzhiyun	bic 	r2,r7,r6
48*4882a593Smuzhiyun	b.d	.Lfound_char_b
49*4882a593Smuzhiyun	and_s	r2,r2,r12
50*4882a593Smuzhiyun#endif
51*4882a593Smuzhiyun; /* We require this code address to be unaligned for speed...  */
52*4882a593Smuzhiyun.Laligned:
53*4882a593Smuzhiyun	ld_s	r2,[r0]
54*4882a593Smuzhiyun	or	r5,r5,r4
55*4882a593Smuzhiyun	ror	r4,r3
56*4882a593Smuzhiyun; /* ... so that this code address is aligned, for itself and ...  */
57*4882a593Smuzhiyun.Loop:
58*4882a593Smuzhiyun	sub	r12,r2,r3
59*4882a593Smuzhiyun	bic_s	r12,r12,r2
60*4882a593Smuzhiyun	and	r12,r12,r4
61*4882a593Smuzhiyun	brne.d	r12,0,.Lfound0
62*4882a593Smuzhiyun	xor	r6,r2,r5
63*4882a593Smuzhiyun	ld.a	r2,[r0,4]
64*4882a593Smuzhiyun	sub	r12,r6,r3
65*4882a593Smuzhiyun	bic	r12,r12,r6
66*4882a593Smuzhiyun	and	r7,r12,r4
67*4882a593Smuzhiyun	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
68*4882a593Smuzhiyun	; Found searched-for character.  r0 has already advanced to next word.
69*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__
70*4882a593Smuzhiyun/* We only need the information about the first matching byte
71*4882a593Smuzhiyun   (i.e. the least significant matching byte) to be exact,
72*4882a593Smuzhiyun   hence there is no problem with carry effects.  */
73*4882a593Smuzhiyun.Lfound_char:
74*4882a593Smuzhiyun	sub	r3,r7,1
75*4882a593Smuzhiyun	bic	r3,r3,r7
76*4882a593Smuzhiyun	norm	r2,r3
77*4882a593Smuzhiyun	sub_s	r0,r0,1
78*4882a593Smuzhiyun	asr_s	r2,r2,3
79*4882a593Smuzhiyun	j.d	[blink]
80*4882a593Smuzhiyun	sub_s	r0,r0,r2
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun	.balign	4
83*4882a593Smuzhiyun.Lfound0_ua:
84*4882a593Smuzhiyun	mov	r3,r7
85*4882a593Smuzhiyun.Lfound0:
86*4882a593Smuzhiyun	sub	r3,r6,r3
87*4882a593Smuzhiyun	bic	r3,r3,r6
88*4882a593Smuzhiyun	and	r2,r3,r4
89*4882a593Smuzhiyun	or_s	r12,r12,r2
90*4882a593Smuzhiyun	sub_s	r3,r12,1
91*4882a593Smuzhiyun	bic_s	r3,r3,r12
92*4882a593Smuzhiyun	norm	r3,r3
93*4882a593Smuzhiyun	add_s	r0,r0,3
94*4882a593Smuzhiyun	asr_s	r12,r3,3
95*4882a593Smuzhiyun	asl.f	0,r2,r3
96*4882a593Smuzhiyun	sub_s	r0,r0,r12
97*4882a593Smuzhiyun	j_s.d	[blink]
98*4882a593Smuzhiyun	mov.pl	r0,0
99*4882a593Smuzhiyun#else /* BIG ENDIAN */
100*4882a593Smuzhiyun.Lfound_char:
101*4882a593Smuzhiyun	lsr	r7,r7,7
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun	bic	r2,r7,r6
104*4882a593Smuzhiyun.Lfound_char_b:
105*4882a593Smuzhiyun	norm	r2,r2
106*4882a593Smuzhiyun	sub_s	r0,r0,4
107*4882a593Smuzhiyun	asr_s	r2,r2,3
108*4882a593Smuzhiyun	j.d	[blink]
109*4882a593Smuzhiyun	add_s	r0,r0,r2
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun.Lfound0_ua:
112*4882a593Smuzhiyun	mov_s	r3,r7
113*4882a593Smuzhiyun.Lfound0:
114*4882a593Smuzhiyun	asl_s	r2,r2,7
115*4882a593Smuzhiyun	or	r7,r6,r4
116*4882a593Smuzhiyun	bic_s	r12,r12,r2
117*4882a593Smuzhiyun	sub	r2,r7,r3
118*4882a593Smuzhiyun	or	r2,r2,r6
119*4882a593Smuzhiyun	bic	r12,r2,r12
120*4882a593Smuzhiyun	bic.f	r3,r4,r12
121*4882a593Smuzhiyun	norm	r3,r3
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun	add.pl	r3,r3,1
124*4882a593Smuzhiyun	asr_s	r12,r3,3
125*4882a593Smuzhiyun	asl.f	0,r2,r3
126*4882a593Smuzhiyun	add_s	r0,r0,r12
127*4882a593Smuzhiyun	j_s.d	[blink]
128*4882a593Smuzhiyun	mov.mi	r0,0
129*4882a593Smuzhiyun#endif /* ENDIAN */
130*4882a593SmuzhiyunEND_CFI(strchr)
131