xref: /OK3568_Linux_fs/u-boot/arch/arc/lib/strchr-700.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/*
2*4882a593Smuzhiyun * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * SPDX-License-Identifier:	GPL-2.0+
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun/*
8*4882a593Smuzhiyun * ARC700 has a relatively long pipeline and branch prediction, so we want
9*4882a593Smuzhiyun * to avoid branches that are hard to predict.  On the other hand, the
10*4882a593Smuzhiyun * presence of the norm instruction makes it easier to operate on whole
11*4882a593Smuzhiyun * words branch-free.
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun.global strchr
15*4882a593Smuzhiyun.align 4
16*4882a593Smuzhiyunstrchr:
17*4882a593Smuzhiyun	extb_s	%r1, %r1
18*4882a593Smuzhiyun	asl	%r5, %r1, 8
19*4882a593Smuzhiyun	bmsk	%r2, %r0, 1
20*4882a593Smuzhiyun	or	%r5, %r5, %r1
21*4882a593Smuzhiyun	mov_s	%r3, 0x01010101
22*4882a593Smuzhiyun	breq.d	%r2, %r0, .Laligned
23*4882a593Smuzhiyun	asl	%r4, %r5, 16
24*4882a593Smuzhiyun	sub_s	%r0, %r0, %r2
25*4882a593Smuzhiyun	asl	%r7, %r2, 3
26*4882a593Smuzhiyun	ld_s	%r2, [%r0]
27*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__
28*4882a593Smuzhiyun	asl	%r7, %r3, %r7
29*4882a593Smuzhiyun#else /* __BIG_ENDIAN__ */
30*4882a593Smuzhiyun	lsr	%r7, %r3, %r7
31*4882a593Smuzhiyun#endif /* _ENDIAN__ */
32*4882a593Smuzhiyun	or	%r5, %r5, %r4
33*4882a593Smuzhiyun	ror	%r4, %r3
34*4882a593Smuzhiyun	sub	%r12, %r2, %r7
35*4882a593Smuzhiyun	bic_s	%r12, %r12, %r2
36*4882a593Smuzhiyun	and	%r12, %r12, %r4
37*4882a593Smuzhiyun	brne.d	%r12, 0, .Lfound0_ua
38*4882a593Smuzhiyun	xor	%r6, %r2, %r5
39*4882a593Smuzhiyun	ld.a	%r2, [%r0, 4]
40*4882a593Smuzhiyun	sub	%r12, %r6, %r7
41*4882a593Smuzhiyun	bic	%r12, %r12, %r6
42*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__
43*4882a593Smuzhiyun	and	%r7, %r12, %r4
44*4882a593Smuzhiyun	/* For speed, we want this branch to be unaligned. */
45*4882a593Smuzhiyun	breq	%r7, 0, .Loop
46*4882a593Smuzhiyun	/* Likewise this one */
47*4882a593Smuzhiyun	b	.Lfound_char
48*4882a593Smuzhiyun#else /* __BIG_ENDIAN__ */
49*4882a593Smuzhiyun	and	%r12, %r12, %r4
50*4882a593Smuzhiyun	/* For speed, we want this branch to be unaligned. */
51*4882a593Smuzhiyun	breq	%r12, 0, .Loop
52*4882a593Smuzhiyun	lsr_s	%r12, %r12, 7
53*4882a593Smuzhiyun	bic 	%r2, %r7, %r6
54*4882a593Smuzhiyun	b.d	.Lfound_char_b
55*4882a593Smuzhiyun	and_s	%r2, %r2, %r12
56*4882a593Smuzhiyun#endif /* _ENDIAN__ */
57*4882a593Smuzhiyun	/* We require this code address to be unaligned for speed...  */
58*4882a593Smuzhiyun.Laligned:
59*4882a593Smuzhiyun	ld_s	%r2, [%r0]
60*4882a593Smuzhiyun	or	%r5, %r5, %r4
61*4882a593Smuzhiyun	ror	%r4, %r3
62*4882a593Smuzhiyun	/* ... so that this code address is aligned, for itself and ...  */
63*4882a593Smuzhiyun.Loop:
64*4882a593Smuzhiyun	sub	%r12, %r2, %r3
65*4882a593Smuzhiyun	bic_s	%r12, %r12, %r2
66*4882a593Smuzhiyun	and	%r12, %r12, %r4
67*4882a593Smuzhiyun	brne.d	%r12, 0, .Lfound0
68*4882a593Smuzhiyun	xor	%r6, %r2, %r5
69*4882a593Smuzhiyun	ld.a	%r2, [%r0, 4]
70*4882a593Smuzhiyun	sub	%r12, %r6, %r3
71*4882a593Smuzhiyun	bic	%r12, %r12, %r6
72*4882a593Smuzhiyun	and	%r7, %r12, %r4
73*4882a593Smuzhiyun	breq	%r7, 0, .Loop
74*4882a593Smuzhiyun	/*
75*4882a593Smuzhiyun	 *... so that this branch is unaligned.
76*4882a593Smuzhiyun	 * Found searched-for character.
77*4882a593Smuzhiyun	 * r0 has already advanced to next word.
78*4882a593Smuzhiyun	 */
79*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__
80*4882a593Smuzhiyun	/*
81*4882a593Smuzhiyun	 * We only need the information about the first matching byte
82*4882a593Smuzhiyun	 * (i.e. the least significant matching byte) to be exact,
83*4882a593Smuzhiyun	 * hence there is no problem with carry effects.
84*4882a593Smuzhiyun	 */
85*4882a593Smuzhiyun.Lfound_char:
86*4882a593Smuzhiyun	sub	%r3, %r7, 1
87*4882a593Smuzhiyun	bic	%r3, %r3, %r7
88*4882a593Smuzhiyun	norm	%r2, %r3
89*4882a593Smuzhiyun	sub_s	%r0, %r0, 1
90*4882a593Smuzhiyun	asr_s	%r2, %r2, 3
91*4882a593Smuzhiyun	j.d	[%blink]
92*4882a593Smuzhiyun	sub_s	%r0, %r0, %r2
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun	.balign	4
95*4882a593Smuzhiyun.Lfound0_ua:
96*4882a593Smuzhiyun	mov	%r3, %r7
97*4882a593Smuzhiyun.Lfound0:
98*4882a593Smuzhiyun	sub	%r3, %r6, %r3
99*4882a593Smuzhiyun	bic	%r3, %r3, %r6
100*4882a593Smuzhiyun	and	%r2, %r3, %r4
101*4882a593Smuzhiyun	or_s	%r12, %r12, %r2
102*4882a593Smuzhiyun	sub_s	%r3, %r12, 1
103*4882a593Smuzhiyun	bic_s	%r3, %r3, %r12
104*4882a593Smuzhiyun	norm	%r3, %r3
105*4882a593Smuzhiyun	add_s	%r0, %r0, 3
106*4882a593Smuzhiyun	asr_s	%r12, %r3, 3
107*4882a593Smuzhiyun	asl.f	0, %r2, %r3
108*4882a593Smuzhiyun	sub_s	%r0, %r0, %r12
109*4882a593Smuzhiyun	j_s.d	[%blink]
110*4882a593Smuzhiyun	mov.pl	%r0, 0
111*4882a593Smuzhiyun#else /* __BIG_ENDIAN__ */
112*4882a593Smuzhiyun.Lfound_char:
113*4882a593Smuzhiyun	lsr	%r7, %r7, 7
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun	bic	%r2, %r7, %r6
116*4882a593Smuzhiyun.Lfound_char_b:
117*4882a593Smuzhiyun	norm	%r2, %r2
118*4882a593Smuzhiyun	sub_s	%r0, %r0, 4
119*4882a593Smuzhiyun	asr_s	%r2, %r2, 3
120*4882a593Smuzhiyun	j.d	[%blink]
121*4882a593Smuzhiyun	add_s	%r0, %r0, %r2
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun.Lfound0_ua:
124*4882a593Smuzhiyun	mov_s	%r3, %r7
125*4882a593Smuzhiyun.Lfound0:
126*4882a593Smuzhiyun	asl_s	%r2, %r2, 7
127*4882a593Smuzhiyun	or	%r7, %r6, %r4
128*4882a593Smuzhiyun	bic_s	%r12, %r12, %r2
129*4882a593Smuzhiyun	sub	%r2, %r7, %r3
130*4882a593Smuzhiyun	or	%r2, %r2, %r6
131*4882a593Smuzhiyun	bic	%r12, %r2, %r12
132*4882a593Smuzhiyun	bic.f	%r3, %r4, %r12
133*4882a593Smuzhiyun	norm	%r3, %r3
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun	add.pl	%r3, %r3, 1
136*4882a593Smuzhiyun	asr_s	%r12, %r3, 3
137*4882a593Smuzhiyun	asl.f	0, %r2, %r3
138*4882a593Smuzhiyun	add_s	%r0, %r0, %r12
139*4882a593Smuzhiyun	j_s.d	[%blink]
140*4882a593Smuzhiyun	mov.mi	%r0, 0
141*4882a593Smuzhiyun#endif /* _ENDIAN__ */
142