xref: /rk3399_rockchip-uboot/arch/arc/lib/strchr-700.S (revision 1ad6364eeb4f578e423081d1748e8a3fdf1ab01d)
1*22723828SAlexey Brodkin/*
2*22723828SAlexey Brodkin * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
3*22723828SAlexey Brodkin *
4*22723828SAlexey Brodkin * SPDX-License-Identifier:	GPL-2.0+
5*22723828SAlexey Brodkin */
6*22723828SAlexey Brodkin
7*22723828SAlexey Brodkin/*
8*22723828SAlexey Brodkin * ARC700 has a relatively long pipeline and branch prediction, so we want
9*22723828SAlexey Brodkin * to avoid branches that are hard to predict.  On the other hand, the
10*22723828SAlexey Brodkin * presence of the norm instruction makes it easier to operate on whole
11*22723828SAlexey Brodkin * words branch-free.
12*22723828SAlexey Brodkin */
13*22723828SAlexey Brodkin
14*22723828SAlexey Brodkin.global strchr
15*22723828SAlexey Brodkin.align 4
16*22723828SAlexey Brodkinstrchr:
17*22723828SAlexey Brodkin	extb_s	%r1, %r1
18*22723828SAlexey Brodkin	asl	%r5, %r1, 8
19*22723828SAlexey Brodkin	bmsk	%r2, %r0, 1
20*22723828SAlexey Brodkin	or	%r5, %r5, %r1
21*22723828SAlexey Brodkin	mov_s	%r3, 0x01010101
22*22723828SAlexey Brodkin	breq.d	%r2, %r0, .Laligned
23*22723828SAlexey Brodkin	asl	%r4, %r5, 16
24*22723828SAlexey Brodkin	sub_s	%r0, %r0, %r2
25*22723828SAlexey Brodkin	asl	%r7, %r2, 3
26*22723828SAlexey Brodkin	ld_s	%r2, [%r0]
27*22723828SAlexey Brodkin#ifdef __LITTLE_ENDIAN__
28*22723828SAlexey Brodkin	asl	%r7, %r3, %r7
29*22723828SAlexey Brodkin#else /* __BIG_ENDIAN__ */
30*22723828SAlexey Brodkin	lsr	%r7, %r3, %r7
31*22723828SAlexey Brodkin#endif /* _ENDIAN__ */
32*22723828SAlexey Brodkin	or	%r5, %r5, %r4
33*22723828SAlexey Brodkin	ror	%r4, %r3
34*22723828SAlexey Brodkin	sub	%r12, %r2, %r7
35*22723828SAlexey Brodkin	bic_s	%r12, %r12, %r2
36*22723828SAlexey Brodkin	and	%r12, %r12, %r4
37*22723828SAlexey Brodkin	brne.d	%r12, 0, .Lfound0_ua
38*22723828SAlexey Brodkin	xor	%r6, %r2, %r5
39*22723828SAlexey Brodkin	ld.a	%r2, [%r0, 4]
40*22723828SAlexey Brodkin	sub	%r12, %r6, %r7
41*22723828SAlexey Brodkin	bic	%r12, %r12, %r6
42*22723828SAlexey Brodkin#ifdef __LITTLE_ENDIAN__
43*22723828SAlexey Brodkin	and	%r7, %r12, %r4
44*22723828SAlexey Brodkin	/* For speed, we want this branch to be unaligned. */
45*22723828SAlexey Brodkin	breq	%r7, 0, .Loop
46*22723828SAlexey Brodkin	/* Likewise this one */
47*22723828SAlexey Brodkin	b	.Lfound_char
48*22723828SAlexey Brodkin#else /* __BIG_ENDIAN__ */
49*22723828SAlexey Brodkin	and	%r12, %r12, %r4
50*22723828SAlexey Brodkin	/* For speed, we want this branch to be unaligned. */
51*22723828SAlexey Brodkin	breq	%r12, 0, .Loop
52*22723828SAlexey Brodkin	lsr_s	%r12, %r12, 7
53*22723828SAlexey Brodkin	bic 	%r2, %r7, %r6
54*22723828SAlexey Brodkin	b.d	.Lfound_char_b
55*22723828SAlexey Brodkin	and_s	%r2, %r2, %r12
56*22723828SAlexey Brodkin#endif /* _ENDIAN__ */
57*22723828SAlexey Brodkin	/* We require this code address to be unaligned for speed...  */
58*22723828SAlexey Brodkin.Laligned:
59*22723828SAlexey Brodkin	ld_s	%r2, [%r0]
60*22723828SAlexey Brodkin	or	%r5, %r5, %r4
61*22723828SAlexey Brodkin	ror	%r4, %r3
62*22723828SAlexey Brodkin	/* ... so that this code address is aligned, for itself and ...  */
63*22723828SAlexey Brodkin.Loop:
64*22723828SAlexey Brodkin	sub	%r12, %r2, %r3
65*22723828SAlexey Brodkin	bic_s	%r12, %r12, %r2
66*22723828SAlexey Brodkin	and	%r12, %r12, %r4
67*22723828SAlexey Brodkin	brne.d	%r12, 0, .Lfound0
68*22723828SAlexey Brodkin	xor	%r6, %r2, %r5
69*22723828SAlexey Brodkin	ld.a	%r2, [%r0, 4]
70*22723828SAlexey Brodkin	sub	%r12, %r6, %r3
71*22723828SAlexey Brodkin	bic	%r12, %r12, %r6
72*22723828SAlexey Brodkin	and	%r7, %r12, %r4
73*22723828SAlexey Brodkin	breq	%r7, 0, .Loop
74*22723828SAlexey Brodkin	/*
75*22723828SAlexey Brodkin	 *... so that this branch is unaligned.
76*22723828SAlexey Brodkin	 * Found searched-for character.
77*22723828SAlexey Brodkin	 * r0 has already advanced to next word.
78*22723828SAlexey Brodkin	 */
79*22723828SAlexey Brodkin#ifdef __LITTLE_ENDIAN__
80*22723828SAlexey Brodkin	/*
81*22723828SAlexey Brodkin	 * We only need the information about the first matching byte
82*22723828SAlexey Brodkin	 * (i.e. the least significant matching byte) to be exact,
83*22723828SAlexey Brodkin	 * hence there is no problem with carry effects.
84*22723828SAlexey Brodkin	 */
85*22723828SAlexey Brodkin.Lfound_char:
86*22723828SAlexey Brodkin	sub	%r3, %r7, 1
87*22723828SAlexey Brodkin	bic	%r3, %r3, %r7
88*22723828SAlexey Brodkin	norm	%r2, %r3
89*22723828SAlexey Brodkin	sub_s	%r0, %r0, 1
90*22723828SAlexey Brodkin	asr_s	%r2, %r2, 3
91*22723828SAlexey Brodkin	j.d	[%blink]
92*22723828SAlexey Brodkin	sub_s	%r0, %r0, %r2
93*22723828SAlexey Brodkin
94*22723828SAlexey Brodkin	.balign	4
95*22723828SAlexey Brodkin.Lfound0_ua:
96*22723828SAlexey Brodkin	mov	%r3, %r7
97*22723828SAlexey Brodkin.Lfound0:
98*22723828SAlexey Brodkin	sub	%r3, %r6, %r3
99*22723828SAlexey Brodkin	bic	%r3, %r3, %r6
100*22723828SAlexey Brodkin	and	%r2, %r3, %r4
101*22723828SAlexey Brodkin	or_s	%r12, %r12, %r2
102*22723828SAlexey Brodkin	sub_s	%r3, %r12, 1
103*22723828SAlexey Brodkin	bic_s	%r3, %r3, %r12
104*22723828SAlexey Brodkin	norm	%r3, %r3
105*22723828SAlexey Brodkin	add_s	%r0, %r0, 3
106*22723828SAlexey Brodkin	asr_s	%r12, %r3, 3
107*22723828SAlexey Brodkin	asl.f	0, %r2, %r3
108*22723828SAlexey Brodkin	sub_s	%r0, %r0, %r12
109*22723828SAlexey Brodkin	j_s.d	[%blink]
110*22723828SAlexey Brodkin	mov.pl	%r0, 0
111*22723828SAlexey Brodkin#else /* __BIG_ENDIAN__ */
112*22723828SAlexey Brodkin.Lfound_char:
113*22723828SAlexey Brodkin	lsr	%r7, %r7, 7
114*22723828SAlexey Brodkin
115*22723828SAlexey Brodkin	bic	%r2, %r7, %r6
116*22723828SAlexey Brodkin.Lfound_char_b:
117*22723828SAlexey Brodkin	norm	%r2, %r2
118*22723828SAlexey Brodkin	sub_s	%r0, %r0, 4
119*22723828SAlexey Brodkin	asr_s	%r2, %r2, 3
120*22723828SAlexey Brodkin	j.d	[%blink]
121*22723828SAlexey Brodkin	add_s	%r0, %r0, %r2
122*22723828SAlexey Brodkin
123*22723828SAlexey Brodkin.Lfound0_ua:
124*22723828SAlexey Brodkin	mov_s	%r3, %r7
125*22723828SAlexey Brodkin.Lfound0:
126*22723828SAlexey Brodkin	asl_s	%r2, %r2, 7
127*22723828SAlexey Brodkin	or	%r7, %r6, %r4
128*22723828SAlexey Brodkin	bic_s	%r12, %r12, %r2
129*22723828SAlexey Brodkin	sub	%r2, %r7, %r3
130*22723828SAlexey Brodkin	or	%r2, %r2, %r6
131*22723828SAlexey Brodkin	bic	%r12, %r2, %r12
132*22723828SAlexey Brodkin	bic.f	%r3, %r4, %r12
133*22723828SAlexey Brodkin	norm	%r3, %r3
134*22723828SAlexey Brodkin
135*22723828SAlexey Brodkin	add.pl	%r3, %r3, 1
136*22723828SAlexey Brodkin	asr_s	%r12, %r3, 3
137*22723828SAlexey Brodkin	asl.f	0, %r2, %r3
138*22723828SAlexey Brodkin	add_s	%r0, %r0, %r12
139*22723828SAlexey Brodkin	j_s.d	[%blink]
140*22723828SAlexey Brodkin	mov.mi	%r0, 0
141*22723828SAlexey Brodkin#endif /* _ENDIAN__ */
142