1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * SPDX-License-Identifier: GPL-2.0+ 5*4882a593Smuzhiyun */ 6*4882a593Smuzhiyun 7*4882a593Smuzhiyun/* 8*4882a593Smuzhiyun * ARC700 has a relatively long pipeline and branch prediction, so we want 9*4882a593Smuzhiyun * to avoid branches that are hard to predict. On the other hand, the 10*4882a593Smuzhiyun * presence of the norm instruction makes it easier to operate on whole 11*4882a593Smuzhiyun * words branch-free. 12*4882a593Smuzhiyun */ 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun.global strchr 15*4882a593Smuzhiyun.align 4 16*4882a593Smuzhiyunstrchr: 17*4882a593Smuzhiyun extb_s %r1, %r1 18*4882a593Smuzhiyun asl %r5, %r1, 8 19*4882a593Smuzhiyun bmsk %r2, %r0, 1 20*4882a593Smuzhiyun or %r5, %r5, %r1 21*4882a593Smuzhiyun mov_s %r3, 0x01010101 22*4882a593Smuzhiyun breq.d %r2, %r0, .Laligned 23*4882a593Smuzhiyun asl %r4, %r5, 16 24*4882a593Smuzhiyun sub_s %r0, %r0, %r2 25*4882a593Smuzhiyun asl %r7, %r2, 3 26*4882a593Smuzhiyun ld_s %r2, [%r0] 27*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__ 28*4882a593Smuzhiyun asl %r7, %r3, %r7 29*4882a593Smuzhiyun#else /* __BIG_ENDIAN__ */ 30*4882a593Smuzhiyun lsr %r7, %r3, %r7 31*4882a593Smuzhiyun#endif /* _ENDIAN__ */ 32*4882a593Smuzhiyun or %r5, %r5, %r4 33*4882a593Smuzhiyun ror %r4, %r3 34*4882a593Smuzhiyun sub %r12, %r2, %r7 35*4882a593Smuzhiyun bic_s %r12, %r12, %r2 36*4882a593Smuzhiyun and %r12, %r12, %r4 37*4882a593Smuzhiyun brne.d %r12, 0, .Lfound0_ua 38*4882a593Smuzhiyun xor %r6, %r2, %r5 39*4882a593Smuzhiyun ld.a %r2, [%r0, 4] 40*4882a593Smuzhiyun sub %r12, %r6, %r7 41*4882a593Smuzhiyun bic %r12, %r12, %r6 42*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__ 43*4882a593Smuzhiyun and %r7, %r12, %r4 44*4882a593Smuzhiyun /* For speed, we want this branch to be unaligned. */ 45*4882a593Smuzhiyun breq %r7, 0, .Loop 46*4882a593Smuzhiyun /* Likewise this one */ 47*4882a593Smuzhiyun b .Lfound_char 48*4882a593Smuzhiyun#else /* __BIG_ENDIAN__ */ 49*4882a593Smuzhiyun and %r12, %r12, %r4 50*4882a593Smuzhiyun /* For speed, we want this branch to be unaligned. */ 51*4882a593Smuzhiyun breq %r12, 0, .Loop 52*4882a593Smuzhiyun lsr_s %r12, %r12, 7 53*4882a593Smuzhiyun bic %r2, %r7, %r6 54*4882a593Smuzhiyun b.d .Lfound_char_b 55*4882a593Smuzhiyun and_s %r2, %r2, %r12 56*4882a593Smuzhiyun#endif /* _ENDIAN__ */ 57*4882a593Smuzhiyun /* We require this code address to be unaligned for speed... */ 58*4882a593Smuzhiyun.Laligned: 59*4882a593Smuzhiyun ld_s %r2, [%r0] 60*4882a593Smuzhiyun or %r5, %r5, %r4 61*4882a593Smuzhiyun ror %r4, %r3 62*4882a593Smuzhiyun /* ... so that this code address is aligned, for itself and ... */ 63*4882a593Smuzhiyun.Loop: 64*4882a593Smuzhiyun sub %r12, %r2, %r3 65*4882a593Smuzhiyun bic_s %r12, %r12, %r2 66*4882a593Smuzhiyun and %r12, %r12, %r4 67*4882a593Smuzhiyun brne.d %r12, 0, .Lfound0 68*4882a593Smuzhiyun xor %r6, %r2, %r5 69*4882a593Smuzhiyun ld.a %r2, [%r0, 4] 70*4882a593Smuzhiyun sub %r12, %r6, %r3 71*4882a593Smuzhiyun bic %r12, %r12, %r6 72*4882a593Smuzhiyun and %r7, %r12, %r4 73*4882a593Smuzhiyun breq %r7, 0, .Loop 74*4882a593Smuzhiyun /* 75*4882a593Smuzhiyun *... so that this branch is unaligned. 76*4882a593Smuzhiyun * Found searched-for character. 77*4882a593Smuzhiyun * r0 has already advanced to next word. 78*4882a593Smuzhiyun */ 79*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__ 80*4882a593Smuzhiyun /* 81*4882a593Smuzhiyun * We only need the information about the first matching byte 82*4882a593Smuzhiyun * (i.e. the least significant matching byte) to be exact, 83*4882a593Smuzhiyun * hence there is no problem with carry effects. 84*4882a593Smuzhiyun */ 85*4882a593Smuzhiyun.Lfound_char: 86*4882a593Smuzhiyun sub %r3, %r7, 1 87*4882a593Smuzhiyun bic %r3, %r3, %r7 88*4882a593Smuzhiyun norm %r2, %r3 89*4882a593Smuzhiyun sub_s %r0, %r0, 1 90*4882a593Smuzhiyun asr_s %r2, %r2, 3 91*4882a593Smuzhiyun j.d [%blink] 92*4882a593Smuzhiyun sub_s %r0, %r0, %r2 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun .balign 4 95*4882a593Smuzhiyun.Lfound0_ua: 96*4882a593Smuzhiyun mov %r3, %r7 97*4882a593Smuzhiyun.Lfound0: 98*4882a593Smuzhiyun sub %r3, %r6, %r3 99*4882a593Smuzhiyun bic %r3, %r3, %r6 100*4882a593Smuzhiyun and %r2, %r3, %r4 101*4882a593Smuzhiyun or_s %r12, %r12, %r2 102*4882a593Smuzhiyun sub_s %r3, %r12, 1 103*4882a593Smuzhiyun bic_s %r3, %r3, %r12 104*4882a593Smuzhiyun norm %r3, %r3 105*4882a593Smuzhiyun add_s %r0, %r0, 3 106*4882a593Smuzhiyun asr_s %r12, %r3, 3 107*4882a593Smuzhiyun asl.f 0, %r2, %r3 108*4882a593Smuzhiyun sub_s %r0, %r0, %r12 109*4882a593Smuzhiyun j_s.d [%blink] 110*4882a593Smuzhiyun mov.pl %r0, 0 111*4882a593Smuzhiyun#else /* __BIG_ENDIAN__ */ 112*4882a593Smuzhiyun.Lfound_char: 113*4882a593Smuzhiyun lsr %r7, %r7, 7 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun bic %r2, %r7, %r6 116*4882a593Smuzhiyun.Lfound_char_b: 117*4882a593Smuzhiyun norm %r2, %r2 118*4882a593Smuzhiyun sub_s %r0, %r0, 4 119*4882a593Smuzhiyun asr_s %r2, %r2, 3 120*4882a593Smuzhiyun j.d [%blink] 121*4882a593Smuzhiyun add_s %r0, %r0, %r2 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun.Lfound0_ua: 124*4882a593Smuzhiyun mov_s %r3, %r7 125*4882a593Smuzhiyun.Lfound0: 126*4882a593Smuzhiyun asl_s %r2, %r2, 7 127*4882a593Smuzhiyun or %r7, %r6, %r4 128*4882a593Smuzhiyun bic_s %r12, %r12, %r2 129*4882a593Smuzhiyun sub %r2, %r7, %r3 130*4882a593Smuzhiyun or %r2, %r2, %r6 131*4882a593Smuzhiyun bic %r12, %r2, %r12 132*4882a593Smuzhiyun bic.f %r3, %r4, %r12 133*4882a593Smuzhiyun norm %r3, %r3 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun add.pl %r3, %r3, 1 136*4882a593Smuzhiyun asr_s %r12, %r3, 3 137*4882a593Smuzhiyun asl.f 0, %r2, %r3 138*4882a593Smuzhiyun add_s %r0, %r0, %r12 139*4882a593Smuzhiyun j_s.d [%blink] 140*4882a593Smuzhiyun mov.mi %r0, 0 141*4882a593Smuzhiyun#endif /* _ENDIAN__ */ 142