1*22723828SAlexey Brodkin/* 2*22723828SAlexey Brodkin * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. 3*22723828SAlexey Brodkin * 4*22723828SAlexey Brodkin * SPDX-License-Identifier: GPL-2.0+ 5*22723828SAlexey Brodkin */ 6*22723828SAlexey Brodkin 7*22723828SAlexey Brodkin/* 8*22723828SAlexey Brodkin * This is optimized primarily for the ARC700. 9*22723828SAlexey Brodkin * It would be possible to speed up the loops by one cycle / word 10*22723828SAlexey Brodkin * respective one cycle / byte by forcing double source 1 alignment, unrolling 11*22723828SAlexey Brodkin * by a factor of two, and speculatively loading the second word / byte of 12*22723828SAlexey Brodkin * source 1; however, that would increase the overhead for loop setup / finish, 13*22723828SAlexey Brodkin * and strcmp might often terminate early. 14*22723828SAlexey Brodkin */ 15*22723828SAlexey Brodkin 16*22723828SAlexey Brodkin.global strcmp 17*22723828SAlexey Brodkin.align 4 18*22723828SAlexey Brodkinstrcmp: 19*22723828SAlexey Brodkin or %r2, %r0, %r1 20*22723828SAlexey Brodkin bmsk_s %r2, %r2, 1 21*22723828SAlexey Brodkin brne %r2, 0, .Lcharloop 22*22723828SAlexey Brodkin mov_s %r12, 0x01010101 23*22723828SAlexey Brodkin ror %r5, %r12 24*22723828SAlexey Brodkin.Lwordloop: 25*22723828SAlexey Brodkin ld.ab %r2, [%r0, 4] 26*22723828SAlexey Brodkin ld.ab %r3, [%r1, 4] 27*22723828SAlexey Brodkin nop_s 28*22723828SAlexey Brodkin sub %r4, %r2, %r12 29*22723828SAlexey Brodkin bic %r4, %r4, %r2 30*22723828SAlexey Brodkin and %r4, %r4, %r5 31*22723828SAlexey Brodkin brne %r4, 0, .Lfound0 32*22723828SAlexey Brodkin breq %r2 ,%r3, .Lwordloop 33*22723828SAlexey Brodkin#ifdef __LITTLE_ENDIAN__ 34*22723828SAlexey Brodkin xor %r0, %r2, %r3 /* mask for difference */ 35*22723828SAlexey Brodkin sub_s %r1, %r0, 1 36*22723828SAlexey Brodkin bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ 37*22723828SAlexey Brodkin sub %r1, %r5, %r0 38*22723828SAlexey Brodkin xor %r0, %r5, %r1 /* mask for least significant difference byte */ 39*22723828SAlexey Brodkin and_s %r2, %r2, %r0 40*22723828SAlexey Brodkin and_s %r3, %r3, %r0 41*22723828SAlexey Brodkin#endif /* _ENDIAN__ */ 42*22723828SAlexey Brodkin cmp_s %r2, %r3 43*22723828SAlexey Brodkin mov_s %r0, 1 44*22723828SAlexey Brodkin j_s.d [%blink] 45*22723828SAlexey Brodkin bset.lo %r0, %r0, 31 46*22723828SAlexey Brodkin 47*22723828SAlexey Brodkin .balign 4 48*22723828SAlexey Brodkin#ifdef __LITTLE_ENDIAN__ 49*22723828SAlexey Brodkin.Lfound0: 50*22723828SAlexey Brodkin xor %r0, %r2, %r3 /* mask for difference */ 51*22723828SAlexey Brodkin or %r0, %r0, %r4 /* or in zero indicator */ 52*22723828SAlexey Brodkin sub_s %r1, %r0, 1 53*22723828SAlexey Brodkin bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ 54*22723828SAlexey Brodkin sub %r1, %r5, %r0 55*22723828SAlexey Brodkin xor %r0, %r5, %r1 /* mask for least significant difference byte */ 56*22723828SAlexey Brodkin and_s %r2, %r2, %r0 57*22723828SAlexey Brodkin and_s %r3, %r3, %r0 58*22723828SAlexey Brodkin sub.f %r0, %r2, %r3 59*22723828SAlexey Brodkin mov.hi %r0, 1 60*22723828SAlexey Brodkin j_s.d [%blink] 61*22723828SAlexey Brodkin bset.lo %r0, %r0, 31 62*22723828SAlexey Brodkin#else /* __BIG_ENDIAN__ */ 63*22723828SAlexey Brodkin /* 64*22723828SAlexey Brodkin * The zero-detection above can mis-detect 0x01 bytes as zeroes 65*22723828SAlexey Brodkin * because of carry-propagateion from a lower significant zero byte. 66*22723828SAlexey Brodkin * We can compensate for this by checking that bit0 is zero. 67*22723828SAlexey Brodkin * This compensation is not necessary in the step where we 68*22723828SAlexey Brodkin * get a low estimate for r2, because in any affected bytes 69*22723828SAlexey Brodkin * we already have 0x00 or 0x01, which will remain unchanged 70*22723828SAlexey Brodkin * when bit 7 is cleared. 71*22723828SAlexey Brodkin */ 72*22723828SAlexey Brodkin .balign 4 73*22723828SAlexey Brodkin.Lfound0: 74*22723828SAlexey Brodkin lsr %r0, %r4, 8 75*22723828SAlexey Brodkin lsr_s %r1, %r2 76*22723828SAlexey Brodkin bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */ 77*22723828SAlexey Brodkin bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */ 78*22723828SAlexey Brodkin or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */ 79*22723828SAlexey Brodkin cmp_s %r3, %r2 /* ... be independent of trailing garbage */ 80*22723828SAlexey Brodkin or_s %r2, %r2, %r0 /* likewise for r3 > r2 */ 81*22723828SAlexey Brodkin bic_s %r3, %r3, %r0 82*22723828SAlexey Brodkin rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */ 83*22723828SAlexey Brodkin cmp_s %r2, %r3 84*22723828SAlexey Brodkin j_s.d [%blink] 85*22723828SAlexey Brodkin bset.lo %r0, %r0, 31 86*22723828SAlexey Brodkin#endif /* _ENDIAN__ */ 87*22723828SAlexey Brodkin 88*22723828SAlexey Brodkin .balign 4 89*22723828SAlexey Brodkin.Lcharloop: 90*22723828SAlexey Brodkin ldb.ab %r2,[%r0,1] 91*22723828SAlexey Brodkin ldb.ab %r3,[%r1,1] 92*22723828SAlexey Brodkin nop_s 93*22723828SAlexey Brodkin breq %r2, 0, .Lcmpend 94*22723828SAlexey Brodkin breq %r2, %r3, .Lcharloop 95*22723828SAlexey Brodkin.Lcmpend: 96*22723828SAlexey Brodkin j_s.d [%blink] 97*22723828SAlexey Brodkin sub %r0, %r2, %r3 98