1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4*4882a593Smuzhiyun */ 5*4882a593Smuzhiyun 6*4882a593Smuzhiyun#include <linux/linkage.h> 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__ 9*4882a593Smuzhiyun#define WORD2 r2 10*4882a593Smuzhiyun#define SHIFT r3 11*4882a593Smuzhiyun#else /* BIG ENDIAN */ 12*4882a593Smuzhiyun#define WORD2 r3 13*4882a593Smuzhiyun#define SHIFT r2 14*4882a593Smuzhiyun#endif 15*4882a593Smuzhiyun 16*4882a593SmuzhiyunENTRY_CFI(memcmp) 17*4882a593Smuzhiyun or r12,r0,r1 18*4882a593Smuzhiyun asl_s r12,r12,30 19*4882a593Smuzhiyun sub r3,r2,1 20*4882a593Smuzhiyun brls r2,r12,.Lbytewise 21*4882a593Smuzhiyun ld r4,[r0,0] 22*4882a593Smuzhiyun ld r5,[r1,0] 23*4882a593Smuzhiyun lsr.f lp_count,r3,3 24*4882a593Smuzhiyun#ifdef CONFIG_ISA_ARCV2 25*4882a593Smuzhiyun /* In ARCv2 a branch can't be the last instruction in a zero overhead 26*4882a593Smuzhiyun * loop. 27*4882a593Smuzhiyun * So we move the branch to the start of the loop, duplicate it 28*4882a593Smuzhiyun * after the end, and set up r12 so that the branch isn't taken 29*4882a593Smuzhiyun * initially. 30*4882a593Smuzhiyun */ 31*4882a593Smuzhiyun mov_s r12,WORD2 32*4882a593Smuzhiyun lpne .Loop_end 33*4882a593Smuzhiyun brne WORD2,r12,.Lodd 34*4882a593Smuzhiyun ld WORD2,[r0,4] 35*4882a593Smuzhiyun#else 36*4882a593Smuzhiyun lpne .Loop_end 37*4882a593Smuzhiyun ld_s WORD2,[r0,4] 38*4882a593Smuzhiyun#endif 39*4882a593Smuzhiyun ld_s r12,[r1,4] 40*4882a593Smuzhiyun brne r4,r5,.Leven 41*4882a593Smuzhiyun ld.a r4,[r0,8] 42*4882a593Smuzhiyun ld.a r5,[r1,8] 43*4882a593Smuzhiyun#ifdef CONFIG_ISA_ARCV2 44*4882a593Smuzhiyun.Loop_end: 45*4882a593Smuzhiyun brne WORD2,r12,.Lodd 46*4882a593Smuzhiyun#else 47*4882a593Smuzhiyun brne WORD2,r12,.Lodd 48*4882a593Smuzhiyun.Loop_end: 49*4882a593Smuzhiyun#endif 50*4882a593Smuzhiyun asl_s SHIFT,SHIFT,3 51*4882a593Smuzhiyun bhs_s .Last_cmp 52*4882a593Smuzhiyun brne r4,r5,.Leven 53*4882a593Smuzhiyun ld r4,[r0,4] 54*4882a593Smuzhiyun ld r5,[r1,4] 55*4882a593Smuzhiyun#ifdef __LITTLE_ENDIAN__ 56*4882a593Smuzhiyun nop_s 57*4882a593Smuzhiyun ; one more load latency cycle 58*4882a593Smuzhiyun.Last_cmp: 59*4882a593Smuzhiyun xor r0,r4,r5 60*4882a593Smuzhiyun bset r0,r0,SHIFT 61*4882a593Smuzhiyun sub_s r1,r0,1 62*4882a593Smuzhiyun bic_s r1,r1,r0 63*4882a593Smuzhiyun norm r1,r1 64*4882a593Smuzhiyun b.d .Leven_cmp 65*4882a593Smuzhiyun and r1,r1,24 66*4882a593Smuzhiyun.Leven: 67*4882a593Smuzhiyun xor r0,r4,r5 68*4882a593Smuzhiyun sub_s r1,r0,1 69*4882a593Smuzhiyun bic_s r1,r1,r0 70*4882a593Smuzhiyun norm r1,r1 71*4882a593Smuzhiyun ; slow track insn 72*4882a593Smuzhiyun and r1,r1,24 73*4882a593Smuzhiyun.Leven_cmp: 74*4882a593Smuzhiyun asl r2,r4,r1 75*4882a593Smuzhiyun asl r12,r5,r1 76*4882a593Smuzhiyun lsr_s r2,r2,1 77*4882a593Smuzhiyun lsr_s r12,r12,1 78*4882a593Smuzhiyun j_s.d [blink] 79*4882a593Smuzhiyun sub r0,r2,r12 80*4882a593Smuzhiyun .balign 4 81*4882a593Smuzhiyun.Lodd: 82*4882a593Smuzhiyun xor r0,WORD2,r12 83*4882a593Smuzhiyun sub_s r1,r0,1 84*4882a593Smuzhiyun bic_s r1,r1,r0 85*4882a593Smuzhiyun norm r1,r1 86*4882a593Smuzhiyun ; slow track insn 87*4882a593Smuzhiyun and r1,r1,24 88*4882a593Smuzhiyun asl_s r2,r2,r1 89*4882a593Smuzhiyun asl_s r12,r12,r1 90*4882a593Smuzhiyun lsr_s r2,r2,1 91*4882a593Smuzhiyun lsr_s r12,r12,1 92*4882a593Smuzhiyun j_s.d [blink] 93*4882a593Smuzhiyun sub r0,r2,r12 94*4882a593Smuzhiyun#else /* BIG ENDIAN */ 95*4882a593Smuzhiyun.Last_cmp: 96*4882a593Smuzhiyun neg_s SHIFT,SHIFT 97*4882a593Smuzhiyun lsr r4,r4,SHIFT 98*4882a593Smuzhiyun lsr r5,r5,SHIFT 99*4882a593Smuzhiyun ; slow track insn 100*4882a593Smuzhiyun.Leven: 101*4882a593Smuzhiyun sub.f r0,r4,r5 102*4882a593Smuzhiyun mov.ne r0,1 103*4882a593Smuzhiyun j_s.d [blink] 104*4882a593Smuzhiyun bset.cs r0,r0,31 105*4882a593Smuzhiyun.Lodd: 106*4882a593Smuzhiyun cmp_s WORD2,r12 107*4882a593Smuzhiyun mov_s r0,1 108*4882a593Smuzhiyun j_s.d [blink] 109*4882a593Smuzhiyun bset.cs r0,r0,31 110*4882a593Smuzhiyun#endif /* ENDIAN */ 111*4882a593Smuzhiyun .balign 4 112*4882a593Smuzhiyun.Lbytewise: 113*4882a593Smuzhiyun breq r2,0,.Lnil 114*4882a593Smuzhiyun ldb r4,[r0,0] 115*4882a593Smuzhiyun ldb r5,[r1,0] 116*4882a593Smuzhiyun lsr.f lp_count,r3 117*4882a593Smuzhiyun#ifdef CONFIG_ISA_ARCV2 118*4882a593Smuzhiyun mov r12,r3 119*4882a593Smuzhiyun lpne .Lbyte_end 120*4882a593Smuzhiyun brne r3,r12,.Lbyte_odd 121*4882a593Smuzhiyun#else 122*4882a593Smuzhiyun lpne .Lbyte_end 123*4882a593Smuzhiyun#endif 124*4882a593Smuzhiyun ldb_s r3,[r0,1] 125*4882a593Smuzhiyun ldb r12,[r1,1] 126*4882a593Smuzhiyun brne r4,r5,.Lbyte_even 127*4882a593Smuzhiyun ldb.a r4,[r0,2] 128*4882a593Smuzhiyun ldb.a r5,[r1,2] 129*4882a593Smuzhiyun#ifdef CONFIG_ISA_ARCV2 130*4882a593Smuzhiyun.Lbyte_end: 131*4882a593Smuzhiyun brne r3,r12,.Lbyte_odd 132*4882a593Smuzhiyun#else 133*4882a593Smuzhiyun brne r3,r12,.Lbyte_odd 134*4882a593Smuzhiyun.Lbyte_end: 135*4882a593Smuzhiyun#endif 136*4882a593Smuzhiyun bcc .Lbyte_even 137*4882a593Smuzhiyun brne r4,r5,.Lbyte_even 138*4882a593Smuzhiyun ldb_s r3,[r0,1] 139*4882a593Smuzhiyun ldb_s r12,[r1,1] 140*4882a593Smuzhiyun.Lbyte_odd: 141*4882a593Smuzhiyun j_s.d [blink] 142*4882a593Smuzhiyun sub r0,r3,r12 143*4882a593Smuzhiyun.Lbyte_even: 144*4882a593Smuzhiyun j_s.d [blink] 145*4882a593Smuzhiyun sub r0,r4,r5 146*4882a593Smuzhiyun.Lnil: 147*4882a593Smuzhiyun j_s.d [blink] 148*4882a593Smuzhiyun mov r0,0 149*4882a593SmuzhiyunEND_CFI(memcmp) 150