1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * arch/alpha/lib/strrchr.S 4*4882a593Smuzhiyun * Contributed by Richard Henderson (rth@tamu.edu) 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Return the address of the last occurrence of a given character 7*4882a593Smuzhiyun * within a null-terminated string, or null if it is not found. 8*4882a593Smuzhiyun */ 9*4882a593Smuzhiyun#include <asm/export.h> 10*4882a593Smuzhiyun#include <asm/regdef.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun .set noreorder 13*4882a593Smuzhiyun .set noat 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun .align 3 16*4882a593Smuzhiyun .ent strrchr 17*4882a593Smuzhiyun .globl strrchr 18*4882a593Smuzhiyunstrrchr: 19*4882a593Smuzhiyun .frame sp, 0, ra 20*4882a593Smuzhiyun .prologue 0 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun zapnot a1, 1, a1 # e0 : zero extend our test character 23*4882a593Smuzhiyun mov zero, t6 # .. e1 : t6 is last match aligned addr 24*4882a593Smuzhiyun sll a1, 8, t5 # e0 : replicate our test character 25*4882a593Smuzhiyun mov zero, t8 # .. e1 : t8 is last match byte compare mask 26*4882a593Smuzhiyun or t5, a1, a1 # e0 : 27*4882a593Smuzhiyun ldq_u t0, 0(a0) # .. e1 : load first quadword 28*4882a593Smuzhiyun sll a1, 16, t5 # e0 : 29*4882a593Smuzhiyun andnot a0, 7, v0 # .. e1 : align source addr 30*4882a593Smuzhiyun or t5, a1, a1 # e0 : 31*4882a593Smuzhiyun lda t4, -1 # .. e1 : build garbage mask 32*4882a593Smuzhiyun sll a1, 32, t5 # e0 : 33*4882a593Smuzhiyun cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero 34*4882a593Smuzhiyun mskqh t4, a0, t4 # e0 : 35*4882a593Smuzhiyun or t5, a1, a1 # .. e1 : character replication complete 36*4882a593Smuzhiyun xor t0, a1, t2 # e0 : make bytes == c zero 37*4882a593Smuzhiyun cmpbge zero, t4, t4 # .. e1 : bits set iff byte is garbage 38*4882a593Smuzhiyun cmpbge zero, t2, t3 # e0 : bits set iff byte == c 39*4882a593Smuzhiyun andnot t1, t4, t1 # .. e1 : clear garbage from null test 40*4882a593Smuzhiyun andnot t3, t4, t3 # e0 : clear garbage from char test 41*4882a593Smuzhiyun bne t1, $eos # .. e1 : did we already hit the terminator? 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun /* Character search main loop */ 44*4882a593Smuzhiyun$loop: 45*4882a593Smuzhiyun ldq t0, 8(v0) # e0 : load next quadword 46*4882a593Smuzhiyun cmovne t3, v0, t6 # .. e1 : save previous comparisons match 47*4882a593Smuzhiyun cmovne t3, t3, t8 # e0 : 48*4882a593Smuzhiyun addq v0, 8, v0 # .. e1 : 49*4882a593Smuzhiyun xor t0, a1, t2 # e0 : 50*4882a593Smuzhiyun cmpbge zero, t0, t1 # .. e1 : bits set iff byte == zero 51*4882a593Smuzhiyun cmpbge zero, t2, t3 # e0 : bits set iff byte == c 52*4882a593Smuzhiyun beq t1, $loop # .. e1 : if we havnt seen a null, loop 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun /* Mask out character matches after terminator */ 55*4882a593Smuzhiyun$eos: 56*4882a593Smuzhiyun negq t1, t4 # e0 : isolate first null byte match 57*4882a593Smuzhiyun and t1, t4, t4 # e1 : 58*4882a593Smuzhiyun subq t4, 1, t5 # e0 : build a mask of the bytes up to... 59*4882a593Smuzhiyun or t4, t5, t4 # e1 : ... and including the null 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun and t3, t4, t3 # e0 : mask out char matches after null 62*4882a593Smuzhiyun cmovne t3, t3, t8 # .. e1 : save it, if match found 63*4882a593Smuzhiyun cmovne t3, v0, t6 # e0 : 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun /* Locate the address of the last matched character */ 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun /* Retain the early exit for the ev4 -- the ev5 mispredict penalty 68*4882a593Smuzhiyun is 5 cycles -- the same as just falling through. */ 69*4882a593Smuzhiyun beq t8, $retnull # .. e1 : 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun and t8, 0xf0, t2 # e0 : binary search for the high bit set 72*4882a593Smuzhiyun cmovne t2, t2, t8 # .. e1 (zdb) 73*4882a593Smuzhiyun cmovne t2, 4, t2 # e0 : 74*4882a593Smuzhiyun and t8, 0xcc, t1 # .. e1 : 75*4882a593Smuzhiyun cmovne t1, t1, t8 # e0 : 76*4882a593Smuzhiyun cmovne t1, 2, t1 # .. e1 : 77*4882a593Smuzhiyun and t8, 0xaa, t0 # e0 : 78*4882a593Smuzhiyun cmovne t0, 1, t0 # .. e1 (zdb) 79*4882a593Smuzhiyun addq t2, t1, t1 # e0 : 80*4882a593Smuzhiyun addq t6, t0, v0 # .. e1 : add our aligned base ptr to the mix 81*4882a593Smuzhiyun addq v0, t1, v0 # e0 : 82*4882a593Smuzhiyun ret # .. e1 : 83*4882a593Smuzhiyun 84*4882a593Smuzhiyun$retnull: 85*4882a593Smuzhiyun mov zero, v0 # e0 : 86*4882a593Smuzhiyun ret # .. e1 : 87*4882a593Smuzhiyun 88*4882a593Smuzhiyun .end strrchr 89*4882a593Smuzhiyun EXPORT_SYMBOL(strrchr) 90