xref: /OK3568_Linux_fs/kernel/arch/alpha/lib/strlen.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Finds length of a 0-terminated string.  Optimized for the
6*4882a593Smuzhiyun * Alpha architecture:
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun *	- memory accessed as aligned quadwords only
9*4882a593Smuzhiyun *	- uses bcmpge to compare 8 bytes in parallel
10*4882a593Smuzhiyun *	- does binary search to find 0 byte in last
11*4882a593Smuzhiyun *	  quadword (HAKMEM needed 12 instructions to
12*4882a593Smuzhiyun *	  do this instead of the 9 instructions that
13*4882a593Smuzhiyun *	  binary search needs).
14*4882a593Smuzhiyun */
15*4882a593Smuzhiyun#include <asm/export.h>
16*4882a593Smuzhiyun	.set noreorder
17*4882a593Smuzhiyun	.set noat
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun	.align 3
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun	.globl	strlen
22*4882a593Smuzhiyun	.ent	strlen
23*4882a593Smuzhiyun
24*4882a593Smuzhiyunstrlen:
25*4882a593Smuzhiyun	ldq_u	$1, 0($16)	# load first quadword ($16  may be misaligned)
26*4882a593Smuzhiyun	lda	$2, -1($31)
27*4882a593Smuzhiyun	insqh	$2, $16, $2
28*4882a593Smuzhiyun	andnot	$16, 7, $0
29*4882a593Smuzhiyun	or	$2, $1, $1
30*4882a593Smuzhiyun	cmpbge	$31, $1, $2	# $2  <- bitmask: bit i == 1 <==> i-th byte == 0
31*4882a593Smuzhiyun	bne	$2, found
32*4882a593Smuzhiyun
33*4882a593Smuzhiyunloop:	ldq	$1, 8($0)
34*4882a593Smuzhiyun	addq	$0, 8, $0	# addr += 8
35*4882a593Smuzhiyun	nop			# helps dual issue last two insns
36*4882a593Smuzhiyun	cmpbge	$31, $1, $2
37*4882a593Smuzhiyun	beq	$2, loop
38*4882a593Smuzhiyun
39*4882a593Smuzhiyunfound:	blbs	$2, done	# make aligned case fast
40*4882a593Smuzhiyun	negq	$2, $3
41*4882a593Smuzhiyun	and	$2, $3, $2
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun	and	$2, 0x0f, $1
44*4882a593Smuzhiyun	addq	$0, 4, $3
45*4882a593Smuzhiyun	cmoveq	$1, $3, $0
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun	and	$2, 0x33, $1
48*4882a593Smuzhiyun	addq	$0, 2, $3
49*4882a593Smuzhiyun	cmoveq	$1, $3, $0
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun	and	$2, 0x55, $1
52*4882a593Smuzhiyun	addq	$0, 1, $3
53*4882a593Smuzhiyun	cmoveq	$1, $3, $0
54*4882a593Smuzhiyun
55*4882a593Smuzhiyundone:	subq	$0, $16, $0
56*4882a593Smuzhiyun	ret	$31, ($26)
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun	.end	strlen
59*4882a593Smuzhiyun	EXPORT_SYMBOL(strlen)
60