xref: /OK3568_Linux_fs/kernel/arch/powerpc/lib/strlen_32.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * strlen() for PPC32
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Inspired from glibc implementation
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun#include <asm/ppc_asm.h>
10*4882a593Smuzhiyun#include <asm/export.h>
11*4882a593Smuzhiyun#include <asm/cache.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun	.text
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun/*
16*4882a593Smuzhiyun * Algorithm:
17*4882a593Smuzhiyun *
18*4882a593Smuzhiyun * 1) Given a word 'x', we can test to see if it contains any 0 bytes
19*4882a593Smuzhiyun *    by subtracting 0x01010101, and seeing if any of the high bits of each
20*4882a593Smuzhiyun *    byte changed from 0 to 1. This works because the least significant
21*4882a593Smuzhiyun *    0 byte must have had no incoming carry (otherwise it's not the least
22*4882a593Smuzhiyun *    significant), so it is 0x00 - 0x01 == 0xff. For all other
23*4882a593Smuzhiyun *    byte values, either they have the high bit set initially, or when
24*4882a593Smuzhiyun *    1 is subtracted you get a value in the range 0x00-0x7f, none of which
25*4882a593Smuzhiyun *    have their high bit set. The expression here is
26*4882a593Smuzhiyun *    (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
27*4882a593Smuzhiyun *    there were no 0x00 bytes in the word.  You get 0x80 in bytes that
28*4882a593Smuzhiyun *    match, but possibly false 0x80 matches in the next more significant
29*4882a593Smuzhiyun *    byte to a true match due to carries.  For little-endian this is
30*4882a593Smuzhiyun *    of no consequence since the least significant match is the one
31*4882a593Smuzhiyun *    we're interested in, but big-endian needs method 2 to find which
32*4882a593Smuzhiyun *    byte matches.
33*4882a593Smuzhiyun * 2) Given a word 'x', we can test to see _which_ byte was zero by
34*4882a593Smuzhiyun *    calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
35*4882a593Smuzhiyun *    This produces 0x80 in each byte that was zero, and 0x00 in all
36*4882a593Smuzhiyun *    the other bytes. The '| ~0x80808080' clears the low 7 bits in each
37*4882a593Smuzhiyun *    byte, and the '| x' part ensures that bytes with the high bit set
38*4882a593Smuzhiyun *    produce 0x00. The addition will carry into the high bit of each byte
39*4882a593Smuzhiyun *    iff that byte had one of its low 7 bits set. We can then just see
40*4882a593Smuzhiyun *    which was the most significant bit set and divide by 8 to find how
41*4882a593Smuzhiyun *    many to add to the index.
42*4882a593Smuzhiyun *    This is from the book 'The PowerPC Compiler Writer's Guide',
43*4882a593Smuzhiyun *    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
44*4882a593Smuzhiyun */
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun_GLOBAL(strlen)
47*4882a593Smuzhiyun	andi.   r0, r3, 3
48*4882a593Smuzhiyun	lis	r7, 0x0101
49*4882a593Smuzhiyun	addi	r10, r3, -4
50*4882a593Smuzhiyun	addic	r7, r7, 0x0101	/* r7 = 0x01010101 (lomagic) & clear XER[CA] */
51*4882a593Smuzhiyun	rotlwi	r6, r7, 31 	/* r6 = 0x80808080 (himagic) */
52*4882a593Smuzhiyun	bne-	3f
53*4882a593Smuzhiyun	.balign IFETCH_ALIGN_BYTES
54*4882a593Smuzhiyun1:	lwzu	r9, 4(r10)
55*4882a593Smuzhiyun2:	subf	r8, r7, r9
56*4882a593Smuzhiyun	and.	r8, r8, r6
57*4882a593Smuzhiyun	beq+	1b
58*4882a593Smuzhiyun	andc.	r8, r8, r9
59*4882a593Smuzhiyun	beq+	1b
60*4882a593Smuzhiyun	andc	r8, r9, r6
61*4882a593Smuzhiyun	orc	r9, r9, r6
62*4882a593Smuzhiyun	subfe	r8, r6, r8
63*4882a593Smuzhiyun	nor	r8, r8, r9
64*4882a593Smuzhiyun	cntlzw	r8, r8
65*4882a593Smuzhiyun	subf	r3, r3, r10
66*4882a593Smuzhiyun	srwi	r8, r8, 3
67*4882a593Smuzhiyun	add	r3, r3, r8
68*4882a593Smuzhiyun	blr
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun	/* Missaligned string: make sure bytes before string are seen not 0 */
71*4882a593Smuzhiyun3:	xor	r10, r10, r0
72*4882a593Smuzhiyun	orc	r8, r8, r8
73*4882a593Smuzhiyun	lwzu	r9, 4(r10)
74*4882a593Smuzhiyun	slwi	r0, r0, 3
75*4882a593Smuzhiyun	srw	r8, r8, r0
76*4882a593Smuzhiyun	orc	r9, r9, r8
77*4882a593Smuzhiyun	b	2b
78*4882a593SmuzhiyunEXPORT_SYMBOL(strlen)
79