xref: /OK3568_Linux_fs/kernel/arch/hexagon/mm/strnlen_user.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * User string length functions for kernel
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#define isrc	r0
9*4882a593Smuzhiyun#define max	r1	/*  Do not change!  */
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun#define end	r2
12*4882a593Smuzhiyun#define tmp1	r3
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun#define obo	r6	/*  off-by-one  */
15*4882a593Smuzhiyun#define start	r7
16*4882a593Smuzhiyun#define mod8	r8
17*4882a593Smuzhiyun#define dbuf    r15:14
18*4882a593Smuzhiyun#define dcmp	r13:12
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun/*
21*4882a593Smuzhiyun * The vector mask version of this turned out *really* badly.
22*4882a593Smuzhiyun * The hardware loop version also turned out *really* badly.
23*4882a593Smuzhiyun * Seems straight pointer arithmetic basically wins here.
24*4882a593Smuzhiyun */
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun#define fname __strnlen_user
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun	.text
29*4882a593Smuzhiyun	.global fname
30*4882a593Smuzhiyun	.type fname, @function
31*4882a593Smuzhiyun	.p2align 5  /*  why?  */
32*4882a593Smuzhiyunfname:
33*4882a593Smuzhiyun	{
34*4882a593Smuzhiyun		mod8 = and(isrc,#7);
35*4882a593Smuzhiyun		end = add(isrc,max);
36*4882a593Smuzhiyun		start = isrc;
37*4882a593Smuzhiyun	}
38*4882a593Smuzhiyun	{
39*4882a593Smuzhiyun		P0 = cmp.eq(mod8,#0);
40*4882a593Smuzhiyun		mod8 = and(end,#7);
41*4882a593Smuzhiyun		dcmp = #0;
42*4882a593Smuzhiyun		if (P0.new) jump:t dw_loop;	/*  fire up the oven  */
43*4882a593Smuzhiyun	}
44*4882a593Smuzhiyun
45*4882a593Smuzhiyunalignment_loop:
46*4882a593Smuzhiyunfail_1:	{
47*4882a593Smuzhiyun		tmp1 = memb(start++#1);
48*4882a593Smuzhiyun	}
49*4882a593Smuzhiyun	{
50*4882a593Smuzhiyun		P0 = cmp.eq(tmp1,#0);
51*4882a593Smuzhiyun		if (P0.new) jump:nt exit_found;
52*4882a593Smuzhiyun		P1 = cmp.gtu(end,start);
53*4882a593Smuzhiyun		mod8 = and(start,#7);
54*4882a593Smuzhiyun	}
55*4882a593Smuzhiyun	{
56*4882a593Smuzhiyun		if (!P1) jump exit_error;  /*  hit the end  */
57*4882a593Smuzhiyun		P0 = cmp.eq(mod8,#0);
58*4882a593Smuzhiyun	}
59*4882a593Smuzhiyun	{
60*4882a593Smuzhiyun		if (!P0) jump alignment_loop;
61*4882a593Smuzhiyun	}
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun
65*4882a593Smuzhiyundw_loop:
66*4882a593Smuzhiyunfail_2:	{
67*4882a593Smuzhiyun		dbuf = memd(start);
68*4882a593Smuzhiyun		obo = add(start,#1);
69*4882a593Smuzhiyun	}
70*4882a593Smuzhiyun	{
71*4882a593Smuzhiyun		P0 = vcmpb.eq(dbuf,dcmp);
72*4882a593Smuzhiyun	}
73*4882a593Smuzhiyun	{
74*4882a593Smuzhiyun		tmp1 = P0;
75*4882a593Smuzhiyun		P0 = cmp.gtu(end,start);
76*4882a593Smuzhiyun	}
77*4882a593Smuzhiyun	{
78*4882a593Smuzhiyun		tmp1 = ct0(tmp1);
79*4882a593Smuzhiyun		mod8 = and(end,#7);
80*4882a593Smuzhiyun		if (!P0) jump end_check;
81*4882a593Smuzhiyun	}
82*4882a593Smuzhiyun	{
83*4882a593Smuzhiyun		P0 = cmp.eq(tmp1,#32);
84*4882a593Smuzhiyun		if (!P0.new) jump:nt exit_found;
85*4882a593Smuzhiyun		if (!P0.new) start = add(obo,tmp1);
86*4882a593Smuzhiyun	}
87*4882a593Smuzhiyun	{
88*4882a593Smuzhiyun		start = add(start,#8);
89*4882a593Smuzhiyun		jump dw_loop;
90*4882a593Smuzhiyun	}	/*  might be nice to combine these jumps...   */
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun
93*4882a593Smuzhiyunend_check:
94*4882a593Smuzhiyun	{
95*4882a593Smuzhiyun		P0 = cmp.gt(tmp1,mod8);
96*4882a593Smuzhiyun		if (P0.new) jump:nt exit_error;	/*  neverfound!  */
97*4882a593Smuzhiyun		start = add(obo,tmp1);
98*4882a593Smuzhiyun	}
99*4882a593Smuzhiyun
100*4882a593Smuzhiyunexit_found:
101*4882a593Smuzhiyun	{
102*4882a593Smuzhiyun		R0 = sub(start,isrc);
103*4882a593Smuzhiyun		jumpr R31;
104*4882a593Smuzhiyun	}
105*4882a593Smuzhiyun
106*4882a593Smuzhiyunexit_error:
107*4882a593Smuzhiyun	{
108*4882a593Smuzhiyun		R0 = add(max,#1);
109*4882a593Smuzhiyun		jumpr R31;
110*4882a593Smuzhiyun	}
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun	/*  Uh, what does the "fixup" return here?  */
113*4882a593Smuzhiyun	.falign
114*4882a593Smuzhiyunfix_1:
115*4882a593Smuzhiyun	{
116*4882a593Smuzhiyun		R0 = #0;
117*4882a593Smuzhiyun		jumpr R31;
118*4882a593Smuzhiyun	}
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun	.size fname,.-fname
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun.section __ex_table,"a"
124*4882a593Smuzhiyun.long fail_1,fix_1
125*4882a593Smuzhiyun.long fail_2,fix_1
126*4882a593Smuzhiyun.previous
127