xref: /OK3568_Linux_fs/kernel/arch/hexagon/mm/copy_user_template.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun/* Numerology:
7*4882a593Smuzhiyun * WXYZ
8*4882a593Smuzhiyun * W: width in bytes
9*4882a593Smuzhiyun * X: Load=0, Store=1
10*4882a593Smuzhiyun * Y: Location 0=preamble,8=loop,9=epilog
11*4882a593Smuzhiyun * Z: Location=0,handler=9
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun	.text
14*4882a593Smuzhiyun	.global FUNCNAME
15*4882a593Smuzhiyun	.type FUNCNAME, @function
16*4882a593Smuzhiyun	.p2align 5
17*4882a593SmuzhiyunFUNCNAME:
18*4882a593Smuzhiyun	{
19*4882a593Smuzhiyun		p0 = cmp.gtu(bytes,#0)
20*4882a593Smuzhiyun		if (!p0.new) jump:nt .Ldone
21*4882a593Smuzhiyun		r3 = or(dst,src)
22*4882a593Smuzhiyun		r4 = xor(dst,src)
23*4882a593Smuzhiyun	}
24*4882a593Smuzhiyun	{
25*4882a593Smuzhiyun		p1 = cmp.gtu(bytes,#15)
26*4882a593Smuzhiyun		p0 = bitsclr(r3,#7)
27*4882a593Smuzhiyun		if (!p0.new) jump:nt .Loop_not_aligned_8
28*4882a593Smuzhiyun		src_dst_sav = combine(src,dst)
29*4882a593Smuzhiyun	}
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun	{
32*4882a593Smuzhiyun		loopcount = lsr(bytes,#3)
33*4882a593Smuzhiyun		if (!p1) jump .Lsmall
34*4882a593Smuzhiyun	}
35*4882a593Smuzhiyun	p3=sp1loop0(.Loop8,loopcount)
36*4882a593Smuzhiyun.Loop8:
37*4882a593Smuzhiyun8080:
38*4882a593Smuzhiyun8180:
39*4882a593Smuzhiyun	{
40*4882a593Smuzhiyun		if (p3) memd(dst++#8) = d_dbuf
41*4882a593Smuzhiyun		d_dbuf = memd(src++#8)
42*4882a593Smuzhiyun	}:endloop0
43*4882a593Smuzhiyun8190:
44*4882a593Smuzhiyun	{
45*4882a593Smuzhiyun		memd(dst++#8) = d_dbuf
46*4882a593Smuzhiyun		bytes -= asl(loopcount,#3)
47*4882a593Smuzhiyun		jump .Lsmall
48*4882a593Smuzhiyun	}
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun.Loop_not_aligned_8:
51*4882a593Smuzhiyun	{
52*4882a593Smuzhiyun		p0 = bitsclr(r4,#7)
53*4882a593Smuzhiyun		if (p0.new) jump:nt .Lalign
54*4882a593Smuzhiyun	}
55*4882a593Smuzhiyun	{
56*4882a593Smuzhiyun		p0 = bitsclr(r3,#3)
57*4882a593Smuzhiyun		if (!p0.new) jump:nt .Loop_not_aligned_4
58*4882a593Smuzhiyun		p1 = cmp.gtu(bytes,#7)
59*4882a593Smuzhiyun	}
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun	{
62*4882a593Smuzhiyun		if (!p1) jump .Lsmall
63*4882a593Smuzhiyun		loopcount = lsr(bytes,#2)
64*4882a593Smuzhiyun	}
65*4882a593Smuzhiyun	p3=sp1loop0(.Loop4,loopcount)
66*4882a593Smuzhiyun.Loop4:
67*4882a593Smuzhiyun4080:
68*4882a593Smuzhiyun4180:
69*4882a593Smuzhiyun	{
70*4882a593Smuzhiyun		if (p3) memw(dst++#4) = w_dbuf
71*4882a593Smuzhiyun		w_dbuf = memw(src++#4)
72*4882a593Smuzhiyun	}:endloop0
73*4882a593Smuzhiyun4190:
74*4882a593Smuzhiyun	{
75*4882a593Smuzhiyun		memw(dst++#4) = w_dbuf
76*4882a593Smuzhiyun		bytes -= asl(loopcount,#2)
77*4882a593Smuzhiyun		jump .Lsmall
78*4882a593Smuzhiyun	}
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun.Loop_not_aligned_4:
81*4882a593Smuzhiyun	{
82*4882a593Smuzhiyun		p0 = bitsclr(r3,#1)
83*4882a593Smuzhiyun		if (!p0.new) jump:nt .Loop_not_aligned
84*4882a593Smuzhiyun		p1 = cmp.gtu(bytes,#3)
85*4882a593Smuzhiyun	}
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun	{
88*4882a593Smuzhiyun		if (!p1) jump .Lsmall
89*4882a593Smuzhiyun		loopcount = lsr(bytes,#1)
90*4882a593Smuzhiyun	}
91*4882a593Smuzhiyun	p3=sp1loop0(.Loop2,loopcount)
92*4882a593Smuzhiyun.Loop2:
93*4882a593Smuzhiyun2080:
94*4882a593Smuzhiyun2180:
95*4882a593Smuzhiyun	{
96*4882a593Smuzhiyun		if (p3) memh(dst++#2) = w_dbuf
97*4882a593Smuzhiyun		w_dbuf = memuh(src++#2)
98*4882a593Smuzhiyun	}:endloop0
99*4882a593Smuzhiyun2190:
100*4882a593Smuzhiyun	{
101*4882a593Smuzhiyun		memh(dst++#2) = w_dbuf
102*4882a593Smuzhiyun		bytes -= asl(loopcount,#1)
103*4882a593Smuzhiyun		jump .Lsmall
104*4882a593Smuzhiyun	}
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun.Loop_not_aligned: /* Works for as small as one byte */
107*4882a593Smuzhiyun	p3=sp1loop0(.Loop1,bytes)
108*4882a593Smuzhiyun.Loop1:
109*4882a593Smuzhiyun1080:
110*4882a593Smuzhiyun1180:
111*4882a593Smuzhiyun	{
112*4882a593Smuzhiyun		if (p3) memb(dst++#1) = w_dbuf
113*4882a593Smuzhiyun		w_dbuf = memub(src++#1)
114*4882a593Smuzhiyun	}:endloop0
115*4882a593Smuzhiyun	/* Done */
116*4882a593Smuzhiyun1190:
117*4882a593Smuzhiyun	{
118*4882a593Smuzhiyun		memb(dst) = w_dbuf
119*4882a593Smuzhiyun		jumpr r31
120*4882a593Smuzhiyun		r0 = #0
121*4882a593Smuzhiyun	}
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun.Lsmall:
124*4882a593Smuzhiyun	{
125*4882a593Smuzhiyun		p0 = cmp.gtu(bytes,#0)
126*4882a593Smuzhiyun		if (p0.new) jump:nt .Loop_not_aligned
127*4882a593Smuzhiyun	}
128*4882a593Smuzhiyun.Ldone:
129*4882a593Smuzhiyun	{
130*4882a593Smuzhiyun		r0 = #0
131*4882a593Smuzhiyun		jumpr r31
132*4882a593Smuzhiyun	}
133*4882a593Smuzhiyun	.falign
134*4882a593Smuzhiyun.Lalign:
135*4882a593Smuzhiyun1000:
136*4882a593Smuzhiyun	{
137*4882a593Smuzhiyun		if (p0.new) w_dbuf = memub(src)
138*4882a593Smuzhiyun		p0 = tstbit(src,#0)
139*4882a593Smuzhiyun		if (!p1) jump .Lsmall
140*4882a593Smuzhiyun	}
141*4882a593Smuzhiyun1100:
142*4882a593Smuzhiyun	{
143*4882a593Smuzhiyun		if (p0) memb(dst++#1) = w_dbuf
144*4882a593Smuzhiyun		if (p0) bytes = add(bytes,#-1)
145*4882a593Smuzhiyun		if (p0) src = add(src,#1)
146*4882a593Smuzhiyun	}
147*4882a593Smuzhiyun2000:
148*4882a593Smuzhiyun	{
149*4882a593Smuzhiyun		if (p0.new) w_dbuf = memuh(src)
150*4882a593Smuzhiyun		p0 = tstbit(src,#1)
151*4882a593Smuzhiyun		if (!p1) jump .Lsmall
152*4882a593Smuzhiyun	}
153*4882a593Smuzhiyun2100:
154*4882a593Smuzhiyun	{
155*4882a593Smuzhiyun		if (p0) memh(dst++#2) = w_dbuf
156*4882a593Smuzhiyun		if (p0) bytes = add(bytes,#-2)
157*4882a593Smuzhiyun		if (p0) src = add(src,#2)
158*4882a593Smuzhiyun	}
159*4882a593Smuzhiyun4000:
160*4882a593Smuzhiyun	{
161*4882a593Smuzhiyun		if (p0.new) w_dbuf = memw(src)
162*4882a593Smuzhiyun		p0 = tstbit(src,#2)
163*4882a593Smuzhiyun		if (!p1) jump .Lsmall
164*4882a593Smuzhiyun	}
165*4882a593Smuzhiyun4100:
166*4882a593Smuzhiyun	{
167*4882a593Smuzhiyun		if (p0) memw(dst++#4) = w_dbuf
168*4882a593Smuzhiyun		if (p0) bytes = add(bytes,#-4)
169*4882a593Smuzhiyun		if (p0) src = add(src,#4)
170*4882a593Smuzhiyun		jump FUNCNAME
171*4882a593Smuzhiyun	}
172*4882a593Smuzhiyun	.size FUNCNAME,.-FUNCNAME
173