xref: /OK3568_Linux_fs/kernel/arch/ia64/lib/xor.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * arch/ia64/lib/xor.S
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Optimized RAID-5 checksumming functions for IA-64.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <asm/asmmacro.h>
9*4882a593Smuzhiyun#include <asm/export.h>
10*4882a593Smuzhiyun
11*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_2)
12*4882a593Smuzhiyun	.prologue
13*4882a593Smuzhiyun	.fframe 0
14*4882a593Smuzhiyun	.save ar.pfs, r31
15*4882a593Smuzhiyun	alloc r31 = ar.pfs, 3, 0, 13, 16
16*4882a593Smuzhiyun	.save ar.lc, r30
17*4882a593Smuzhiyun	mov r30 = ar.lc
18*4882a593Smuzhiyun	.save pr, r29
19*4882a593Smuzhiyun	mov r29 = pr
20*4882a593Smuzhiyun	;;
21*4882a593Smuzhiyun	.body
22*4882a593Smuzhiyun	mov r8 = in1
23*4882a593Smuzhiyun	mov ar.ec = 6 + 2
24*4882a593Smuzhiyun	shr in0 = in0, 3
25*4882a593Smuzhiyun	;;
26*4882a593Smuzhiyun	adds in0 = -1, in0
27*4882a593Smuzhiyun	mov r16 = in1
28*4882a593Smuzhiyun	mov r17 = in2
29*4882a593Smuzhiyun	;;
30*4882a593Smuzhiyun	mov ar.lc = in0
31*4882a593Smuzhiyun	mov pr.rot = 1 << 16
32*4882a593Smuzhiyun	;;
33*4882a593Smuzhiyun	.rotr s1[6+1], s2[6+1], d[2]
34*4882a593Smuzhiyun	.rotp p[6+2]
35*4882a593Smuzhiyun0:
36*4882a593Smuzhiyun(p[0])	ld8.nta s1[0] = [r16], 8
37*4882a593Smuzhiyun(p[0])	ld8.nta s2[0] = [r17], 8
38*4882a593Smuzhiyun(p[6])	xor d[0] = s1[6], s2[6]
39*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8
40*4882a593Smuzhiyun	nop.f 0
41*4882a593Smuzhiyun	br.ctop.dptk.few 0b
42*4882a593Smuzhiyun	;;
43*4882a593Smuzhiyun	mov ar.lc = r30
44*4882a593Smuzhiyun	mov pr = r29, -1
45*4882a593Smuzhiyun	br.ret.sptk.few rp
46*4882a593SmuzhiyunEND(xor_ia64_2)
47*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_2)
48*4882a593Smuzhiyun
49*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_3)
50*4882a593Smuzhiyun	.prologue
51*4882a593Smuzhiyun	.fframe 0
52*4882a593Smuzhiyun	.save ar.pfs, r31
53*4882a593Smuzhiyun	alloc r31 = ar.pfs, 4, 0, 20, 24
54*4882a593Smuzhiyun	.save ar.lc, r30
55*4882a593Smuzhiyun	mov r30 = ar.lc
56*4882a593Smuzhiyun	.save pr, r29
57*4882a593Smuzhiyun	mov r29 = pr
58*4882a593Smuzhiyun	;;
59*4882a593Smuzhiyun	.body
60*4882a593Smuzhiyun	mov r8 = in1
61*4882a593Smuzhiyun	mov ar.ec = 6 + 2
62*4882a593Smuzhiyun	shr in0 = in0, 3
63*4882a593Smuzhiyun	;;
64*4882a593Smuzhiyun	adds in0 = -1, in0
65*4882a593Smuzhiyun	mov r16 = in1
66*4882a593Smuzhiyun	mov r17 = in2
67*4882a593Smuzhiyun	;;
68*4882a593Smuzhiyun	mov r18 = in3
69*4882a593Smuzhiyun	mov ar.lc = in0
70*4882a593Smuzhiyun	mov pr.rot = 1 << 16
71*4882a593Smuzhiyun	;;
72*4882a593Smuzhiyun	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
73*4882a593Smuzhiyun	.rotp p[6+2]
74*4882a593Smuzhiyun0:
75*4882a593Smuzhiyun(p[0])	ld8.nta s1[0] = [r16], 8
76*4882a593Smuzhiyun(p[0])	ld8.nta s2[0] = [r17], 8
77*4882a593Smuzhiyun(p[6])	xor d[0] = s1[6], s2[6]
78*4882a593Smuzhiyun	;;
79*4882a593Smuzhiyun(p[0])	ld8.nta s3[0] = [r18], 8
80*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8
81*4882a593Smuzhiyun(p[6])	xor d[0] = d[0], s3[6]
82*4882a593Smuzhiyun	br.ctop.dptk.few 0b
83*4882a593Smuzhiyun	;;
84*4882a593Smuzhiyun	mov ar.lc = r30
85*4882a593Smuzhiyun	mov pr = r29, -1
86*4882a593Smuzhiyun	br.ret.sptk.few rp
87*4882a593SmuzhiyunEND(xor_ia64_3)
88*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_3)
89*4882a593Smuzhiyun
90*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_4)
91*4882a593Smuzhiyun	.prologue
92*4882a593Smuzhiyun	.fframe 0
93*4882a593Smuzhiyun	.save ar.pfs, r31
94*4882a593Smuzhiyun	alloc r31 = ar.pfs, 5, 0, 27, 32
95*4882a593Smuzhiyun	.save ar.lc, r30
96*4882a593Smuzhiyun	mov r30 = ar.lc
97*4882a593Smuzhiyun	.save pr, r29
98*4882a593Smuzhiyun	mov r29 = pr
99*4882a593Smuzhiyun	;;
100*4882a593Smuzhiyun	.body
101*4882a593Smuzhiyun	mov r8 = in1
102*4882a593Smuzhiyun	mov ar.ec = 6 + 2
103*4882a593Smuzhiyun	shr in0 = in0, 3
104*4882a593Smuzhiyun	;;
105*4882a593Smuzhiyun	adds in0 = -1, in0
106*4882a593Smuzhiyun	mov r16 = in1
107*4882a593Smuzhiyun	mov r17 = in2
108*4882a593Smuzhiyun	;;
109*4882a593Smuzhiyun	mov r18 = in3
110*4882a593Smuzhiyun	mov ar.lc = in0
111*4882a593Smuzhiyun	mov pr.rot = 1 << 16
112*4882a593Smuzhiyun	mov r19 = in4
113*4882a593Smuzhiyun	;;
114*4882a593Smuzhiyun	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
115*4882a593Smuzhiyun	.rotp p[6+2]
116*4882a593Smuzhiyun0:
117*4882a593Smuzhiyun(p[0])	ld8.nta s1[0] = [r16], 8
118*4882a593Smuzhiyun(p[0])	ld8.nta s2[0] = [r17], 8
119*4882a593Smuzhiyun(p[6])	xor d[0] = s1[6], s2[6]
120*4882a593Smuzhiyun(p[0])	ld8.nta s3[0] = [r18], 8
121*4882a593Smuzhiyun(p[0])	ld8.nta s4[0] = [r19], 8
122*4882a593Smuzhiyun(p[6])	xor r20 = s3[6], s4[6]
123*4882a593Smuzhiyun	;;
124*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8
125*4882a593Smuzhiyun(p[6])	xor d[0] = d[0], r20
126*4882a593Smuzhiyun	br.ctop.dptk.few 0b
127*4882a593Smuzhiyun	;;
128*4882a593Smuzhiyun	mov ar.lc = r30
129*4882a593Smuzhiyun	mov pr = r29, -1
130*4882a593Smuzhiyun	br.ret.sptk.few rp
131*4882a593SmuzhiyunEND(xor_ia64_4)
132*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_4)
133*4882a593Smuzhiyun
134*4882a593SmuzhiyunGLOBAL_ENTRY(xor_ia64_5)
135*4882a593Smuzhiyun	.prologue
136*4882a593Smuzhiyun	.fframe 0
137*4882a593Smuzhiyun	.save ar.pfs, r31
138*4882a593Smuzhiyun	alloc r31 = ar.pfs, 6, 0, 34, 40
139*4882a593Smuzhiyun	.save ar.lc, r30
140*4882a593Smuzhiyun	mov r30 = ar.lc
141*4882a593Smuzhiyun	.save pr, r29
142*4882a593Smuzhiyun	mov r29 = pr
143*4882a593Smuzhiyun	;;
144*4882a593Smuzhiyun	.body
145*4882a593Smuzhiyun	mov r8 = in1
146*4882a593Smuzhiyun	mov ar.ec = 6 + 2
147*4882a593Smuzhiyun	shr in0 = in0, 3
148*4882a593Smuzhiyun	;;
149*4882a593Smuzhiyun	adds in0 = -1, in0
150*4882a593Smuzhiyun	mov r16 = in1
151*4882a593Smuzhiyun	mov r17 = in2
152*4882a593Smuzhiyun	;;
153*4882a593Smuzhiyun	mov r18 = in3
154*4882a593Smuzhiyun	mov ar.lc = in0
155*4882a593Smuzhiyun	mov pr.rot = 1 << 16
156*4882a593Smuzhiyun	mov r19 = in4
157*4882a593Smuzhiyun	mov r20 = in5
158*4882a593Smuzhiyun	;;
159*4882a593Smuzhiyun	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
160*4882a593Smuzhiyun	.rotp p[6+2]
161*4882a593Smuzhiyun0:
162*4882a593Smuzhiyun(p[0])	ld8.nta s1[0] = [r16], 8
163*4882a593Smuzhiyun(p[0])	ld8.nta s2[0] = [r17], 8
164*4882a593Smuzhiyun(p[6])	xor d[0] = s1[6], s2[6]
165*4882a593Smuzhiyun(p[0])	ld8.nta s3[0] = [r18], 8
166*4882a593Smuzhiyun(p[0])	ld8.nta s4[0] = [r19], 8
167*4882a593Smuzhiyun(p[6])	xor r21 = s3[6], s4[6]
168*4882a593Smuzhiyun	;;
169*4882a593Smuzhiyun(p[0])	ld8.nta s5[0] = [r20], 8
170*4882a593Smuzhiyun(p[6+1])st8.nta [r8] = d[1], 8
171*4882a593Smuzhiyun(p[6])	xor d[0] = d[0], r21
172*4882a593Smuzhiyun	;;
173*4882a593Smuzhiyun(p[6])	  xor d[0] = d[0], s5[6]
174*4882a593Smuzhiyun	nop.f 0
175*4882a593Smuzhiyun	br.ctop.dptk.few 0b
176*4882a593Smuzhiyun	;;
177*4882a593Smuzhiyun	mov ar.lc = r30
178*4882a593Smuzhiyun	mov pr = r29, -1
179*4882a593Smuzhiyun	br.ret.sptk.few rp
180*4882a593SmuzhiyunEND(xor_ia64_5)
181*4882a593SmuzhiyunEXPORT_SYMBOL(xor_ia64_5)
182