xref: /OK3568_Linux_fs/kernel/arch/sparc/crypto/camellia_asm.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun#include <linux/linkage.h>
3*4882a593Smuzhiyun#include <asm/visasm.h>
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun#include "opcodes.h"
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
8*4882a593Smuzhiyun	CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \
9*4882a593Smuzhiyun	CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \
10*4882a593Smuzhiyun	CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \
11*4882a593Smuzhiyun	CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \
12*4882a593Smuzhiyun	CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \
13*4882a593Smuzhiyun	CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
16*4882a593Smuzhiyun	CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
17*4882a593Smuzhiyun	CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
18*4882a593Smuzhiyun	CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun	.data
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun	.align	8
23*4882a593SmuzhiyunSIGMA:	.xword	0xA09E667F3BCC908B
24*4882a593Smuzhiyun	.xword	0xB67AE8584CAA73B2
25*4882a593Smuzhiyun	.xword	0xC6EF372FE94F82BE
26*4882a593Smuzhiyun	.xword	0x54FF53A5F1D36F1C
27*4882a593Smuzhiyun	.xword	0x10E527FADE682D1D
28*4882a593Smuzhiyun	.xword	0xB05688C2B3E6C1FD
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun	.text
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun	.align	32
33*4882a593SmuzhiyunENTRY(camellia_sparc64_key_expand)
34*4882a593Smuzhiyun	/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
35*4882a593Smuzhiyun	VISEntry
36*4882a593Smuzhiyun	ld	[%o0 + 0x00], %f0	! i0, k[0]
37*4882a593Smuzhiyun	ld	[%o0 + 0x04], %f1	! i1, k[1]
38*4882a593Smuzhiyun	ld	[%o0 + 0x08], %f2	! i2, k[2]
39*4882a593Smuzhiyun	ld	[%o0 + 0x0c], %f3	! i3, k[3]
40*4882a593Smuzhiyun	std	%f0, [%o1 + 0x00]	! k[0, 1]
41*4882a593Smuzhiyun	fsrc2	%f0, %f28
42*4882a593Smuzhiyun	std	%f2, [%o1 + 0x08]	! k[2, 3]
43*4882a593Smuzhiyun	cmp	%o2, 16
44*4882a593Smuzhiyun	be	10f
45*4882a593Smuzhiyun	 fsrc2	%f2, %f30
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun	ld	[%o0 + 0x10], %f0
48*4882a593Smuzhiyun	ld	[%o0 + 0x14], %f1
49*4882a593Smuzhiyun	std	%f0, [%o1 + 0x20]	! k[8, 9]
50*4882a593Smuzhiyun	cmp	%o2, 24
51*4882a593Smuzhiyun	fone	%f10
52*4882a593Smuzhiyun	be,a	1f
53*4882a593Smuzhiyun	 fxor	%f10, %f0, %f2
54*4882a593Smuzhiyun	ld	[%o0 + 0x18], %f2
55*4882a593Smuzhiyun	ld	[%o0 + 0x1c], %f3
56*4882a593Smuzhiyun1:
57*4882a593Smuzhiyun	std	%f2, [%o1 + 0x28]	! k[10, 11]
58*4882a593Smuzhiyun	fxor	%f28, %f0, %f0
59*4882a593Smuzhiyun	fxor	%f30, %f2, %f2
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun10:
62*4882a593Smuzhiyun	sethi	%hi(SIGMA), %g3
63*4882a593Smuzhiyun	or	%g3, %lo(SIGMA), %g3
64*4882a593Smuzhiyun	ldd	[%g3 + 0x00], %f16
65*4882a593Smuzhiyun	ldd	[%g3 + 0x08], %f18
66*4882a593Smuzhiyun	ldd	[%g3 + 0x10], %f20
67*4882a593Smuzhiyun	ldd	[%g3 + 0x18], %f22
68*4882a593Smuzhiyun	ldd	[%g3 + 0x20], %f24
69*4882a593Smuzhiyun	ldd	[%g3 + 0x28], %f26
70*4882a593Smuzhiyun	CAMELLIA_F(16, 2, 0, 2)
71*4882a593Smuzhiyun	CAMELLIA_F(18, 0, 2, 0)
72*4882a593Smuzhiyun	fxor	%f28, %f0, %f0
73*4882a593Smuzhiyun	fxor	%f30, %f2, %f2
74*4882a593Smuzhiyun	CAMELLIA_F(20, 2, 0, 2)
75*4882a593Smuzhiyun	CAMELLIA_F(22, 0, 2, 0)
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun#define ROTL128(S01, S23, TMP1, TMP2, N)	\
78*4882a593Smuzhiyun	srlx	S01, (64 - N), TMP1;		\
79*4882a593Smuzhiyun	sllx	S01, N, S01;			\
80*4882a593Smuzhiyun	srlx	S23, (64 - N), TMP2;		\
81*4882a593Smuzhiyun	sllx	S23, N, S23;			\
82*4882a593Smuzhiyun	or	S01, TMP2, S01;			\
83*4882a593Smuzhiyun	or	S23, TMP1, S23
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun	cmp	%o2, 16
86*4882a593Smuzhiyun	bne	1f
87*4882a593Smuzhiyun	 nop
88*4882a593Smuzhiyun	/* 128-bit key */
89*4882a593Smuzhiyun	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
90*4882a593Smuzhiyun	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
91*4882a593Smuzhiyun	MOVDTOX_F0_O4
92*4882a593Smuzhiyun	MOVDTOX_F2_O5
93*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
94*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x30]	! k[12, 13]
95*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x38]	! k[14, 15]
96*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
97*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x40]	! k[16, 17]
98*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x48]	! k[18, 19]
99*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
100*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x60]	! k[24, 25]
101*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
102*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x70]	! k[28, 29]
103*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x78]	! k[30, 31]
104*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 34)
105*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
106*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
107*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 17)
108*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xc0]	! k[48, 49]
109*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xc8]	! k[50, 51]
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
112*4882a593Smuzhiyun	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
113*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
114*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
115*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x28]	! k[10, 11]
116*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 30)
117*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x50]	! k[20, 21]
118*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x58]	! k[22, 23]
119*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
120*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x68]	! k[26, 27]
121*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 17)
122*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x80]	! k[32, 33]
123*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x88]	! k[34, 35]
124*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 17)
125*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x90]	! k[36, 37]
126*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x98]	! k[38, 39]
127*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 17)
128*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
129*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun	ba,pt	%xcc, 2f
132*4882a593Smuzhiyun	 mov	(3 * 16 * 4), %o0
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun1:
135*4882a593Smuzhiyun	/* 192-bit or 256-bit key */
136*4882a593Smuzhiyun	std	%f0, [%o1 + 0x30]	! k[12, 13]
137*4882a593Smuzhiyun	std	%f2, [%o1 + 0x38]	! k[14, 15]
138*4882a593Smuzhiyun	ldd	[%o1 + 0x20], %f4	! k[ 8,  9]
139*4882a593Smuzhiyun	ldd	[%o1 + 0x28], %f6	! k[10, 11]
140*4882a593Smuzhiyun	fxor	%f0, %f4, %f0
141*4882a593Smuzhiyun	fxor	%f2, %f6, %f2
142*4882a593Smuzhiyun	CAMELLIA_F(24, 2, 0, 2)
143*4882a593Smuzhiyun	CAMELLIA_F(26, 0, 2, 0)
144*4882a593Smuzhiyun	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
145*4882a593Smuzhiyun	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
146*4882a593Smuzhiyun	MOVDTOX_F0_O4
147*4882a593Smuzhiyun	MOVDTOX_F2_O5
148*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 30)
149*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x50]	! k[20, 21]
150*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x58]	! k[22, 23]
151*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 30)
152*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
153*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
154*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 51)
155*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x100]	! k[64, 65]
156*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x108]	! k[66, 67]
157*4882a593Smuzhiyun	ldx	[%o1 + 0x20], %o4	! k[ 8,  9]
158*4882a593Smuzhiyun	ldx	[%o1 + 0x28], %o5	! k[10, 11]
159*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
160*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
161*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x28]	! k[10, 11]
162*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
163*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x40]	! k[16, 17]
164*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x48]	! k[18, 19]
165*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 30)
166*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x90]	! k[36, 37]
167*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x98]	! k[38, 39]
168*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 34)
169*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xd0]	! k[52, 53]
170*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xd8]	! k[54, 55]
171*4882a593Smuzhiyun	ldx	[%o1 + 0x30], %o4	! k[12, 13]
172*4882a593Smuzhiyun	ldx	[%o1 + 0x38], %o5	! k[14, 15]
173*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
174*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x30]	! k[12, 13]
175*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x38]	! k[14, 15]
176*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 30)
177*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x70]	! k[28, 29]
178*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x78]	! k[30, 31]
179*4882a593Smuzhiyun	srlx	%o4, 32, %g2
180*4882a593Smuzhiyun	srlx	%o5, 32, %g3
181*4882a593Smuzhiyun	stw	%o4, [%o1 + 0xc0]	! k[48]
182*4882a593Smuzhiyun	stw	%g3, [%o1 + 0xc4]	! k[49]
183*4882a593Smuzhiyun	stw	%o5, [%o1 + 0xc8]	! k[50]
184*4882a593Smuzhiyun	stw	%g2, [%o1 + 0xcc]	! k[51]
185*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 49)
186*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xe0]	! k[56, 57]
187*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xe8]	! k[58, 59]
188*4882a593Smuzhiyun	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
189*4882a593Smuzhiyun	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
190*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 45)
191*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x60]	! k[24, 25]
192*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x68]	! k[26, 27]
193*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 15)
194*4882a593Smuzhiyun	stx	%o4, [%o1 + 0x80]	! k[32, 33]
195*4882a593Smuzhiyun	stx	%o5, [%o1 + 0x88]	! k[34, 35]
196*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 17)
197*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
198*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
199*4882a593Smuzhiyun	ROTL128(%o4, %o5, %g2, %g3, 34)
200*4882a593Smuzhiyun	stx	%o4, [%o1 + 0xf0]	! k[60, 61]
201*4882a593Smuzhiyun	stx	%o5, [%o1 + 0xf8]	! k[62, 63]
202*4882a593Smuzhiyun	mov	(4 * 16 * 4), %o0
203*4882a593Smuzhiyun2:
204*4882a593Smuzhiyun	add	%o1, %o0, %o1
205*4882a593Smuzhiyun	ldd	[%o1 + 0x00], %f0
206*4882a593Smuzhiyun	ldd	[%o1 + 0x08], %f2
207*4882a593Smuzhiyun	std	%f0, [%o3 + 0x00]
208*4882a593Smuzhiyun	std	%f2, [%o3 + 0x08]
209*4882a593Smuzhiyun	add	%o3, 0x10, %o3
210*4882a593Smuzhiyun1:
211*4882a593Smuzhiyun	sub	%o1, (16 * 4), %o1
212*4882a593Smuzhiyun	ldd	[%o1 + 0x38], %f0
213*4882a593Smuzhiyun	ldd	[%o1 + 0x30], %f2
214*4882a593Smuzhiyun	ldd	[%o1 + 0x28], %f4
215*4882a593Smuzhiyun	ldd	[%o1 + 0x20], %f6
216*4882a593Smuzhiyun	ldd	[%o1 + 0x18], %f8
217*4882a593Smuzhiyun	ldd	[%o1 + 0x10], %f10
218*4882a593Smuzhiyun	std	%f0, [%o3 + 0x00]
219*4882a593Smuzhiyun	std	%f2, [%o3 + 0x08]
220*4882a593Smuzhiyun	std	%f4, [%o3 + 0x10]
221*4882a593Smuzhiyun	std	%f6, [%o3 + 0x18]
222*4882a593Smuzhiyun	std	%f8, [%o3 + 0x20]
223*4882a593Smuzhiyun	std	%f10, [%o3 + 0x28]
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun	ldd	[%o1 + 0x08], %f0
226*4882a593Smuzhiyun	ldd	[%o1 + 0x00], %f2
227*4882a593Smuzhiyun	std	%f0, [%o3 + 0x30]
228*4882a593Smuzhiyun	std	%f2, [%o3 + 0x38]
229*4882a593Smuzhiyun	subcc	%o0, (16 * 4), %o0
230*4882a593Smuzhiyun	bne,pt	%icc, 1b
231*4882a593Smuzhiyun	 add	%o3, (16 * 4), %o3
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun	std	%f2, [%o3 - 0x10]
234*4882a593Smuzhiyun	std	%f0, [%o3 - 0x08]
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun	retl
237*4882a593Smuzhiyun	 VISExit
238*4882a593SmuzhiyunENDPROC(camellia_sparc64_key_expand)
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun	.align	32
241*4882a593SmuzhiyunENTRY(camellia_sparc64_crypt)
242*4882a593Smuzhiyun	/* %o0=key, %o1=input, %o2=output, %o3=key_len */
243*4882a593Smuzhiyun	VISEntry
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun	ld	[%o1 + 0x00], %f0
246*4882a593Smuzhiyun	ld	[%o1 + 0x04], %f1
247*4882a593Smuzhiyun	ld	[%o1 + 0x08], %f2
248*4882a593Smuzhiyun	ld	[%o1 + 0x0c], %f3
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun	ldd	[%o0 + 0x00], %f4
251*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f6
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun	cmp	%o3, 16
254*4882a593Smuzhiyun	fxor	%f4, %f0, %f0
255*4882a593Smuzhiyun	be	1f
256*4882a593Smuzhiyun	 fxor	%f6, %f2, %f2
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun	ldd	[%o0 + 0x10], %f8
259*4882a593Smuzhiyun	ldd	[%o0 + 0x18], %f10
260*4882a593Smuzhiyun	ldd	[%o0 + 0x20], %f12
261*4882a593Smuzhiyun	ldd	[%o0 + 0x28], %f14
262*4882a593Smuzhiyun	ldd	[%o0 + 0x30], %f16
263*4882a593Smuzhiyun	ldd	[%o0 + 0x38], %f18
264*4882a593Smuzhiyun	ldd	[%o0 + 0x40], %f20
265*4882a593Smuzhiyun	ldd	[%o0 + 0x48], %f22
266*4882a593Smuzhiyun	add	%o0, 0x40, %o0
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun1:
271*4882a593Smuzhiyun	ldd	[%o0 + 0x10], %f8
272*4882a593Smuzhiyun	ldd	[%o0 + 0x18], %f10
273*4882a593Smuzhiyun	ldd	[%o0 + 0x20], %f12
274*4882a593Smuzhiyun	ldd	[%o0 + 0x28], %f14
275*4882a593Smuzhiyun	ldd	[%o0 + 0x30], %f16
276*4882a593Smuzhiyun	ldd	[%o0 + 0x38], %f18
277*4882a593Smuzhiyun	ldd	[%o0 + 0x40], %f20
278*4882a593Smuzhiyun	ldd	[%o0 + 0x48], %f22
279*4882a593Smuzhiyun	ldd	[%o0 + 0x50], %f24
280*4882a593Smuzhiyun	ldd	[%o0 + 0x58], %f26
281*4882a593Smuzhiyun	ldd	[%o0 + 0x60], %f28
282*4882a593Smuzhiyun	ldd	[%o0 + 0x68], %f30
283*4882a593Smuzhiyun	ldd	[%o0 + 0x70], %f32
284*4882a593Smuzhiyun	ldd	[%o0 + 0x78], %f34
285*4882a593Smuzhiyun	ldd	[%o0 + 0x80], %f36
286*4882a593Smuzhiyun	ldd	[%o0 + 0x88], %f38
287*4882a593Smuzhiyun	ldd	[%o0 + 0x90], %f40
288*4882a593Smuzhiyun	ldd	[%o0 + 0x98], %f42
289*4882a593Smuzhiyun	ldd	[%o0 + 0xa0], %f44
290*4882a593Smuzhiyun	ldd	[%o0 + 0xa8], %f46
291*4882a593Smuzhiyun	ldd	[%o0 + 0xb0], %f48
292*4882a593Smuzhiyun	ldd	[%o0 + 0xb8], %f50
293*4882a593Smuzhiyun	ldd	[%o0 + 0xc0], %f52
294*4882a593Smuzhiyun	ldd	[%o0 + 0xc8], %f54
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
297*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
298*4882a593Smuzhiyun	CAMELLIA_6ROUNDS(40, 0, 2)
299*4882a593Smuzhiyun	fxor	%f52, %f2, %f2
300*4882a593Smuzhiyun	fxor	%f54, %f0, %f0
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun	st	%f2, [%o2 + 0x00]
303*4882a593Smuzhiyun	st	%f3, [%o2 + 0x04]
304*4882a593Smuzhiyun	st	%f0, [%o2 + 0x08]
305*4882a593Smuzhiyun	st	%f1, [%o2 + 0x0c]
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun	retl
308*4882a593Smuzhiyun	 VISExit
309*4882a593SmuzhiyunENDPROC(camellia_sparc64_crypt)
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun	.align	32
312*4882a593SmuzhiyunENTRY(camellia_sparc64_load_keys)
313*4882a593Smuzhiyun	/* %o0=key, %o1=key_len */
314*4882a593Smuzhiyun	VISEntry
315*4882a593Smuzhiyun	ldd	[%o0 + 0x00], %f4
316*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f6
317*4882a593Smuzhiyun	ldd	[%o0 + 0x10], %f8
318*4882a593Smuzhiyun	ldd	[%o0 + 0x18], %f10
319*4882a593Smuzhiyun	ldd	[%o0 + 0x20], %f12
320*4882a593Smuzhiyun	ldd	[%o0 + 0x28], %f14
321*4882a593Smuzhiyun	ldd	[%o0 + 0x30], %f16
322*4882a593Smuzhiyun	ldd	[%o0 + 0x38], %f18
323*4882a593Smuzhiyun	ldd	[%o0 + 0x40], %f20
324*4882a593Smuzhiyun	ldd	[%o0 + 0x48], %f22
325*4882a593Smuzhiyun	ldd	[%o0 + 0x50], %f24
326*4882a593Smuzhiyun	ldd	[%o0 + 0x58], %f26
327*4882a593Smuzhiyun	ldd	[%o0 + 0x60], %f28
328*4882a593Smuzhiyun	ldd	[%o0 + 0x68], %f30
329*4882a593Smuzhiyun	ldd	[%o0 + 0x70], %f32
330*4882a593Smuzhiyun	ldd	[%o0 + 0x78], %f34
331*4882a593Smuzhiyun	ldd	[%o0 + 0x80], %f36
332*4882a593Smuzhiyun	ldd	[%o0 + 0x88], %f38
333*4882a593Smuzhiyun	ldd	[%o0 + 0x90], %f40
334*4882a593Smuzhiyun	ldd	[%o0 + 0x98], %f42
335*4882a593Smuzhiyun	ldd	[%o0 + 0xa0], %f44
336*4882a593Smuzhiyun	ldd	[%o0 + 0xa8], %f46
337*4882a593Smuzhiyun	ldd	[%o0 + 0xb0], %f48
338*4882a593Smuzhiyun	ldd	[%o0 + 0xb8], %f50
339*4882a593Smuzhiyun	ldd	[%o0 + 0xc0], %f52
340*4882a593Smuzhiyun	retl
341*4882a593Smuzhiyun	 ldd	[%o0 + 0xc8], %f54
342*4882a593SmuzhiyunENDPROC(camellia_sparc64_load_keys)
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun	.align	32
345*4882a593SmuzhiyunENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
346*4882a593Smuzhiyun	/* %o0=input, %o1=output, %o2=len, %o3=key */
347*4882a593Smuzhiyun1:	ldd	[%o0 + 0x00], %f0
348*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f2
349*4882a593Smuzhiyun	add	%o0, 0x10, %o0
350*4882a593Smuzhiyun	fxor	%f4, %f0, %f0
351*4882a593Smuzhiyun	fxor	%f6, %f2, %f2
352*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
353*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
354*4882a593Smuzhiyun	CAMELLIA_6ROUNDS(40, 0, 2)
355*4882a593Smuzhiyun	fxor	%f52, %f2, %f2
356*4882a593Smuzhiyun	fxor	%f54, %f0, %f0
357*4882a593Smuzhiyun	std	%f2, [%o1 + 0x00]
358*4882a593Smuzhiyun	std	%f0, [%o1 + 0x08]
359*4882a593Smuzhiyun	subcc	%o2, 0x10, %o2
360*4882a593Smuzhiyun	bne,pt	%icc, 1b
361*4882a593Smuzhiyun	 add	%o1, 0x10, %o1
362*4882a593Smuzhiyun	retl
363*4882a593Smuzhiyun	 nop
364*4882a593SmuzhiyunENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun	.align	32
367*4882a593SmuzhiyunENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
368*4882a593Smuzhiyun	/* %o0=input, %o1=output, %o2=len, %o3=key */
369*4882a593Smuzhiyun1:	ldd	[%o0 + 0x00], %f0
370*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f2
371*4882a593Smuzhiyun	add	%o0, 0x10, %o0
372*4882a593Smuzhiyun	fxor	%f4, %f0, %f0
373*4882a593Smuzhiyun	fxor	%f6, %f2, %f2
374*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
375*4882a593Smuzhiyun	ldd	[%o3 + 0xd0], %f8
376*4882a593Smuzhiyun	ldd	[%o3 + 0xd8], %f10
377*4882a593Smuzhiyun	ldd	[%o3 + 0xe0], %f12
378*4882a593Smuzhiyun	ldd	[%o3 + 0xe8], %f14
379*4882a593Smuzhiyun	ldd	[%o3 + 0xf0], %f16
380*4882a593Smuzhiyun	ldd	[%o3 + 0xf8], %f18
381*4882a593Smuzhiyun	ldd	[%o3 + 0x100], %f20
382*4882a593Smuzhiyun	ldd	[%o3 + 0x108], %f22
383*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
384*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
385*4882a593Smuzhiyun	CAMELLIA_F(8, 2, 0, 2)
386*4882a593Smuzhiyun	CAMELLIA_F(10, 0, 2, 0)
387*4882a593Smuzhiyun	ldd	[%o3 + 0x10], %f8
388*4882a593Smuzhiyun	ldd	[%o3 + 0x18], %f10
389*4882a593Smuzhiyun	CAMELLIA_F(12, 2, 0, 2)
390*4882a593Smuzhiyun	CAMELLIA_F(14, 0, 2, 0)
391*4882a593Smuzhiyun	ldd	[%o3 + 0x20], %f12
392*4882a593Smuzhiyun	ldd	[%o3 + 0x28], %f14
393*4882a593Smuzhiyun	CAMELLIA_F(16, 2, 0, 2)
394*4882a593Smuzhiyun	CAMELLIA_F(18, 0, 2, 0)
395*4882a593Smuzhiyun	ldd	[%o3 + 0x30], %f16
396*4882a593Smuzhiyun	ldd	[%o3 + 0x38], %f18
397*4882a593Smuzhiyun	fxor	%f20, %f2, %f2
398*4882a593Smuzhiyun	fxor	%f22, %f0, %f0
399*4882a593Smuzhiyun	ldd	[%o3 + 0x40], %f20
400*4882a593Smuzhiyun	ldd	[%o3 + 0x48], %f22
401*4882a593Smuzhiyun	std	%f2, [%o1 + 0x00]
402*4882a593Smuzhiyun	std	%f0, [%o1 + 0x08]
403*4882a593Smuzhiyun	subcc	%o2, 0x10, %o2
404*4882a593Smuzhiyun	bne,pt	%icc, 1b
405*4882a593Smuzhiyun	 add	%o1, 0x10, %o1
406*4882a593Smuzhiyun	retl
407*4882a593Smuzhiyun	 nop
408*4882a593SmuzhiyunENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun	.align	32
411*4882a593SmuzhiyunENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
412*4882a593Smuzhiyun	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
413*4882a593Smuzhiyun	ldd	[%o4 + 0x00], %f60
414*4882a593Smuzhiyun	ldd	[%o4 + 0x08], %f62
415*4882a593Smuzhiyun1:	ldd	[%o0 + 0x00], %f0
416*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f2
417*4882a593Smuzhiyun	add	%o0, 0x10, %o0
418*4882a593Smuzhiyun	fxor	%f60, %f0, %f0
419*4882a593Smuzhiyun	fxor	%f62, %f2, %f2
420*4882a593Smuzhiyun	fxor	%f4, %f0, %f0
421*4882a593Smuzhiyun	fxor	%f6, %f2, %f2
422*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
423*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
424*4882a593Smuzhiyun	CAMELLIA_6ROUNDS(40, 0, 2)
425*4882a593Smuzhiyun	fxor	%f52, %f2, %f60
426*4882a593Smuzhiyun	fxor	%f54, %f0, %f62
427*4882a593Smuzhiyun	std	%f60, [%o1 + 0x00]
428*4882a593Smuzhiyun	std	%f62, [%o1 + 0x08]
429*4882a593Smuzhiyun	subcc	%o2, 0x10, %o2
430*4882a593Smuzhiyun	bne,pt	%icc, 1b
431*4882a593Smuzhiyun	 add	%o1, 0x10, %o1
432*4882a593Smuzhiyun	std	%f60, [%o4 + 0x00]
433*4882a593Smuzhiyun	retl
434*4882a593Smuzhiyun	 std	%f62, [%o4 + 0x08]
435*4882a593SmuzhiyunENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun	.align	32
438*4882a593SmuzhiyunENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
439*4882a593Smuzhiyun	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
440*4882a593Smuzhiyun	ldd	[%o4 + 0x00], %f60
441*4882a593Smuzhiyun	ldd	[%o4 + 0x08], %f62
442*4882a593Smuzhiyun1:	ldd	[%o0 + 0x00], %f0
443*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f2
444*4882a593Smuzhiyun	add	%o0, 0x10, %o0
445*4882a593Smuzhiyun	fxor	%f60, %f0, %f0
446*4882a593Smuzhiyun	fxor	%f62, %f2, %f2
447*4882a593Smuzhiyun	fxor	%f4, %f0, %f0
448*4882a593Smuzhiyun	fxor	%f6, %f2, %f2
449*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
450*4882a593Smuzhiyun	ldd	[%o3 + 0xd0], %f8
451*4882a593Smuzhiyun	ldd	[%o3 + 0xd8], %f10
452*4882a593Smuzhiyun	ldd	[%o3 + 0xe0], %f12
453*4882a593Smuzhiyun	ldd	[%o3 + 0xe8], %f14
454*4882a593Smuzhiyun	ldd	[%o3 + 0xf0], %f16
455*4882a593Smuzhiyun	ldd	[%o3 + 0xf8], %f18
456*4882a593Smuzhiyun	ldd	[%o3 + 0x100], %f20
457*4882a593Smuzhiyun	ldd	[%o3 + 0x108], %f22
458*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
459*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
460*4882a593Smuzhiyun	CAMELLIA_F(8, 2, 0, 2)
461*4882a593Smuzhiyun	CAMELLIA_F(10, 0, 2, 0)
462*4882a593Smuzhiyun	ldd	[%o3 + 0x10], %f8
463*4882a593Smuzhiyun	ldd	[%o3 + 0x18], %f10
464*4882a593Smuzhiyun	CAMELLIA_F(12, 2, 0, 2)
465*4882a593Smuzhiyun	CAMELLIA_F(14, 0, 2, 0)
466*4882a593Smuzhiyun	ldd	[%o3 + 0x20], %f12
467*4882a593Smuzhiyun	ldd	[%o3 + 0x28], %f14
468*4882a593Smuzhiyun	CAMELLIA_F(16, 2, 0, 2)
469*4882a593Smuzhiyun	CAMELLIA_F(18, 0, 2, 0)
470*4882a593Smuzhiyun	ldd	[%o3 + 0x30], %f16
471*4882a593Smuzhiyun	ldd	[%o3 + 0x38], %f18
472*4882a593Smuzhiyun	fxor	%f20, %f2, %f60
473*4882a593Smuzhiyun	fxor	%f22, %f0, %f62
474*4882a593Smuzhiyun	ldd	[%o3 + 0x40], %f20
475*4882a593Smuzhiyun	ldd	[%o3 + 0x48], %f22
476*4882a593Smuzhiyun	std	%f60, [%o1 + 0x00]
477*4882a593Smuzhiyun	std	%f62, [%o1 + 0x08]
478*4882a593Smuzhiyun	subcc	%o2, 0x10, %o2
479*4882a593Smuzhiyun	bne,pt	%icc, 1b
480*4882a593Smuzhiyun	 add	%o1, 0x10, %o1
481*4882a593Smuzhiyun	std	%f60, [%o4 + 0x00]
482*4882a593Smuzhiyun	retl
483*4882a593Smuzhiyun	 std	%f62, [%o4 + 0x08]
484*4882a593SmuzhiyunENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun	.align	32
487*4882a593SmuzhiyunENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
488*4882a593Smuzhiyun	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
489*4882a593Smuzhiyun	ldd	[%o4 + 0x00], %f60
490*4882a593Smuzhiyun	ldd	[%o4 + 0x08], %f62
491*4882a593Smuzhiyun1:	ldd	[%o0 + 0x00], %f56
492*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f58
493*4882a593Smuzhiyun	add	%o0, 0x10, %o0
494*4882a593Smuzhiyun	fxor	%f4, %f56, %f0
495*4882a593Smuzhiyun	fxor	%f6, %f58, %f2
496*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
497*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
498*4882a593Smuzhiyun	CAMELLIA_6ROUNDS(40, 0, 2)
499*4882a593Smuzhiyun	fxor	%f52, %f2, %f2
500*4882a593Smuzhiyun	fxor	%f54, %f0, %f0
501*4882a593Smuzhiyun	fxor	%f60, %f2, %f2
502*4882a593Smuzhiyun	fxor	%f62, %f0, %f0
503*4882a593Smuzhiyun	fsrc2	%f56, %f60
504*4882a593Smuzhiyun	fsrc2	%f58, %f62
505*4882a593Smuzhiyun	std	%f2, [%o1 + 0x00]
506*4882a593Smuzhiyun	std	%f0, [%o1 + 0x08]
507*4882a593Smuzhiyun	subcc	%o2, 0x10, %o2
508*4882a593Smuzhiyun	bne,pt	%icc, 1b
509*4882a593Smuzhiyun	 add	%o1, 0x10, %o1
510*4882a593Smuzhiyun	std	%f60, [%o4 + 0x00]
511*4882a593Smuzhiyun	retl
512*4882a593Smuzhiyun	 std	%f62, [%o4 + 0x08]
513*4882a593SmuzhiyunENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun	.align	32
516*4882a593SmuzhiyunENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
517*4882a593Smuzhiyun	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
518*4882a593Smuzhiyun	ldd	[%o4 + 0x00], %f60
519*4882a593Smuzhiyun	ldd	[%o4 + 0x08], %f62
520*4882a593Smuzhiyun1:	ldd	[%o0 + 0x00], %f56
521*4882a593Smuzhiyun	ldd	[%o0 + 0x08], %f58
522*4882a593Smuzhiyun	add	%o0, 0x10, %o0
523*4882a593Smuzhiyun	fxor	%f4, %f56, %f0
524*4882a593Smuzhiyun	fxor	%f6, %f58, %f2
525*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
526*4882a593Smuzhiyun	ldd	[%o3 + 0xd0], %f8
527*4882a593Smuzhiyun	ldd	[%o3 + 0xd8], %f10
528*4882a593Smuzhiyun	ldd	[%o3 + 0xe0], %f12
529*4882a593Smuzhiyun	ldd	[%o3 + 0xe8], %f14
530*4882a593Smuzhiyun	ldd	[%o3 + 0xf0], %f16
531*4882a593Smuzhiyun	ldd	[%o3 + 0xf8], %f18
532*4882a593Smuzhiyun	ldd	[%o3 + 0x100], %f20
533*4882a593Smuzhiyun	ldd	[%o3 + 0x108], %f22
534*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
535*4882a593Smuzhiyun	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
536*4882a593Smuzhiyun	CAMELLIA_F(8, 2, 0, 2)
537*4882a593Smuzhiyun	CAMELLIA_F(10, 0, 2, 0)
538*4882a593Smuzhiyun	ldd	[%o3 + 0x10], %f8
539*4882a593Smuzhiyun	ldd	[%o3 + 0x18], %f10
540*4882a593Smuzhiyun	CAMELLIA_F(12, 2, 0, 2)
541*4882a593Smuzhiyun	CAMELLIA_F(14, 0, 2, 0)
542*4882a593Smuzhiyun	ldd	[%o3 + 0x20], %f12
543*4882a593Smuzhiyun	ldd	[%o3 + 0x28], %f14
544*4882a593Smuzhiyun	CAMELLIA_F(16, 2, 0, 2)
545*4882a593Smuzhiyun	CAMELLIA_F(18, 0, 2, 0)
546*4882a593Smuzhiyun	ldd	[%o3 + 0x30], %f16
547*4882a593Smuzhiyun	ldd	[%o3 + 0x38], %f18
548*4882a593Smuzhiyun	fxor	%f20, %f2, %f2
549*4882a593Smuzhiyun	fxor	%f22, %f0, %f0
550*4882a593Smuzhiyun	ldd	[%o3 + 0x40], %f20
551*4882a593Smuzhiyun	ldd	[%o3 + 0x48], %f22
552*4882a593Smuzhiyun	fxor	%f60, %f2, %f2
553*4882a593Smuzhiyun	fxor	%f62, %f0, %f0
554*4882a593Smuzhiyun	fsrc2	%f56, %f60
555*4882a593Smuzhiyun	fsrc2	%f58, %f62
556*4882a593Smuzhiyun	std	%f2, [%o1 + 0x00]
557*4882a593Smuzhiyun	std	%f0, [%o1 + 0x08]
558*4882a593Smuzhiyun	subcc	%o2, 0x10, %o2
559*4882a593Smuzhiyun	bne,pt	%icc, 1b
560*4882a593Smuzhiyun	 add	%o1, 0x10, %o1
561*4882a593Smuzhiyun	std	%f60, [%o4 + 0x00]
562*4882a593Smuzhiyun	retl
563*4882a593Smuzhiyun	 std	%f62, [%o4 + 0x08]
564*4882a593SmuzhiyunENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
565