xref: /OK3568_Linux_fs/kernel/arch/arm64/crypto/sha3-ce-core.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or modify
8*4882a593Smuzhiyun * it under the terms of the GNU General Public License version 2 as
9*4882a593Smuzhiyun * published by the Free Software Foundation.
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun#include <linux/linkage.h>
13*4882a593Smuzhiyun#include <asm/assembler.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
16*4882a593Smuzhiyun	.set	.Lv\b\().2d, \b
17*4882a593Smuzhiyun	.set	.Lv\b\().16b, \b
18*4882a593Smuzhiyun	.endr
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun	/*
21*4882a593Smuzhiyun	 * ARMv8.2 Crypto Extensions instructions
22*4882a593Smuzhiyun	 */
23*4882a593Smuzhiyun	.macro	eor3, rd, rn, rm, ra
24*4882a593Smuzhiyun	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25*4882a593Smuzhiyun	.endm
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun	.macro	rax1, rd, rn, rm
28*4882a593Smuzhiyun	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
29*4882a593Smuzhiyun	.endm
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun	.macro	bcax, rd, rn, rm, ra
32*4882a593Smuzhiyun	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
33*4882a593Smuzhiyun	.endm
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun	.macro	xar, rd, rn, rm, imm6
36*4882a593Smuzhiyun	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
37*4882a593Smuzhiyun	.endm
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun	/*
40*4882a593Smuzhiyun	 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
41*4882a593Smuzhiyun	 */
42*4882a593Smuzhiyun	.text
43*4882a593SmuzhiyunSYM_FUNC_START(sha3_ce_transform)
44*4882a593Smuzhiyun	/* load state */
45*4882a593Smuzhiyun	add	x8, x0, #32
46*4882a593Smuzhiyun	ld1	{ v0.1d- v3.1d}, [x0]
47*4882a593Smuzhiyun	ld1	{ v4.1d- v7.1d}, [x8], #32
48*4882a593Smuzhiyun	ld1	{ v8.1d-v11.1d}, [x8], #32
49*4882a593Smuzhiyun	ld1	{v12.1d-v15.1d}, [x8], #32
50*4882a593Smuzhiyun	ld1	{v16.1d-v19.1d}, [x8], #32
51*4882a593Smuzhiyun	ld1	{v20.1d-v23.1d}, [x8], #32
52*4882a593Smuzhiyun	ld1	{v24.1d}, [x8]
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun0:	sub	w2, w2, #1
55*4882a593Smuzhiyun	mov	w8, #24
56*4882a593Smuzhiyun	adr_l	x9, .Lsha3_rcon
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun	/* load input */
59*4882a593Smuzhiyun	ld1	{v25.8b-v28.8b}, [x1], #32
60*4882a593Smuzhiyun	ld1	{v29.8b-v31.8b}, [x1], #24
61*4882a593Smuzhiyun	eor	v0.8b, v0.8b, v25.8b
62*4882a593Smuzhiyun	eor	v1.8b, v1.8b, v26.8b
63*4882a593Smuzhiyun	eor	v2.8b, v2.8b, v27.8b
64*4882a593Smuzhiyun	eor	v3.8b, v3.8b, v28.8b
65*4882a593Smuzhiyun	eor	v4.8b, v4.8b, v29.8b
66*4882a593Smuzhiyun	eor	v5.8b, v5.8b, v30.8b
67*4882a593Smuzhiyun	eor	v6.8b, v6.8b, v31.8b
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun	tbnz	x3, #6, 2f		// SHA3-512
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun	ld1	{v25.8b-v28.8b}, [x1], #32
72*4882a593Smuzhiyun	ld1	{v29.8b-v30.8b}, [x1], #16
73*4882a593Smuzhiyun	eor	 v7.8b,  v7.8b, v25.8b
74*4882a593Smuzhiyun	eor	 v8.8b,  v8.8b, v26.8b
75*4882a593Smuzhiyun	eor	 v9.8b,  v9.8b, v27.8b
76*4882a593Smuzhiyun	eor	v10.8b, v10.8b, v28.8b
77*4882a593Smuzhiyun	eor	v11.8b, v11.8b, v29.8b
78*4882a593Smuzhiyun	eor	v12.8b, v12.8b, v30.8b
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun	// SHA3-256
83*4882a593Smuzhiyun	ld1	{v25.8b-v28.8b}, [x1], #32
84*4882a593Smuzhiyun	eor	v13.8b, v13.8b, v25.8b
85*4882a593Smuzhiyun	eor	v14.8b, v14.8b, v26.8b
86*4882a593Smuzhiyun	eor	v15.8b, v15.8b, v27.8b
87*4882a593Smuzhiyun	eor	v16.8b, v16.8b, v28.8b
88*4882a593Smuzhiyun	b	3f
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	// SHA3-224
93*4882a593Smuzhiyun	ld1	{v25.8b-v28.8b}, [x1], #32
94*4882a593Smuzhiyun	ld1	{v29.8b}, [x1], #8
95*4882a593Smuzhiyun	eor	v13.8b, v13.8b, v25.8b
96*4882a593Smuzhiyun	eor	v14.8b, v14.8b, v26.8b
97*4882a593Smuzhiyun	eor	v15.8b, v15.8b, v27.8b
98*4882a593Smuzhiyun	eor	v16.8b, v16.8b, v28.8b
99*4882a593Smuzhiyun	eor	v17.8b, v17.8b, v29.8b
100*4882a593Smuzhiyun	b	3f
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun	// SHA3-512
103*4882a593Smuzhiyun2:	ld1	{v25.8b-v26.8b}, [x1], #16
104*4882a593Smuzhiyun	eor	 v7.8b,  v7.8b, v25.8b
105*4882a593Smuzhiyun	eor	 v8.8b,  v8.8b, v26.8b
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun3:	sub	w8, w8, #1
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
110*4882a593Smuzhiyun	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
111*4882a593Smuzhiyun	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
112*4882a593Smuzhiyun	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
113*4882a593Smuzhiyun	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
114*4882a593Smuzhiyun	eor3	v29.16b, v29.16b, v19.16b, v24.16b
115*4882a593Smuzhiyun	eor3	v26.16b, v26.16b, v16.16b, v21.16b
116*4882a593Smuzhiyun	eor3	v28.16b, v28.16b, v18.16b, v23.16b
117*4882a593Smuzhiyun	eor3	v25.16b, v25.16b, v15.16b, v20.16b
118*4882a593Smuzhiyun	eor3	v27.16b, v27.16b, v17.16b, v22.16b
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
121*4882a593Smuzhiyun	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
122*4882a593Smuzhiyun	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
123*4882a593Smuzhiyun	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
124*4882a593Smuzhiyun	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun	eor	 v0.16b,  v0.16b, v30.16b
127*4882a593Smuzhiyun	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
128*4882a593Smuzhiyun	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
129*4882a593Smuzhiyun	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
130*4882a593Smuzhiyun	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
131*4882a593Smuzhiyun	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
132*4882a593Smuzhiyun	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
133*4882a593Smuzhiyun	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
134*4882a593Smuzhiyun	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
135*4882a593Smuzhiyun	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
136*4882a593Smuzhiyun	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
137*4882a593Smuzhiyun	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
138*4882a593Smuzhiyun	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
139*4882a593Smuzhiyun	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
140*4882a593Smuzhiyun	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
141*4882a593Smuzhiyun	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
142*4882a593Smuzhiyun	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
143*4882a593Smuzhiyun	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
144*4882a593Smuzhiyun	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
145*4882a593Smuzhiyun	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
146*4882a593Smuzhiyun	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
147*4882a593Smuzhiyun	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
148*4882a593Smuzhiyun	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
149*4882a593Smuzhiyun	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
150*4882a593Smuzhiyun	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
153*4882a593Smuzhiyun	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
154*4882a593Smuzhiyun	bcax	v22.16b, v22.16b, v24.16b, v23.16b
155*4882a593Smuzhiyun	bcax	v23.16b, v23.16b, v31.16b, v24.16b
156*4882a593Smuzhiyun	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun	ld1r	{v31.2d}, [x9], #8
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
161*4882a593Smuzhiyun	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
162*4882a593Smuzhiyun	bcax	v19.16b, v19.16b, v16.16b, v15.16b
163*4882a593Smuzhiyun	bcax	v15.16b, v15.16b, v25.16b, v16.16b
164*4882a593Smuzhiyun	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun	bcax	v10.16b, v29.16b, v12.16b, v26.16b
167*4882a593Smuzhiyun	bcax	v11.16b, v26.16b, v13.16b, v12.16b
168*4882a593Smuzhiyun	bcax	v12.16b, v12.16b, v14.16b, v13.16b
169*4882a593Smuzhiyun	bcax	v13.16b, v13.16b, v29.16b, v14.16b
170*4882a593Smuzhiyun	bcax	v14.16b, v14.16b, v26.16b, v29.16b
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
173*4882a593Smuzhiyun	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
174*4882a593Smuzhiyun	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
175*4882a593Smuzhiyun	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
176*4882a593Smuzhiyun	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
179*4882a593Smuzhiyun	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
180*4882a593Smuzhiyun	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
181*4882a593Smuzhiyun	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
182*4882a593Smuzhiyun	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun	eor	 v0.16b,  v0.16b, v31.16b
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun	cbnz	w8, 3b
187*4882a593Smuzhiyun	cond_yield 4f, x8, x9
188*4882a593Smuzhiyun	cbnz	w2, 0b
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun	/* save state */
191*4882a593Smuzhiyun4:	st1	{ v0.1d- v3.1d}, [x0], #32
192*4882a593Smuzhiyun	st1	{ v4.1d- v7.1d}, [x0], #32
193*4882a593Smuzhiyun	st1	{ v8.1d-v11.1d}, [x0], #32
194*4882a593Smuzhiyun	st1	{v12.1d-v15.1d}, [x0], #32
195*4882a593Smuzhiyun	st1	{v16.1d-v19.1d}, [x0], #32
196*4882a593Smuzhiyun	st1	{v20.1d-v23.1d}, [x0], #32
197*4882a593Smuzhiyun	st1	{v24.1d}, [x0]
198*4882a593Smuzhiyun	mov	w0, w2
199*4882a593Smuzhiyun	ret
200*4882a593SmuzhiyunSYM_FUNC_END(sha3_ce_transform)
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun	.section	".rodata", "a"
203*4882a593Smuzhiyun	.align		8
204*4882a593Smuzhiyun.Lsha3_rcon:
205*4882a593Smuzhiyun	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
206*4882a593Smuzhiyun	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
207*4882a593Smuzhiyun	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
208*4882a593Smuzhiyun	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
209*4882a593Smuzhiyun	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
210*4882a593Smuzhiyun	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
211*4882a593Smuzhiyun	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
212*4882a593Smuzhiyun	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
213