xref: /OK3568_Linux_fs/kernel/arch/arm64/crypto/sm3-ce-core.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <linux/linkage.h>
9*4882a593Smuzhiyun#include <asm/assembler.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
12*4882a593Smuzhiyun	.set		.Lv\b\().4s, \b
13*4882a593Smuzhiyun	.endr
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun	.macro		sm3partw1, rd, rn, rm
16*4882a593Smuzhiyun	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
17*4882a593Smuzhiyun	.endm
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun	.macro		sm3partw2, rd, rn, rm
20*4882a593Smuzhiyun	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
21*4882a593Smuzhiyun	.endm
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun	.macro		sm3ss1, rd, rn, rm, ra
24*4882a593Smuzhiyun	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25*4882a593Smuzhiyun	.endm
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun	.macro		sm3tt1a, rd, rn, rm, imm2
28*4882a593Smuzhiyun	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
29*4882a593Smuzhiyun	.endm
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun	.macro		sm3tt1b, rd, rn, rm, imm2
32*4882a593Smuzhiyun	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
33*4882a593Smuzhiyun	.endm
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun	.macro		sm3tt2a, rd, rn, rm, imm2
36*4882a593Smuzhiyun	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
37*4882a593Smuzhiyun	.endm
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun	.macro		sm3tt2b, rd, rn, rm, imm2
40*4882a593Smuzhiyun	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
41*4882a593Smuzhiyun	.endm
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun	.macro		round, ab, s0, t0, t1, i
44*4882a593Smuzhiyun	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
45*4882a593Smuzhiyun	shl		\t1\().4s, \t0\().4s, #1
46*4882a593Smuzhiyun	sri		\t1\().4s, \t0\().4s, #31
47*4882a593Smuzhiyun	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
48*4882a593Smuzhiyun	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
49*4882a593Smuzhiyun	.endm
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun	.macro		qround, ab, s0, s1, s2, s3, s4
52*4882a593Smuzhiyun	.ifnb		\s4
53*4882a593Smuzhiyun	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
54*4882a593Smuzhiyun	ext		v6.16b, \s0\().16b, \s1\().16b, #12
55*4882a593Smuzhiyun	ext		v7.16b, \s2\().16b, \s3\().16b, #8
56*4882a593Smuzhiyun	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
57*4882a593Smuzhiyun	.endif
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun	eor		v10.16b, \s0\().16b, \s1\().16b
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun	round		\ab, \s0, v11, v12, 0
62*4882a593Smuzhiyun	round		\ab, \s0, v12, v11, 1
63*4882a593Smuzhiyun	round		\ab, \s0, v11, v12, 2
64*4882a593Smuzhiyun	round		\ab, \s0, v12, v11, 3
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun	.ifnb		\s4
67*4882a593Smuzhiyun	sm3partw2	\s4\().4s, v7.4s, v6.4s
68*4882a593Smuzhiyun	.endif
69*4882a593Smuzhiyun	.endm
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun	/*
72*4882a593Smuzhiyun	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
73*4882a593Smuzhiyun	 *                       int blocks)
74*4882a593Smuzhiyun	 */
75*4882a593Smuzhiyun	.text
76*4882a593SmuzhiyunSYM_FUNC_START(sm3_ce_transform)
77*4882a593Smuzhiyun	/* load state */
78*4882a593Smuzhiyun	ld1		{v8.4s-v9.4s}, [x0]
79*4882a593Smuzhiyun	rev64		v8.4s, v8.4s
80*4882a593Smuzhiyun	rev64		v9.4s, v9.4s
81*4882a593Smuzhiyun	ext		v8.16b, v8.16b, v8.16b, #8
82*4882a593Smuzhiyun	ext		v9.16b, v9.16b, v9.16b, #8
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun	adr_l		x8, .Lt
85*4882a593Smuzhiyun	ldp		s13, s14, [x8]
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun	/* load input */
88*4882a593Smuzhiyun0:	ld1		{v0.16b-v3.16b}, [x1], #64
89*4882a593Smuzhiyun	sub		w2, w2, #1
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun	mov		v15.16b, v8.16b
92*4882a593Smuzhiyun	mov		v16.16b, v9.16b
93*4882a593Smuzhiyun
94*4882a593SmuzhiyunCPU_LE(	rev32		v0.16b, v0.16b		)
95*4882a593SmuzhiyunCPU_LE(	rev32		v1.16b, v1.16b		)
96*4882a593SmuzhiyunCPU_LE(	rev32		v2.16b, v2.16b		)
97*4882a593SmuzhiyunCPU_LE(	rev32		v3.16b, v3.16b		)
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun	ext		v11.16b, v13.16b, v13.16b, #4
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun	qround		a, v0, v1, v2, v3, v4
102*4882a593Smuzhiyun	qround		a, v1, v2, v3, v4, v0
103*4882a593Smuzhiyun	qround		a, v2, v3, v4, v0, v1
104*4882a593Smuzhiyun	qround		a, v3, v4, v0, v1, v2
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun	ext		v11.16b, v14.16b, v14.16b, #4
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun	qround		b, v4, v0, v1, v2, v3
109*4882a593Smuzhiyun	qround		b, v0, v1, v2, v3, v4
110*4882a593Smuzhiyun	qround		b, v1, v2, v3, v4, v0
111*4882a593Smuzhiyun	qround		b, v2, v3, v4, v0, v1
112*4882a593Smuzhiyun	qround		b, v3, v4, v0, v1, v2
113*4882a593Smuzhiyun	qround		b, v4, v0, v1, v2, v3
114*4882a593Smuzhiyun	qround		b, v0, v1, v2, v3, v4
115*4882a593Smuzhiyun	qround		b, v1, v2, v3, v4, v0
116*4882a593Smuzhiyun	qround		b, v2, v3, v4, v0, v1
117*4882a593Smuzhiyun	qround		b, v3, v4
118*4882a593Smuzhiyun	qround		b, v4, v0
119*4882a593Smuzhiyun	qround		b, v0, v1
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun	eor		v8.16b, v8.16b, v15.16b
122*4882a593Smuzhiyun	eor		v9.16b, v9.16b, v16.16b
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun	/* handled all input blocks? */
125*4882a593Smuzhiyun	cbnz		w2, 0b
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	/* save state */
128*4882a593Smuzhiyun	rev64		v8.4s, v8.4s
129*4882a593Smuzhiyun	rev64		v9.4s, v9.4s
130*4882a593Smuzhiyun	ext		v8.16b, v8.16b, v8.16b, #8
131*4882a593Smuzhiyun	ext		v9.16b, v9.16b, v9.16b, #8
132*4882a593Smuzhiyun	st1		{v8.4s-v9.4s}, [x0]
133*4882a593Smuzhiyun	ret
134*4882a593SmuzhiyunSYM_FUNC_END(sm3_ce_transform)
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun	.section	".rodata", "a"
137*4882a593Smuzhiyun	.align		3
138*4882a593Smuzhiyun.Lt:	.word		0x79cc4519, 0x9d8a7a87
139