xref: /OK3568_Linux_fs/kernel/arch/arm64/crypto/aes-ce-ccm-core.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <linux/linkage.h>
9*4882a593Smuzhiyun#include <asm/assembler.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun	.text
12*4882a593Smuzhiyun	.arch	armv8-a+crypto
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun	/*
15*4882a593Smuzhiyun	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
16*4882a593Smuzhiyun	 *			     u32 *macp, u8 const rk[], u32 rounds);
17*4882a593Smuzhiyun	 */
18*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_auth_data)
19*4882a593Smuzhiyun	ldr	w8, [x3]			/* leftover from prev round? */
20*4882a593Smuzhiyun	ld1	{v0.16b}, [x0]			/* load mac */
21*4882a593Smuzhiyun	cbz	w8, 1f
22*4882a593Smuzhiyun	sub	w8, w8, #16
23*4882a593Smuzhiyun	eor	v1.16b, v1.16b, v1.16b
24*4882a593Smuzhiyun0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
25*4882a593Smuzhiyun	subs	w2, w2, #1
26*4882a593Smuzhiyun	add	w8, w8, #1
27*4882a593Smuzhiyun	ins	v1.b[0], w7
28*4882a593Smuzhiyun	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
29*4882a593Smuzhiyun	beq	8f				/* out of input? */
30*4882a593Smuzhiyun	cbnz	w8, 0b
31*4882a593Smuzhiyun	eor	v0.16b, v0.16b, v1.16b
32*4882a593Smuzhiyun1:	ld1	{v3.4s}, [x4]			/* load first round key */
33*4882a593Smuzhiyun	prfm	pldl1strm, [x1]
34*4882a593Smuzhiyun	cmp	w5, #12				/* which key size? */
35*4882a593Smuzhiyun	add	x6, x4, #16
36*4882a593Smuzhiyun	sub	w7, w5, #2			/* modified # of rounds */
37*4882a593Smuzhiyun	bmi	2f
38*4882a593Smuzhiyun	bne	5f
39*4882a593Smuzhiyun	mov	v5.16b, v3.16b
40*4882a593Smuzhiyun	b	4f
41*4882a593Smuzhiyun2:	mov	v4.16b, v3.16b
42*4882a593Smuzhiyun	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
43*4882a593Smuzhiyun3:	aese	v0.16b, v4.16b
44*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
45*4882a593Smuzhiyun4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
46*4882a593Smuzhiyun	aese	v0.16b, v5.16b
47*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
48*4882a593Smuzhiyun5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
49*4882a593Smuzhiyun	subs	w7, w7, #3
50*4882a593Smuzhiyun	aese	v0.16b, v3.16b
51*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
52*4882a593Smuzhiyun	ld1	{v5.4s}, [x6], #16		/* load next round key */
53*4882a593Smuzhiyun	bpl	3b
54*4882a593Smuzhiyun	aese	v0.16b, v4.16b
55*4882a593Smuzhiyun	subs	w2, w2, #16			/* last data? */
56*4882a593Smuzhiyun	eor	v0.16b, v0.16b, v5.16b		/* final round */
57*4882a593Smuzhiyun	bmi	6f
58*4882a593Smuzhiyun	ld1	{v1.16b}, [x1], #16		/* load next input block */
59*4882a593Smuzhiyun	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
60*4882a593Smuzhiyun	bne	1b
61*4882a593Smuzhiyun6:	st1	{v0.16b}, [x0]			/* store mac */
62*4882a593Smuzhiyun	beq	10f
63*4882a593Smuzhiyun	adds	w2, w2, #16
64*4882a593Smuzhiyun	beq	10f
65*4882a593Smuzhiyun	mov	w8, w2
66*4882a593Smuzhiyun7:	ldrb	w7, [x1], #1
67*4882a593Smuzhiyun	umov	w6, v0.b[0]
68*4882a593Smuzhiyun	eor	w6, w6, w7
69*4882a593Smuzhiyun	strb	w6, [x0], #1
70*4882a593Smuzhiyun	subs	w2, w2, #1
71*4882a593Smuzhiyun	beq	10f
72*4882a593Smuzhiyun	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
73*4882a593Smuzhiyun	b	7b
74*4882a593Smuzhiyun8:	cbz	w8, 91f
75*4882a593Smuzhiyun	mov	w7, w8
76*4882a593Smuzhiyun	add	w8, w8, #16
77*4882a593Smuzhiyun9:	ext	v1.16b, v1.16b, v1.16b, #1
78*4882a593Smuzhiyun	adds	w7, w7, #1
79*4882a593Smuzhiyun	bne	9b
80*4882a593Smuzhiyun91:	eor	v0.16b, v0.16b, v1.16b
81*4882a593Smuzhiyun	st1	{v0.16b}, [x0]
82*4882a593Smuzhiyun10:	str	w8, [x3]
83*4882a593Smuzhiyun	ret
84*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_auth_data)
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun	/*
87*4882a593Smuzhiyun	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
88*4882a593Smuzhiyun	 * 			 u32 rounds);
89*4882a593Smuzhiyun	 */
90*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_final)
91*4882a593Smuzhiyun	ld1	{v3.4s}, [x2], #16		/* load first round key */
92*4882a593Smuzhiyun	ld1	{v0.16b}, [x0]			/* load mac */
93*4882a593Smuzhiyun	cmp	w3, #12				/* which key size? */
94*4882a593Smuzhiyun	sub	w3, w3, #2			/* modified # of rounds */
95*4882a593Smuzhiyun	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
96*4882a593Smuzhiyun	bmi	0f
97*4882a593Smuzhiyun	bne	3f
98*4882a593Smuzhiyun	mov	v5.16b, v3.16b
99*4882a593Smuzhiyun	b	2f
100*4882a593Smuzhiyun0:	mov	v4.16b, v3.16b
101*4882a593Smuzhiyun1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
102*4882a593Smuzhiyun	aese	v0.16b, v4.16b
103*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
104*4882a593Smuzhiyun	aese	v1.16b, v4.16b
105*4882a593Smuzhiyun	aesmc	v1.16b, v1.16b
106*4882a593Smuzhiyun2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
107*4882a593Smuzhiyun	aese	v0.16b, v5.16b
108*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
109*4882a593Smuzhiyun	aese	v1.16b, v5.16b
110*4882a593Smuzhiyun	aesmc	v1.16b, v1.16b
111*4882a593Smuzhiyun3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
112*4882a593Smuzhiyun	subs	w3, w3, #3
113*4882a593Smuzhiyun	aese	v0.16b, v3.16b
114*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
115*4882a593Smuzhiyun	aese	v1.16b, v3.16b
116*4882a593Smuzhiyun	aesmc	v1.16b, v1.16b
117*4882a593Smuzhiyun	bpl	1b
118*4882a593Smuzhiyun	aese	v0.16b, v4.16b
119*4882a593Smuzhiyun	aese	v1.16b, v4.16b
120*4882a593Smuzhiyun	/* final round key cancels out */
121*4882a593Smuzhiyun	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
122*4882a593Smuzhiyun	st1	{v0.16b}, [x0]			/* store result */
123*4882a593Smuzhiyun	ret
124*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_final)
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun	.macro	aes_ccm_do_crypt,enc
127*4882a593Smuzhiyun	ldr	x8, [x6, #8]			/* load lower ctr */
128*4882a593Smuzhiyun	ld1	{v0.16b}, [x5]			/* load mac */
129*4882a593SmuzhiyunCPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
130*4882a593Smuzhiyun0:	/* outer loop */
131*4882a593Smuzhiyun	ld1	{v1.8b}, [x6]			/* load upper ctr */
132*4882a593Smuzhiyun	prfm	pldl1strm, [x1]
133*4882a593Smuzhiyun	add	x8, x8, #1
134*4882a593Smuzhiyun	rev	x9, x8
135*4882a593Smuzhiyun	cmp	w4, #12				/* which key size? */
136*4882a593Smuzhiyun	sub	w7, w4, #2			/* get modified # of rounds */
137*4882a593Smuzhiyun	ins	v1.d[1], x9			/* no carry in lower ctr */
138*4882a593Smuzhiyun	ld1	{v3.4s}, [x3]			/* load first round key */
139*4882a593Smuzhiyun	add	x10, x3, #16
140*4882a593Smuzhiyun	bmi	1f
141*4882a593Smuzhiyun	bne	4f
142*4882a593Smuzhiyun	mov	v5.16b, v3.16b
143*4882a593Smuzhiyun	b	3f
144*4882a593Smuzhiyun1:	mov	v4.16b, v3.16b
145*4882a593Smuzhiyun	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
146*4882a593Smuzhiyun2:	/* inner loop: 3 rounds, 2x interleaved */
147*4882a593Smuzhiyun	aese	v0.16b, v4.16b
148*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
149*4882a593Smuzhiyun	aese	v1.16b, v4.16b
150*4882a593Smuzhiyun	aesmc	v1.16b, v1.16b
151*4882a593Smuzhiyun3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
152*4882a593Smuzhiyun	aese	v0.16b, v5.16b
153*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
154*4882a593Smuzhiyun	aese	v1.16b, v5.16b
155*4882a593Smuzhiyun	aesmc	v1.16b, v1.16b
156*4882a593Smuzhiyun4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
157*4882a593Smuzhiyun	subs	w7, w7, #3
158*4882a593Smuzhiyun	aese	v0.16b, v3.16b
159*4882a593Smuzhiyun	aesmc	v0.16b, v0.16b
160*4882a593Smuzhiyun	aese	v1.16b, v3.16b
161*4882a593Smuzhiyun	aesmc	v1.16b, v1.16b
162*4882a593Smuzhiyun	ld1	{v5.4s}, [x10], #16		/* load next round key */
163*4882a593Smuzhiyun	bpl	2b
164*4882a593Smuzhiyun	aese	v0.16b, v4.16b
165*4882a593Smuzhiyun	aese	v1.16b, v4.16b
166*4882a593Smuzhiyun	subs	w2, w2, #16
167*4882a593Smuzhiyun	bmi	6f				/* partial block? */
168*4882a593Smuzhiyun	ld1	{v2.16b}, [x1], #16		/* load next input block */
169*4882a593Smuzhiyun	.if	\enc == 1
170*4882a593Smuzhiyun	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
171*4882a593Smuzhiyun	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
172*4882a593Smuzhiyun	.else
173*4882a593Smuzhiyun	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
174*4882a593Smuzhiyun	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
175*4882a593Smuzhiyun	.endif
176*4882a593Smuzhiyun	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
177*4882a593Smuzhiyun	st1	{v1.16b}, [x0], #16		/* write output block */
178*4882a593Smuzhiyun	bne	0b
179*4882a593SmuzhiyunCPU_LE(	rev	x8, x8			)
180*4882a593Smuzhiyun	st1	{v0.16b}, [x5]			/* store mac */
181*4882a593Smuzhiyun	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
182*4882a593Smuzhiyun5:	ret
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
185*4882a593Smuzhiyun	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
186*4882a593Smuzhiyun	st1	{v0.16b}, [x5]			/* store mac */
187*4882a593Smuzhiyun	add	w2, w2, #16			/* process partial tail block */
188*4882a593Smuzhiyun7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
189*4882a593Smuzhiyun	umov	w6, v1.b[0]			/* get top crypted ctr byte */
190*4882a593Smuzhiyun	umov	w7, v0.b[0]			/* get top mac byte */
191*4882a593Smuzhiyun	.if	\enc == 1
192*4882a593Smuzhiyun	eor	w7, w7, w9
193*4882a593Smuzhiyun	eor	w9, w9, w6
194*4882a593Smuzhiyun	.else
195*4882a593Smuzhiyun	eor	w9, w9, w6
196*4882a593Smuzhiyun	eor	w7, w7, w9
197*4882a593Smuzhiyun	.endif
198*4882a593Smuzhiyun	strb	w9, [x0], #1			/* store out byte */
199*4882a593Smuzhiyun	strb	w7, [x5], #1			/* store mac byte */
200*4882a593Smuzhiyun	subs	w2, w2, #1
201*4882a593Smuzhiyun	beq	5b
202*4882a593Smuzhiyun	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
203*4882a593Smuzhiyun	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
204*4882a593Smuzhiyun	b	7b
205*4882a593Smuzhiyun	.endm
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun	/*
208*4882a593Smuzhiyun	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
209*4882a593Smuzhiyun	 * 			   u8 const rk[], u32 rounds, u8 mac[],
210*4882a593Smuzhiyun	 * 			   u8 ctr[]);
211*4882a593Smuzhiyun	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
212*4882a593Smuzhiyun	 * 			   u8 const rk[], u32 rounds, u8 mac[],
213*4882a593Smuzhiyun	 * 			   u8 ctr[]);
214*4882a593Smuzhiyun	 */
215*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_encrypt)
216*4882a593Smuzhiyun	aes_ccm_do_crypt	1
217*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_encrypt)
218*4882a593Smuzhiyun
219*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_decrypt)
220*4882a593Smuzhiyun	aes_ccm_do_crypt	0
221*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_decrypt)
222