xref: /OK3568_Linux_fs/kernel/arch/powerpc/crypto/aes-spe-keys.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * Key handling functions for PPC AES implementation
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <asm/ppc_asm.h>
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__
11*4882a593Smuzhiyun#define LOAD_KEY(d, s, off) \
12*4882a593Smuzhiyun	lwz		d,off(s);
13*4882a593Smuzhiyun#else
14*4882a593Smuzhiyun#define LOAD_KEY(d, s, off) \
15*4882a593Smuzhiyun	li		r0,off; \
16*4882a593Smuzhiyun	lwbrx		d,s,r0;
17*4882a593Smuzhiyun#endif
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun#define INITIALIZE_KEY \
20*4882a593Smuzhiyun	stwu		r1,-32(r1);	/* create stack frame		*/ \
21*4882a593Smuzhiyun	stw		r14,8(r1);	/* save registers		*/ \
22*4882a593Smuzhiyun	stw		r15,12(r1);					   \
23*4882a593Smuzhiyun	stw		r16,16(r1);
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun#define FINALIZE_KEY \
26*4882a593Smuzhiyun	lwz		r14,8(r1);	/* restore registers		*/ \
27*4882a593Smuzhiyun	lwz		r15,12(r1);					   \
28*4882a593Smuzhiyun	lwz		r16,16(r1);					   \
29*4882a593Smuzhiyun	xor		r5,r5,r5;	/* clear sensitive data		*/ \
30*4882a593Smuzhiyun	xor		r6,r6,r6;					   \
31*4882a593Smuzhiyun	xor		r7,r7,r7;					   \
32*4882a593Smuzhiyun	xor		r8,r8,r8;					   \
33*4882a593Smuzhiyun	xor		r9,r9,r9;					   \
34*4882a593Smuzhiyun	xor		r10,r10,r10;					   \
35*4882a593Smuzhiyun	xor		r11,r11,r11;					   \
36*4882a593Smuzhiyun	xor		r12,r12,r12;					   \
37*4882a593Smuzhiyun	addi		r1,r1,32;	/* cleanup stack		*/
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun#define LS_BOX(r, t1, t2) \
40*4882a593Smuzhiyun	lis		t2,PPC_AES_4K_ENCTAB@h;				   \
41*4882a593Smuzhiyun	ori		t2,t2,PPC_AES_4K_ENCTAB@l;			   \
42*4882a593Smuzhiyun	rlwimi		t2,r,4,20,27;					   \
43*4882a593Smuzhiyun	lbz		t1,8(t2);					   \
44*4882a593Smuzhiyun	rlwimi		r,t1,0,24,31;					   \
45*4882a593Smuzhiyun	rlwimi		t2,r,28,20,27;					   \
46*4882a593Smuzhiyun	lbz		t1,8(t2);					   \
47*4882a593Smuzhiyun	rlwimi		r,t1,8,16,23;					   \
48*4882a593Smuzhiyun	rlwimi		t2,r,20,20,27;					   \
49*4882a593Smuzhiyun	lbz		t1,8(t2);					   \
50*4882a593Smuzhiyun	rlwimi		r,t1,16,8,15;					   \
51*4882a593Smuzhiyun	rlwimi		t2,r,12,20,27;					   \
52*4882a593Smuzhiyun	lbz		t1,8(t2);					   \
53*4882a593Smuzhiyun	rlwimi		r,t1,24,0,7;
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun#define GF8_MUL(out, in, t1, t2) \
56*4882a593Smuzhiyun	lis t1,0x8080;			/* multiplication in GF8	*/ \
57*4882a593Smuzhiyun	ori t1,t1,0x8080; 						   \
58*4882a593Smuzhiyun	and t1,t1,in; 							   \
59*4882a593Smuzhiyun	srwi t1,t1,7; 							   \
60*4882a593Smuzhiyun	mulli t1,t1,0x1b; 						   \
61*4882a593Smuzhiyun	lis t2,0x7f7f; 							   \
62*4882a593Smuzhiyun	ori t2,t2,0x7f7f; 						   \
63*4882a593Smuzhiyun	and t2,t2,in; 							   \
64*4882a593Smuzhiyun	slwi t2,t2,1; 							   \
65*4882a593Smuzhiyun	xor out,t1,t2;
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun/*
68*4882a593Smuzhiyun * ppc_expand_key_128(u32 *key_enc, const u8 *key)
69*4882a593Smuzhiyun *
70*4882a593Smuzhiyun * Expand 128 bit key into 176 bytes encryption key. It consists of
71*4882a593Smuzhiyun * key itself plus 10 rounds with 16 bytes each
72*4882a593Smuzhiyun *
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun_GLOBAL(ppc_expand_key_128)
75*4882a593Smuzhiyun	INITIALIZE_KEY
76*4882a593Smuzhiyun	LOAD_KEY(r5,r4,0)
77*4882a593Smuzhiyun	LOAD_KEY(r6,r4,4)
78*4882a593Smuzhiyun	LOAD_KEY(r7,r4,8)
79*4882a593Smuzhiyun	LOAD_KEY(r8,r4,12)
80*4882a593Smuzhiyun	stw		r5,0(r3)	/* key[0..3] = input data	*/
81*4882a593Smuzhiyun	stw		r6,4(r3)
82*4882a593Smuzhiyun	stw		r7,8(r3)
83*4882a593Smuzhiyun	stw		r8,12(r3)
84*4882a593Smuzhiyun	li		r16,10		/* 10 expansion rounds		*/
85*4882a593Smuzhiyun	lis		r0,0x0100	/* RCO(1)			*/
86*4882a593Smuzhiyunppc_expand_128_loop:
87*4882a593Smuzhiyun	addi		r3,r3,16
88*4882a593Smuzhiyun	mr		r14,r8		/* apply LS_BOX to 4th temp	*/
89*4882a593Smuzhiyun	rotlwi		r14,r14,8
90*4882a593Smuzhiyun	LS_BOX(r14, r15, r4)
91*4882a593Smuzhiyun	xor		r14,r14,r0
92*4882a593Smuzhiyun	xor		r5,r5,r14	/* xor next 4 keys		*/
93*4882a593Smuzhiyun	xor		r6,r6,r5
94*4882a593Smuzhiyun	xor		r7,r7,r6
95*4882a593Smuzhiyun	xor		r8,r8,r7
96*4882a593Smuzhiyun	stw		r5,0(r3)	/* store next 4 keys		*/
97*4882a593Smuzhiyun	stw		r6,4(r3)
98*4882a593Smuzhiyun	stw		r7,8(r3)
99*4882a593Smuzhiyun	stw		r8,12(r3)
100*4882a593Smuzhiyun	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO by 2 in GF	*/
101*4882a593Smuzhiyun	subi		r16,r16,1
102*4882a593Smuzhiyun	cmpwi		r16,0
103*4882a593Smuzhiyun	bt		eq,ppc_expand_128_end
104*4882a593Smuzhiyun	b		ppc_expand_128_loop
105*4882a593Smuzhiyunppc_expand_128_end:
106*4882a593Smuzhiyun	FINALIZE_KEY
107*4882a593Smuzhiyun	blr
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun/*
110*4882a593Smuzhiyun * ppc_expand_key_192(u32 *key_enc, const u8 *key)
111*4882a593Smuzhiyun *
112*4882a593Smuzhiyun * Expand 192 bit key into 208 bytes encryption key. It consists of key
113*4882a593Smuzhiyun * itself plus 12 rounds with 16 bytes each
114*4882a593Smuzhiyun *
115*4882a593Smuzhiyun */
116*4882a593Smuzhiyun_GLOBAL(ppc_expand_key_192)
117*4882a593Smuzhiyun	INITIALIZE_KEY
118*4882a593Smuzhiyun	LOAD_KEY(r5,r4,0)
119*4882a593Smuzhiyun	LOAD_KEY(r6,r4,4)
120*4882a593Smuzhiyun	LOAD_KEY(r7,r4,8)
121*4882a593Smuzhiyun	LOAD_KEY(r8,r4,12)
122*4882a593Smuzhiyun	LOAD_KEY(r9,r4,16)
123*4882a593Smuzhiyun	LOAD_KEY(r10,r4,20)
124*4882a593Smuzhiyun	stw		r5,0(r3)
125*4882a593Smuzhiyun	stw		r6,4(r3)
126*4882a593Smuzhiyun	stw		r7,8(r3)
127*4882a593Smuzhiyun	stw		r8,12(r3)
128*4882a593Smuzhiyun	stw		r9,16(r3)
129*4882a593Smuzhiyun	stw		r10,20(r3)
130*4882a593Smuzhiyun	li		r16,8		/* 8 expansion rounds		*/
131*4882a593Smuzhiyun	lis		r0,0x0100	/* RCO(1)			*/
132*4882a593Smuzhiyunppc_expand_192_loop:
133*4882a593Smuzhiyun	addi		r3,r3,24
134*4882a593Smuzhiyun	mr		r14,r10		/* apply LS_BOX to 6th temp	*/
135*4882a593Smuzhiyun	rotlwi		r14,r14,8
136*4882a593Smuzhiyun	LS_BOX(r14, r15, r4)
137*4882a593Smuzhiyun	xor		r14,r14,r0
138*4882a593Smuzhiyun	xor		r5,r5,r14	/* xor next 6 keys		*/
139*4882a593Smuzhiyun	xor		r6,r6,r5
140*4882a593Smuzhiyun	xor		r7,r7,r6
141*4882a593Smuzhiyun	xor		r8,r8,r7
142*4882a593Smuzhiyun	xor		r9,r9,r8
143*4882a593Smuzhiyun	xor		r10,r10,r9
144*4882a593Smuzhiyun	stw		r5,0(r3)
145*4882a593Smuzhiyun	stw		r6,4(r3)
146*4882a593Smuzhiyun	stw		r7,8(r3)
147*4882a593Smuzhiyun	stw		r8,12(r3)
148*4882a593Smuzhiyun	subi		r16,r16,1
149*4882a593Smuzhiyun	cmpwi		r16,0		/* last round early kick out	*/
150*4882a593Smuzhiyun	bt		eq,ppc_expand_192_end
151*4882a593Smuzhiyun	stw		r9,16(r3)
152*4882a593Smuzhiyun	stw		r10,20(r3)
153*4882a593Smuzhiyun	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO GF8		*/
154*4882a593Smuzhiyun	b		ppc_expand_192_loop
155*4882a593Smuzhiyunppc_expand_192_end:
156*4882a593Smuzhiyun	FINALIZE_KEY
157*4882a593Smuzhiyun	blr
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun/*
160*4882a593Smuzhiyun * ppc_expand_key_256(u32 *key_enc, const u8 *key)
161*4882a593Smuzhiyun *
162*4882a593Smuzhiyun * Expand 256 bit key into 240 bytes encryption key. It consists of key
163*4882a593Smuzhiyun * itself plus 14 rounds with 16 bytes each
164*4882a593Smuzhiyun *
165*4882a593Smuzhiyun */
166*4882a593Smuzhiyun_GLOBAL(ppc_expand_key_256)
167*4882a593Smuzhiyun	INITIALIZE_KEY
168*4882a593Smuzhiyun	LOAD_KEY(r5,r4,0)
169*4882a593Smuzhiyun	LOAD_KEY(r6,r4,4)
170*4882a593Smuzhiyun	LOAD_KEY(r7,r4,8)
171*4882a593Smuzhiyun	LOAD_KEY(r8,r4,12)
172*4882a593Smuzhiyun	LOAD_KEY(r9,r4,16)
173*4882a593Smuzhiyun	LOAD_KEY(r10,r4,20)
174*4882a593Smuzhiyun	LOAD_KEY(r11,r4,24)
175*4882a593Smuzhiyun	LOAD_KEY(r12,r4,28)
176*4882a593Smuzhiyun	stw		r5,0(r3)
177*4882a593Smuzhiyun	stw		r6,4(r3)
178*4882a593Smuzhiyun	stw		r7,8(r3)
179*4882a593Smuzhiyun	stw		r8,12(r3)
180*4882a593Smuzhiyun	stw		r9,16(r3)
181*4882a593Smuzhiyun	stw		r10,20(r3)
182*4882a593Smuzhiyun	stw		r11,24(r3)
183*4882a593Smuzhiyun	stw		r12,28(r3)
184*4882a593Smuzhiyun	li		r16,7		/* 7 expansion rounds		*/
185*4882a593Smuzhiyun	lis		r0,0x0100	/* RCO(1)			*/
186*4882a593Smuzhiyunppc_expand_256_loop:
187*4882a593Smuzhiyun	addi		r3,r3,32
188*4882a593Smuzhiyun	mr		r14,r12		/* apply LS_BOX to 8th temp	*/
189*4882a593Smuzhiyun	rotlwi		r14,r14,8
190*4882a593Smuzhiyun	LS_BOX(r14, r15, r4)
191*4882a593Smuzhiyun	xor		r14,r14,r0
192*4882a593Smuzhiyun	xor		r5,r5,r14	/* xor 4 keys			*/
193*4882a593Smuzhiyun	xor		r6,r6,r5
194*4882a593Smuzhiyun	xor		r7,r7,r6
195*4882a593Smuzhiyun	xor		r8,r8,r7
196*4882a593Smuzhiyun	mr		r14,r8
197*4882a593Smuzhiyun	LS_BOX(r14, r15, r4)		/* apply LS_BOX to 4th temp	*/
198*4882a593Smuzhiyun	xor		r9,r9,r14	/* xor 4 keys			*/
199*4882a593Smuzhiyun	xor		r10,r10,r9
200*4882a593Smuzhiyun	xor		r11,r11,r10
201*4882a593Smuzhiyun	xor		r12,r12,r11
202*4882a593Smuzhiyun	stw		r5,0(r3)
203*4882a593Smuzhiyun	stw		r6,4(r3)
204*4882a593Smuzhiyun	stw		r7,8(r3)
205*4882a593Smuzhiyun	stw		r8,12(r3)
206*4882a593Smuzhiyun	subi		r16,r16,1
207*4882a593Smuzhiyun	cmpwi		r16,0		/* last round early kick out	*/
208*4882a593Smuzhiyun	bt		eq,ppc_expand_256_end
209*4882a593Smuzhiyun	stw		r9,16(r3)
210*4882a593Smuzhiyun	stw		r10,20(r3)
211*4882a593Smuzhiyun	stw		r11,24(r3)
212*4882a593Smuzhiyun	stw		r12,28(r3)
213*4882a593Smuzhiyun	GF8_MUL(r0, r0, r4, r14)
214*4882a593Smuzhiyun	b		ppc_expand_256_loop
215*4882a593Smuzhiyunppc_expand_256_end:
216*4882a593Smuzhiyun	FINALIZE_KEY
217*4882a593Smuzhiyun	blr
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun/*
220*4882a593Smuzhiyun * ppc_generate_decrypt_key: derive decryption key from encryption key
221*4882a593Smuzhiyun * number of bytes to handle are calculated from length of key (16/24/32)
222*4882a593Smuzhiyun *
223*4882a593Smuzhiyun */
224*4882a593Smuzhiyun_GLOBAL(ppc_generate_decrypt_key)
225*4882a593Smuzhiyun	addi		r6,r5,24
226*4882a593Smuzhiyun	slwi		r6,r6,2
227*4882a593Smuzhiyun	lwzx		r7,r4,r6	/* first/last 4 words are same	*/
228*4882a593Smuzhiyun	stw		r7,0(r3)
229*4882a593Smuzhiyun	lwz		r7,0(r4)
230*4882a593Smuzhiyun	stwx		r7,r3,r6
231*4882a593Smuzhiyun	addi		r6,r6,4
232*4882a593Smuzhiyun	lwzx		r7,r4,r6
233*4882a593Smuzhiyun	stw		r7,4(r3)
234*4882a593Smuzhiyun	lwz		r7,4(r4)
235*4882a593Smuzhiyun	stwx		r7,r3,r6
236*4882a593Smuzhiyun	addi		r6,r6,4
237*4882a593Smuzhiyun	lwzx		r7,r4,r6
238*4882a593Smuzhiyun	stw		r7,8(r3)
239*4882a593Smuzhiyun	lwz		r7,8(r4)
240*4882a593Smuzhiyun	stwx		r7,r3,r6
241*4882a593Smuzhiyun	addi		r6,r6,4
242*4882a593Smuzhiyun	lwzx		r7,r4,r6
243*4882a593Smuzhiyun	stw		r7,12(r3)
244*4882a593Smuzhiyun	lwz		r7,12(r4)
245*4882a593Smuzhiyun	stwx		r7,r3,r6
246*4882a593Smuzhiyun	addi		r3,r3,16
247*4882a593Smuzhiyun	add		r4,r4,r6
248*4882a593Smuzhiyun	subi		r4,r4,28
249*4882a593Smuzhiyun	addi		r5,r5,20
250*4882a593Smuzhiyun	srwi		r5,r5,2
251*4882a593Smuzhiyunppc_generate_decrypt_block:
252*4882a593Smuzhiyun	li	r6,4
253*4882a593Smuzhiyun	mtctr	r6
254*4882a593Smuzhiyunppc_generate_decrypt_word:
255*4882a593Smuzhiyun	lwz		r6,0(r4)
256*4882a593Smuzhiyun	GF8_MUL(r7, r6, r0, r7)
257*4882a593Smuzhiyun	GF8_MUL(r8, r7, r0, r8)
258*4882a593Smuzhiyun	GF8_MUL(r9, r8, r0, r9)
259*4882a593Smuzhiyun	xor		r10,r9,r6
260*4882a593Smuzhiyun	xor		r11,r7,r8
261*4882a593Smuzhiyun	xor		r11,r11,r9
262*4882a593Smuzhiyun	xor		r12,r7,r10
263*4882a593Smuzhiyun	rotrwi		r12,r12,24
264*4882a593Smuzhiyun	xor		r11,r11,r12
265*4882a593Smuzhiyun	xor		r12,r8,r10
266*4882a593Smuzhiyun	rotrwi		r12,r12,16
267*4882a593Smuzhiyun	xor		r11,r11,r12
268*4882a593Smuzhiyun	rotrwi		r12,r10,8
269*4882a593Smuzhiyun	xor		r11,r11,r12
270*4882a593Smuzhiyun	stw		r11,0(r3)
271*4882a593Smuzhiyun	addi		r3,r3,4
272*4882a593Smuzhiyun	addi		r4,r4,4
273*4882a593Smuzhiyun	bdnz		ppc_generate_decrypt_word
274*4882a593Smuzhiyun	subi		r4,r4,32
275*4882a593Smuzhiyun	subi		r5,r5,1
276*4882a593Smuzhiyun	cmpwi		r5,0
277*4882a593Smuzhiyun	bt		gt,ppc_generate_decrypt_block
278*4882a593Smuzhiyun	blr
279