xref: /OK3568_Linux_fs/kernel/arch/powerpc/crypto/aes-spe-modes.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <asm/ppc_asm.h>
9*4882a593Smuzhiyun#include "aes-spe-regs.h"
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \
14*4882a593Smuzhiyun	lwz		reg,off(rSP);	/* load with offset		*/
15*4882a593Smuzhiyun#define SAVE_DATA(reg, off) \
16*4882a593Smuzhiyun	stw		reg,off(rDP);	/* save with offset		*/
17*4882a593Smuzhiyun#define NEXT_BLOCK \
18*4882a593Smuzhiyun	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
19*4882a593Smuzhiyun	addi		rDP,rDP,16;
20*4882a593Smuzhiyun#define LOAD_IV(reg, off) \
21*4882a593Smuzhiyun	lwz		reg,off(rIP);	/* IV loading with offset	*/
22*4882a593Smuzhiyun#define SAVE_IV(reg, off) \
23*4882a593Smuzhiyun	stw		reg,off(rIP);	/* IV saving with offset	*/
24*4882a593Smuzhiyun#define START_IV			/* nothing to reset		*/
25*4882a593Smuzhiyun#define CBC_DEC 16			/* CBC decrement per block	*/
26*4882a593Smuzhiyun#define CTR_DEC 1			/* CTR decrement one byte	*/
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun#else					/* Macros for little endian	*/
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \
31*4882a593Smuzhiyun	lwbrx		reg,0,rSP;	/* load reversed		*/ \
32*4882a593Smuzhiyun	addi		rSP,rSP,4;	/* and increment pointer	*/
33*4882a593Smuzhiyun#define SAVE_DATA(reg, off) \
34*4882a593Smuzhiyun	stwbrx		reg,0,rDP;	/* save reversed		*/ \
35*4882a593Smuzhiyun	addi		rDP,rDP,4;	/* and increment pointer	*/
36*4882a593Smuzhiyun#define NEXT_BLOCK			/* nothing todo			*/
37*4882a593Smuzhiyun#define LOAD_IV(reg, off) \
38*4882a593Smuzhiyun	lwbrx		reg,0,rIP;	/* load reversed		*/ \
39*4882a593Smuzhiyun	addi		rIP,rIP,4;	/* and increment pointer	*/
40*4882a593Smuzhiyun#define SAVE_IV(reg, off) \
41*4882a593Smuzhiyun	stwbrx		reg,0,rIP;	/* load reversed		*/ \
42*4882a593Smuzhiyun	addi		rIP,rIP,4;	/* and increment pointer	*/
43*4882a593Smuzhiyun#define START_IV \
44*4882a593Smuzhiyun	subi		rIP,rIP,16;	/* must reset pointer		*/
45*4882a593Smuzhiyun#define CBC_DEC 32			/* 2 blocks because of incs	*/
46*4882a593Smuzhiyun#define CTR_DEC 17			/* 1 block because of incs	*/
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun#endif
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun#define SAVE_0_REGS
51*4882a593Smuzhiyun#define LOAD_0_REGS
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun#define SAVE_4_REGS \
54*4882a593Smuzhiyun	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
55*4882a593Smuzhiyun	stw		rI1,100(r1);					   \
56*4882a593Smuzhiyun	stw		rI2,104(r1);					   \
57*4882a593Smuzhiyun	stw		rI3,108(r1);
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun#define LOAD_4_REGS \
60*4882a593Smuzhiyun	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
61*4882a593Smuzhiyun	lwz		rI1,100(r1);					   \
62*4882a593Smuzhiyun	lwz		rI2,104(r1);					   \
63*4882a593Smuzhiyun	lwz		rI3,108(r1);
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun#define SAVE_8_REGS \
66*4882a593Smuzhiyun	SAVE_4_REGS							   \
67*4882a593Smuzhiyun	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
68*4882a593Smuzhiyun	stw		rG1,116(r1);					   \
69*4882a593Smuzhiyun	stw		rG2,120(r1);					   \
70*4882a593Smuzhiyun	stw		rG3,124(r1);
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun#define LOAD_8_REGS \
73*4882a593Smuzhiyun	LOAD_4_REGS							   \
74*4882a593Smuzhiyun	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
75*4882a593Smuzhiyun	lwz		rG1,116(r1);					   \
76*4882a593Smuzhiyun	lwz		rG2,120(r1);					   \
77*4882a593Smuzhiyun	lwz		rG3,124(r1);
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun#define INITIALIZE_CRYPT(tab,nr32bitregs) \
80*4882a593Smuzhiyun	mflr		r0;						   \
81*4882a593Smuzhiyun	stwu		r1,-160(r1);	/* create stack frame		*/ \
82*4882a593Smuzhiyun	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
83*4882a593Smuzhiyun	stw		r0,8(r1);	/* save link register		*/ \
84*4882a593Smuzhiyun	ori		rT0,rT0,tab@l;					   \
85*4882a593Smuzhiyun	evstdw		r14,16(r1);					   \
86*4882a593Smuzhiyun	mr		rKS,rKP;					   \
87*4882a593Smuzhiyun	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
88*4882a593Smuzhiyun	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
89*4882a593Smuzhiyun	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
90*4882a593Smuzhiyun	evstdw		r18,48(r1);					   \
91*4882a593Smuzhiyun	evstdw		r19,56(r1);					   \
92*4882a593Smuzhiyun	evstdw		r20,64(r1);					   \
93*4882a593Smuzhiyun	evstdw		r21,72(r1);					   \
94*4882a593Smuzhiyun	evstdw		r22,80(r1);					   \
95*4882a593Smuzhiyun	evstdw		r23,88(r1);					   \
96*4882a593Smuzhiyun	SAVE_##nr32bitregs##_REGS
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun#define FINALIZE_CRYPT(nr32bitregs) \
99*4882a593Smuzhiyun	lwz		r0,8(r1);					   \
100*4882a593Smuzhiyun	evldw		r14,16(r1);	/* restore SPE registers	*/ \
101*4882a593Smuzhiyun	evldw		r15,24(r1);					   \
102*4882a593Smuzhiyun	evldw		r16,32(r1);					   \
103*4882a593Smuzhiyun	evldw		r17,40(r1);					   \
104*4882a593Smuzhiyun	evldw		r18,48(r1);					   \
105*4882a593Smuzhiyun	evldw		r19,56(r1);					   \
106*4882a593Smuzhiyun	evldw		r20,64(r1);					   \
107*4882a593Smuzhiyun	evldw		r21,72(r1);					   \
108*4882a593Smuzhiyun	evldw		r22,80(r1);					   \
109*4882a593Smuzhiyun	evldw		r23,88(r1);					   \
110*4882a593Smuzhiyun	LOAD_##nr32bitregs##_REGS					   \
111*4882a593Smuzhiyun	mtlr		r0;		/* restore link register	*/ \
112*4882a593Smuzhiyun	xor		r0,r0,r0;					   \
113*4882a593Smuzhiyun	stw		r0,16(r1);	/* delete sensitive data	*/ \
114*4882a593Smuzhiyun	stw		r0,24(r1);	/* that we might have pushed	*/ \
115*4882a593Smuzhiyun	stw		r0,32(r1);	/* from other context that runs	*/ \
116*4882a593Smuzhiyun	stw		r0,40(r1);	/* the same code		*/ \
117*4882a593Smuzhiyun	stw		r0,48(r1);					   \
118*4882a593Smuzhiyun	stw		r0,56(r1);					   \
119*4882a593Smuzhiyun	stw		r0,64(r1);					   \
120*4882a593Smuzhiyun	stw		r0,72(r1);					   \
121*4882a593Smuzhiyun	stw		r0,80(r1);					   \
122*4882a593Smuzhiyun	stw		r0,88(r1);					   \
123*4882a593Smuzhiyun	addi		r1,r1,160;	/* cleanup stack frame		*/
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun#define ENDIAN_SWAP(t0, t1, s0, s1) \
126*4882a593Smuzhiyun	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
127*4882a593Smuzhiyun	rotrwi		t1,s1,8;					   \
128*4882a593Smuzhiyun	rlwimi		t0,s0,8,8,15;					   \
129*4882a593Smuzhiyun	rlwimi		t1,s1,8,8,15;					   \
130*4882a593Smuzhiyun	rlwimi		t0,s0,8,24,31;					   \
131*4882a593Smuzhiyun	rlwimi		t1,s1,8,24,31;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun#define GF128_MUL(d0, d1, d2, d3, t0) \
134*4882a593Smuzhiyun	li		t0,0x87;	/* multiplication in GF128	*/ \
135*4882a593Smuzhiyun	cmpwi		d3,-1;						   \
136*4882a593Smuzhiyun	iselgt		t0,0,t0;					   \
137*4882a593Smuzhiyun	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
138*4882a593Smuzhiyun	rotlwi		d3,d3,1;					   \
139*4882a593Smuzhiyun	rlwimi		d2,d1,0,0,0;					   \
140*4882a593Smuzhiyun	rotlwi		d2,d2,1;					   \
141*4882a593Smuzhiyun	rlwimi		d1,d0,0,0,0;					   \
142*4882a593Smuzhiyun	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
143*4882a593Smuzhiyun	rotlwi		d1,d1,1;					   \
144*4882a593Smuzhiyun	xor		d0,d0,t0;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun#define START_KEY(d0, d1, d2, d3) \
147*4882a593Smuzhiyun	lwz		rW0,0(rKP);					   \
148*4882a593Smuzhiyun	mtctr		rRR;						   \
149*4882a593Smuzhiyun	lwz		rW1,4(rKP);					   \
150*4882a593Smuzhiyun	lwz		rW2,8(rKP);					   \
151*4882a593Smuzhiyun	lwz		rW3,12(rKP);					   \
152*4882a593Smuzhiyun	xor		rD0,d0,rW0;					   \
153*4882a593Smuzhiyun	xor		rD1,d1,rW1;					   \
154*4882a593Smuzhiyun	xor		rD2,d2,rW2;					   \
155*4882a593Smuzhiyun	xor		rD3,d3,rW3;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun/*
158*4882a593Smuzhiyun * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
159*4882a593Smuzhiyun *		   u32 rounds)
160*4882a593Smuzhiyun *
161*4882a593Smuzhiyun * called from glue layer to encrypt a single 16 byte block
162*4882a593Smuzhiyun * round values are AES128 = 4, AES192 = 5, AES256 = 6
163*4882a593Smuzhiyun *
164*4882a593Smuzhiyun */
165*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_aes)
166*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
167*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
168*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
169*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
170*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
171*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
172*4882a593Smuzhiyun	bl		ppc_encrypt_block
173*4882a593Smuzhiyun	xor		rD0,rD0,rW0
174*4882a593Smuzhiyun	SAVE_DATA(rD0, 0)
175*4882a593Smuzhiyun	xor		rD1,rD1,rW1
176*4882a593Smuzhiyun	SAVE_DATA(rD1, 4)
177*4882a593Smuzhiyun	xor		rD2,rD2,rW2
178*4882a593Smuzhiyun	SAVE_DATA(rD2, 8)
179*4882a593Smuzhiyun	xor		rD3,rD3,rW3
180*4882a593Smuzhiyun	SAVE_DATA(rD3, 12)
181*4882a593Smuzhiyun	FINALIZE_CRYPT(0)
182*4882a593Smuzhiyun	blr
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun/*
185*4882a593Smuzhiyun * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
186*4882a593Smuzhiyun *		   u32 rounds)
187*4882a593Smuzhiyun *
188*4882a593Smuzhiyun * called from glue layer to decrypt a single 16 byte block
189*4882a593Smuzhiyun * round values are AES128 = 4, AES192 = 5, AES256 = 6
190*4882a593Smuzhiyun *
191*4882a593Smuzhiyun */
192*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_aes)
193*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
194*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
195*4882a593Smuzhiyun	addi		rT1,rT0,4096
196*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
197*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
198*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
199*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
200*4882a593Smuzhiyun	bl		ppc_decrypt_block
201*4882a593Smuzhiyun	xor		rD0,rD0,rW0
202*4882a593Smuzhiyun	SAVE_DATA(rD0, 0)
203*4882a593Smuzhiyun	xor		rD1,rD1,rW1
204*4882a593Smuzhiyun	SAVE_DATA(rD1, 4)
205*4882a593Smuzhiyun	xor		rD2,rD2,rW2
206*4882a593Smuzhiyun	SAVE_DATA(rD2, 8)
207*4882a593Smuzhiyun	xor		rD3,rD3,rW3
208*4882a593Smuzhiyun	SAVE_DATA(rD3, 12)
209*4882a593Smuzhiyun	FINALIZE_CRYPT(0)
210*4882a593Smuzhiyun	blr
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun/*
213*4882a593Smuzhiyun * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
214*4882a593Smuzhiyun *		   u32 rounds, u32 bytes);
215*4882a593Smuzhiyun *
216*4882a593Smuzhiyun * called from glue layer to encrypt multiple blocks via ECB
217*4882a593Smuzhiyun * Bytes must be larger or equal 16 and only whole blocks are
218*4882a593Smuzhiyun * processed. round values are AES128 = 4, AES192 = 5 and
219*4882a593Smuzhiyun * AES256 = 6
220*4882a593Smuzhiyun *
221*4882a593Smuzhiyun */
222*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_ecb)
223*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
224*4882a593Smuzhiyunppc_encrypt_ecb_loop:
225*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
226*4882a593Smuzhiyun	mr		rKP,rKS
227*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
228*4882a593Smuzhiyun	subi		rLN,rLN,16
229*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
230*4882a593Smuzhiyun	cmpwi		rLN,15
231*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
232*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
233*4882a593Smuzhiyun	bl		ppc_encrypt_block
234*4882a593Smuzhiyun	xor		rD0,rD0,rW0
235*4882a593Smuzhiyun	SAVE_DATA(rD0, 0)
236*4882a593Smuzhiyun	xor		rD1,rD1,rW1
237*4882a593Smuzhiyun	SAVE_DATA(rD1, 4)
238*4882a593Smuzhiyun	xor		rD2,rD2,rW2
239*4882a593Smuzhiyun	SAVE_DATA(rD2, 8)
240*4882a593Smuzhiyun	xor		rD3,rD3,rW3
241*4882a593Smuzhiyun	SAVE_DATA(rD3, 12)
242*4882a593Smuzhiyun	NEXT_BLOCK
243*4882a593Smuzhiyun	bt		gt,ppc_encrypt_ecb_loop
244*4882a593Smuzhiyun	FINALIZE_CRYPT(0)
245*4882a593Smuzhiyun	blr
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun/*
248*4882a593Smuzhiyun * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
249*4882a593Smuzhiyun *		   u32 rounds, u32 bytes);
250*4882a593Smuzhiyun *
251*4882a593Smuzhiyun * called from glue layer to decrypt multiple blocks via ECB
252*4882a593Smuzhiyun * Bytes must be larger or equal 16 and only whole blocks are
253*4882a593Smuzhiyun * processed. round values are AES128 = 4, AES192 = 5 and
254*4882a593Smuzhiyun * AES256 = 6
255*4882a593Smuzhiyun *
256*4882a593Smuzhiyun */
257*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_ecb)
258*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
259*4882a593Smuzhiyun	addi		rT1,rT0,4096
260*4882a593Smuzhiyunppc_decrypt_ecb_loop:
261*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
262*4882a593Smuzhiyun	mr		rKP,rKS
263*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
264*4882a593Smuzhiyun	subi		rLN,rLN,16
265*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
266*4882a593Smuzhiyun	cmpwi		rLN,15
267*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
268*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
269*4882a593Smuzhiyun	bl		ppc_decrypt_block
270*4882a593Smuzhiyun	xor		rD0,rD0,rW0
271*4882a593Smuzhiyun	SAVE_DATA(rD0, 0)
272*4882a593Smuzhiyun	xor		rD1,rD1,rW1
273*4882a593Smuzhiyun	SAVE_DATA(rD1, 4)
274*4882a593Smuzhiyun	xor		rD2,rD2,rW2
275*4882a593Smuzhiyun	SAVE_DATA(rD2, 8)
276*4882a593Smuzhiyun	xor		rD3,rD3,rW3
277*4882a593Smuzhiyun	SAVE_DATA(rD3, 12)
278*4882a593Smuzhiyun	NEXT_BLOCK
279*4882a593Smuzhiyun	bt		gt,ppc_decrypt_ecb_loop
280*4882a593Smuzhiyun	FINALIZE_CRYPT(0)
281*4882a593Smuzhiyun	blr
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun/*
284*4882a593Smuzhiyun * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
285*4882a593Smuzhiyun *		   32 rounds, u32 bytes, u8 *iv);
286*4882a593Smuzhiyun *
287*4882a593Smuzhiyun * called from glue layer to encrypt multiple blocks via CBC
288*4882a593Smuzhiyun * Bytes must be larger or equal 16 and only whole blocks are
289*4882a593Smuzhiyun * processed. round values are AES128 = 4, AES192 = 5 and
290*4882a593Smuzhiyun * AES256 = 6
291*4882a593Smuzhiyun *
292*4882a593Smuzhiyun */
293*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_cbc)
294*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
295*4882a593Smuzhiyun	LOAD_IV(rI0, 0)
296*4882a593Smuzhiyun	LOAD_IV(rI1, 4)
297*4882a593Smuzhiyun	LOAD_IV(rI2, 8)
298*4882a593Smuzhiyun	LOAD_IV(rI3, 12)
299*4882a593Smuzhiyunppc_encrypt_cbc_loop:
300*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
301*4882a593Smuzhiyun	mr		rKP,rKS
302*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
303*4882a593Smuzhiyun	subi		rLN,rLN,16
304*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
305*4882a593Smuzhiyun	cmpwi		rLN,15
306*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
307*4882a593Smuzhiyun	xor		rD0,rD0,rI0
308*4882a593Smuzhiyun	xor		rD1,rD1,rI1
309*4882a593Smuzhiyun	xor		rD2,rD2,rI2
310*4882a593Smuzhiyun	xor		rD3,rD3,rI3
311*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
312*4882a593Smuzhiyun	bl		ppc_encrypt_block
313*4882a593Smuzhiyun	xor		rI0,rD0,rW0
314*4882a593Smuzhiyun	SAVE_DATA(rI0, 0)
315*4882a593Smuzhiyun	xor		rI1,rD1,rW1
316*4882a593Smuzhiyun	SAVE_DATA(rI1, 4)
317*4882a593Smuzhiyun	xor		rI2,rD2,rW2
318*4882a593Smuzhiyun	SAVE_DATA(rI2, 8)
319*4882a593Smuzhiyun	xor		rI3,rD3,rW3
320*4882a593Smuzhiyun	SAVE_DATA(rI3, 12)
321*4882a593Smuzhiyun	NEXT_BLOCK
322*4882a593Smuzhiyun	bt		gt,ppc_encrypt_cbc_loop
323*4882a593Smuzhiyun	START_IV
324*4882a593Smuzhiyun	SAVE_IV(rI0, 0)
325*4882a593Smuzhiyun	SAVE_IV(rI1, 4)
326*4882a593Smuzhiyun	SAVE_IV(rI2, 8)
327*4882a593Smuzhiyun	SAVE_IV(rI3, 12)
328*4882a593Smuzhiyun	FINALIZE_CRYPT(4)
329*4882a593Smuzhiyun	blr
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun/*
332*4882a593Smuzhiyun * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
333*4882a593Smuzhiyun *		   u32 rounds, u32 bytes, u8 *iv);
334*4882a593Smuzhiyun *
335*4882a593Smuzhiyun * called from glue layer to decrypt multiple blocks via CBC
336*4882a593Smuzhiyun * round values are AES128 = 4, AES192 = 5, AES256 = 6
337*4882a593Smuzhiyun *
338*4882a593Smuzhiyun */
339*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_cbc)
340*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
341*4882a593Smuzhiyun	li		rT1,15
342*4882a593Smuzhiyun	LOAD_IV(rI0, 0)
343*4882a593Smuzhiyun	andc		rLN,rLN,rT1
344*4882a593Smuzhiyun	LOAD_IV(rI1, 4)
345*4882a593Smuzhiyun	subi		rLN,rLN,16
346*4882a593Smuzhiyun	LOAD_IV(rI2, 8)
347*4882a593Smuzhiyun	add		rSP,rSP,rLN	/* reverse processing		*/
348*4882a593Smuzhiyun	LOAD_IV(rI3, 12)
349*4882a593Smuzhiyun	add		rDP,rDP,rLN
350*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
351*4882a593Smuzhiyun	addi		rT1,rT0,4096
352*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
353*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
354*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
355*4882a593Smuzhiyun	START_IV
356*4882a593Smuzhiyun	SAVE_IV(rD0, 0)
357*4882a593Smuzhiyun	SAVE_IV(rD1, 4)
358*4882a593Smuzhiyun	SAVE_IV(rD2, 8)
359*4882a593Smuzhiyun	cmpwi		rLN,16
360*4882a593Smuzhiyun	SAVE_IV(rD3, 12)
361*4882a593Smuzhiyun	bt		lt,ppc_decrypt_cbc_end
362*4882a593Smuzhiyunppc_decrypt_cbc_loop:
363*4882a593Smuzhiyun	mr		rKP,rKS
364*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
365*4882a593Smuzhiyun	bl		ppc_decrypt_block
366*4882a593Smuzhiyun	subi		rLN,rLN,16
367*4882a593Smuzhiyun	subi		rSP,rSP,CBC_DEC
368*4882a593Smuzhiyun	xor		rW0,rD0,rW0
369*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
370*4882a593Smuzhiyun	xor		rW1,rD1,rW1
371*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
372*4882a593Smuzhiyun	xor		rW2,rD2,rW2
373*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
374*4882a593Smuzhiyun	xor		rW3,rD3,rW3
375*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
376*4882a593Smuzhiyun	xor		rW0,rW0,rD0
377*4882a593Smuzhiyun	SAVE_DATA(rW0, 0)
378*4882a593Smuzhiyun	xor		rW1,rW1,rD1
379*4882a593Smuzhiyun	SAVE_DATA(rW1, 4)
380*4882a593Smuzhiyun	xor		rW2,rW2,rD2
381*4882a593Smuzhiyun	SAVE_DATA(rW2, 8)
382*4882a593Smuzhiyun	xor		rW3,rW3,rD3
383*4882a593Smuzhiyun	SAVE_DATA(rW3, 12)
384*4882a593Smuzhiyun	cmpwi		rLN,15
385*4882a593Smuzhiyun	subi		rDP,rDP,CBC_DEC
386*4882a593Smuzhiyun	bt		gt,ppc_decrypt_cbc_loop
387*4882a593Smuzhiyunppc_decrypt_cbc_end:
388*4882a593Smuzhiyun	mr		rKP,rKS
389*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
390*4882a593Smuzhiyun	bl		ppc_decrypt_block
391*4882a593Smuzhiyun	xor		rW0,rW0,rD0
392*4882a593Smuzhiyun	xor		rW1,rW1,rD1
393*4882a593Smuzhiyun	xor		rW2,rW2,rD2
394*4882a593Smuzhiyun	xor		rW3,rW3,rD3
395*4882a593Smuzhiyun	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
396*4882a593Smuzhiyun	SAVE_DATA(rW0, 0)
397*4882a593Smuzhiyun	xor		rW1,rW1,rI1
398*4882a593Smuzhiyun	SAVE_DATA(rW1, 4)
399*4882a593Smuzhiyun	xor		rW2,rW2,rI2
400*4882a593Smuzhiyun	SAVE_DATA(rW2, 8)
401*4882a593Smuzhiyun	xor		rW3,rW3,rI3
402*4882a593Smuzhiyun	SAVE_DATA(rW3, 12)
403*4882a593Smuzhiyun	FINALIZE_CRYPT(4)
404*4882a593Smuzhiyun	blr
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun/*
407*4882a593Smuzhiyun * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
408*4882a593Smuzhiyun *		 u32 rounds, u32 bytes, u8 *iv);
409*4882a593Smuzhiyun *
410*4882a593Smuzhiyun * called from glue layer to encrypt/decrypt multiple blocks
411*4882a593Smuzhiyun * via CTR. Number of bytes does not need to be a multiple of
412*4882a593Smuzhiyun * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
413*4882a593Smuzhiyun *
414*4882a593Smuzhiyun */
415*4882a593Smuzhiyun_GLOBAL(ppc_crypt_ctr)
416*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
417*4882a593Smuzhiyun	LOAD_IV(rI0, 0)
418*4882a593Smuzhiyun	LOAD_IV(rI1, 4)
419*4882a593Smuzhiyun	LOAD_IV(rI2, 8)
420*4882a593Smuzhiyun	cmpwi		rLN,16
421*4882a593Smuzhiyun	LOAD_IV(rI3, 12)
422*4882a593Smuzhiyun	START_IV
423*4882a593Smuzhiyun	bt		lt,ppc_crypt_ctr_partial
424*4882a593Smuzhiyunppc_crypt_ctr_loop:
425*4882a593Smuzhiyun	mr		rKP,rKS
426*4882a593Smuzhiyun	START_KEY(rI0, rI1, rI2, rI3)
427*4882a593Smuzhiyun	bl		ppc_encrypt_block
428*4882a593Smuzhiyun	xor		rW0,rD0,rW0
429*4882a593Smuzhiyun	xor		rW1,rD1,rW1
430*4882a593Smuzhiyun	xor		rW2,rD2,rW2
431*4882a593Smuzhiyun	xor		rW3,rD3,rW3
432*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
433*4882a593Smuzhiyun	subi		rLN,rLN,16
434*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
435*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
436*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
437*4882a593Smuzhiyun	xor		rD0,rD0,rW0
438*4882a593Smuzhiyun	SAVE_DATA(rD0, 0)
439*4882a593Smuzhiyun	xor		rD1,rD1,rW1
440*4882a593Smuzhiyun	SAVE_DATA(rD1, 4)
441*4882a593Smuzhiyun	xor		rD2,rD2,rW2
442*4882a593Smuzhiyun	SAVE_DATA(rD2, 8)
443*4882a593Smuzhiyun	xor		rD3,rD3,rW3
444*4882a593Smuzhiyun	SAVE_DATA(rD3, 12)
445*4882a593Smuzhiyun	addic		rI3,rI3,1	/* increase counter			*/
446*4882a593Smuzhiyun	addze		rI2,rI2
447*4882a593Smuzhiyun	addze		rI1,rI1
448*4882a593Smuzhiyun	addze		rI0,rI0
449*4882a593Smuzhiyun	NEXT_BLOCK
450*4882a593Smuzhiyun	cmpwi		rLN,15
451*4882a593Smuzhiyun	bt		gt,ppc_crypt_ctr_loop
452*4882a593Smuzhiyunppc_crypt_ctr_partial:
453*4882a593Smuzhiyun	cmpwi		rLN,0
454*4882a593Smuzhiyun	bt		eq,ppc_crypt_ctr_end
455*4882a593Smuzhiyun	mr		rKP,rKS
456*4882a593Smuzhiyun	START_KEY(rI0, rI1, rI2, rI3)
457*4882a593Smuzhiyun	bl		ppc_encrypt_block
458*4882a593Smuzhiyun	xor		rW0,rD0,rW0
459*4882a593Smuzhiyun	SAVE_IV(rW0, 0)
460*4882a593Smuzhiyun	xor		rW1,rD1,rW1
461*4882a593Smuzhiyun	SAVE_IV(rW1, 4)
462*4882a593Smuzhiyun	xor		rW2,rD2,rW2
463*4882a593Smuzhiyun	SAVE_IV(rW2, 8)
464*4882a593Smuzhiyun	xor		rW3,rD3,rW3
465*4882a593Smuzhiyun	SAVE_IV(rW3, 12)
466*4882a593Smuzhiyun	mtctr		rLN
467*4882a593Smuzhiyun	subi		rIP,rIP,CTR_DEC
468*4882a593Smuzhiyun	subi		rSP,rSP,1
469*4882a593Smuzhiyun	subi		rDP,rDP,1
470*4882a593Smuzhiyunppc_crypt_ctr_xorbyte:
471*4882a593Smuzhiyun	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
472*4882a593Smuzhiyun	lbzu		rW5,1(rSP)
473*4882a593Smuzhiyun	xor		rW4,rW4,rW5
474*4882a593Smuzhiyun	stbu		rW4,1(rDP)
475*4882a593Smuzhiyun	bdnz		ppc_crypt_ctr_xorbyte
476*4882a593Smuzhiyun	subf		rIP,rLN,rIP
477*4882a593Smuzhiyun	addi		rIP,rIP,1
478*4882a593Smuzhiyun	addic		rI3,rI3,1
479*4882a593Smuzhiyun	addze		rI2,rI2
480*4882a593Smuzhiyun	addze		rI1,rI1
481*4882a593Smuzhiyun	addze		rI0,rI0
482*4882a593Smuzhiyunppc_crypt_ctr_end:
483*4882a593Smuzhiyun	SAVE_IV(rI0, 0)
484*4882a593Smuzhiyun	SAVE_IV(rI1, 4)
485*4882a593Smuzhiyun	SAVE_IV(rI2, 8)
486*4882a593Smuzhiyun	SAVE_IV(rI3, 12)
487*4882a593Smuzhiyun	FINALIZE_CRYPT(4)
488*4882a593Smuzhiyun	blr
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun/*
491*4882a593Smuzhiyun * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
492*4882a593Smuzhiyun *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
493*4882a593Smuzhiyun *
494*4882a593Smuzhiyun * called from glue layer to encrypt multiple blocks via XTS
495*4882a593Smuzhiyun * If key_twk is given, the initial IV encryption will be
496*4882a593Smuzhiyun * processed too. Round values are AES128 = 4, AES192 = 5,
497*4882a593Smuzhiyun * AES256 = 6
498*4882a593Smuzhiyun *
499*4882a593Smuzhiyun */
500*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_xts)
501*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
502*4882a593Smuzhiyun	LOAD_IV(rI0, 0)
503*4882a593Smuzhiyun	LOAD_IV(rI1, 4)
504*4882a593Smuzhiyun	LOAD_IV(rI2, 8)
505*4882a593Smuzhiyun	cmpwi		rKT,0
506*4882a593Smuzhiyun	LOAD_IV(rI3, 12)
507*4882a593Smuzhiyun	bt		eq,ppc_encrypt_xts_notweak
508*4882a593Smuzhiyun	mr		rKP,rKT
509*4882a593Smuzhiyun	START_KEY(rI0, rI1, rI2, rI3)
510*4882a593Smuzhiyun	bl		ppc_encrypt_block
511*4882a593Smuzhiyun	xor		rI0,rD0,rW0
512*4882a593Smuzhiyun	xor		rI1,rD1,rW1
513*4882a593Smuzhiyun	xor		rI2,rD2,rW2
514*4882a593Smuzhiyun	xor		rI3,rD3,rW3
515*4882a593Smuzhiyunppc_encrypt_xts_notweak:
516*4882a593Smuzhiyun	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
517*4882a593Smuzhiyun	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
518*4882a593Smuzhiyunppc_encrypt_xts_loop:
519*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
520*4882a593Smuzhiyun	mr		rKP,rKS
521*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
522*4882a593Smuzhiyun	subi		rLN,rLN,16
523*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
524*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
525*4882a593Smuzhiyun	xor		rD0,rD0,rI0
526*4882a593Smuzhiyun	xor		rD1,rD1,rI1
527*4882a593Smuzhiyun	xor		rD2,rD2,rI2
528*4882a593Smuzhiyun	xor		rD3,rD3,rI3
529*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
530*4882a593Smuzhiyun	bl		ppc_encrypt_block
531*4882a593Smuzhiyun	xor		rD0,rD0,rW0
532*4882a593Smuzhiyun	xor		rD1,rD1,rW1
533*4882a593Smuzhiyun	xor		rD2,rD2,rW2
534*4882a593Smuzhiyun	xor		rD3,rD3,rW3
535*4882a593Smuzhiyun	xor		rD0,rD0,rI0
536*4882a593Smuzhiyun	SAVE_DATA(rD0, 0)
537*4882a593Smuzhiyun	xor		rD1,rD1,rI1
538*4882a593Smuzhiyun	SAVE_DATA(rD1, 4)
539*4882a593Smuzhiyun	xor		rD2,rD2,rI2
540*4882a593Smuzhiyun	SAVE_DATA(rD2, 8)
541*4882a593Smuzhiyun	xor		rD3,rD3,rI3
542*4882a593Smuzhiyun	SAVE_DATA(rD3, 12)
543*4882a593Smuzhiyun	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
544*4882a593Smuzhiyun	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
545*4882a593Smuzhiyun	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
546*4882a593Smuzhiyun	cmpwi		rLN,0
547*4882a593Smuzhiyun	NEXT_BLOCK
548*4882a593Smuzhiyun	bt		gt,ppc_encrypt_xts_loop
549*4882a593Smuzhiyun	START_IV
550*4882a593Smuzhiyun	SAVE_IV(rI0, 0)
551*4882a593Smuzhiyun	SAVE_IV(rI1, 4)
552*4882a593Smuzhiyun	SAVE_IV(rI2, 8)
553*4882a593Smuzhiyun	SAVE_IV(rI3, 12)
554*4882a593Smuzhiyun	FINALIZE_CRYPT(8)
555*4882a593Smuzhiyun	blr
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun/*
558*4882a593Smuzhiyun * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
559*4882a593Smuzhiyun *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
560*4882a593Smuzhiyun *
561*4882a593Smuzhiyun * called from glue layer to decrypt multiple blocks via XTS
562*4882a593Smuzhiyun * If key_twk is given, the initial IV encryption will be
563*4882a593Smuzhiyun * processed too. Round values are AES128 = 4, AES192 = 5,
564*4882a593Smuzhiyun * AES256 = 6
565*4882a593Smuzhiyun *
566*4882a593Smuzhiyun */
567*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_xts)
568*4882a593Smuzhiyun	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
569*4882a593Smuzhiyun	LOAD_IV(rI0, 0)
570*4882a593Smuzhiyun	addi		rT1,rT0,4096
571*4882a593Smuzhiyun	LOAD_IV(rI1, 4)
572*4882a593Smuzhiyun	LOAD_IV(rI2, 8)
573*4882a593Smuzhiyun	cmpwi		rKT,0
574*4882a593Smuzhiyun	LOAD_IV(rI3, 12)
575*4882a593Smuzhiyun	bt		eq,ppc_decrypt_xts_notweak
576*4882a593Smuzhiyun	subi		rT0,rT0,4096
577*4882a593Smuzhiyun	mr		rKP,rKT
578*4882a593Smuzhiyun	START_KEY(rI0, rI1, rI2, rI3)
579*4882a593Smuzhiyun	bl		ppc_encrypt_block
580*4882a593Smuzhiyun	xor		rI0,rD0,rW0
581*4882a593Smuzhiyun	xor		rI1,rD1,rW1
582*4882a593Smuzhiyun	xor		rI2,rD2,rW2
583*4882a593Smuzhiyun	xor		rI3,rD3,rW3
584*4882a593Smuzhiyun	addi		rT0,rT0,4096
585*4882a593Smuzhiyunppc_decrypt_xts_notweak:
586*4882a593Smuzhiyun	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
587*4882a593Smuzhiyun	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
588*4882a593Smuzhiyunppc_decrypt_xts_loop:
589*4882a593Smuzhiyun	LOAD_DATA(rD0, 0)
590*4882a593Smuzhiyun	mr		rKP,rKS
591*4882a593Smuzhiyun	LOAD_DATA(rD1, 4)
592*4882a593Smuzhiyun	subi		rLN,rLN,16
593*4882a593Smuzhiyun	LOAD_DATA(rD2, 8)
594*4882a593Smuzhiyun	LOAD_DATA(rD3, 12)
595*4882a593Smuzhiyun	xor		rD0,rD0,rI0
596*4882a593Smuzhiyun	xor		rD1,rD1,rI1
597*4882a593Smuzhiyun	xor		rD2,rD2,rI2
598*4882a593Smuzhiyun	xor		rD3,rD3,rI3
599*4882a593Smuzhiyun	START_KEY(rD0, rD1, rD2, rD3)
600*4882a593Smuzhiyun	bl		ppc_decrypt_block
601*4882a593Smuzhiyun	xor		rD0,rD0,rW0
602*4882a593Smuzhiyun	xor		rD1,rD1,rW1
603*4882a593Smuzhiyun	xor		rD2,rD2,rW2
604*4882a593Smuzhiyun	xor		rD3,rD3,rW3
605*4882a593Smuzhiyun	xor		rD0,rD0,rI0
606*4882a593Smuzhiyun	SAVE_DATA(rD0, 0)
607*4882a593Smuzhiyun	xor		rD1,rD1,rI1
608*4882a593Smuzhiyun	SAVE_DATA(rD1, 4)
609*4882a593Smuzhiyun	xor		rD2,rD2,rI2
610*4882a593Smuzhiyun	SAVE_DATA(rD2, 8)
611*4882a593Smuzhiyun	xor		rD3,rD3,rI3
612*4882a593Smuzhiyun	SAVE_DATA(rD3, 12)
613*4882a593Smuzhiyun	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
614*4882a593Smuzhiyun	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
615*4882a593Smuzhiyun	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
616*4882a593Smuzhiyun	cmpwi		rLN,0
617*4882a593Smuzhiyun	NEXT_BLOCK
618*4882a593Smuzhiyun	bt		gt,ppc_decrypt_xts_loop
619*4882a593Smuzhiyun	START_IV
620*4882a593Smuzhiyun	SAVE_IV(rI0, 0)
621*4882a593Smuzhiyun	SAVE_IV(rI1, 4)
622*4882a593Smuzhiyun	SAVE_IV(rI2, 8)
623*4882a593Smuzhiyun	SAVE_IV(rI3, 12)
624*4882a593Smuzhiyun	FINALIZE_CRYPT(8)
625*4882a593Smuzhiyun	blr
626