xref: /OK3568_Linux_fs/kernel/arch/powerpc/crypto/sha1-powerpc-asm.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * SHA-1 implementation for PowerPC.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <asm/ppc_asm.h>
9*4882a593Smuzhiyun#include <asm/asm-offsets.h>
10*4882a593Smuzhiyun#include <asm/asm-compat.h>
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__
13*4882a593Smuzhiyun#define LWZ(rt, d, ra)	\
14*4882a593Smuzhiyun	lwz	rt,d(ra)
15*4882a593Smuzhiyun#else
16*4882a593Smuzhiyun#define LWZ(rt, d, ra)	\
17*4882a593Smuzhiyun	li	rt,d;	\
18*4882a593Smuzhiyun	lwbrx	rt,rt,ra
19*4882a593Smuzhiyun#endif
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun/*
22*4882a593Smuzhiyun * We roll the registers for T, A, B, C, D, E around on each
23*4882a593Smuzhiyun * iteration; T on iteration t is A on iteration t+1, and so on.
24*4882a593Smuzhiyun * We use registers 7 - 12 for this.
25*4882a593Smuzhiyun */
26*4882a593Smuzhiyun#define RT(t)	((((t)+5)%6)+7)
27*4882a593Smuzhiyun#define RA(t)	((((t)+4)%6)+7)
28*4882a593Smuzhiyun#define RB(t)	((((t)+3)%6)+7)
29*4882a593Smuzhiyun#define RC(t)	((((t)+2)%6)+7)
30*4882a593Smuzhiyun#define RD(t)	((((t)+1)%6)+7)
31*4882a593Smuzhiyun#define RE(t)	((((t)+0)%6)+7)
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun/* We use registers 16 - 31 for the W values */
34*4882a593Smuzhiyun#define W(t)	(((t)%16)+16)
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun#define LOADW(t)				\
37*4882a593Smuzhiyun	LWZ(W(t),(t)*4,r4)
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun#define STEPD0_LOAD(t)				\
40*4882a593Smuzhiyun	andc	r0,RD(t),RB(t);		\
41*4882a593Smuzhiyun	and	r6,RB(t),RC(t);		\
42*4882a593Smuzhiyun	rotlwi	RT(t),RA(t),5;			\
43*4882a593Smuzhiyun	or	r6,r6,r0;			\
44*4882a593Smuzhiyun	add	r0,RE(t),r15;			\
45*4882a593Smuzhiyun	add	RT(t),RT(t),r6;		\
46*4882a593Smuzhiyun	add	r14,r0,W(t);			\
47*4882a593Smuzhiyun	LWZ(W((t)+4),((t)+4)*4,r4);	\
48*4882a593Smuzhiyun	rotlwi	RB(t),RB(t),30;			\
49*4882a593Smuzhiyun	add	RT(t),RT(t),r14
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun#define STEPD0_UPDATE(t)			\
52*4882a593Smuzhiyun	and	r6,RB(t),RC(t);		\
53*4882a593Smuzhiyun	andc	r0,RD(t),RB(t);		\
54*4882a593Smuzhiyun	rotlwi	RT(t),RA(t),5;			\
55*4882a593Smuzhiyun	rotlwi	RB(t),RB(t),30;			\
56*4882a593Smuzhiyun	or	r6,r6,r0;			\
57*4882a593Smuzhiyun	add	r0,RE(t),r15;			\
58*4882a593Smuzhiyun	xor	r5,W((t)+4-3),W((t)+4-8);		\
59*4882a593Smuzhiyun	add	RT(t),RT(t),r6;		\
60*4882a593Smuzhiyun	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
61*4882a593Smuzhiyun	add	r0,r0,W(t);			\
62*4882a593Smuzhiyun	xor	W((t)+4),W((t)+4),r5;			\
63*4882a593Smuzhiyun	add	RT(t),RT(t),r0;		\
64*4882a593Smuzhiyun	rotlwi	W((t)+4),W((t)+4),1
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun#define STEPD1(t)				\
67*4882a593Smuzhiyun	xor	r6,RB(t),RC(t);		\
68*4882a593Smuzhiyun	rotlwi	RT(t),RA(t),5;			\
69*4882a593Smuzhiyun	rotlwi	RB(t),RB(t),30;			\
70*4882a593Smuzhiyun	xor	r6,r6,RD(t);			\
71*4882a593Smuzhiyun	add	r0,RE(t),r15;			\
72*4882a593Smuzhiyun	add	RT(t),RT(t),r6;		\
73*4882a593Smuzhiyun	add	r0,r0,W(t);			\
74*4882a593Smuzhiyun	add	RT(t),RT(t),r0
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun#define STEPD1_UPDATE(t)				\
77*4882a593Smuzhiyun	xor	r6,RB(t),RC(t);		\
78*4882a593Smuzhiyun	rotlwi	RT(t),RA(t),5;			\
79*4882a593Smuzhiyun	rotlwi	RB(t),RB(t),30;			\
80*4882a593Smuzhiyun	xor	r6,r6,RD(t);			\
81*4882a593Smuzhiyun	add	r0,RE(t),r15;			\
82*4882a593Smuzhiyun	xor	r5,W((t)+4-3),W((t)+4-8);		\
83*4882a593Smuzhiyun	add	RT(t),RT(t),r6;		\
84*4882a593Smuzhiyun	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
85*4882a593Smuzhiyun	add	r0,r0,W(t);			\
86*4882a593Smuzhiyun	xor	W((t)+4),W((t)+4),r5;			\
87*4882a593Smuzhiyun	add	RT(t),RT(t),r0;		\
88*4882a593Smuzhiyun	rotlwi	W((t)+4),W((t)+4),1
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun#define STEPD2_UPDATE(t)			\
91*4882a593Smuzhiyun	and	r6,RB(t),RC(t);		\
92*4882a593Smuzhiyun	and	r0,RB(t),RD(t);		\
93*4882a593Smuzhiyun	rotlwi	RT(t),RA(t),5;			\
94*4882a593Smuzhiyun	or	r6,r6,r0;			\
95*4882a593Smuzhiyun	rotlwi	RB(t),RB(t),30;			\
96*4882a593Smuzhiyun	and	r0,RC(t),RD(t);		\
97*4882a593Smuzhiyun	xor	r5,W((t)+4-3),W((t)+4-8);	\
98*4882a593Smuzhiyun	or	r6,r6,r0;			\
99*4882a593Smuzhiyun	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
100*4882a593Smuzhiyun	add	r0,RE(t),r15;			\
101*4882a593Smuzhiyun	add	RT(t),RT(t),r6;		\
102*4882a593Smuzhiyun	add	r0,r0,W(t);			\
103*4882a593Smuzhiyun	xor	W((t)+4),W((t)+4),r5;		\
104*4882a593Smuzhiyun	add	RT(t),RT(t),r0;		\
105*4882a593Smuzhiyun	rotlwi	W((t)+4),W((t)+4),1
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun#define STEP0LD4(t)				\
108*4882a593Smuzhiyun	STEPD0_LOAD(t);				\
109*4882a593Smuzhiyun	STEPD0_LOAD((t)+1);			\
110*4882a593Smuzhiyun	STEPD0_LOAD((t)+2);			\
111*4882a593Smuzhiyun	STEPD0_LOAD((t)+3)
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun#define STEPUP4(t, fn)				\
114*4882a593Smuzhiyun	STEP##fn##_UPDATE(t);			\
115*4882a593Smuzhiyun	STEP##fn##_UPDATE((t)+1);		\
116*4882a593Smuzhiyun	STEP##fn##_UPDATE((t)+2);		\
117*4882a593Smuzhiyun	STEP##fn##_UPDATE((t)+3)
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun#define STEPUP20(t, fn)				\
120*4882a593Smuzhiyun	STEPUP4(t, fn);				\
121*4882a593Smuzhiyun	STEPUP4((t)+4, fn);			\
122*4882a593Smuzhiyun	STEPUP4((t)+8, fn);			\
123*4882a593Smuzhiyun	STEPUP4((t)+12, fn);			\
124*4882a593Smuzhiyun	STEPUP4((t)+16, fn)
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun_GLOBAL(powerpc_sha_transform)
127*4882a593Smuzhiyun	PPC_STLU r1,-INT_FRAME_SIZE(r1)
128*4882a593Smuzhiyun	SAVE_8GPRS(14, r1)
129*4882a593Smuzhiyun	SAVE_10GPRS(22, r1)
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun	/* Load up A - E */
132*4882a593Smuzhiyun	lwz	RA(0),0(r3)	/* A */
133*4882a593Smuzhiyun	lwz	RB(0),4(r3)	/* B */
134*4882a593Smuzhiyun	lwz	RC(0),8(r3)	/* C */
135*4882a593Smuzhiyun	lwz	RD(0),12(r3)	/* D */
136*4882a593Smuzhiyun	lwz	RE(0),16(r3)	/* E */
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun	LOADW(0)
139*4882a593Smuzhiyun	LOADW(1)
140*4882a593Smuzhiyun	LOADW(2)
141*4882a593Smuzhiyun	LOADW(3)
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun	lis	r15,0x5a82	/* K0-19 */
144*4882a593Smuzhiyun	ori	r15,r15,0x7999
145*4882a593Smuzhiyun	STEP0LD4(0)
146*4882a593Smuzhiyun	STEP0LD4(4)
147*4882a593Smuzhiyun	STEP0LD4(8)
148*4882a593Smuzhiyun	STEPUP4(12, D0)
149*4882a593Smuzhiyun	STEPUP4(16, D0)
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun	lis	r15,0x6ed9	/* K20-39 */
152*4882a593Smuzhiyun	ori	r15,r15,0xeba1
153*4882a593Smuzhiyun	STEPUP20(20, D1)
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun	lis	r15,0x8f1b	/* K40-59 */
156*4882a593Smuzhiyun	ori	r15,r15,0xbcdc
157*4882a593Smuzhiyun	STEPUP20(40, D2)
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun	lis	r15,0xca62	/* K60-79 */
160*4882a593Smuzhiyun	ori	r15,r15,0xc1d6
161*4882a593Smuzhiyun	STEPUP4(60, D1)
162*4882a593Smuzhiyun	STEPUP4(64, D1)
163*4882a593Smuzhiyun	STEPUP4(68, D1)
164*4882a593Smuzhiyun	STEPUP4(72, D1)
165*4882a593Smuzhiyun	lwz	r20,16(r3)
166*4882a593Smuzhiyun	STEPD1(76)
167*4882a593Smuzhiyun	lwz	r19,12(r3)
168*4882a593Smuzhiyun	STEPD1(77)
169*4882a593Smuzhiyun	lwz	r18,8(r3)
170*4882a593Smuzhiyun	STEPD1(78)
171*4882a593Smuzhiyun	lwz	r17,4(r3)
172*4882a593Smuzhiyun	STEPD1(79)
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun	lwz	r16,0(r3)
175*4882a593Smuzhiyun	add	r20,RE(80),r20
176*4882a593Smuzhiyun	add	RD(0),RD(80),r19
177*4882a593Smuzhiyun	add	RC(0),RC(80),r18
178*4882a593Smuzhiyun	add	RB(0),RB(80),r17
179*4882a593Smuzhiyun	add	RA(0),RA(80),r16
180*4882a593Smuzhiyun	mr	RE(0),r20
181*4882a593Smuzhiyun	stw	RA(0),0(r3)
182*4882a593Smuzhiyun	stw	RB(0),4(r3)
183*4882a593Smuzhiyun	stw	RC(0),8(r3)
184*4882a593Smuzhiyun	stw	RD(0),12(r3)
185*4882a593Smuzhiyun	stw	RE(0),16(r3)
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun	REST_8GPRS(14, r1)
188*4882a593Smuzhiyun	REST_10GPRS(22, r1)
189*4882a593Smuzhiyun	addi	r1,r1,INT_FRAME_SIZE
190*4882a593Smuzhiyun	blr
191