1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * SHA-1 implementation for PowerPC. 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2005 Paul Mackerras <paulus@samba.org> 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <asm/ppc_asm.h> 9*4882a593Smuzhiyun#include <asm/asm-offsets.h> 10*4882a593Smuzhiyun#include <asm/asm-compat.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__ 13*4882a593Smuzhiyun#define LWZ(rt, d, ra) \ 14*4882a593Smuzhiyun lwz rt,d(ra) 15*4882a593Smuzhiyun#else 16*4882a593Smuzhiyun#define LWZ(rt, d, ra) \ 17*4882a593Smuzhiyun li rt,d; \ 18*4882a593Smuzhiyun lwbrx rt,rt,ra 19*4882a593Smuzhiyun#endif 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun/* 22*4882a593Smuzhiyun * We roll the registers for T, A, B, C, D, E around on each 23*4882a593Smuzhiyun * iteration; T on iteration t is A on iteration t+1, and so on. 24*4882a593Smuzhiyun * We use registers 7 - 12 for this. 25*4882a593Smuzhiyun */ 26*4882a593Smuzhiyun#define RT(t) ((((t)+5)%6)+7) 27*4882a593Smuzhiyun#define RA(t) ((((t)+4)%6)+7) 28*4882a593Smuzhiyun#define RB(t) ((((t)+3)%6)+7) 29*4882a593Smuzhiyun#define RC(t) ((((t)+2)%6)+7) 30*4882a593Smuzhiyun#define RD(t) ((((t)+1)%6)+7) 31*4882a593Smuzhiyun#define RE(t) ((((t)+0)%6)+7) 32*4882a593Smuzhiyun 33*4882a593Smuzhiyun/* We use registers 16 - 31 for the W values */ 34*4882a593Smuzhiyun#define W(t) (((t)%16)+16) 35*4882a593Smuzhiyun 36*4882a593Smuzhiyun#define LOADW(t) \ 37*4882a593Smuzhiyun LWZ(W(t),(t)*4,r4) 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun#define STEPD0_LOAD(t) \ 40*4882a593Smuzhiyun andc r0,RD(t),RB(t); \ 41*4882a593Smuzhiyun and r6,RB(t),RC(t); \ 42*4882a593Smuzhiyun rotlwi RT(t),RA(t),5; \ 43*4882a593Smuzhiyun or r6,r6,r0; \ 44*4882a593Smuzhiyun add r0,RE(t),r15; \ 45*4882a593Smuzhiyun add RT(t),RT(t),r6; \ 46*4882a593Smuzhiyun add r14,r0,W(t); \ 47*4882a593Smuzhiyun LWZ(W((t)+4),((t)+4)*4,r4); \ 48*4882a593Smuzhiyun rotlwi RB(t),RB(t),30; \ 49*4882a593Smuzhiyun add RT(t),RT(t),r14 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun#define STEPD0_UPDATE(t) \ 52*4882a593Smuzhiyun and r6,RB(t),RC(t); \ 53*4882a593Smuzhiyun andc r0,RD(t),RB(t); \ 54*4882a593Smuzhiyun rotlwi RT(t),RA(t),5; \ 55*4882a593Smuzhiyun rotlwi RB(t),RB(t),30; \ 56*4882a593Smuzhiyun or r6,r6,r0; \ 57*4882a593Smuzhiyun add r0,RE(t),r15; \ 58*4882a593Smuzhiyun xor r5,W((t)+4-3),W((t)+4-8); \ 59*4882a593Smuzhiyun add RT(t),RT(t),r6; \ 60*4882a593Smuzhiyun xor W((t)+4),W((t)+4-16),W((t)+4-14); \ 61*4882a593Smuzhiyun add r0,r0,W(t); \ 62*4882a593Smuzhiyun xor W((t)+4),W((t)+4),r5; \ 63*4882a593Smuzhiyun add RT(t),RT(t),r0; \ 64*4882a593Smuzhiyun rotlwi W((t)+4),W((t)+4),1 65*4882a593Smuzhiyun 66*4882a593Smuzhiyun#define STEPD1(t) \ 67*4882a593Smuzhiyun xor r6,RB(t),RC(t); \ 68*4882a593Smuzhiyun rotlwi RT(t),RA(t),5; \ 69*4882a593Smuzhiyun rotlwi RB(t),RB(t),30; \ 70*4882a593Smuzhiyun xor r6,r6,RD(t); \ 71*4882a593Smuzhiyun add r0,RE(t),r15; \ 72*4882a593Smuzhiyun add RT(t),RT(t),r6; \ 73*4882a593Smuzhiyun add r0,r0,W(t); \ 74*4882a593Smuzhiyun add RT(t),RT(t),r0 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun#define STEPD1_UPDATE(t) \ 77*4882a593Smuzhiyun xor r6,RB(t),RC(t); \ 78*4882a593Smuzhiyun rotlwi RT(t),RA(t),5; \ 79*4882a593Smuzhiyun rotlwi RB(t),RB(t),30; \ 80*4882a593Smuzhiyun xor r6,r6,RD(t); \ 81*4882a593Smuzhiyun add r0,RE(t),r15; \ 82*4882a593Smuzhiyun xor r5,W((t)+4-3),W((t)+4-8); \ 83*4882a593Smuzhiyun add RT(t),RT(t),r6; \ 84*4882a593Smuzhiyun xor W((t)+4),W((t)+4-16),W((t)+4-14); \ 85*4882a593Smuzhiyun add r0,r0,W(t); \ 86*4882a593Smuzhiyun xor W((t)+4),W((t)+4),r5; \ 87*4882a593Smuzhiyun add RT(t),RT(t),r0; \ 88*4882a593Smuzhiyun rotlwi W((t)+4),W((t)+4),1 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun#define STEPD2_UPDATE(t) \ 91*4882a593Smuzhiyun and r6,RB(t),RC(t); \ 92*4882a593Smuzhiyun and r0,RB(t),RD(t); \ 93*4882a593Smuzhiyun rotlwi RT(t),RA(t),5; \ 94*4882a593Smuzhiyun or r6,r6,r0; \ 95*4882a593Smuzhiyun rotlwi RB(t),RB(t),30; \ 96*4882a593Smuzhiyun and r0,RC(t),RD(t); \ 97*4882a593Smuzhiyun xor r5,W((t)+4-3),W((t)+4-8); \ 98*4882a593Smuzhiyun or r6,r6,r0; \ 99*4882a593Smuzhiyun xor W((t)+4),W((t)+4-16),W((t)+4-14); \ 100*4882a593Smuzhiyun add r0,RE(t),r15; \ 101*4882a593Smuzhiyun add RT(t),RT(t),r6; \ 102*4882a593Smuzhiyun add r0,r0,W(t); \ 103*4882a593Smuzhiyun xor W((t)+4),W((t)+4),r5; \ 104*4882a593Smuzhiyun add RT(t),RT(t),r0; \ 105*4882a593Smuzhiyun rotlwi W((t)+4),W((t)+4),1 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun#define STEP0LD4(t) \ 108*4882a593Smuzhiyun STEPD0_LOAD(t); \ 109*4882a593Smuzhiyun STEPD0_LOAD((t)+1); \ 110*4882a593Smuzhiyun STEPD0_LOAD((t)+2); \ 111*4882a593Smuzhiyun STEPD0_LOAD((t)+3) 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun#define STEPUP4(t, fn) \ 114*4882a593Smuzhiyun STEP##fn##_UPDATE(t); \ 115*4882a593Smuzhiyun STEP##fn##_UPDATE((t)+1); \ 116*4882a593Smuzhiyun STEP##fn##_UPDATE((t)+2); \ 117*4882a593Smuzhiyun STEP##fn##_UPDATE((t)+3) 118*4882a593Smuzhiyun 119*4882a593Smuzhiyun#define STEPUP20(t, fn) \ 120*4882a593Smuzhiyun STEPUP4(t, fn); \ 121*4882a593Smuzhiyun STEPUP4((t)+4, fn); \ 122*4882a593Smuzhiyun STEPUP4((t)+8, fn); \ 123*4882a593Smuzhiyun STEPUP4((t)+12, fn); \ 124*4882a593Smuzhiyun STEPUP4((t)+16, fn) 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun_GLOBAL(powerpc_sha_transform) 127*4882a593Smuzhiyun PPC_STLU r1,-INT_FRAME_SIZE(r1) 128*4882a593Smuzhiyun SAVE_8GPRS(14, r1) 129*4882a593Smuzhiyun SAVE_10GPRS(22, r1) 130*4882a593Smuzhiyun 131*4882a593Smuzhiyun /* Load up A - E */ 132*4882a593Smuzhiyun lwz RA(0),0(r3) /* A */ 133*4882a593Smuzhiyun lwz RB(0),4(r3) /* B */ 134*4882a593Smuzhiyun lwz RC(0),8(r3) /* C */ 135*4882a593Smuzhiyun lwz RD(0),12(r3) /* D */ 136*4882a593Smuzhiyun lwz RE(0),16(r3) /* E */ 137*4882a593Smuzhiyun 138*4882a593Smuzhiyun LOADW(0) 139*4882a593Smuzhiyun LOADW(1) 140*4882a593Smuzhiyun LOADW(2) 141*4882a593Smuzhiyun LOADW(3) 142*4882a593Smuzhiyun 143*4882a593Smuzhiyun lis r15,0x5a82 /* K0-19 */ 144*4882a593Smuzhiyun ori r15,r15,0x7999 145*4882a593Smuzhiyun STEP0LD4(0) 146*4882a593Smuzhiyun STEP0LD4(4) 147*4882a593Smuzhiyun STEP0LD4(8) 148*4882a593Smuzhiyun STEPUP4(12, D0) 149*4882a593Smuzhiyun STEPUP4(16, D0) 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun lis r15,0x6ed9 /* K20-39 */ 152*4882a593Smuzhiyun ori r15,r15,0xeba1 153*4882a593Smuzhiyun STEPUP20(20, D1) 154*4882a593Smuzhiyun 155*4882a593Smuzhiyun lis r15,0x8f1b /* K40-59 */ 156*4882a593Smuzhiyun ori r15,r15,0xbcdc 157*4882a593Smuzhiyun STEPUP20(40, D2) 158*4882a593Smuzhiyun 159*4882a593Smuzhiyun lis r15,0xca62 /* K60-79 */ 160*4882a593Smuzhiyun ori r15,r15,0xc1d6 161*4882a593Smuzhiyun STEPUP4(60, D1) 162*4882a593Smuzhiyun STEPUP4(64, D1) 163*4882a593Smuzhiyun STEPUP4(68, D1) 164*4882a593Smuzhiyun STEPUP4(72, D1) 165*4882a593Smuzhiyun lwz r20,16(r3) 166*4882a593Smuzhiyun STEPD1(76) 167*4882a593Smuzhiyun lwz r19,12(r3) 168*4882a593Smuzhiyun STEPD1(77) 169*4882a593Smuzhiyun lwz r18,8(r3) 170*4882a593Smuzhiyun STEPD1(78) 171*4882a593Smuzhiyun lwz r17,4(r3) 172*4882a593Smuzhiyun STEPD1(79) 173*4882a593Smuzhiyun 174*4882a593Smuzhiyun lwz r16,0(r3) 175*4882a593Smuzhiyun add r20,RE(80),r20 176*4882a593Smuzhiyun add RD(0),RD(80),r19 177*4882a593Smuzhiyun add RC(0),RC(80),r18 178*4882a593Smuzhiyun add RB(0),RB(80),r17 179*4882a593Smuzhiyun add RA(0),RA(80),r16 180*4882a593Smuzhiyun mr RE(0),r20 181*4882a593Smuzhiyun stw RA(0),0(r3) 182*4882a593Smuzhiyun stw RB(0),4(r3) 183*4882a593Smuzhiyun stw RC(0),8(r3) 184*4882a593Smuzhiyun stw RD(0),12(r3) 185*4882a593Smuzhiyun stw RE(0),16(r3) 186*4882a593Smuzhiyun 187*4882a593Smuzhiyun REST_8GPRS(14, r1) 188*4882a593Smuzhiyun REST_10GPRS(22, r1) 189*4882a593Smuzhiyun addi r1,r1,INT_FRAME_SIZE 190*4882a593Smuzhiyun blr 191