1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Fast SHA-1 implementation for SPE instruction set (PPC) 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * This code makes use of the SPE SIMD instruction set as defined in 6*4882a593Smuzhiyun * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf 7*4882a593Smuzhiyun * Implementation is based on optimization guide notes from 8*4882a593Smuzhiyun * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf 9*4882a593Smuzhiyun * 10*4882a593Smuzhiyun * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 11*4882a593Smuzhiyun */ 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun#include <asm/ppc_asm.h> 14*4882a593Smuzhiyun#include <asm/asm-offsets.h> 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun#define rHP r3 /* pointer to hash value */ 17*4882a593Smuzhiyun#define rWP r4 /* pointer to input */ 18*4882a593Smuzhiyun#define rKP r5 /* pointer to constants */ 19*4882a593Smuzhiyun 20*4882a593Smuzhiyun#define rW0 r14 /* 64 bit round words */ 21*4882a593Smuzhiyun#define rW1 r15 22*4882a593Smuzhiyun#define rW2 r16 23*4882a593Smuzhiyun#define rW3 r17 24*4882a593Smuzhiyun#define rW4 r18 25*4882a593Smuzhiyun#define rW5 r19 26*4882a593Smuzhiyun#define rW6 r20 27*4882a593Smuzhiyun#define rW7 r21 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun#define rH0 r6 /* 32 bit hash values */ 30*4882a593Smuzhiyun#define rH1 r7 31*4882a593Smuzhiyun#define rH2 r8 32*4882a593Smuzhiyun#define rH3 r9 33*4882a593Smuzhiyun#define rH4 r10 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun#define rT0 r22 /* 64 bit temporary */ 36*4882a593Smuzhiyun#define rT1 r0 /* 32 bit temporaries */ 37*4882a593Smuzhiyun#define rT2 r11 38*4882a593Smuzhiyun#define rT3 r12 39*4882a593Smuzhiyun 40*4882a593Smuzhiyun#define rK r23 /* 64 bit constant in volatile register */ 41*4882a593Smuzhiyun 42*4882a593Smuzhiyun#define LOAD_K01 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun#define LOAD_K11 \ 45*4882a593Smuzhiyun evlwwsplat rK,0(rKP); 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun#define LOAD_K21 \ 48*4882a593Smuzhiyun evlwwsplat rK,4(rKP); 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun#define LOAD_K31 \ 51*4882a593Smuzhiyun evlwwsplat rK,8(rKP); 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun#define LOAD_K41 \ 54*4882a593Smuzhiyun evlwwsplat rK,12(rKP); 55*4882a593Smuzhiyun 56*4882a593Smuzhiyun#define INITIALIZE \ 57*4882a593Smuzhiyun stwu r1,-128(r1); /* create stack frame */ \ 58*4882a593Smuzhiyun evstdw r14,8(r1); /* We must save non volatile */ \ 59*4882a593Smuzhiyun evstdw r15,16(r1); /* registers. Take the chance */ \ 60*4882a593Smuzhiyun evstdw r16,24(r1); /* and save the SPE part too */ \ 61*4882a593Smuzhiyun evstdw r17,32(r1); \ 62*4882a593Smuzhiyun evstdw r18,40(r1); \ 63*4882a593Smuzhiyun evstdw r19,48(r1); \ 64*4882a593Smuzhiyun evstdw r20,56(r1); \ 65*4882a593Smuzhiyun evstdw r21,64(r1); \ 66*4882a593Smuzhiyun evstdw r22,72(r1); \ 67*4882a593Smuzhiyun evstdw r23,80(r1); 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun#define FINALIZE \ 71*4882a593Smuzhiyun evldw r14,8(r1); /* restore SPE registers */ \ 72*4882a593Smuzhiyun evldw r15,16(r1); \ 73*4882a593Smuzhiyun evldw r16,24(r1); \ 74*4882a593Smuzhiyun evldw r17,32(r1); \ 75*4882a593Smuzhiyun evldw r18,40(r1); \ 76*4882a593Smuzhiyun evldw r19,48(r1); \ 77*4882a593Smuzhiyun evldw r20,56(r1); \ 78*4882a593Smuzhiyun evldw r21,64(r1); \ 79*4882a593Smuzhiyun evldw r22,72(r1); \ 80*4882a593Smuzhiyun evldw r23,80(r1); \ 81*4882a593Smuzhiyun xor r0,r0,r0; \ 82*4882a593Smuzhiyun stw r0,8(r1); /* Delete sensitive data */ \ 83*4882a593Smuzhiyun stw r0,16(r1); /* that we might have pushed */ \ 84*4882a593Smuzhiyun stw r0,24(r1); /* from other context that runs */ \ 85*4882a593Smuzhiyun stw r0,32(r1); /* the same code. Assume that */ \ 86*4882a593Smuzhiyun stw r0,40(r1); /* the lower part of the GPRs */ \ 87*4882a593Smuzhiyun stw r0,48(r1); /* were already overwritten on */ \ 88*4882a593Smuzhiyun stw r0,56(r1); /* the way down to here */ \ 89*4882a593Smuzhiyun stw r0,64(r1); \ 90*4882a593Smuzhiyun stw r0,72(r1); \ 91*4882a593Smuzhiyun stw r0,80(r1); \ 92*4882a593Smuzhiyun addi r1,r1,128; /* cleanup stack frame */ 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__ 95*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \ 96*4882a593Smuzhiyun lwz reg,off(rWP); /* load data */ 97*4882a593Smuzhiyun#define NEXT_BLOCK \ 98*4882a593Smuzhiyun addi rWP,rWP,64; /* increment per block */ 99*4882a593Smuzhiyun#else 100*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \ 101*4882a593Smuzhiyun lwbrx reg,0,rWP; /* load data */ \ 102*4882a593Smuzhiyun addi rWP,rWP,4; /* increment per word */ 103*4882a593Smuzhiyun#define NEXT_BLOCK /* nothing to do */ 104*4882a593Smuzhiyun#endif 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun#define R_00_15(a, b, c, d, e, w0, w1, k, off) \ 107*4882a593Smuzhiyun LOAD_DATA(w0, off) /* 1: W */ \ 108*4882a593Smuzhiyun and rT2,b,c; /* 1: F' = B and C */ \ 109*4882a593Smuzhiyun LOAD_K##k##1 \ 110*4882a593Smuzhiyun andc rT1,d,b; /* 1: F" = ~B and D */ \ 111*4882a593Smuzhiyun rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \ 112*4882a593Smuzhiyun or rT2,rT2,rT1; /* 1: F = F' or F" */ \ 113*4882a593Smuzhiyun add e,e,rT0; /* 1: E = E + A' */ \ 114*4882a593Smuzhiyun rotrwi b,b,2; /* 1: B = B rotl 30 */ \ 115*4882a593Smuzhiyun add e,e,w0; /* 1: E = E + W */ \ 116*4882a593Smuzhiyun LOAD_DATA(w1, off+4) /* 2: W */ \ 117*4882a593Smuzhiyun add e,e,rT2; /* 1: E = E + F */ \ 118*4882a593Smuzhiyun and rT1,a,b; /* 2: F' = B and C */ \ 119*4882a593Smuzhiyun add e,e,rK; /* 1: E = E + K */ \ 120*4882a593Smuzhiyun andc rT2,c,a; /* 2: F" = ~B and D */ \ 121*4882a593Smuzhiyun add d,d,rK; /* 2: E = E + K */ \ 122*4882a593Smuzhiyun or rT2,rT2,rT1; /* 2: F = F' or F" */ \ 123*4882a593Smuzhiyun rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ 124*4882a593Smuzhiyun add d,d,w1; /* 2: E = E + W */ \ 125*4882a593Smuzhiyun rotrwi a,a,2; /* 2: B = B rotl 30 */ \ 126*4882a593Smuzhiyun add d,d,rT0; /* 2: E = E + A' */ \ 127*4882a593Smuzhiyun evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \ 128*4882a593Smuzhiyun add d,d,rT2 /* 2: E = E + F */ 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ 131*4882a593Smuzhiyun and rT2,b,c; /* 1: F' = B and C */ \ 132*4882a593Smuzhiyun evmergelohi rT0,w7,w6; /* W[-3] */ \ 133*4882a593Smuzhiyun andc rT1,d,b; /* 1: F" = ~B and D */ \ 134*4882a593Smuzhiyun evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ 135*4882a593Smuzhiyun or rT1,rT1,rT2; /* 1: F = F' or F" */ \ 136*4882a593Smuzhiyun evxor w0,w0,w4; /* W = W xor W[-8] */ \ 137*4882a593Smuzhiyun add e,e,rT1; /* 1: E = E + F */ \ 138*4882a593Smuzhiyun evxor w0,w0,w1; /* W = W xor W[-14] */ \ 139*4882a593Smuzhiyun rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ 140*4882a593Smuzhiyun evrlwi w0,w0,1; /* W = W rotl 1 */ \ 141*4882a593Smuzhiyun add e,e,rT2; /* 1: E = E + A' */ \ 142*4882a593Smuzhiyun evaddw rT0,w0,rK; /* WK = W + K */ \ 143*4882a593Smuzhiyun rotrwi b,b,2; /* 1: B = B rotl 30 */ \ 144*4882a593Smuzhiyun LOAD_K##k##1 \ 145*4882a593Smuzhiyun evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ 146*4882a593Smuzhiyun add e,e,rT0; /* 1: E = E + WK */ \ 147*4882a593Smuzhiyun add d,d,rT1; /* 2: E = E + WK */ \ 148*4882a593Smuzhiyun and rT2,a,b; /* 2: F' = B and C */ \ 149*4882a593Smuzhiyun andc rT1,c,a; /* 2: F" = ~B and D */ \ 150*4882a593Smuzhiyun rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ 151*4882a593Smuzhiyun or rT1,rT1,rT2; /* 2: F = F' or F" */ \ 152*4882a593Smuzhiyun add d,d,rT0; /* 2: E = E + A' */ \ 153*4882a593Smuzhiyun rotrwi a,a,2; /* 2: B = B rotl 30 */ \ 154*4882a593Smuzhiyun add d,d,rT1 /* 2: E = E + F */ 155*4882a593Smuzhiyun 156*4882a593Smuzhiyun#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ 157*4882a593Smuzhiyun evmergelohi rT0,w7,w6; /* W[-3] */ \ 158*4882a593Smuzhiyun xor rT2,b,c; /* 1: F' = B xor C */ \ 159*4882a593Smuzhiyun evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ 160*4882a593Smuzhiyun xor rT2,rT2,d; /* 1: F = F' xor D */ \ 161*4882a593Smuzhiyun evxor w0,w0,w4; /* W = W xor W[-8] */ \ 162*4882a593Smuzhiyun add e,e,rT2; /* 1: E = E + F */ \ 163*4882a593Smuzhiyun evxor w0,w0,w1; /* W = W xor W[-14] */ \ 164*4882a593Smuzhiyun rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ 165*4882a593Smuzhiyun evrlwi w0,w0,1; /* W = W rotl 1 */ \ 166*4882a593Smuzhiyun add e,e,rT2; /* 1: E = E + A' */ \ 167*4882a593Smuzhiyun evaddw rT0,w0,rK; /* WK = W + K */ \ 168*4882a593Smuzhiyun rotrwi b,b,2; /* 1: B = B rotl 30 */ \ 169*4882a593Smuzhiyun LOAD_K##k##1 \ 170*4882a593Smuzhiyun evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ 171*4882a593Smuzhiyun add e,e,rT0; /* 1: E = E + WK */ \ 172*4882a593Smuzhiyun xor rT2,a,b; /* 2: F' = B xor C */ \ 173*4882a593Smuzhiyun add d,d,rT1; /* 2: E = E + WK */ \ 174*4882a593Smuzhiyun xor rT2,rT2,c; /* 2: F = F' xor D */ \ 175*4882a593Smuzhiyun rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ 176*4882a593Smuzhiyun add d,d,rT2; /* 2: E = E + F */ \ 177*4882a593Smuzhiyun rotrwi a,a,2; /* 2: B = B rotl 30 */ \ 178*4882a593Smuzhiyun add d,d,rT0 /* 2: E = E + A' */ 179*4882a593Smuzhiyun 180*4882a593Smuzhiyun#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ 181*4882a593Smuzhiyun and rT2,b,c; /* 1: F' = B and C */ \ 182*4882a593Smuzhiyun evmergelohi rT0,w7,w6; /* W[-3] */ \ 183*4882a593Smuzhiyun or rT1,b,c; /* 1: F" = B or C */ \ 184*4882a593Smuzhiyun evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ 185*4882a593Smuzhiyun and rT1,d,rT1; /* 1: F" = F" and D */ \ 186*4882a593Smuzhiyun evxor w0,w0,w4; /* W = W xor W[-8] */ \ 187*4882a593Smuzhiyun or rT2,rT2,rT1; /* 1: F = F' or F" */ \ 188*4882a593Smuzhiyun evxor w0,w0,w1; /* W = W xor W[-14] */ \ 189*4882a593Smuzhiyun add e,e,rT2; /* 1: E = E + F */ \ 190*4882a593Smuzhiyun evrlwi w0,w0,1; /* W = W rotl 1 */ \ 191*4882a593Smuzhiyun rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ 192*4882a593Smuzhiyun evaddw rT0,w0,rK; /* WK = W + K */ \ 193*4882a593Smuzhiyun add e,e,rT2; /* 1: E = E + A' */ \ 194*4882a593Smuzhiyun LOAD_K##k##1 \ 195*4882a593Smuzhiyun evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ 196*4882a593Smuzhiyun rotrwi b,b,2; /* 1: B = B rotl 30 */ \ 197*4882a593Smuzhiyun add e,e,rT0; /* 1: E = E + WK */ \ 198*4882a593Smuzhiyun and rT2,a,b; /* 2: F' = B and C */ \ 199*4882a593Smuzhiyun or rT0,a,b; /* 2: F" = B or C */ \ 200*4882a593Smuzhiyun add d,d,rT1; /* 2: E = E + WK */ \ 201*4882a593Smuzhiyun and rT0,c,rT0; /* 2: F" = F" and D */ \ 202*4882a593Smuzhiyun rotrwi a,a,2; /* 2: B = B rotl 30 */ \ 203*4882a593Smuzhiyun or rT2,rT2,rT0; /* 2: F = F' or F" */ \ 204*4882a593Smuzhiyun rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ 205*4882a593Smuzhiyun add d,d,rT2; /* 2: E = E + F */ \ 206*4882a593Smuzhiyun add d,d,rT0 /* 2: E = E + A' */ 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ 209*4882a593Smuzhiyun R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) 210*4882a593Smuzhiyun 211*4882a593Smuzhiyun_GLOBAL(ppc_spe_sha1_transform) 212*4882a593Smuzhiyun INITIALIZE 213*4882a593Smuzhiyun 214*4882a593Smuzhiyun lwz rH0,0(rHP) 215*4882a593Smuzhiyun lwz rH1,4(rHP) 216*4882a593Smuzhiyun mtctr r5 217*4882a593Smuzhiyun lwz rH2,8(rHP) 218*4882a593Smuzhiyun lis rKP,PPC_SPE_SHA1_K@h 219*4882a593Smuzhiyun lwz rH3,12(rHP) 220*4882a593Smuzhiyun ori rKP,rKP,PPC_SPE_SHA1_K@l 221*4882a593Smuzhiyun lwz rH4,16(rHP) 222*4882a593Smuzhiyun 223*4882a593Smuzhiyunppc_spe_sha1_main: 224*4882a593Smuzhiyun R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0) 225*4882a593Smuzhiyun R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8) 226*4882a593Smuzhiyun R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16) 227*4882a593Smuzhiyun R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24) 228*4882a593Smuzhiyun R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32) 229*4882a593Smuzhiyun R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40) 230*4882a593Smuzhiyun R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48) 231*4882a593Smuzhiyun R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56) 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0) 234*4882a593Smuzhiyun R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2) 235*4882a593Smuzhiyun 236*4882a593Smuzhiyun R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0) 237*4882a593Smuzhiyun R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0) 238*4882a593Smuzhiyun R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0) 239*4882a593Smuzhiyun R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0) 240*4882a593Smuzhiyun R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0) 241*4882a593Smuzhiyun R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0) 242*4882a593Smuzhiyun R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0) 243*4882a593Smuzhiyun R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0) 244*4882a593Smuzhiyun R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0) 245*4882a593Smuzhiyun R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3) 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0) 248*4882a593Smuzhiyun R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0) 249*4882a593Smuzhiyun R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0) 250*4882a593Smuzhiyun R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0) 251*4882a593Smuzhiyun R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0) 252*4882a593Smuzhiyun R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0) 253*4882a593Smuzhiyun R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0) 254*4882a593Smuzhiyun R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0) 255*4882a593Smuzhiyun R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0) 256*4882a593Smuzhiyun R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4) 257*4882a593Smuzhiyun 258*4882a593Smuzhiyun R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0) 259*4882a593Smuzhiyun R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0) 260*4882a593Smuzhiyun R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0) 261*4882a593Smuzhiyun R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0) 262*4882a593Smuzhiyun R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0) 263*4882a593Smuzhiyun R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0) 264*4882a593Smuzhiyun R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0) 265*4882a593Smuzhiyun lwz rT3,0(rHP) 266*4882a593Smuzhiyun R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0) 267*4882a593Smuzhiyun lwz rW1,4(rHP) 268*4882a593Smuzhiyun R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0) 269*4882a593Smuzhiyun lwz rW2,8(rHP) 270*4882a593Smuzhiyun R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0) 271*4882a593Smuzhiyun lwz rW3,12(rHP) 272*4882a593Smuzhiyun NEXT_BLOCK 273*4882a593Smuzhiyun lwz rW4,16(rHP) 274*4882a593Smuzhiyun 275*4882a593Smuzhiyun add rH0,rH0,rT3 276*4882a593Smuzhiyun stw rH0,0(rHP) 277*4882a593Smuzhiyun add rH1,rH1,rW1 278*4882a593Smuzhiyun stw rH1,4(rHP) 279*4882a593Smuzhiyun add rH2,rH2,rW2 280*4882a593Smuzhiyun stw rH2,8(rHP) 281*4882a593Smuzhiyun add rH3,rH3,rW3 282*4882a593Smuzhiyun stw rH3,12(rHP) 283*4882a593Smuzhiyun add rH4,rH4,rW4 284*4882a593Smuzhiyun stw rH4,16(rHP) 285*4882a593Smuzhiyun 286*4882a593Smuzhiyun bdnz ppc_spe_sha1_main 287*4882a593Smuzhiyun 288*4882a593Smuzhiyun FINALIZE 289*4882a593Smuzhiyun blr 290*4882a593Smuzhiyun 291*4882a593Smuzhiyun.data 292*4882a593Smuzhiyun.align 4 293*4882a593SmuzhiyunPPC_SPE_SHA1_K: 294*4882a593Smuzhiyun .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6 295