1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Fast SHA-256 implementation for SPE instruction set (PPC) 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * This code makes use of the SPE SIMD instruction set as defined in 6*4882a593Smuzhiyun * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf 7*4882a593Smuzhiyun * Implementation is based on optimization guide notes from 8*4882a593Smuzhiyun * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf 9*4882a593Smuzhiyun * 10*4882a593Smuzhiyun * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 11*4882a593Smuzhiyun */ 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun#include <asm/ppc_asm.h> 14*4882a593Smuzhiyun#include <asm/asm-offsets.h> 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun#define rHP r3 /* pointer to hash values in memory */ 17*4882a593Smuzhiyun#define rKP r24 /* pointer to round constants */ 18*4882a593Smuzhiyun#define rWP r4 /* pointer to input data */ 19*4882a593Smuzhiyun 20*4882a593Smuzhiyun#define rH0 r5 /* 8 32 bit hash values in 8 registers */ 21*4882a593Smuzhiyun#define rH1 r6 22*4882a593Smuzhiyun#define rH2 r7 23*4882a593Smuzhiyun#define rH3 r8 24*4882a593Smuzhiyun#define rH4 r9 25*4882a593Smuzhiyun#define rH5 r10 26*4882a593Smuzhiyun#define rH6 r11 27*4882a593Smuzhiyun#define rH7 r12 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun#define rW0 r14 /* 64 bit registers. 16 words in 8 registers */ 30*4882a593Smuzhiyun#define rW1 r15 31*4882a593Smuzhiyun#define rW2 r16 32*4882a593Smuzhiyun#define rW3 r17 33*4882a593Smuzhiyun#define rW4 r18 34*4882a593Smuzhiyun#define rW5 r19 35*4882a593Smuzhiyun#define rW6 r20 36*4882a593Smuzhiyun#define rW7 r21 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun#define rT0 r22 /* 64 bit temporaries */ 39*4882a593Smuzhiyun#define rT1 r23 40*4882a593Smuzhiyun#define rT2 r0 /* 32 bit temporaries */ 41*4882a593Smuzhiyun#define rT3 r25 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun#define CMP_KN_LOOP 44*4882a593Smuzhiyun#define CMP_KC_LOOP \ 45*4882a593Smuzhiyun cmpwi rT1,0; 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun#define INITIALIZE \ 48*4882a593Smuzhiyun stwu r1,-128(r1); /* create stack frame */ \ 49*4882a593Smuzhiyun evstdw r14,8(r1); /* We must save non volatile */ \ 50*4882a593Smuzhiyun evstdw r15,16(r1); /* registers. Take the chance */ \ 51*4882a593Smuzhiyun evstdw r16,24(r1); /* and save the SPE part too */ \ 52*4882a593Smuzhiyun evstdw r17,32(r1); \ 53*4882a593Smuzhiyun evstdw r18,40(r1); \ 54*4882a593Smuzhiyun evstdw r19,48(r1); \ 55*4882a593Smuzhiyun evstdw r20,56(r1); \ 56*4882a593Smuzhiyun evstdw r21,64(r1); \ 57*4882a593Smuzhiyun evstdw r22,72(r1); \ 58*4882a593Smuzhiyun evstdw r23,80(r1); \ 59*4882a593Smuzhiyun stw r24,88(r1); /* save normal registers */ \ 60*4882a593Smuzhiyun stw r25,92(r1); 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun#define FINALIZE \ 64*4882a593Smuzhiyun evldw r14,8(r1); /* restore SPE registers */ \ 65*4882a593Smuzhiyun evldw r15,16(r1); \ 66*4882a593Smuzhiyun evldw r16,24(r1); \ 67*4882a593Smuzhiyun evldw r17,32(r1); \ 68*4882a593Smuzhiyun evldw r18,40(r1); \ 69*4882a593Smuzhiyun evldw r19,48(r1); \ 70*4882a593Smuzhiyun evldw r20,56(r1); \ 71*4882a593Smuzhiyun evldw r21,64(r1); \ 72*4882a593Smuzhiyun evldw r22,72(r1); \ 73*4882a593Smuzhiyun evldw r23,80(r1); \ 74*4882a593Smuzhiyun lwz r24,88(r1); /* restore normal registers */ \ 75*4882a593Smuzhiyun lwz r25,92(r1); \ 76*4882a593Smuzhiyun xor r0,r0,r0; \ 77*4882a593Smuzhiyun stw r0,8(r1); /* Delete sensitive data */ \ 78*4882a593Smuzhiyun stw r0,16(r1); /* that we might have pushed */ \ 79*4882a593Smuzhiyun stw r0,24(r1); /* from other context that runs */ \ 80*4882a593Smuzhiyun stw r0,32(r1); /* the same code. Assume that */ \ 81*4882a593Smuzhiyun stw r0,40(r1); /* the lower part of the GPRs */ \ 82*4882a593Smuzhiyun stw r0,48(r1); /* was already overwritten on */ \ 83*4882a593Smuzhiyun stw r0,56(r1); /* the way down to here */ \ 84*4882a593Smuzhiyun stw r0,64(r1); \ 85*4882a593Smuzhiyun stw r0,72(r1); \ 86*4882a593Smuzhiyun stw r0,80(r1); \ 87*4882a593Smuzhiyun addi r1,r1,128; /* cleanup stack frame */ 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__ 90*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \ 91*4882a593Smuzhiyun lwz reg,off(rWP); /* load data */ 92*4882a593Smuzhiyun#define NEXT_BLOCK \ 93*4882a593Smuzhiyun addi rWP,rWP,64; /* increment per block */ 94*4882a593Smuzhiyun#else 95*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \ 96*4882a593Smuzhiyun lwbrx reg,0,rWP; /* load data */ \ 97*4882a593Smuzhiyun addi rWP,rWP,4; /* increment per word */ 98*4882a593Smuzhiyun#define NEXT_BLOCK /* nothing to do */ 99*4882a593Smuzhiyun#endif 100*4882a593Smuzhiyun 101*4882a593Smuzhiyun#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \ 102*4882a593Smuzhiyun LOAD_DATA(w, off) /* 1: W */ \ 103*4882a593Smuzhiyun rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \ 104*4882a593Smuzhiyun rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \ 105*4882a593Smuzhiyun rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \ 106*4882a593Smuzhiyun xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \ 107*4882a593Smuzhiyun and rT3,e,f; /* 1: ch = e and f */ \ 108*4882a593Smuzhiyun xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \ 109*4882a593Smuzhiyun andc rT1,g,e; /* 1: ch' = ~e and g */ \ 110*4882a593Smuzhiyun lwz rT2,off(rKP); /* 1: K */ \ 111*4882a593Smuzhiyun xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \ 112*4882a593Smuzhiyun add h,h,rT0; /* 1: temp1 = h + S1 */ \ 113*4882a593Smuzhiyun add rT3,rT3,w; /* 1: temp1' = ch + w */ \ 114*4882a593Smuzhiyun rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \ 115*4882a593Smuzhiyun add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \ 116*4882a593Smuzhiyun rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \ 117*4882a593Smuzhiyun add h,h,rT2; /* 1: temp1 = temp1 + K */ \ 118*4882a593Smuzhiyun rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \ 119*4882a593Smuzhiyun xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \ 120*4882a593Smuzhiyun add d,d,h; /* 1: d = d + temp1 */ \ 121*4882a593Smuzhiyun xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \ 122*4882a593Smuzhiyun evmergelo w,w,w; /* shift W */ \ 123*4882a593Smuzhiyun or rT2,a,b; /* 1: maj = a or b */ \ 124*4882a593Smuzhiyun and rT1,a,b; /* 1: maj' = a and b */ \ 125*4882a593Smuzhiyun and rT2,rT2,c; /* 1: maj = maj and c */ \ 126*4882a593Smuzhiyun LOAD_DATA(w, off+4) /* 2: W */ \ 127*4882a593Smuzhiyun or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \ 128*4882a593Smuzhiyun rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \ 129*4882a593Smuzhiyun add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \ 130*4882a593Smuzhiyun rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \ 131*4882a593Smuzhiyun add h,h,rT3; /* 1: h = temp1 + temp2 */ \ 132*4882a593Smuzhiyun rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \ 133*4882a593Smuzhiyun xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \ 134*4882a593Smuzhiyun and rT3,d,e; /* 2: ch = e and f */ \ 135*4882a593Smuzhiyun xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \ 136*4882a593Smuzhiyun andc rT1,f,d; /* 2: ch' = ~e and g */ \ 137*4882a593Smuzhiyun lwz rT2,off+4(rKP); /* 2: K */ \ 138*4882a593Smuzhiyun xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \ 139*4882a593Smuzhiyun add g,g,rT0; /* 2: temp1 = h + S1 */ \ 140*4882a593Smuzhiyun add rT3,rT3,w; /* 2: temp1' = ch + w */ \ 141*4882a593Smuzhiyun rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \ 142*4882a593Smuzhiyun add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \ 143*4882a593Smuzhiyun rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \ 144*4882a593Smuzhiyun add g,g,rT2; /* 2: temp1 = temp1 + K */ \ 145*4882a593Smuzhiyun rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \ 146*4882a593Smuzhiyun xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \ 147*4882a593Smuzhiyun or rT2,h,a; /* 2: maj = a or b */ \ 148*4882a593Smuzhiyun xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \ 149*4882a593Smuzhiyun and rT1,h,a; /* 2: maj' = a and b */ \ 150*4882a593Smuzhiyun and rT2,rT2,b; /* 2: maj = maj and c */ \ 151*4882a593Smuzhiyun add c,c,g; /* 2: d = d + temp1 */ \ 152*4882a593Smuzhiyun or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \ 153*4882a593Smuzhiyun add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \ 154*4882a593Smuzhiyun add g,g,rT3 /* 2: h = temp1 + temp2 */ 155*4882a593Smuzhiyun 156*4882a593Smuzhiyun#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \ 157*4882a593Smuzhiyun rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \ 158*4882a593Smuzhiyun evmergelohi rT0,w0,w1; /* w[-15] */ \ 159*4882a593Smuzhiyun rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \ 160*4882a593Smuzhiyun evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \ 161*4882a593Smuzhiyun xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \ 162*4882a593Smuzhiyun evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \ 163*4882a593Smuzhiyun rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \ 164*4882a593Smuzhiyun evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \ 165*4882a593Smuzhiyun xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \ 166*4882a593Smuzhiyun evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \ 167*4882a593Smuzhiyun add h,h,rT2; /* 1: temp1 = h + S1 */ \ 168*4882a593Smuzhiyun evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \ 169*4882a593Smuzhiyun and rT2,e,f; /* 1: ch = e and f */ \ 170*4882a593Smuzhiyun evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \ 171*4882a593Smuzhiyun andc rT3,g,e; /* 1: ch' = ~e and g */ \ 172*4882a593Smuzhiyun evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \ 173*4882a593Smuzhiyun xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \ 174*4882a593Smuzhiyun evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \ 175*4882a593Smuzhiyun add h,h,rT2; /* 1: temp1 = temp1 + ch */ \ 176*4882a593Smuzhiyun evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \ 177*4882a593Smuzhiyun rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \ 178*4882a593Smuzhiyun evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \ 179*4882a593Smuzhiyun rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \ 180*4882a593Smuzhiyun evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \ 181*4882a593Smuzhiyun xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \ 182*4882a593Smuzhiyun evldw rT1,off(rKP); /* k */ \ 183*4882a593Smuzhiyun rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \ 184*4882a593Smuzhiyun evaddw w0,w0,rT0; /* w = w + s1 */ \ 185*4882a593Smuzhiyun xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \ 186*4882a593Smuzhiyun evmergelohi rT0,w4,w5; /* w[-7] */ \ 187*4882a593Smuzhiyun and rT3,a,b; /* 1: maj = a and b */ \ 188*4882a593Smuzhiyun evaddw w0,w0,rT0; /* w = w + w[-7] */ \ 189*4882a593Smuzhiyun CMP_K##k##_LOOP \ 190*4882a593Smuzhiyun add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \ 191*4882a593Smuzhiyun evaddw rT1,rT1,w0; /* wk = w + k */ \ 192*4882a593Smuzhiyun xor rT3,a,b; /* 1: maj = a xor b */ \ 193*4882a593Smuzhiyun evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \ 194*4882a593Smuzhiyun and rT3,rT3,c; /* 1: maj = maj and c */ \ 195*4882a593Smuzhiyun add h,h,rT0; /* 1: temp1 = temp1 + wk */ \ 196*4882a593Smuzhiyun add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \ 197*4882a593Smuzhiyun add g,g,rT1; /* 2: temp1 = temp1 + wk */ \ 198*4882a593Smuzhiyun add d,d,h; /* 1: d = d + temp1 */ \ 199*4882a593Smuzhiyun rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \ 200*4882a593Smuzhiyun add h,h,rT2; /* 1: h = temp1 + temp2 */ \ 201*4882a593Smuzhiyun rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \ 202*4882a593Smuzhiyun rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \ 203*4882a593Smuzhiyun xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \ 204*4882a593Smuzhiyun and rT3,d,e; /* 2: ch = e and f */ \ 205*4882a593Smuzhiyun xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \ 206*4882a593Smuzhiyun andc rT1,f,d; /* 2: ch' = ~e and g */ \ 207*4882a593Smuzhiyun add g,g,rT0; /* 2: temp1 = h + S1 */ \ 208*4882a593Smuzhiyun xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \ 209*4882a593Smuzhiyun rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \ 210*4882a593Smuzhiyun add g,g,rT3; /* 2: temp1 = temp1 + ch */ \ 211*4882a593Smuzhiyun rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \ 212*4882a593Smuzhiyun rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \ 213*4882a593Smuzhiyun xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \ 214*4882a593Smuzhiyun or rT2,h,a; /* 2: maj = a or b */ \ 215*4882a593Smuzhiyun and rT1,h,a; /* 2: maj' = a and b */ \ 216*4882a593Smuzhiyun and rT2,rT2,b; /* 2: maj = maj and c */ \ 217*4882a593Smuzhiyun xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \ 218*4882a593Smuzhiyun or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \ 219*4882a593Smuzhiyun add c,c,g; /* 2: d = d + temp1 */ \ 220*4882a593Smuzhiyun add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \ 221*4882a593Smuzhiyun add g,g,rT3 /* 2: h = temp1 + temp2 */ 222*4882a593Smuzhiyun 223*4882a593Smuzhiyun_GLOBAL(ppc_spe_sha256_transform) 224*4882a593Smuzhiyun INITIALIZE 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun mtctr r5 227*4882a593Smuzhiyun lwz rH0,0(rHP) 228*4882a593Smuzhiyun lwz rH1,4(rHP) 229*4882a593Smuzhiyun lwz rH2,8(rHP) 230*4882a593Smuzhiyun lwz rH3,12(rHP) 231*4882a593Smuzhiyun lwz rH4,16(rHP) 232*4882a593Smuzhiyun lwz rH5,20(rHP) 233*4882a593Smuzhiyun lwz rH6,24(rHP) 234*4882a593Smuzhiyun lwz rH7,28(rHP) 235*4882a593Smuzhiyun 236*4882a593Smuzhiyunppc_spe_sha256_main: 237*4882a593Smuzhiyun lis rKP,PPC_SPE_SHA256_K@ha 238*4882a593Smuzhiyun addi rKP,rKP,PPC_SPE_SHA256_K@l 239*4882a593Smuzhiyun 240*4882a593Smuzhiyun R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0) 241*4882a593Smuzhiyun R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8) 242*4882a593Smuzhiyun R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16) 243*4882a593Smuzhiyun R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24) 244*4882a593Smuzhiyun R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32) 245*4882a593Smuzhiyun R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40) 246*4882a593Smuzhiyun R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48) 247*4882a593Smuzhiyun R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56) 248*4882a593Smuzhiyunppc_spe_sha256_16_rounds: 249*4882a593Smuzhiyun addi rKP,rKP,64 250*4882a593Smuzhiyun R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, 251*4882a593Smuzhiyun rW0, rW1, rW4, rW5, rW7, N, 0) 252*4882a593Smuzhiyun R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, 253*4882a593Smuzhiyun rW1, rW2, rW5, rW6, rW0, N, 8) 254*4882a593Smuzhiyun R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, 255*4882a593Smuzhiyun rW2, rW3, rW6, rW7, rW1, N, 16) 256*4882a593Smuzhiyun R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, 257*4882a593Smuzhiyun rW3, rW4, rW7, rW0, rW2, N, 24) 258*4882a593Smuzhiyun R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, 259*4882a593Smuzhiyun rW4, rW5, rW0, rW1, rW3, N, 32) 260*4882a593Smuzhiyun R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, 261*4882a593Smuzhiyun rW5, rW6, rW1, rW2, rW4, N, 40) 262*4882a593Smuzhiyun R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, 263*4882a593Smuzhiyun rW6, rW7, rW2, rW3, rW5, N, 48) 264*4882a593Smuzhiyun R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, 265*4882a593Smuzhiyun rW7, rW0, rW3, rW4, rW6, C, 56) 266*4882a593Smuzhiyun bt gt,ppc_spe_sha256_16_rounds 267*4882a593Smuzhiyun 268*4882a593Smuzhiyun lwz rW0,0(rHP) 269*4882a593Smuzhiyun NEXT_BLOCK 270*4882a593Smuzhiyun lwz rW1,4(rHP) 271*4882a593Smuzhiyun lwz rW2,8(rHP) 272*4882a593Smuzhiyun lwz rW3,12(rHP) 273*4882a593Smuzhiyun lwz rW4,16(rHP) 274*4882a593Smuzhiyun lwz rW5,20(rHP) 275*4882a593Smuzhiyun lwz rW6,24(rHP) 276*4882a593Smuzhiyun lwz rW7,28(rHP) 277*4882a593Smuzhiyun 278*4882a593Smuzhiyun add rH0,rH0,rW0 279*4882a593Smuzhiyun stw rH0,0(rHP) 280*4882a593Smuzhiyun add rH1,rH1,rW1 281*4882a593Smuzhiyun stw rH1,4(rHP) 282*4882a593Smuzhiyun add rH2,rH2,rW2 283*4882a593Smuzhiyun stw rH2,8(rHP) 284*4882a593Smuzhiyun add rH3,rH3,rW3 285*4882a593Smuzhiyun stw rH3,12(rHP) 286*4882a593Smuzhiyun add rH4,rH4,rW4 287*4882a593Smuzhiyun stw rH4,16(rHP) 288*4882a593Smuzhiyun add rH5,rH5,rW5 289*4882a593Smuzhiyun stw rH5,20(rHP) 290*4882a593Smuzhiyun add rH6,rH6,rW6 291*4882a593Smuzhiyun stw rH6,24(rHP) 292*4882a593Smuzhiyun add rH7,rH7,rW7 293*4882a593Smuzhiyun stw rH7,28(rHP) 294*4882a593Smuzhiyun 295*4882a593Smuzhiyun bdnz ppc_spe_sha256_main 296*4882a593Smuzhiyun 297*4882a593Smuzhiyun FINALIZE 298*4882a593Smuzhiyun blr 299*4882a593Smuzhiyun 300*4882a593Smuzhiyun.data 301*4882a593Smuzhiyun.align 5 302*4882a593SmuzhiyunPPC_SPE_SHA256_K: 303*4882a593Smuzhiyun .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 304*4882a593Smuzhiyun .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 305*4882a593Smuzhiyun .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 306*4882a593Smuzhiyun .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 307*4882a593Smuzhiyun .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 308*4882a593Smuzhiyun .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 309*4882a593Smuzhiyun .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 310*4882a593Smuzhiyun .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 311*4882a593Smuzhiyun .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 312*4882a593Smuzhiyun .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 313*4882a593Smuzhiyun .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 314*4882a593Smuzhiyun .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 315*4882a593Smuzhiyun .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 316*4882a593Smuzhiyun .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 317*4882a593Smuzhiyun .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 318*4882a593Smuzhiyun .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 319