1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> 5*4882a593Smuzhiyun */ 6*4882a593Smuzhiyun 7*4882a593Smuzhiyun#include <linux/linkage.h> 8*4882a593Smuzhiyun#include <asm/assembler.h> 9*4882a593Smuzhiyun 10*4882a593Smuzhiyun.syntax unified 11*4882a593Smuzhiyun.fpu neon 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun.text 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun/* Context structure */ 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun#define state_h0 0 19*4882a593Smuzhiyun#define state_h1 4 20*4882a593Smuzhiyun#define state_h2 8 21*4882a593Smuzhiyun#define state_h3 12 22*4882a593Smuzhiyun#define state_h4 16 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun/* Constants */ 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun#define K1 0x5A827999 28*4882a593Smuzhiyun#define K2 0x6ED9EBA1 29*4882a593Smuzhiyun#define K3 0x8F1BBCDC 30*4882a593Smuzhiyun#define K4 0xCA62C1D6 31*4882a593Smuzhiyun.align 4 32*4882a593Smuzhiyun.LK_VEC: 33*4882a593Smuzhiyun.LK1: .long K1, K1, K1, K1 34*4882a593Smuzhiyun.LK2: .long K2, K2, K2, K2 35*4882a593Smuzhiyun.LK3: .long K3, K3, K3, K3 36*4882a593Smuzhiyun.LK4: .long K4, K4, K4, K4 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun/* Register macros */ 40*4882a593Smuzhiyun 41*4882a593Smuzhiyun#define RSTATE r0 42*4882a593Smuzhiyun#define RDATA r1 43*4882a593Smuzhiyun#define RNBLKS r2 44*4882a593Smuzhiyun#define ROLDSTACK r3 45*4882a593Smuzhiyun#define RWK lr 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun#define _a r4 48*4882a593Smuzhiyun#define _b r5 49*4882a593Smuzhiyun#define _c r6 50*4882a593Smuzhiyun#define _d r7 51*4882a593Smuzhiyun#define _e r8 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun#define RT0 r9 54*4882a593Smuzhiyun#define RT1 r10 55*4882a593Smuzhiyun#define RT2 r11 56*4882a593Smuzhiyun#define RT3 r12 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun#define W0 q0 59*4882a593Smuzhiyun#define W1 q7 60*4882a593Smuzhiyun#define W2 q2 61*4882a593Smuzhiyun#define W3 q3 62*4882a593Smuzhiyun#define W4 q4 63*4882a593Smuzhiyun#define W5 q6 64*4882a593Smuzhiyun#define W6 q5 65*4882a593Smuzhiyun#define W7 q1 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun#define tmp0 q8 68*4882a593Smuzhiyun#define tmp1 q9 69*4882a593Smuzhiyun#define tmp2 q10 70*4882a593Smuzhiyun#define tmp3 q11 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun#define qK1 q12 73*4882a593Smuzhiyun#define qK2 q13 74*4882a593Smuzhiyun#define qK3 q14 75*4882a593Smuzhiyun#define qK4 q15 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun#ifdef CONFIG_CPU_BIG_ENDIAN 78*4882a593Smuzhiyun#define ARM_LE(code...) 79*4882a593Smuzhiyun#else 80*4882a593Smuzhiyun#define ARM_LE(code...) code 81*4882a593Smuzhiyun#endif 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun/* Round function macros. */ 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun#define WK_offs(i) (((i) & 15) * 4) 86*4882a593Smuzhiyun 87*4882a593Smuzhiyun#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 88*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 89*4882a593Smuzhiyun ldr RT3, [sp, WK_offs(i)]; \ 90*4882a593Smuzhiyun pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 91*4882a593Smuzhiyun bic RT0, d, b; \ 92*4882a593Smuzhiyun add e, e, a, ror #(32 - 5); \ 93*4882a593Smuzhiyun and RT1, c, b; \ 94*4882a593Smuzhiyun pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 95*4882a593Smuzhiyun add RT0, RT0, RT3; \ 96*4882a593Smuzhiyun add e, e, RT1; \ 97*4882a593Smuzhiyun ror b, #(32 - 30); \ 98*4882a593Smuzhiyun pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 99*4882a593Smuzhiyun add e, e, RT0; 100*4882a593Smuzhiyun 101*4882a593Smuzhiyun#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 102*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 103*4882a593Smuzhiyun ldr RT3, [sp, WK_offs(i)]; \ 104*4882a593Smuzhiyun pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 105*4882a593Smuzhiyun eor RT0, d, b; \ 106*4882a593Smuzhiyun add e, e, a, ror #(32 - 5); \ 107*4882a593Smuzhiyun eor RT0, RT0, c; \ 108*4882a593Smuzhiyun pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 109*4882a593Smuzhiyun add e, e, RT3; \ 110*4882a593Smuzhiyun ror b, #(32 - 30); \ 111*4882a593Smuzhiyun pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 112*4882a593Smuzhiyun add e, e, RT0; \ 113*4882a593Smuzhiyun 114*4882a593Smuzhiyun#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 115*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 116*4882a593Smuzhiyun ldr RT3, [sp, WK_offs(i)]; \ 117*4882a593Smuzhiyun pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 118*4882a593Smuzhiyun eor RT0, b, c; \ 119*4882a593Smuzhiyun and RT1, b, c; \ 120*4882a593Smuzhiyun add e, e, a, ror #(32 - 5); \ 121*4882a593Smuzhiyun pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 122*4882a593Smuzhiyun and RT0, RT0, d; \ 123*4882a593Smuzhiyun add RT1, RT1, RT3; \ 124*4882a593Smuzhiyun add e, e, RT0; \ 125*4882a593Smuzhiyun ror b, #(32 - 30); \ 126*4882a593Smuzhiyun pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ 127*4882a593Smuzhiyun add e, e, RT1; 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 130*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 131*4882a593Smuzhiyun _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 132*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ 135*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 136*4882a593Smuzhiyun _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ 137*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun#define R(a,b,c,d,e,f,i) \ 140*4882a593Smuzhiyun _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ 141*4882a593Smuzhiyun W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) 142*4882a593Smuzhiyun 143*4882a593Smuzhiyun#define dummy(...) 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun/* Input expansion macros. */ 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun/********* Precalc macros for rounds 0-15 *************************************/ 149*4882a593Smuzhiyun 150*4882a593Smuzhiyun#define W_PRECALC_00_15() \ 151*4882a593Smuzhiyun add RWK, sp, #(WK_offs(0)); \ 152*4882a593Smuzhiyun \ 153*4882a593Smuzhiyun vld1.32 {W0, W7}, [RDATA]!; \ 154*4882a593Smuzhiyun ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ 155*4882a593Smuzhiyun vld1.32 {W6, W5}, [RDATA]!; \ 156*4882a593Smuzhiyun vadd.u32 tmp0, W0, curK; \ 157*4882a593Smuzhiyun ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ 158*4882a593Smuzhiyun ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ 159*4882a593Smuzhiyun vadd.u32 tmp1, W7, curK; \ 160*4882a593Smuzhiyun ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ 161*4882a593Smuzhiyun vadd.u32 tmp2, W6, curK; \ 162*4882a593Smuzhiyun vst1.32 {tmp0, tmp1}, [RWK]!; \ 163*4882a593Smuzhiyun vadd.u32 tmp3, W5, curK; \ 164*4882a593Smuzhiyun vst1.32 {tmp2, tmp3}, [RWK]; \ 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 167*4882a593Smuzhiyun vld1.32 {W0, W7}, [RDATA]!; \ 168*4882a593Smuzhiyun 169*4882a593Smuzhiyun#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 170*4882a593Smuzhiyun add RWK, sp, #(WK_offs(0)); \ 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 173*4882a593Smuzhiyun ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ 174*4882a593Smuzhiyun 175*4882a593Smuzhiyun#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 176*4882a593Smuzhiyun vld1.32 {W6, W5}, [RDATA]!; \ 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 179*4882a593Smuzhiyun vadd.u32 tmp0, W0, curK; \ 180*4882a593Smuzhiyun 181*4882a593Smuzhiyun#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 182*4882a593Smuzhiyun ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 185*4882a593Smuzhiyun ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ 186*4882a593Smuzhiyun 187*4882a593Smuzhiyun#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 188*4882a593Smuzhiyun vadd.u32 tmp1, W7, curK; \ 189*4882a593Smuzhiyun 190*4882a593Smuzhiyun#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 191*4882a593Smuzhiyun ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ 192*4882a593Smuzhiyun 193*4882a593Smuzhiyun#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 194*4882a593Smuzhiyun vadd.u32 tmp2, W6, curK; \ 195*4882a593Smuzhiyun 196*4882a593Smuzhiyun#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 197*4882a593Smuzhiyun vst1.32 {tmp0, tmp1}, [RWK]!; \ 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 200*4882a593Smuzhiyun vadd.u32 tmp3, W5, curK; \ 201*4882a593Smuzhiyun 202*4882a593Smuzhiyun#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 203*4882a593Smuzhiyun vst1.32 {tmp2, tmp3}, [RWK]; \ 204*4882a593Smuzhiyun 205*4882a593Smuzhiyun 206*4882a593Smuzhiyun/********* Precalc macros for rounds 16-31 ************************************/ 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 209*4882a593Smuzhiyun veor tmp0, tmp0; \ 210*4882a593Smuzhiyun vext.8 W, W_m16, W_m12, #8; \ 211*4882a593Smuzhiyun 212*4882a593Smuzhiyun#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 213*4882a593Smuzhiyun add RWK, sp, #(WK_offs(i)); \ 214*4882a593Smuzhiyun vext.8 tmp0, W_m04, tmp0, #4; \ 215*4882a593Smuzhiyun 216*4882a593Smuzhiyun#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 217*4882a593Smuzhiyun veor tmp0, tmp0, W_m16; \ 218*4882a593Smuzhiyun veor.32 W, W, W_m08; \ 219*4882a593Smuzhiyun 220*4882a593Smuzhiyun#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 221*4882a593Smuzhiyun veor tmp1, tmp1; \ 222*4882a593Smuzhiyun veor W, W, tmp0; \ 223*4882a593Smuzhiyun 224*4882a593Smuzhiyun#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 225*4882a593Smuzhiyun vshl.u32 tmp0, W, #1; \ 226*4882a593Smuzhiyun 227*4882a593Smuzhiyun#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 228*4882a593Smuzhiyun vext.8 tmp1, tmp1, W, #(16-12); \ 229*4882a593Smuzhiyun vshr.u32 W, W, #31; \ 230*4882a593Smuzhiyun 231*4882a593Smuzhiyun#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 232*4882a593Smuzhiyun vorr tmp0, tmp0, W; \ 233*4882a593Smuzhiyun vshr.u32 W, tmp1, #30; \ 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 236*4882a593Smuzhiyun vshl.u32 tmp1, tmp1, #2; \ 237*4882a593Smuzhiyun 238*4882a593Smuzhiyun#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 239*4882a593Smuzhiyun veor tmp0, tmp0, W; \ 240*4882a593Smuzhiyun 241*4882a593Smuzhiyun#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 242*4882a593Smuzhiyun veor W, tmp0, tmp1; \ 243*4882a593Smuzhiyun 244*4882a593Smuzhiyun#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 245*4882a593Smuzhiyun vadd.u32 tmp0, W, curK; \ 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 248*4882a593Smuzhiyun vst1.32 {tmp0}, [RWK]; 249*4882a593Smuzhiyun 250*4882a593Smuzhiyun 251*4882a593Smuzhiyun/********* Precalc macros for rounds 32-79 ************************************/ 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 254*4882a593Smuzhiyun veor W, W_m28; \ 255*4882a593Smuzhiyun 256*4882a593Smuzhiyun#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 257*4882a593Smuzhiyun vext.8 tmp0, W_m08, W_m04, #8; \ 258*4882a593Smuzhiyun 259*4882a593Smuzhiyun#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 260*4882a593Smuzhiyun veor W, W_m16; \ 261*4882a593Smuzhiyun 262*4882a593Smuzhiyun#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 263*4882a593Smuzhiyun veor W, tmp0; \ 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 266*4882a593Smuzhiyun add RWK, sp, #(WK_offs(i&~3)); \ 267*4882a593Smuzhiyun 268*4882a593Smuzhiyun#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 269*4882a593Smuzhiyun vshl.u32 tmp1, W, #2; \ 270*4882a593Smuzhiyun 271*4882a593Smuzhiyun#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 272*4882a593Smuzhiyun vshr.u32 tmp0, W, #30; \ 273*4882a593Smuzhiyun 274*4882a593Smuzhiyun#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 275*4882a593Smuzhiyun vorr W, tmp0, tmp1; \ 276*4882a593Smuzhiyun 277*4882a593Smuzhiyun#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 278*4882a593Smuzhiyun vadd.u32 tmp0, W, curK; \ 279*4882a593Smuzhiyun 280*4882a593Smuzhiyun#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 281*4882a593Smuzhiyun vst1.32 {tmp0}, [RWK]; 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun 284*4882a593Smuzhiyun/* 285*4882a593Smuzhiyun * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. 286*4882a593Smuzhiyun * 287*4882a593Smuzhiyun * unsigned int 288*4882a593Smuzhiyun * sha1_transform_neon (void *ctx, const unsigned char *data, 289*4882a593Smuzhiyun * unsigned int nblks) 290*4882a593Smuzhiyun */ 291*4882a593Smuzhiyun.align 3 292*4882a593SmuzhiyunENTRY(sha1_transform_neon) 293*4882a593Smuzhiyun /* input: 294*4882a593Smuzhiyun * r0: ctx, CTX 295*4882a593Smuzhiyun * r1: data (64*nblks bytes) 296*4882a593Smuzhiyun * r2: nblks 297*4882a593Smuzhiyun */ 298*4882a593Smuzhiyun 299*4882a593Smuzhiyun cmp RNBLKS, #0; 300*4882a593Smuzhiyun beq .Ldo_nothing; 301*4882a593Smuzhiyun 302*4882a593Smuzhiyun push {r4-r12, lr}; 303*4882a593Smuzhiyun /*vpush {q4-q7};*/ 304*4882a593Smuzhiyun 305*4882a593Smuzhiyun adr RT3, .LK_VEC; 306*4882a593Smuzhiyun 307*4882a593Smuzhiyun mov ROLDSTACK, sp; 308*4882a593Smuzhiyun 309*4882a593Smuzhiyun /* Align stack. */ 310*4882a593Smuzhiyun sub RT0, sp, #(16*4); 311*4882a593Smuzhiyun and RT0, #(~(16-1)); 312*4882a593Smuzhiyun mov sp, RT0; 313*4882a593Smuzhiyun 314*4882a593Smuzhiyun vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ 315*4882a593Smuzhiyun 316*4882a593Smuzhiyun /* Get the values of the chaining variables. */ 317*4882a593Smuzhiyun ldm RSTATE, {_a-_e}; 318*4882a593Smuzhiyun 319*4882a593Smuzhiyun vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ 320*4882a593Smuzhiyun 321*4882a593Smuzhiyun#undef curK 322*4882a593Smuzhiyun#define curK qK1 323*4882a593Smuzhiyun /* Precalc 0-15. */ 324*4882a593Smuzhiyun W_PRECALC_00_15(); 325*4882a593Smuzhiyun 326*4882a593Smuzhiyun.Loop: 327*4882a593Smuzhiyun /* Transform 0-15 + Precalc 16-31. */ 328*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F1, 0, 329*4882a593Smuzhiyun WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, 330*4882a593Smuzhiyun W4, W5, W6, W7, W0, _, _, _ ); 331*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F1, 1, 332*4882a593Smuzhiyun WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, 333*4882a593Smuzhiyun W4, W5, W6, W7, W0, _, _, _ ); 334*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F1, 2, 335*4882a593Smuzhiyun WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, 336*4882a593Smuzhiyun W4, W5, W6, W7, W0, _, _, _ ); 337*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F1, 3, 338*4882a593Smuzhiyun WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, 339*4882a593Smuzhiyun W4, W5, W6, W7, W0, _, _, _ ); 340*4882a593Smuzhiyun 341*4882a593Smuzhiyun#undef curK 342*4882a593Smuzhiyun#define curK qK2 343*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F1, 4, 344*4882a593Smuzhiyun WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, 345*4882a593Smuzhiyun W3, W4, W5, W6, W7, _, _, _ ); 346*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F1, 5, 347*4882a593Smuzhiyun WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, 348*4882a593Smuzhiyun W3, W4, W5, W6, W7, _, _, _ ); 349*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F1, 6, 350*4882a593Smuzhiyun WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, 351*4882a593Smuzhiyun W3, W4, W5, W6, W7, _, _, _ ); 352*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F1, 7, 353*4882a593Smuzhiyun WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, 354*4882a593Smuzhiyun W3, W4, W5, W6, W7, _, _, _ ); 355*4882a593Smuzhiyun 356*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F1, 8, 357*4882a593Smuzhiyun WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, 358*4882a593Smuzhiyun W2, W3, W4, W5, W6, _, _, _ ); 359*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F1, 9, 360*4882a593Smuzhiyun WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, 361*4882a593Smuzhiyun W2, W3, W4, W5, W6, _, _, _ ); 362*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F1, 10, 363*4882a593Smuzhiyun WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, 364*4882a593Smuzhiyun W2, W3, W4, W5, W6, _, _, _ ); 365*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F1, 11, 366*4882a593Smuzhiyun WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, 367*4882a593Smuzhiyun W2, W3, W4, W5, W6, _, _, _ ); 368*4882a593Smuzhiyun 369*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F1, 12, 370*4882a593Smuzhiyun WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, 371*4882a593Smuzhiyun W1, W2, W3, W4, W5, _, _, _ ); 372*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F1, 13, 373*4882a593Smuzhiyun WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, 374*4882a593Smuzhiyun W1, W2, W3, W4, W5, _, _, _ ); 375*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F1, 14, 376*4882a593Smuzhiyun WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, 377*4882a593Smuzhiyun W1, W2, W3, W4, W5, _, _, _ ); 378*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F1, 15, 379*4882a593Smuzhiyun WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, 380*4882a593Smuzhiyun W1, W2, W3, W4, W5, _, _, _ ); 381*4882a593Smuzhiyun 382*4882a593Smuzhiyun /* Transform 16-63 + Precalc 32-79. */ 383*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F1, 16, 384*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, 385*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 386*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F1, 17, 387*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, 388*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 389*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F1, 18, 390*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, 391*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 392*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F1, 19, 393*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, 394*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 395*4882a593Smuzhiyun 396*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F2, 20, 397*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, 398*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 399*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F2, 21, 400*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, 401*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 402*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F2, 22, 403*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, 404*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 405*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F2, 23, 406*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, 407*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 408*4882a593Smuzhiyun 409*4882a593Smuzhiyun#undef curK 410*4882a593Smuzhiyun#define curK qK3 411*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F2, 24, 412*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, 413*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 414*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F2, 25, 415*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, 416*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 417*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F2, 26, 418*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, 419*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 420*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F2, 27, 421*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, 422*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 423*4882a593Smuzhiyun 424*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F2, 28, 425*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, 426*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 427*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F2, 29, 428*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, 429*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 430*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F2, 30, 431*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, 432*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 433*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F2, 31, 434*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, 435*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 436*4882a593Smuzhiyun 437*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F2, 32, 438*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, 439*4882a593Smuzhiyun W4, W5, W6, W7, W0, W1, W2, W3); 440*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F2, 33, 441*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, 442*4882a593Smuzhiyun W4, W5, W6, W7, W0, W1, W2, W3); 443*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F2, 34, 444*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, 445*4882a593Smuzhiyun W4, W5, W6, W7, W0, W1, W2, W3); 446*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F2, 35, 447*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, 448*4882a593Smuzhiyun W4, W5, W6, W7, W0, W1, W2, W3); 449*4882a593Smuzhiyun 450*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F2, 36, 451*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, 452*4882a593Smuzhiyun W3, W4, W5, W6, W7, W0, W1, W2); 453*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F2, 37, 454*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, 455*4882a593Smuzhiyun W3, W4, W5, W6, W7, W0, W1, W2); 456*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F2, 38, 457*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, 458*4882a593Smuzhiyun W3, W4, W5, W6, W7, W0, W1, W2); 459*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F2, 39, 460*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, 461*4882a593Smuzhiyun W3, W4, W5, W6, W7, W0, W1, W2); 462*4882a593Smuzhiyun 463*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F3, 40, 464*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, 465*4882a593Smuzhiyun W2, W3, W4, W5, W6, W7, W0, W1); 466*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F3, 41, 467*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, 468*4882a593Smuzhiyun W2, W3, W4, W5, W6, W7, W0, W1); 469*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F3, 42, 470*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, 471*4882a593Smuzhiyun W2, W3, W4, W5, W6, W7, W0, W1); 472*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F3, 43, 473*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, 474*4882a593Smuzhiyun W2, W3, W4, W5, W6, W7, W0, W1); 475*4882a593Smuzhiyun 476*4882a593Smuzhiyun#undef curK 477*4882a593Smuzhiyun#define curK qK4 478*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F3, 44, 479*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, 480*4882a593Smuzhiyun W1, W2, W3, W4, W5, W6, W7, W0); 481*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F3, 45, 482*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, 483*4882a593Smuzhiyun W1, W2, W3, W4, W5, W6, W7, W0); 484*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F3, 46, 485*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, 486*4882a593Smuzhiyun W1, W2, W3, W4, W5, W6, W7, W0); 487*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F3, 47, 488*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, 489*4882a593Smuzhiyun W1, W2, W3, W4, W5, W6, W7, W0); 490*4882a593Smuzhiyun 491*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F3, 48, 492*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, 493*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 494*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F3, 49, 495*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, 496*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 497*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F3, 50, 498*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, 499*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 500*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F3, 51, 501*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, 502*4882a593Smuzhiyun W0, W1, W2, W3, W4, W5, W6, W7); 503*4882a593Smuzhiyun 504*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F3, 52, 505*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, 506*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 507*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F3, 53, 508*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, 509*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 510*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F3, 54, 511*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, 512*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 513*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F3, 55, 514*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, 515*4882a593Smuzhiyun W7, W0, W1, W2, W3, W4, W5, W6); 516*4882a593Smuzhiyun 517*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F3, 56, 518*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, 519*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 520*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F3, 57, 521*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, 522*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 523*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F3, 58, 524*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, 525*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 526*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F3, 59, 527*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, 528*4882a593Smuzhiyun W6, W7, W0, W1, W2, W3, W4, W5); 529*4882a593Smuzhiyun 530*4882a593Smuzhiyun subs RNBLKS, #1; 531*4882a593Smuzhiyun 532*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F4, 60, 533*4882a593Smuzhiyun WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, 534*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 535*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F4, 61, 536*4882a593Smuzhiyun WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, 537*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 538*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F4, 62, 539*4882a593Smuzhiyun WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, 540*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 541*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F4, 63, 542*4882a593Smuzhiyun WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, 543*4882a593Smuzhiyun W5, W6, W7, W0, W1, W2, W3, W4); 544*4882a593Smuzhiyun 545*4882a593Smuzhiyun beq .Lend; 546*4882a593Smuzhiyun 547*4882a593Smuzhiyun /* Transform 64-79 + Precalc 0-15 of next block. */ 548*4882a593Smuzhiyun#undef curK 549*4882a593Smuzhiyun#define curK qK1 550*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F4, 64, 551*4882a593Smuzhiyun WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 552*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F4, 65, 553*4882a593Smuzhiyun WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 554*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F4, 66, 555*4882a593Smuzhiyun WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 556*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F4, 67, 557*4882a593Smuzhiyun WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 558*4882a593Smuzhiyun 559*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F4, 68, 560*4882a593Smuzhiyun dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 561*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F4, 69, 562*4882a593Smuzhiyun dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 563*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F4, 70, 564*4882a593Smuzhiyun WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 565*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F4, 71, 566*4882a593Smuzhiyun WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 567*4882a593Smuzhiyun 568*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F4, 72, 569*4882a593Smuzhiyun dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 570*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F4, 73, 571*4882a593Smuzhiyun dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 572*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F4, 74, 573*4882a593Smuzhiyun WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 574*4882a593Smuzhiyun _R( _a, _b, _c, _d, _e, F4, 75, 575*4882a593Smuzhiyun WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 576*4882a593Smuzhiyun 577*4882a593Smuzhiyun _R( _e, _a, _b, _c, _d, F4, 76, 578*4882a593Smuzhiyun WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 579*4882a593Smuzhiyun _R( _d, _e, _a, _b, _c, F4, 77, 580*4882a593Smuzhiyun WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 581*4882a593Smuzhiyun _R( _c, _d, _e, _a, _b, F4, 78, 582*4882a593Smuzhiyun WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); 583*4882a593Smuzhiyun _R( _b, _c, _d, _e, _a, F4, 79, 584*4882a593Smuzhiyun WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); 585*4882a593Smuzhiyun 586*4882a593Smuzhiyun /* Update the chaining variables. */ 587*4882a593Smuzhiyun ldm RSTATE, {RT0-RT3}; 588*4882a593Smuzhiyun add _a, RT0; 589*4882a593Smuzhiyun ldr RT0, [RSTATE, #state_h4]; 590*4882a593Smuzhiyun add _b, RT1; 591*4882a593Smuzhiyun add _c, RT2; 592*4882a593Smuzhiyun add _d, RT3; 593*4882a593Smuzhiyun add _e, RT0; 594*4882a593Smuzhiyun stm RSTATE, {_a-_e}; 595*4882a593Smuzhiyun 596*4882a593Smuzhiyun b .Loop; 597*4882a593Smuzhiyun 598*4882a593Smuzhiyun.Lend: 599*4882a593Smuzhiyun /* Transform 64-79 */ 600*4882a593Smuzhiyun R( _b, _c, _d, _e, _a, F4, 64 ); 601*4882a593Smuzhiyun R( _a, _b, _c, _d, _e, F4, 65 ); 602*4882a593Smuzhiyun R( _e, _a, _b, _c, _d, F4, 66 ); 603*4882a593Smuzhiyun R( _d, _e, _a, _b, _c, F4, 67 ); 604*4882a593Smuzhiyun R( _c, _d, _e, _a, _b, F4, 68 ); 605*4882a593Smuzhiyun R( _b, _c, _d, _e, _a, F4, 69 ); 606*4882a593Smuzhiyun R( _a, _b, _c, _d, _e, F4, 70 ); 607*4882a593Smuzhiyun R( _e, _a, _b, _c, _d, F4, 71 ); 608*4882a593Smuzhiyun R( _d, _e, _a, _b, _c, F4, 72 ); 609*4882a593Smuzhiyun R( _c, _d, _e, _a, _b, F4, 73 ); 610*4882a593Smuzhiyun R( _b, _c, _d, _e, _a, F4, 74 ); 611*4882a593Smuzhiyun R( _a, _b, _c, _d, _e, F4, 75 ); 612*4882a593Smuzhiyun R( _e, _a, _b, _c, _d, F4, 76 ); 613*4882a593Smuzhiyun R( _d, _e, _a, _b, _c, F4, 77 ); 614*4882a593Smuzhiyun R( _c, _d, _e, _a, _b, F4, 78 ); 615*4882a593Smuzhiyun R( _b, _c, _d, _e, _a, F4, 79 ); 616*4882a593Smuzhiyun 617*4882a593Smuzhiyun mov sp, ROLDSTACK; 618*4882a593Smuzhiyun 619*4882a593Smuzhiyun /* Update the chaining variables. */ 620*4882a593Smuzhiyun ldm RSTATE, {RT0-RT3}; 621*4882a593Smuzhiyun add _a, RT0; 622*4882a593Smuzhiyun ldr RT0, [RSTATE, #state_h4]; 623*4882a593Smuzhiyun add _b, RT1; 624*4882a593Smuzhiyun add _c, RT2; 625*4882a593Smuzhiyun add _d, RT3; 626*4882a593Smuzhiyun /*vpop {q4-q7};*/ 627*4882a593Smuzhiyun add _e, RT0; 628*4882a593Smuzhiyun stm RSTATE, {_a-_e}; 629*4882a593Smuzhiyun 630*4882a593Smuzhiyun pop {r4-r12, pc}; 631*4882a593Smuzhiyun 632*4882a593Smuzhiyun.Ldo_nothing: 633*4882a593Smuzhiyun bx lr 634*4882a593SmuzhiyunENDPROC(sha1_transform_neon) 635