1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 OR MIT */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This 6*4882a593Smuzhiyun * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been 7*4882a593Smuzhiyun * manually reworked for use in kernel space. 8*4882a593Smuzhiyun */ 9*4882a593Smuzhiyun 10*4882a593Smuzhiyun#include <linux/linkage.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun.text 13*4882a593Smuzhiyun.arch armv7-a 14*4882a593Smuzhiyun.fpu neon 15*4882a593Smuzhiyun.align 4 16*4882a593Smuzhiyun 17*4882a593SmuzhiyunENTRY(curve25519_neon) 18*4882a593Smuzhiyun push {r4-r11, lr} 19*4882a593Smuzhiyun mov ip, sp 20*4882a593Smuzhiyun sub r3, sp, #704 21*4882a593Smuzhiyun and r3, r3, #0xfffffff0 22*4882a593Smuzhiyun mov sp, r3 23*4882a593Smuzhiyun movw r4, #0 24*4882a593Smuzhiyun movw r5, #254 25*4882a593Smuzhiyun vmov.i32 q0, #1 26*4882a593Smuzhiyun vshr.u64 q1, q0, #7 27*4882a593Smuzhiyun vshr.u64 q0, q0, #8 28*4882a593Smuzhiyun vmov.i32 d4, #19 29*4882a593Smuzhiyun vmov.i32 d5, #38 30*4882a593Smuzhiyun add r6, sp, #480 31*4882a593Smuzhiyun vst1.8 {d2-d3}, [r6, : 128]! 32*4882a593Smuzhiyun vst1.8 {d0-d1}, [r6, : 128]! 33*4882a593Smuzhiyun vst1.8 {d4-d5}, [r6, : 128] 34*4882a593Smuzhiyun add r6, r3, #0 35*4882a593Smuzhiyun vmov.i32 q2, #0 36*4882a593Smuzhiyun vst1.8 {d4-d5}, [r6, : 128]! 37*4882a593Smuzhiyun vst1.8 {d4-d5}, [r6, : 128]! 38*4882a593Smuzhiyun vst1.8 d4, [r6, : 64] 39*4882a593Smuzhiyun add r6, r3, #0 40*4882a593Smuzhiyun movw r7, #960 41*4882a593Smuzhiyun sub r7, r7, #2 42*4882a593Smuzhiyun neg r7, r7 43*4882a593Smuzhiyun sub r7, r7, r7, LSL #7 44*4882a593Smuzhiyun str r7, [r6] 45*4882a593Smuzhiyun add r6, sp, #672 46*4882a593Smuzhiyun vld1.8 {d4-d5}, [r1]! 47*4882a593Smuzhiyun vld1.8 {d6-d7}, [r1] 48*4882a593Smuzhiyun vst1.8 {d4-d5}, [r6, : 128]! 49*4882a593Smuzhiyun vst1.8 {d6-d7}, [r6, : 128] 50*4882a593Smuzhiyun sub r1, r6, #16 51*4882a593Smuzhiyun ldrb r6, [r1] 52*4882a593Smuzhiyun and r6, r6, #248 53*4882a593Smuzhiyun strb r6, [r1] 54*4882a593Smuzhiyun ldrb r6, [r1, #31] 55*4882a593Smuzhiyun and r6, r6, #127 56*4882a593Smuzhiyun orr r6, r6, #64 57*4882a593Smuzhiyun strb r6, [r1, #31] 58*4882a593Smuzhiyun vmov.i64 q2, #0xffffffff 59*4882a593Smuzhiyun vshr.u64 q3, q2, #7 60*4882a593Smuzhiyun vshr.u64 q2, q2, #6 61*4882a593Smuzhiyun vld1.8 {d8}, [r2] 62*4882a593Smuzhiyun vld1.8 {d10}, [r2] 63*4882a593Smuzhiyun add r2, r2, #6 64*4882a593Smuzhiyun vld1.8 {d12}, [r2] 65*4882a593Smuzhiyun vld1.8 {d14}, [r2] 66*4882a593Smuzhiyun add r2, r2, #6 67*4882a593Smuzhiyun vld1.8 {d16}, [r2] 68*4882a593Smuzhiyun add r2, r2, #4 69*4882a593Smuzhiyun vld1.8 {d18}, [r2] 70*4882a593Smuzhiyun vld1.8 {d20}, [r2] 71*4882a593Smuzhiyun add r2, r2, #6 72*4882a593Smuzhiyun vld1.8 {d22}, [r2] 73*4882a593Smuzhiyun add r2, r2, #2 74*4882a593Smuzhiyun vld1.8 {d24}, [r2] 75*4882a593Smuzhiyun vld1.8 {d26}, [r2] 76*4882a593Smuzhiyun vshr.u64 q5, q5, #26 77*4882a593Smuzhiyun vshr.u64 q6, q6, #3 78*4882a593Smuzhiyun vshr.u64 q7, q7, #29 79*4882a593Smuzhiyun vshr.u64 q8, q8, #6 80*4882a593Smuzhiyun vshr.u64 q10, q10, #25 81*4882a593Smuzhiyun vshr.u64 q11, q11, #3 82*4882a593Smuzhiyun vshr.u64 q12, q12, #12 83*4882a593Smuzhiyun vshr.u64 q13, q13, #38 84*4882a593Smuzhiyun vand q4, q4, q2 85*4882a593Smuzhiyun vand q6, q6, q2 86*4882a593Smuzhiyun vand q8, q8, q2 87*4882a593Smuzhiyun vand q10, q10, q2 88*4882a593Smuzhiyun vand q2, q12, q2 89*4882a593Smuzhiyun vand q5, q5, q3 90*4882a593Smuzhiyun vand q7, q7, q3 91*4882a593Smuzhiyun vand q9, q9, q3 92*4882a593Smuzhiyun vand q11, q11, q3 93*4882a593Smuzhiyun vand q3, q13, q3 94*4882a593Smuzhiyun add r2, r3, #48 95*4882a593Smuzhiyun vadd.i64 q12, q4, q1 96*4882a593Smuzhiyun vadd.i64 q13, q10, q1 97*4882a593Smuzhiyun vshr.s64 q12, q12, #26 98*4882a593Smuzhiyun vshr.s64 q13, q13, #26 99*4882a593Smuzhiyun vadd.i64 q5, q5, q12 100*4882a593Smuzhiyun vshl.i64 q12, q12, #26 101*4882a593Smuzhiyun vadd.i64 q14, q5, q0 102*4882a593Smuzhiyun vadd.i64 q11, q11, q13 103*4882a593Smuzhiyun vshl.i64 q13, q13, #26 104*4882a593Smuzhiyun vadd.i64 q15, q11, q0 105*4882a593Smuzhiyun vsub.i64 q4, q4, q12 106*4882a593Smuzhiyun vshr.s64 q12, q14, #25 107*4882a593Smuzhiyun vsub.i64 q10, q10, q13 108*4882a593Smuzhiyun vshr.s64 q13, q15, #25 109*4882a593Smuzhiyun vadd.i64 q6, q6, q12 110*4882a593Smuzhiyun vshl.i64 q12, q12, #25 111*4882a593Smuzhiyun vadd.i64 q14, q6, q1 112*4882a593Smuzhiyun vadd.i64 q2, q2, q13 113*4882a593Smuzhiyun vsub.i64 q5, q5, q12 114*4882a593Smuzhiyun vshr.s64 q12, q14, #26 115*4882a593Smuzhiyun vshl.i64 q13, q13, #25 116*4882a593Smuzhiyun vadd.i64 q14, q2, q1 117*4882a593Smuzhiyun vadd.i64 q7, q7, q12 118*4882a593Smuzhiyun vshl.i64 q12, q12, #26 119*4882a593Smuzhiyun vadd.i64 q15, q7, q0 120*4882a593Smuzhiyun vsub.i64 q11, q11, q13 121*4882a593Smuzhiyun vshr.s64 q13, q14, #26 122*4882a593Smuzhiyun vsub.i64 q6, q6, q12 123*4882a593Smuzhiyun vshr.s64 q12, q15, #25 124*4882a593Smuzhiyun vadd.i64 q3, q3, q13 125*4882a593Smuzhiyun vshl.i64 q13, q13, #26 126*4882a593Smuzhiyun vadd.i64 q14, q3, q0 127*4882a593Smuzhiyun vadd.i64 q8, q8, q12 128*4882a593Smuzhiyun vshl.i64 q12, q12, #25 129*4882a593Smuzhiyun vadd.i64 q15, q8, q1 130*4882a593Smuzhiyun add r2, r2, #8 131*4882a593Smuzhiyun vsub.i64 q2, q2, q13 132*4882a593Smuzhiyun vshr.s64 q13, q14, #25 133*4882a593Smuzhiyun vsub.i64 q7, q7, q12 134*4882a593Smuzhiyun vshr.s64 q12, q15, #26 135*4882a593Smuzhiyun vadd.i64 q14, q13, q13 136*4882a593Smuzhiyun vadd.i64 q9, q9, q12 137*4882a593Smuzhiyun vtrn.32 d12, d14 138*4882a593Smuzhiyun vshl.i64 q12, q12, #26 139*4882a593Smuzhiyun vtrn.32 d13, d15 140*4882a593Smuzhiyun vadd.i64 q0, q9, q0 141*4882a593Smuzhiyun vadd.i64 q4, q4, q14 142*4882a593Smuzhiyun vst1.8 d12, [r2, : 64]! 143*4882a593Smuzhiyun vshl.i64 q6, q13, #4 144*4882a593Smuzhiyun vsub.i64 q7, q8, q12 145*4882a593Smuzhiyun vshr.s64 q0, q0, #25 146*4882a593Smuzhiyun vadd.i64 q4, q4, q6 147*4882a593Smuzhiyun vadd.i64 q6, q10, q0 148*4882a593Smuzhiyun vshl.i64 q0, q0, #25 149*4882a593Smuzhiyun vadd.i64 q8, q6, q1 150*4882a593Smuzhiyun vadd.i64 q4, q4, q13 151*4882a593Smuzhiyun vshl.i64 q10, q13, #25 152*4882a593Smuzhiyun vadd.i64 q1, q4, q1 153*4882a593Smuzhiyun vsub.i64 q0, q9, q0 154*4882a593Smuzhiyun vshr.s64 q8, q8, #26 155*4882a593Smuzhiyun vsub.i64 q3, q3, q10 156*4882a593Smuzhiyun vtrn.32 d14, d0 157*4882a593Smuzhiyun vshr.s64 q1, q1, #26 158*4882a593Smuzhiyun vtrn.32 d15, d1 159*4882a593Smuzhiyun vadd.i64 q0, q11, q8 160*4882a593Smuzhiyun vst1.8 d14, [r2, : 64] 161*4882a593Smuzhiyun vshl.i64 q7, q8, #26 162*4882a593Smuzhiyun vadd.i64 q5, q5, q1 163*4882a593Smuzhiyun vtrn.32 d4, d6 164*4882a593Smuzhiyun vshl.i64 q1, q1, #26 165*4882a593Smuzhiyun vtrn.32 d5, d7 166*4882a593Smuzhiyun vsub.i64 q3, q6, q7 167*4882a593Smuzhiyun add r2, r2, #16 168*4882a593Smuzhiyun vsub.i64 q1, q4, q1 169*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 170*4882a593Smuzhiyun vtrn.32 d6, d0 171*4882a593Smuzhiyun vtrn.32 d7, d1 172*4882a593Smuzhiyun sub r2, r2, #8 173*4882a593Smuzhiyun vtrn.32 d2, d10 174*4882a593Smuzhiyun vtrn.32 d3, d11 175*4882a593Smuzhiyun vst1.8 d6, [r2, : 64] 176*4882a593Smuzhiyun sub r2, r2, #24 177*4882a593Smuzhiyun vst1.8 d2, [r2, : 64] 178*4882a593Smuzhiyun add r2, r3, #96 179*4882a593Smuzhiyun vmov.i32 q0, #0 180*4882a593Smuzhiyun vmov.i64 d2, #0xff 181*4882a593Smuzhiyun vmov.i64 d3, #0 182*4882a593Smuzhiyun vshr.u32 q1, q1, #7 183*4882a593Smuzhiyun vst1.8 {d2-d3}, [r2, : 128]! 184*4882a593Smuzhiyun vst1.8 {d0-d1}, [r2, : 128]! 185*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 186*4882a593Smuzhiyun add r2, r3, #144 187*4882a593Smuzhiyun vmov.i32 q0, #0 188*4882a593Smuzhiyun vst1.8 {d0-d1}, [r2, : 128]! 189*4882a593Smuzhiyun vst1.8 {d0-d1}, [r2, : 128]! 190*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 191*4882a593Smuzhiyun add r2, r3, #240 192*4882a593Smuzhiyun vmov.i32 q0, #0 193*4882a593Smuzhiyun vmov.i64 d2, #0xff 194*4882a593Smuzhiyun vmov.i64 d3, #0 195*4882a593Smuzhiyun vshr.u32 q1, q1, #7 196*4882a593Smuzhiyun vst1.8 {d2-d3}, [r2, : 128]! 197*4882a593Smuzhiyun vst1.8 {d0-d1}, [r2, : 128]! 198*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 199*4882a593Smuzhiyun add r2, r3, #48 200*4882a593Smuzhiyun add r6, r3, #192 201*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128]! 202*4882a593Smuzhiyun vld1.8 {d2-d3}, [r2, : 128]! 203*4882a593Smuzhiyun vld1.8 {d4}, [r2, : 64] 204*4882a593Smuzhiyun vst1.8 {d0-d1}, [r6, : 128]! 205*4882a593Smuzhiyun vst1.8 {d2-d3}, [r6, : 128]! 206*4882a593Smuzhiyun vst1.8 d4, [r6, : 64] 207*4882a593Smuzhiyun.Lmainloop: 208*4882a593Smuzhiyun mov r2, r5, LSR #3 209*4882a593Smuzhiyun and r6, r5, #7 210*4882a593Smuzhiyun ldrb r2, [r1, r2] 211*4882a593Smuzhiyun mov r2, r2, LSR r6 212*4882a593Smuzhiyun and r2, r2, #1 213*4882a593Smuzhiyun str r5, [sp, #456] 214*4882a593Smuzhiyun eor r4, r4, r2 215*4882a593Smuzhiyun str r2, [sp, #460] 216*4882a593Smuzhiyun neg r2, r4 217*4882a593Smuzhiyun add r4, r3, #96 218*4882a593Smuzhiyun add r5, r3, #192 219*4882a593Smuzhiyun add r6, r3, #144 220*4882a593Smuzhiyun vld1.8 {d8-d9}, [r4, : 128]! 221*4882a593Smuzhiyun add r7, r3, #240 222*4882a593Smuzhiyun vld1.8 {d10-d11}, [r5, : 128]! 223*4882a593Smuzhiyun veor q6, q4, q5 224*4882a593Smuzhiyun vld1.8 {d14-d15}, [r6, : 128]! 225*4882a593Smuzhiyun vdup.i32 q8, r2 226*4882a593Smuzhiyun vld1.8 {d18-d19}, [r7, : 128]! 227*4882a593Smuzhiyun veor q10, q7, q9 228*4882a593Smuzhiyun vld1.8 {d22-d23}, [r4, : 128]! 229*4882a593Smuzhiyun vand q6, q6, q8 230*4882a593Smuzhiyun vld1.8 {d24-d25}, [r5, : 128]! 231*4882a593Smuzhiyun vand q10, q10, q8 232*4882a593Smuzhiyun vld1.8 {d26-d27}, [r6, : 128]! 233*4882a593Smuzhiyun veor q4, q4, q6 234*4882a593Smuzhiyun vld1.8 {d28-d29}, [r7, : 128]! 235*4882a593Smuzhiyun veor q5, q5, q6 236*4882a593Smuzhiyun vld1.8 {d0}, [r4, : 64] 237*4882a593Smuzhiyun veor q6, q7, q10 238*4882a593Smuzhiyun vld1.8 {d2}, [r5, : 64] 239*4882a593Smuzhiyun veor q7, q9, q10 240*4882a593Smuzhiyun vld1.8 {d4}, [r6, : 64] 241*4882a593Smuzhiyun veor q9, q11, q12 242*4882a593Smuzhiyun vld1.8 {d6}, [r7, : 64] 243*4882a593Smuzhiyun veor q10, q0, q1 244*4882a593Smuzhiyun sub r2, r4, #32 245*4882a593Smuzhiyun vand q9, q9, q8 246*4882a593Smuzhiyun sub r4, r5, #32 247*4882a593Smuzhiyun vand q10, q10, q8 248*4882a593Smuzhiyun sub r5, r6, #32 249*4882a593Smuzhiyun veor q11, q11, q9 250*4882a593Smuzhiyun sub r6, r7, #32 251*4882a593Smuzhiyun veor q0, q0, q10 252*4882a593Smuzhiyun veor q9, q12, q9 253*4882a593Smuzhiyun veor q1, q1, q10 254*4882a593Smuzhiyun veor q10, q13, q14 255*4882a593Smuzhiyun veor q12, q2, q3 256*4882a593Smuzhiyun vand q10, q10, q8 257*4882a593Smuzhiyun vand q8, q12, q8 258*4882a593Smuzhiyun veor q12, q13, q10 259*4882a593Smuzhiyun veor q2, q2, q8 260*4882a593Smuzhiyun veor q10, q14, q10 261*4882a593Smuzhiyun veor q3, q3, q8 262*4882a593Smuzhiyun vadd.i32 q8, q4, q6 263*4882a593Smuzhiyun vsub.i32 q4, q4, q6 264*4882a593Smuzhiyun vst1.8 {d16-d17}, [r2, : 128]! 265*4882a593Smuzhiyun vadd.i32 q6, q11, q12 266*4882a593Smuzhiyun vst1.8 {d8-d9}, [r5, : 128]! 267*4882a593Smuzhiyun vsub.i32 q4, q11, q12 268*4882a593Smuzhiyun vst1.8 {d12-d13}, [r2, : 128]! 269*4882a593Smuzhiyun vadd.i32 q6, q0, q2 270*4882a593Smuzhiyun vst1.8 {d8-d9}, [r5, : 128]! 271*4882a593Smuzhiyun vsub.i32 q0, q0, q2 272*4882a593Smuzhiyun vst1.8 d12, [r2, : 64] 273*4882a593Smuzhiyun vadd.i32 q2, q5, q7 274*4882a593Smuzhiyun vst1.8 d0, [r5, : 64] 275*4882a593Smuzhiyun vsub.i32 q0, q5, q7 276*4882a593Smuzhiyun vst1.8 {d4-d5}, [r4, : 128]! 277*4882a593Smuzhiyun vadd.i32 q2, q9, q10 278*4882a593Smuzhiyun vst1.8 {d0-d1}, [r6, : 128]! 279*4882a593Smuzhiyun vsub.i32 q0, q9, q10 280*4882a593Smuzhiyun vst1.8 {d4-d5}, [r4, : 128]! 281*4882a593Smuzhiyun vadd.i32 q2, q1, q3 282*4882a593Smuzhiyun vst1.8 {d0-d1}, [r6, : 128]! 283*4882a593Smuzhiyun vsub.i32 q0, q1, q3 284*4882a593Smuzhiyun vst1.8 d4, [r4, : 64] 285*4882a593Smuzhiyun vst1.8 d0, [r6, : 64] 286*4882a593Smuzhiyun add r2, sp, #512 287*4882a593Smuzhiyun add r4, r3, #96 288*4882a593Smuzhiyun add r5, r3, #144 289*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128] 290*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 291*4882a593Smuzhiyun vld1.8 {d4-d5}, [r5, : 128]! 292*4882a593Smuzhiyun vzip.i32 q1, q2 293*4882a593Smuzhiyun vld1.8 {d6-d7}, [r4, : 128]! 294*4882a593Smuzhiyun vld1.8 {d8-d9}, [r5, : 128]! 295*4882a593Smuzhiyun vshl.i32 q5, q1, #1 296*4882a593Smuzhiyun vzip.i32 q3, q4 297*4882a593Smuzhiyun vshl.i32 q6, q2, #1 298*4882a593Smuzhiyun vld1.8 {d14}, [r4, : 64] 299*4882a593Smuzhiyun vshl.i32 q8, q3, #1 300*4882a593Smuzhiyun vld1.8 {d15}, [r5, : 64] 301*4882a593Smuzhiyun vshl.i32 q9, q4, #1 302*4882a593Smuzhiyun vmul.i32 d21, d7, d1 303*4882a593Smuzhiyun vtrn.32 d14, d15 304*4882a593Smuzhiyun vmul.i32 q11, q4, q0 305*4882a593Smuzhiyun vmul.i32 q0, q7, q0 306*4882a593Smuzhiyun vmull.s32 q12, d2, d2 307*4882a593Smuzhiyun vmlal.s32 q12, d11, d1 308*4882a593Smuzhiyun vmlal.s32 q12, d12, d0 309*4882a593Smuzhiyun vmlal.s32 q12, d13, d23 310*4882a593Smuzhiyun vmlal.s32 q12, d16, d22 311*4882a593Smuzhiyun vmlal.s32 q12, d7, d21 312*4882a593Smuzhiyun vmull.s32 q10, d2, d11 313*4882a593Smuzhiyun vmlal.s32 q10, d4, d1 314*4882a593Smuzhiyun vmlal.s32 q10, d13, d0 315*4882a593Smuzhiyun vmlal.s32 q10, d6, d23 316*4882a593Smuzhiyun vmlal.s32 q10, d17, d22 317*4882a593Smuzhiyun vmull.s32 q13, d10, d4 318*4882a593Smuzhiyun vmlal.s32 q13, d11, d3 319*4882a593Smuzhiyun vmlal.s32 q13, d13, d1 320*4882a593Smuzhiyun vmlal.s32 q13, d16, d0 321*4882a593Smuzhiyun vmlal.s32 q13, d17, d23 322*4882a593Smuzhiyun vmlal.s32 q13, d8, d22 323*4882a593Smuzhiyun vmull.s32 q1, d10, d5 324*4882a593Smuzhiyun vmlal.s32 q1, d11, d4 325*4882a593Smuzhiyun vmlal.s32 q1, d6, d1 326*4882a593Smuzhiyun vmlal.s32 q1, d17, d0 327*4882a593Smuzhiyun vmlal.s32 q1, d8, d23 328*4882a593Smuzhiyun vmull.s32 q14, d10, d6 329*4882a593Smuzhiyun vmlal.s32 q14, d11, d13 330*4882a593Smuzhiyun vmlal.s32 q14, d4, d4 331*4882a593Smuzhiyun vmlal.s32 q14, d17, d1 332*4882a593Smuzhiyun vmlal.s32 q14, d18, d0 333*4882a593Smuzhiyun vmlal.s32 q14, d9, d23 334*4882a593Smuzhiyun vmull.s32 q11, d10, d7 335*4882a593Smuzhiyun vmlal.s32 q11, d11, d6 336*4882a593Smuzhiyun vmlal.s32 q11, d12, d5 337*4882a593Smuzhiyun vmlal.s32 q11, d8, d1 338*4882a593Smuzhiyun vmlal.s32 q11, d19, d0 339*4882a593Smuzhiyun vmull.s32 q15, d10, d8 340*4882a593Smuzhiyun vmlal.s32 q15, d11, d17 341*4882a593Smuzhiyun vmlal.s32 q15, d12, d6 342*4882a593Smuzhiyun vmlal.s32 q15, d13, d5 343*4882a593Smuzhiyun vmlal.s32 q15, d19, d1 344*4882a593Smuzhiyun vmlal.s32 q15, d14, d0 345*4882a593Smuzhiyun vmull.s32 q2, d10, d9 346*4882a593Smuzhiyun vmlal.s32 q2, d11, d8 347*4882a593Smuzhiyun vmlal.s32 q2, d12, d7 348*4882a593Smuzhiyun vmlal.s32 q2, d13, d6 349*4882a593Smuzhiyun vmlal.s32 q2, d14, d1 350*4882a593Smuzhiyun vmull.s32 q0, d15, d1 351*4882a593Smuzhiyun vmlal.s32 q0, d10, d14 352*4882a593Smuzhiyun vmlal.s32 q0, d11, d19 353*4882a593Smuzhiyun vmlal.s32 q0, d12, d8 354*4882a593Smuzhiyun vmlal.s32 q0, d13, d17 355*4882a593Smuzhiyun vmlal.s32 q0, d6, d6 356*4882a593Smuzhiyun add r2, sp, #480 357*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128]! 358*4882a593Smuzhiyun vmull.s32 q3, d16, d7 359*4882a593Smuzhiyun vmlal.s32 q3, d10, d15 360*4882a593Smuzhiyun vmlal.s32 q3, d11, d14 361*4882a593Smuzhiyun vmlal.s32 q3, d12, d9 362*4882a593Smuzhiyun vmlal.s32 q3, d13, d8 363*4882a593Smuzhiyun vld1.8 {d8-d9}, [r2, : 128] 364*4882a593Smuzhiyun vadd.i64 q5, q12, q9 365*4882a593Smuzhiyun vadd.i64 q6, q15, q9 366*4882a593Smuzhiyun vshr.s64 q5, q5, #26 367*4882a593Smuzhiyun vshr.s64 q6, q6, #26 368*4882a593Smuzhiyun vadd.i64 q7, q10, q5 369*4882a593Smuzhiyun vshl.i64 q5, q5, #26 370*4882a593Smuzhiyun vadd.i64 q8, q7, q4 371*4882a593Smuzhiyun vadd.i64 q2, q2, q6 372*4882a593Smuzhiyun vshl.i64 q6, q6, #26 373*4882a593Smuzhiyun vadd.i64 q10, q2, q4 374*4882a593Smuzhiyun vsub.i64 q5, q12, q5 375*4882a593Smuzhiyun vshr.s64 q8, q8, #25 376*4882a593Smuzhiyun vsub.i64 q6, q15, q6 377*4882a593Smuzhiyun vshr.s64 q10, q10, #25 378*4882a593Smuzhiyun vadd.i64 q12, q13, q8 379*4882a593Smuzhiyun vshl.i64 q8, q8, #25 380*4882a593Smuzhiyun vadd.i64 q13, q12, q9 381*4882a593Smuzhiyun vadd.i64 q0, q0, q10 382*4882a593Smuzhiyun vsub.i64 q7, q7, q8 383*4882a593Smuzhiyun vshr.s64 q8, q13, #26 384*4882a593Smuzhiyun vshl.i64 q10, q10, #25 385*4882a593Smuzhiyun vadd.i64 q13, q0, q9 386*4882a593Smuzhiyun vadd.i64 q1, q1, q8 387*4882a593Smuzhiyun vshl.i64 q8, q8, #26 388*4882a593Smuzhiyun vadd.i64 q15, q1, q4 389*4882a593Smuzhiyun vsub.i64 q2, q2, q10 390*4882a593Smuzhiyun vshr.s64 q10, q13, #26 391*4882a593Smuzhiyun vsub.i64 q8, q12, q8 392*4882a593Smuzhiyun vshr.s64 q12, q15, #25 393*4882a593Smuzhiyun vadd.i64 q3, q3, q10 394*4882a593Smuzhiyun vshl.i64 q10, q10, #26 395*4882a593Smuzhiyun vadd.i64 q13, q3, q4 396*4882a593Smuzhiyun vadd.i64 q14, q14, q12 397*4882a593Smuzhiyun add r2, r3, #288 398*4882a593Smuzhiyun vshl.i64 q12, q12, #25 399*4882a593Smuzhiyun add r4, r3, #336 400*4882a593Smuzhiyun vadd.i64 q15, q14, q9 401*4882a593Smuzhiyun add r2, r2, #8 402*4882a593Smuzhiyun vsub.i64 q0, q0, q10 403*4882a593Smuzhiyun add r4, r4, #8 404*4882a593Smuzhiyun vshr.s64 q10, q13, #25 405*4882a593Smuzhiyun vsub.i64 q1, q1, q12 406*4882a593Smuzhiyun vshr.s64 q12, q15, #26 407*4882a593Smuzhiyun vadd.i64 q13, q10, q10 408*4882a593Smuzhiyun vadd.i64 q11, q11, q12 409*4882a593Smuzhiyun vtrn.32 d16, d2 410*4882a593Smuzhiyun vshl.i64 q12, q12, #26 411*4882a593Smuzhiyun vtrn.32 d17, d3 412*4882a593Smuzhiyun vadd.i64 q1, q11, q4 413*4882a593Smuzhiyun vadd.i64 q4, q5, q13 414*4882a593Smuzhiyun vst1.8 d16, [r2, : 64]! 415*4882a593Smuzhiyun vshl.i64 q5, q10, #4 416*4882a593Smuzhiyun vst1.8 d17, [r4, : 64]! 417*4882a593Smuzhiyun vsub.i64 q8, q14, q12 418*4882a593Smuzhiyun vshr.s64 q1, q1, #25 419*4882a593Smuzhiyun vadd.i64 q4, q4, q5 420*4882a593Smuzhiyun vadd.i64 q5, q6, q1 421*4882a593Smuzhiyun vshl.i64 q1, q1, #25 422*4882a593Smuzhiyun vadd.i64 q6, q5, q9 423*4882a593Smuzhiyun vadd.i64 q4, q4, q10 424*4882a593Smuzhiyun vshl.i64 q10, q10, #25 425*4882a593Smuzhiyun vadd.i64 q9, q4, q9 426*4882a593Smuzhiyun vsub.i64 q1, q11, q1 427*4882a593Smuzhiyun vshr.s64 q6, q6, #26 428*4882a593Smuzhiyun vsub.i64 q3, q3, q10 429*4882a593Smuzhiyun vtrn.32 d16, d2 430*4882a593Smuzhiyun vshr.s64 q9, q9, #26 431*4882a593Smuzhiyun vtrn.32 d17, d3 432*4882a593Smuzhiyun vadd.i64 q1, q2, q6 433*4882a593Smuzhiyun vst1.8 d16, [r2, : 64] 434*4882a593Smuzhiyun vshl.i64 q2, q6, #26 435*4882a593Smuzhiyun vst1.8 d17, [r4, : 64] 436*4882a593Smuzhiyun vadd.i64 q6, q7, q9 437*4882a593Smuzhiyun vtrn.32 d0, d6 438*4882a593Smuzhiyun vshl.i64 q7, q9, #26 439*4882a593Smuzhiyun vtrn.32 d1, d7 440*4882a593Smuzhiyun vsub.i64 q2, q5, q2 441*4882a593Smuzhiyun add r2, r2, #16 442*4882a593Smuzhiyun vsub.i64 q3, q4, q7 443*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 444*4882a593Smuzhiyun add r4, r4, #16 445*4882a593Smuzhiyun vst1.8 d1, [r4, : 64] 446*4882a593Smuzhiyun vtrn.32 d4, d2 447*4882a593Smuzhiyun vtrn.32 d5, d3 448*4882a593Smuzhiyun sub r2, r2, #8 449*4882a593Smuzhiyun sub r4, r4, #8 450*4882a593Smuzhiyun vtrn.32 d6, d12 451*4882a593Smuzhiyun vtrn.32 d7, d13 452*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 453*4882a593Smuzhiyun vst1.8 d5, [r4, : 64] 454*4882a593Smuzhiyun sub r2, r2, #24 455*4882a593Smuzhiyun sub r4, r4, #24 456*4882a593Smuzhiyun vst1.8 d6, [r2, : 64] 457*4882a593Smuzhiyun vst1.8 d7, [r4, : 64] 458*4882a593Smuzhiyun add r2, r3, #240 459*4882a593Smuzhiyun add r4, r3, #96 460*4882a593Smuzhiyun vld1.8 {d0-d1}, [r4, : 128]! 461*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 462*4882a593Smuzhiyun vld1.8 {d4}, [r4, : 64] 463*4882a593Smuzhiyun add r4, r3, #144 464*4882a593Smuzhiyun vld1.8 {d6-d7}, [r4, : 128]! 465*4882a593Smuzhiyun vtrn.32 q0, q3 466*4882a593Smuzhiyun vld1.8 {d8-d9}, [r4, : 128]! 467*4882a593Smuzhiyun vshl.i32 q5, q0, #4 468*4882a593Smuzhiyun vtrn.32 q1, q4 469*4882a593Smuzhiyun vshl.i32 q6, q3, #4 470*4882a593Smuzhiyun vadd.i32 q5, q5, q0 471*4882a593Smuzhiyun vadd.i32 q6, q6, q3 472*4882a593Smuzhiyun vshl.i32 q7, q1, #4 473*4882a593Smuzhiyun vld1.8 {d5}, [r4, : 64] 474*4882a593Smuzhiyun vshl.i32 q8, q4, #4 475*4882a593Smuzhiyun vtrn.32 d4, d5 476*4882a593Smuzhiyun vadd.i32 q7, q7, q1 477*4882a593Smuzhiyun vadd.i32 q8, q8, q4 478*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128]! 479*4882a593Smuzhiyun vshl.i32 q10, q2, #4 480*4882a593Smuzhiyun vld1.8 {d22-d23}, [r2, : 128]! 481*4882a593Smuzhiyun vadd.i32 q10, q10, q2 482*4882a593Smuzhiyun vld1.8 {d24}, [r2, : 64] 483*4882a593Smuzhiyun vadd.i32 q5, q5, q0 484*4882a593Smuzhiyun add r2, r3, #192 485*4882a593Smuzhiyun vld1.8 {d26-d27}, [r2, : 128]! 486*4882a593Smuzhiyun vadd.i32 q6, q6, q3 487*4882a593Smuzhiyun vld1.8 {d28-d29}, [r2, : 128]! 488*4882a593Smuzhiyun vadd.i32 q8, q8, q4 489*4882a593Smuzhiyun vld1.8 {d25}, [r2, : 64] 490*4882a593Smuzhiyun vadd.i32 q10, q10, q2 491*4882a593Smuzhiyun vtrn.32 q9, q13 492*4882a593Smuzhiyun vadd.i32 q7, q7, q1 493*4882a593Smuzhiyun vadd.i32 q5, q5, q0 494*4882a593Smuzhiyun vtrn.32 q11, q14 495*4882a593Smuzhiyun vadd.i32 q6, q6, q3 496*4882a593Smuzhiyun add r2, sp, #528 497*4882a593Smuzhiyun vadd.i32 q10, q10, q2 498*4882a593Smuzhiyun vtrn.32 d24, d25 499*4882a593Smuzhiyun vst1.8 {d12-d13}, [r2, : 128]! 500*4882a593Smuzhiyun vshl.i32 q6, q13, #1 501*4882a593Smuzhiyun vst1.8 {d20-d21}, [r2, : 128]! 502*4882a593Smuzhiyun vshl.i32 q10, q14, #1 503*4882a593Smuzhiyun vst1.8 {d12-d13}, [r2, : 128]! 504*4882a593Smuzhiyun vshl.i32 q15, q12, #1 505*4882a593Smuzhiyun vadd.i32 q8, q8, q4 506*4882a593Smuzhiyun vext.32 d10, d31, d30, #0 507*4882a593Smuzhiyun vadd.i32 q7, q7, q1 508*4882a593Smuzhiyun vst1.8 {d16-d17}, [r2, : 128]! 509*4882a593Smuzhiyun vmull.s32 q8, d18, d5 510*4882a593Smuzhiyun vmlal.s32 q8, d26, d4 511*4882a593Smuzhiyun vmlal.s32 q8, d19, d9 512*4882a593Smuzhiyun vmlal.s32 q8, d27, d3 513*4882a593Smuzhiyun vmlal.s32 q8, d22, d8 514*4882a593Smuzhiyun vmlal.s32 q8, d28, d2 515*4882a593Smuzhiyun vmlal.s32 q8, d23, d7 516*4882a593Smuzhiyun vmlal.s32 q8, d29, d1 517*4882a593Smuzhiyun vmlal.s32 q8, d24, d6 518*4882a593Smuzhiyun vmlal.s32 q8, d25, d0 519*4882a593Smuzhiyun vst1.8 {d14-d15}, [r2, : 128]! 520*4882a593Smuzhiyun vmull.s32 q2, d18, d4 521*4882a593Smuzhiyun vmlal.s32 q2, d12, d9 522*4882a593Smuzhiyun vmlal.s32 q2, d13, d8 523*4882a593Smuzhiyun vmlal.s32 q2, d19, d3 524*4882a593Smuzhiyun vmlal.s32 q2, d22, d2 525*4882a593Smuzhiyun vmlal.s32 q2, d23, d1 526*4882a593Smuzhiyun vmlal.s32 q2, d24, d0 527*4882a593Smuzhiyun vst1.8 {d20-d21}, [r2, : 128]! 528*4882a593Smuzhiyun vmull.s32 q7, d18, d9 529*4882a593Smuzhiyun vmlal.s32 q7, d26, d3 530*4882a593Smuzhiyun vmlal.s32 q7, d19, d8 531*4882a593Smuzhiyun vmlal.s32 q7, d27, d2 532*4882a593Smuzhiyun vmlal.s32 q7, d22, d7 533*4882a593Smuzhiyun vmlal.s32 q7, d28, d1 534*4882a593Smuzhiyun vmlal.s32 q7, d23, d6 535*4882a593Smuzhiyun vmlal.s32 q7, d29, d0 536*4882a593Smuzhiyun vst1.8 {d10-d11}, [r2, : 128]! 537*4882a593Smuzhiyun vmull.s32 q5, d18, d3 538*4882a593Smuzhiyun vmlal.s32 q5, d19, d2 539*4882a593Smuzhiyun vmlal.s32 q5, d22, d1 540*4882a593Smuzhiyun vmlal.s32 q5, d23, d0 541*4882a593Smuzhiyun vmlal.s32 q5, d12, d8 542*4882a593Smuzhiyun vst1.8 {d16-d17}, [r2, : 128] 543*4882a593Smuzhiyun vmull.s32 q4, d18, d8 544*4882a593Smuzhiyun vmlal.s32 q4, d26, d2 545*4882a593Smuzhiyun vmlal.s32 q4, d19, d7 546*4882a593Smuzhiyun vmlal.s32 q4, d27, d1 547*4882a593Smuzhiyun vmlal.s32 q4, d22, d6 548*4882a593Smuzhiyun vmlal.s32 q4, d28, d0 549*4882a593Smuzhiyun vmull.s32 q8, d18, d7 550*4882a593Smuzhiyun vmlal.s32 q8, d26, d1 551*4882a593Smuzhiyun vmlal.s32 q8, d19, d6 552*4882a593Smuzhiyun vmlal.s32 q8, d27, d0 553*4882a593Smuzhiyun add r2, sp, #544 554*4882a593Smuzhiyun vld1.8 {d20-d21}, [r2, : 128] 555*4882a593Smuzhiyun vmlal.s32 q7, d24, d21 556*4882a593Smuzhiyun vmlal.s32 q7, d25, d20 557*4882a593Smuzhiyun vmlal.s32 q4, d23, d21 558*4882a593Smuzhiyun vmlal.s32 q4, d29, d20 559*4882a593Smuzhiyun vmlal.s32 q8, d22, d21 560*4882a593Smuzhiyun vmlal.s32 q8, d28, d20 561*4882a593Smuzhiyun vmlal.s32 q5, d24, d20 562*4882a593Smuzhiyun vst1.8 {d14-d15}, [r2, : 128] 563*4882a593Smuzhiyun vmull.s32 q7, d18, d6 564*4882a593Smuzhiyun vmlal.s32 q7, d26, d0 565*4882a593Smuzhiyun add r2, sp, #624 566*4882a593Smuzhiyun vld1.8 {d30-d31}, [r2, : 128] 567*4882a593Smuzhiyun vmlal.s32 q2, d30, d21 568*4882a593Smuzhiyun vmlal.s32 q7, d19, d21 569*4882a593Smuzhiyun vmlal.s32 q7, d27, d20 570*4882a593Smuzhiyun add r2, sp, #592 571*4882a593Smuzhiyun vld1.8 {d26-d27}, [r2, : 128] 572*4882a593Smuzhiyun vmlal.s32 q4, d25, d27 573*4882a593Smuzhiyun vmlal.s32 q8, d29, d27 574*4882a593Smuzhiyun vmlal.s32 q8, d25, d26 575*4882a593Smuzhiyun vmlal.s32 q7, d28, d27 576*4882a593Smuzhiyun vmlal.s32 q7, d29, d26 577*4882a593Smuzhiyun add r2, sp, #576 578*4882a593Smuzhiyun vld1.8 {d28-d29}, [r2, : 128] 579*4882a593Smuzhiyun vmlal.s32 q4, d24, d29 580*4882a593Smuzhiyun vmlal.s32 q8, d23, d29 581*4882a593Smuzhiyun vmlal.s32 q8, d24, d28 582*4882a593Smuzhiyun vmlal.s32 q7, d22, d29 583*4882a593Smuzhiyun vmlal.s32 q7, d23, d28 584*4882a593Smuzhiyun vst1.8 {d8-d9}, [r2, : 128] 585*4882a593Smuzhiyun add r2, sp, #528 586*4882a593Smuzhiyun vld1.8 {d8-d9}, [r2, : 128] 587*4882a593Smuzhiyun vmlal.s32 q7, d24, d9 588*4882a593Smuzhiyun vmlal.s32 q7, d25, d31 589*4882a593Smuzhiyun vmull.s32 q1, d18, d2 590*4882a593Smuzhiyun vmlal.s32 q1, d19, d1 591*4882a593Smuzhiyun vmlal.s32 q1, d22, d0 592*4882a593Smuzhiyun vmlal.s32 q1, d24, d27 593*4882a593Smuzhiyun vmlal.s32 q1, d23, d20 594*4882a593Smuzhiyun vmlal.s32 q1, d12, d7 595*4882a593Smuzhiyun vmlal.s32 q1, d13, d6 596*4882a593Smuzhiyun vmull.s32 q6, d18, d1 597*4882a593Smuzhiyun vmlal.s32 q6, d19, d0 598*4882a593Smuzhiyun vmlal.s32 q6, d23, d27 599*4882a593Smuzhiyun vmlal.s32 q6, d22, d20 600*4882a593Smuzhiyun vmlal.s32 q6, d24, d26 601*4882a593Smuzhiyun vmull.s32 q0, d18, d0 602*4882a593Smuzhiyun vmlal.s32 q0, d22, d27 603*4882a593Smuzhiyun vmlal.s32 q0, d23, d26 604*4882a593Smuzhiyun vmlal.s32 q0, d24, d31 605*4882a593Smuzhiyun vmlal.s32 q0, d19, d20 606*4882a593Smuzhiyun add r2, sp, #608 607*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 608*4882a593Smuzhiyun vmlal.s32 q2, d18, d7 609*4882a593Smuzhiyun vmlal.s32 q5, d18, d6 610*4882a593Smuzhiyun vmlal.s32 q1, d18, d21 611*4882a593Smuzhiyun vmlal.s32 q0, d18, d28 612*4882a593Smuzhiyun vmlal.s32 q6, d18, d29 613*4882a593Smuzhiyun vmlal.s32 q2, d19, d6 614*4882a593Smuzhiyun vmlal.s32 q5, d19, d21 615*4882a593Smuzhiyun vmlal.s32 q1, d19, d29 616*4882a593Smuzhiyun vmlal.s32 q0, d19, d9 617*4882a593Smuzhiyun vmlal.s32 q6, d19, d28 618*4882a593Smuzhiyun add r2, sp, #560 619*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 620*4882a593Smuzhiyun add r2, sp, #480 621*4882a593Smuzhiyun vld1.8 {d22-d23}, [r2, : 128] 622*4882a593Smuzhiyun vmlal.s32 q5, d19, d7 623*4882a593Smuzhiyun vmlal.s32 q0, d18, d21 624*4882a593Smuzhiyun vmlal.s32 q0, d19, d29 625*4882a593Smuzhiyun vmlal.s32 q6, d18, d6 626*4882a593Smuzhiyun add r2, sp, #496 627*4882a593Smuzhiyun vld1.8 {d6-d7}, [r2, : 128] 628*4882a593Smuzhiyun vmlal.s32 q6, d19, d21 629*4882a593Smuzhiyun add r2, sp, #544 630*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 631*4882a593Smuzhiyun vmlal.s32 q0, d30, d8 632*4882a593Smuzhiyun add r2, sp, #640 633*4882a593Smuzhiyun vld1.8 {d20-d21}, [r2, : 128] 634*4882a593Smuzhiyun vmlal.s32 q5, d30, d29 635*4882a593Smuzhiyun add r2, sp, #576 636*4882a593Smuzhiyun vld1.8 {d24-d25}, [r2, : 128] 637*4882a593Smuzhiyun vmlal.s32 q1, d30, d28 638*4882a593Smuzhiyun vadd.i64 q13, q0, q11 639*4882a593Smuzhiyun vadd.i64 q14, q5, q11 640*4882a593Smuzhiyun vmlal.s32 q6, d30, d9 641*4882a593Smuzhiyun vshr.s64 q4, q13, #26 642*4882a593Smuzhiyun vshr.s64 q13, q14, #26 643*4882a593Smuzhiyun vadd.i64 q7, q7, q4 644*4882a593Smuzhiyun vshl.i64 q4, q4, #26 645*4882a593Smuzhiyun vadd.i64 q14, q7, q3 646*4882a593Smuzhiyun vadd.i64 q9, q9, q13 647*4882a593Smuzhiyun vshl.i64 q13, q13, #26 648*4882a593Smuzhiyun vadd.i64 q15, q9, q3 649*4882a593Smuzhiyun vsub.i64 q0, q0, q4 650*4882a593Smuzhiyun vshr.s64 q4, q14, #25 651*4882a593Smuzhiyun vsub.i64 q5, q5, q13 652*4882a593Smuzhiyun vshr.s64 q13, q15, #25 653*4882a593Smuzhiyun vadd.i64 q6, q6, q4 654*4882a593Smuzhiyun vshl.i64 q4, q4, #25 655*4882a593Smuzhiyun vadd.i64 q14, q6, q11 656*4882a593Smuzhiyun vadd.i64 q2, q2, q13 657*4882a593Smuzhiyun vsub.i64 q4, q7, q4 658*4882a593Smuzhiyun vshr.s64 q7, q14, #26 659*4882a593Smuzhiyun vshl.i64 q13, q13, #25 660*4882a593Smuzhiyun vadd.i64 q14, q2, q11 661*4882a593Smuzhiyun vadd.i64 q8, q8, q7 662*4882a593Smuzhiyun vshl.i64 q7, q7, #26 663*4882a593Smuzhiyun vadd.i64 q15, q8, q3 664*4882a593Smuzhiyun vsub.i64 q9, q9, q13 665*4882a593Smuzhiyun vshr.s64 q13, q14, #26 666*4882a593Smuzhiyun vsub.i64 q6, q6, q7 667*4882a593Smuzhiyun vshr.s64 q7, q15, #25 668*4882a593Smuzhiyun vadd.i64 q10, q10, q13 669*4882a593Smuzhiyun vshl.i64 q13, q13, #26 670*4882a593Smuzhiyun vadd.i64 q14, q10, q3 671*4882a593Smuzhiyun vadd.i64 q1, q1, q7 672*4882a593Smuzhiyun add r2, r3, #144 673*4882a593Smuzhiyun vshl.i64 q7, q7, #25 674*4882a593Smuzhiyun add r4, r3, #96 675*4882a593Smuzhiyun vadd.i64 q15, q1, q11 676*4882a593Smuzhiyun add r2, r2, #8 677*4882a593Smuzhiyun vsub.i64 q2, q2, q13 678*4882a593Smuzhiyun add r4, r4, #8 679*4882a593Smuzhiyun vshr.s64 q13, q14, #25 680*4882a593Smuzhiyun vsub.i64 q7, q8, q7 681*4882a593Smuzhiyun vshr.s64 q8, q15, #26 682*4882a593Smuzhiyun vadd.i64 q14, q13, q13 683*4882a593Smuzhiyun vadd.i64 q12, q12, q8 684*4882a593Smuzhiyun vtrn.32 d12, d14 685*4882a593Smuzhiyun vshl.i64 q8, q8, #26 686*4882a593Smuzhiyun vtrn.32 d13, d15 687*4882a593Smuzhiyun vadd.i64 q3, q12, q3 688*4882a593Smuzhiyun vadd.i64 q0, q0, q14 689*4882a593Smuzhiyun vst1.8 d12, [r2, : 64]! 690*4882a593Smuzhiyun vshl.i64 q7, q13, #4 691*4882a593Smuzhiyun vst1.8 d13, [r4, : 64]! 692*4882a593Smuzhiyun vsub.i64 q1, q1, q8 693*4882a593Smuzhiyun vshr.s64 q3, q3, #25 694*4882a593Smuzhiyun vadd.i64 q0, q0, q7 695*4882a593Smuzhiyun vadd.i64 q5, q5, q3 696*4882a593Smuzhiyun vshl.i64 q3, q3, #25 697*4882a593Smuzhiyun vadd.i64 q6, q5, q11 698*4882a593Smuzhiyun vadd.i64 q0, q0, q13 699*4882a593Smuzhiyun vshl.i64 q7, q13, #25 700*4882a593Smuzhiyun vadd.i64 q8, q0, q11 701*4882a593Smuzhiyun vsub.i64 q3, q12, q3 702*4882a593Smuzhiyun vshr.s64 q6, q6, #26 703*4882a593Smuzhiyun vsub.i64 q7, q10, q7 704*4882a593Smuzhiyun vtrn.32 d2, d6 705*4882a593Smuzhiyun vshr.s64 q8, q8, #26 706*4882a593Smuzhiyun vtrn.32 d3, d7 707*4882a593Smuzhiyun vadd.i64 q3, q9, q6 708*4882a593Smuzhiyun vst1.8 d2, [r2, : 64] 709*4882a593Smuzhiyun vshl.i64 q6, q6, #26 710*4882a593Smuzhiyun vst1.8 d3, [r4, : 64] 711*4882a593Smuzhiyun vadd.i64 q1, q4, q8 712*4882a593Smuzhiyun vtrn.32 d4, d14 713*4882a593Smuzhiyun vshl.i64 q4, q8, #26 714*4882a593Smuzhiyun vtrn.32 d5, d15 715*4882a593Smuzhiyun vsub.i64 q5, q5, q6 716*4882a593Smuzhiyun add r2, r2, #16 717*4882a593Smuzhiyun vsub.i64 q0, q0, q4 718*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 719*4882a593Smuzhiyun add r4, r4, #16 720*4882a593Smuzhiyun vst1.8 d5, [r4, : 64] 721*4882a593Smuzhiyun vtrn.32 d10, d6 722*4882a593Smuzhiyun vtrn.32 d11, d7 723*4882a593Smuzhiyun sub r2, r2, #8 724*4882a593Smuzhiyun sub r4, r4, #8 725*4882a593Smuzhiyun vtrn.32 d0, d2 726*4882a593Smuzhiyun vtrn.32 d1, d3 727*4882a593Smuzhiyun vst1.8 d10, [r2, : 64] 728*4882a593Smuzhiyun vst1.8 d11, [r4, : 64] 729*4882a593Smuzhiyun sub r2, r2, #24 730*4882a593Smuzhiyun sub r4, r4, #24 731*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 732*4882a593Smuzhiyun vst1.8 d1, [r4, : 64] 733*4882a593Smuzhiyun add r2, r3, #288 734*4882a593Smuzhiyun add r4, r3, #336 735*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128]! 736*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 737*4882a593Smuzhiyun vsub.i32 q0, q0, q1 738*4882a593Smuzhiyun vld1.8 {d2-d3}, [r2, : 128]! 739*4882a593Smuzhiyun vld1.8 {d4-d5}, [r4, : 128]! 740*4882a593Smuzhiyun vsub.i32 q1, q1, q2 741*4882a593Smuzhiyun add r5, r3, #240 742*4882a593Smuzhiyun vld1.8 {d4}, [r2, : 64] 743*4882a593Smuzhiyun vld1.8 {d6}, [r4, : 64] 744*4882a593Smuzhiyun vsub.i32 q2, q2, q3 745*4882a593Smuzhiyun vst1.8 {d0-d1}, [r5, : 128]! 746*4882a593Smuzhiyun vst1.8 {d2-d3}, [r5, : 128]! 747*4882a593Smuzhiyun vst1.8 d4, [r5, : 64] 748*4882a593Smuzhiyun add r2, r3, #144 749*4882a593Smuzhiyun add r4, r3, #96 750*4882a593Smuzhiyun add r5, r3, #144 751*4882a593Smuzhiyun add r6, r3, #192 752*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128]! 753*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 754*4882a593Smuzhiyun vsub.i32 q2, q0, q1 755*4882a593Smuzhiyun vadd.i32 q0, q0, q1 756*4882a593Smuzhiyun vld1.8 {d2-d3}, [r2, : 128]! 757*4882a593Smuzhiyun vld1.8 {d6-d7}, [r4, : 128]! 758*4882a593Smuzhiyun vsub.i32 q4, q1, q3 759*4882a593Smuzhiyun vadd.i32 q1, q1, q3 760*4882a593Smuzhiyun vld1.8 {d6}, [r2, : 64] 761*4882a593Smuzhiyun vld1.8 {d10}, [r4, : 64] 762*4882a593Smuzhiyun vsub.i32 q6, q3, q5 763*4882a593Smuzhiyun vadd.i32 q3, q3, q5 764*4882a593Smuzhiyun vst1.8 {d4-d5}, [r5, : 128]! 765*4882a593Smuzhiyun vst1.8 {d0-d1}, [r6, : 128]! 766*4882a593Smuzhiyun vst1.8 {d8-d9}, [r5, : 128]! 767*4882a593Smuzhiyun vst1.8 {d2-d3}, [r6, : 128]! 768*4882a593Smuzhiyun vst1.8 d12, [r5, : 64] 769*4882a593Smuzhiyun vst1.8 d6, [r6, : 64] 770*4882a593Smuzhiyun add r2, r3, #0 771*4882a593Smuzhiyun add r4, r3, #240 772*4882a593Smuzhiyun vld1.8 {d0-d1}, [r4, : 128]! 773*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 774*4882a593Smuzhiyun vld1.8 {d4}, [r4, : 64] 775*4882a593Smuzhiyun add r4, r3, #336 776*4882a593Smuzhiyun vld1.8 {d6-d7}, [r4, : 128]! 777*4882a593Smuzhiyun vtrn.32 q0, q3 778*4882a593Smuzhiyun vld1.8 {d8-d9}, [r4, : 128]! 779*4882a593Smuzhiyun vshl.i32 q5, q0, #4 780*4882a593Smuzhiyun vtrn.32 q1, q4 781*4882a593Smuzhiyun vshl.i32 q6, q3, #4 782*4882a593Smuzhiyun vadd.i32 q5, q5, q0 783*4882a593Smuzhiyun vadd.i32 q6, q6, q3 784*4882a593Smuzhiyun vshl.i32 q7, q1, #4 785*4882a593Smuzhiyun vld1.8 {d5}, [r4, : 64] 786*4882a593Smuzhiyun vshl.i32 q8, q4, #4 787*4882a593Smuzhiyun vtrn.32 d4, d5 788*4882a593Smuzhiyun vadd.i32 q7, q7, q1 789*4882a593Smuzhiyun vadd.i32 q8, q8, q4 790*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128]! 791*4882a593Smuzhiyun vshl.i32 q10, q2, #4 792*4882a593Smuzhiyun vld1.8 {d22-d23}, [r2, : 128]! 793*4882a593Smuzhiyun vadd.i32 q10, q10, q2 794*4882a593Smuzhiyun vld1.8 {d24}, [r2, : 64] 795*4882a593Smuzhiyun vadd.i32 q5, q5, q0 796*4882a593Smuzhiyun add r2, r3, #288 797*4882a593Smuzhiyun vld1.8 {d26-d27}, [r2, : 128]! 798*4882a593Smuzhiyun vadd.i32 q6, q6, q3 799*4882a593Smuzhiyun vld1.8 {d28-d29}, [r2, : 128]! 800*4882a593Smuzhiyun vadd.i32 q8, q8, q4 801*4882a593Smuzhiyun vld1.8 {d25}, [r2, : 64] 802*4882a593Smuzhiyun vadd.i32 q10, q10, q2 803*4882a593Smuzhiyun vtrn.32 q9, q13 804*4882a593Smuzhiyun vadd.i32 q7, q7, q1 805*4882a593Smuzhiyun vadd.i32 q5, q5, q0 806*4882a593Smuzhiyun vtrn.32 q11, q14 807*4882a593Smuzhiyun vadd.i32 q6, q6, q3 808*4882a593Smuzhiyun add r2, sp, #528 809*4882a593Smuzhiyun vadd.i32 q10, q10, q2 810*4882a593Smuzhiyun vtrn.32 d24, d25 811*4882a593Smuzhiyun vst1.8 {d12-d13}, [r2, : 128]! 812*4882a593Smuzhiyun vshl.i32 q6, q13, #1 813*4882a593Smuzhiyun vst1.8 {d20-d21}, [r2, : 128]! 814*4882a593Smuzhiyun vshl.i32 q10, q14, #1 815*4882a593Smuzhiyun vst1.8 {d12-d13}, [r2, : 128]! 816*4882a593Smuzhiyun vshl.i32 q15, q12, #1 817*4882a593Smuzhiyun vadd.i32 q8, q8, q4 818*4882a593Smuzhiyun vext.32 d10, d31, d30, #0 819*4882a593Smuzhiyun vadd.i32 q7, q7, q1 820*4882a593Smuzhiyun vst1.8 {d16-d17}, [r2, : 128]! 821*4882a593Smuzhiyun vmull.s32 q8, d18, d5 822*4882a593Smuzhiyun vmlal.s32 q8, d26, d4 823*4882a593Smuzhiyun vmlal.s32 q8, d19, d9 824*4882a593Smuzhiyun vmlal.s32 q8, d27, d3 825*4882a593Smuzhiyun vmlal.s32 q8, d22, d8 826*4882a593Smuzhiyun vmlal.s32 q8, d28, d2 827*4882a593Smuzhiyun vmlal.s32 q8, d23, d7 828*4882a593Smuzhiyun vmlal.s32 q8, d29, d1 829*4882a593Smuzhiyun vmlal.s32 q8, d24, d6 830*4882a593Smuzhiyun vmlal.s32 q8, d25, d0 831*4882a593Smuzhiyun vst1.8 {d14-d15}, [r2, : 128]! 832*4882a593Smuzhiyun vmull.s32 q2, d18, d4 833*4882a593Smuzhiyun vmlal.s32 q2, d12, d9 834*4882a593Smuzhiyun vmlal.s32 q2, d13, d8 835*4882a593Smuzhiyun vmlal.s32 q2, d19, d3 836*4882a593Smuzhiyun vmlal.s32 q2, d22, d2 837*4882a593Smuzhiyun vmlal.s32 q2, d23, d1 838*4882a593Smuzhiyun vmlal.s32 q2, d24, d0 839*4882a593Smuzhiyun vst1.8 {d20-d21}, [r2, : 128]! 840*4882a593Smuzhiyun vmull.s32 q7, d18, d9 841*4882a593Smuzhiyun vmlal.s32 q7, d26, d3 842*4882a593Smuzhiyun vmlal.s32 q7, d19, d8 843*4882a593Smuzhiyun vmlal.s32 q7, d27, d2 844*4882a593Smuzhiyun vmlal.s32 q7, d22, d7 845*4882a593Smuzhiyun vmlal.s32 q7, d28, d1 846*4882a593Smuzhiyun vmlal.s32 q7, d23, d6 847*4882a593Smuzhiyun vmlal.s32 q7, d29, d0 848*4882a593Smuzhiyun vst1.8 {d10-d11}, [r2, : 128]! 849*4882a593Smuzhiyun vmull.s32 q5, d18, d3 850*4882a593Smuzhiyun vmlal.s32 q5, d19, d2 851*4882a593Smuzhiyun vmlal.s32 q5, d22, d1 852*4882a593Smuzhiyun vmlal.s32 q5, d23, d0 853*4882a593Smuzhiyun vmlal.s32 q5, d12, d8 854*4882a593Smuzhiyun vst1.8 {d16-d17}, [r2, : 128]! 855*4882a593Smuzhiyun vmull.s32 q4, d18, d8 856*4882a593Smuzhiyun vmlal.s32 q4, d26, d2 857*4882a593Smuzhiyun vmlal.s32 q4, d19, d7 858*4882a593Smuzhiyun vmlal.s32 q4, d27, d1 859*4882a593Smuzhiyun vmlal.s32 q4, d22, d6 860*4882a593Smuzhiyun vmlal.s32 q4, d28, d0 861*4882a593Smuzhiyun vmull.s32 q8, d18, d7 862*4882a593Smuzhiyun vmlal.s32 q8, d26, d1 863*4882a593Smuzhiyun vmlal.s32 q8, d19, d6 864*4882a593Smuzhiyun vmlal.s32 q8, d27, d0 865*4882a593Smuzhiyun add r2, sp, #544 866*4882a593Smuzhiyun vld1.8 {d20-d21}, [r2, : 128] 867*4882a593Smuzhiyun vmlal.s32 q7, d24, d21 868*4882a593Smuzhiyun vmlal.s32 q7, d25, d20 869*4882a593Smuzhiyun vmlal.s32 q4, d23, d21 870*4882a593Smuzhiyun vmlal.s32 q4, d29, d20 871*4882a593Smuzhiyun vmlal.s32 q8, d22, d21 872*4882a593Smuzhiyun vmlal.s32 q8, d28, d20 873*4882a593Smuzhiyun vmlal.s32 q5, d24, d20 874*4882a593Smuzhiyun vst1.8 {d14-d15}, [r2, : 128] 875*4882a593Smuzhiyun vmull.s32 q7, d18, d6 876*4882a593Smuzhiyun vmlal.s32 q7, d26, d0 877*4882a593Smuzhiyun add r2, sp, #624 878*4882a593Smuzhiyun vld1.8 {d30-d31}, [r2, : 128] 879*4882a593Smuzhiyun vmlal.s32 q2, d30, d21 880*4882a593Smuzhiyun vmlal.s32 q7, d19, d21 881*4882a593Smuzhiyun vmlal.s32 q7, d27, d20 882*4882a593Smuzhiyun add r2, sp, #592 883*4882a593Smuzhiyun vld1.8 {d26-d27}, [r2, : 128] 884*4882a593Smuzhiyun vmlal.s32 q4, d25, d27 885*4882a593Smuzhiyun vmlal.s32 q8, d29, d27 886*4882a593Smuzhiyun vmlal.s32 q8, d25, d26 887*4882a593Smuzhiyun vmlal.s32 q7, d28, d27 888*4882a593Smuzhiyun vmlal.s32 q7, d29, d26 889*4882a593Smuzhiyun add r2, sp, #576 890*4882a593Smuzhiyun vld1.8 {d28-d29}, [r2, : 128] 891*4882a593Smuzhiyun vmlal.s32 q4, d24, d29 892*4882a593Smuzhiyun vmlal.s32 q8, d23, d29 893*4882a593Smuzhiyun vmlal.s32 q8, d24, d28 894*4882a593Smuzhiyun vmlal.s32 q7, d22, d29 895*4882a593Smuzhiyun vmlal.s32 q7, d23, d28 896*4882a593Smuzhiyun vst1.8 {d8-d9}, [r2, : 128] 897*4882a593Smuzhiyun add r2, sp, #528 898*4882a593Smuzhiyun vld1.8 {d8-d9}, [r2, : 128] 899*4882a593Smuzhiyun vmlal.s32 q7, d24, d9 900*4882a593Smuzhiyun vmlal.s32 q7, d25, d31 901*4882a593Smuzhiyun vmull.s32 q1, d18, d2 902*4882a593Smuzhiyun vmlal.s32 q1, d19, d1 903*4882a593Smuzhiyun vmlal.s32 q1, d22, d0 904*4882a593Smuzhiyun vmlal.s32 q1, d24, d27 905*4882a593Smuzhiyun vmlal.s32 q1, d23, d20 906*4882a593Smuzhiyun vmlal.s32 q1, d12, d7 907*4882a593Smuzhiyun vmlal.s32 q1, d13, d6 908*4882a593Smuzhiyun vmull.s32 q6, d18, d1 909*4882a593Smuzhiyun vmlal.s32 q6, d19, d0 910*4882a593Smuzhiyun vmlal.s32 q6, d23, d27 911*4882a593Smuzhiyun vmlal.s32 q6, d22, d20 912*4882a593Smuzhiyun vmlal.s32 q6, d24, d26 913*4882a593Smuzhiyun vmull.s32 q0, d18, d0 914*4882a593Smuzhiyun vmlal.s32 q0, d22, d27 915*4882a593Smuzhiyun vmlal.s32 q0, d23, d26 916*4882a593Smuzhiyun vmlal.s32 q0, d24, d31 917*4882a593Smuzhiyun vmlal.s32 q0, d19, d20 918*4882a593Smuzhiyun add r2, sp, #608 919*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 920*4882a593Smuzhiyun vmlal.s32 q2, d18, d7 921*4882a593Smuzhiyun vmlal.s32 q5, d18, d6 922*4882a593Smuzhiyun vmlal.s32 q1, d18, d21 923*4882a593Smuzhiyun vmlal.s32 q0, d18, d28 924*4882a593Smuzhiyun vmlal.s32 q6, d18, d29 925*4882a593Smuzhiyun vmlal.s32 q2, d19, d6 926*4882a593Smuzhiyun vmlal.s32 q5, d19, d21 927*4882a593Smuzhiyun vmlal.s32 q1, d19, d29 928*4882a593Smuzhiyun vmlal.s32 q0, d19, d9 929*4882a593Smuzhiyun vmlal.s32 q6, d19, d28 930*4882a593Smuzhiyun add r2, sp, #560 931*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 932*4882a593Smuzhiyun add r2, sp, #480 933*4882a593Smuzhiyun vld1.8 {d22-d23}, [r2, : 128] 934*4882a593Smuzhiyun vmlal.s32 q5, d19, d7 935*4882a593Smuzhiyun vmlal.s32 q0, d18, d21 936*4882a593Smuzhiyun vmlal.s32 q0, d19, d29 937*4882a593Smuzhiyun vmlal.s32 q6, d18, d6 938*4882a593Smuzhiyun add r2, sp, #496 939*4882a593Smuzhiyun vld1.8 {d6-d7}, [r2, : 128] 940*4882a593Smuzhiyun vmlal.s32 q6, d19, d21 941*4882a593Smuzhiyun add r2, sp, #544 942*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 943*4882a593Smuzhiyun vmlal.s32 q0, d30, d8 944*4882a593Smuzhiyun add r2, sp, #640 945*4882a593Smuzhiyun vld1.8 {d20-d21}, [r2, : 128] 946*4882a593Smuzhiyun vmlal.s32 q5, d30, d29 947*4882a593Smuzhiyun add r2, sp, #576 948*4882a593Smuzhiyun vld1.8 {d24-d25}, [r2, : 128] 949*4882a593Smuzhiyun vmlal.s32 q1, d30, d28 950*4882a593Smuzhiyun vadd.i64 q13, q0, q11 951*4882a593Smuzhiyun vadd.i64 q14, q5, q11 952*4882a593Smuzhiyun vmlal.s32 q6, d30, d9 953*4882a593Smuzhiyun vshr.s64 q4, q13, #26 954*4882a593Smuzhiyun vshr.s64 q13, q14, #26 955*4882a593Smuzhiyun vadd.i64 q7, q7, q4 956*4882a593Smuzhiyun vshl.i64 q4, q4, #26 957*4882a593Smuzhiyun vadd.i64 q14, q7, q3 958*4882a593Smuzhiyun vadd.i64 q9, q9, q13 959*4882a593Smuzhiyun vshl.i64 q13, q13, #26 960*4882a593Smuzhiyun vadd.i64 q15, q9, q3 961*4882a593Smuzhiyun vsub.i64 q0, q0, q4 962*4882a593Smuzhiyun vshr.s64 q4, q14, #25 963*4882a593Smuzhiyun vsub.i64 q5, q5, q13 964*4882a593Smuzhiyun vshr.s64 q13, q15, #25 965*4882a593Smuzhiyun vadd.i64 q6, q6, q4 966*4882a593Smuzhiyun vshl.i64 q4, q4, #25 967*4882a593Smuzhiyun vadd.i64 q14, q6, q11 968*4882a593Smuzhiyun vadd.i64 q2, q2, q13 969*4882a593Smuzhiyun vsub.i64 q4, q7, q4 970*4882a593Smuzhiyun vshr.s64 q7, q14, #26 971*4882a593Smuzhiyun vshl.i64 q13, q13, #25 972*4882a593Smuzhiyun vadd.i64 q14, q2, q11 973*4882a593Smuzhiyun vadd.i64 q8, q8, q7 974*4882a593Smuzhiyun vshl.i64 q7, q7, #26 975*4882a593Smuzhiyun vadd.i64 q15, q8, q3 976*4882a593Smuzhiyun vsub.i64 q9, q9, q13 977*4882a593Smuzhiyun vshr.s64 q13, q14, #26 978*4882a593Smuzhiyun vsub.i64 q6, q6, q7 979*4882a593Smuzhiyun vshr.s64 q7, q15, #25 980*4882a593Smuzhiyun vadd.i64 q10, q10, q13 981*4882a593Smuzhiyun vshl.i64 q13, q13, #26 982*4882a593Smuzhiyun vadd.i64 q14, q10, q3 983*4882a593Smuzhiyun vadd.i64 q1, q1, q7 984*4882a593Smuzhiyun add r2, r3, #288 985*4882a593Smuzhiyun vshl.i64 q7, q7, #25 986*4882a593Smuzhiyun add r4, r3, #96 987*4882a593Smuzhiyun vadd.i64 q15, q1, q11 988*4882a593Smuzhiyun add r2, r2, #8 989*4882a593Smuzhiyun vsub.i64 q2, q2, q13 990*4882a593Smuzhiyun add r4, r4, #8 991*4882a593Smuzhiyun vshr.s64 q13, q14, #25 992*4882a593Smuzhiyun vsub.i64 q7, q8, q7 993*4882a593Smuzhiyun vshr.s64 q8, q15, #26 994*4882a593Smuzhiyun vadd.i64 q14, q13, q13 995*4882a593Smuzhiyun vadd.i64 q12, q12, q8 996*4882a593Smuzhiyun vtrn.32 d12, d14 997*4882a593Smuzhiyun vshl.i64 q8, q8, #26 998*4882a593Smuzhiyun vtrn.32 d13, d15 999*4882a593Smuzhiyun vadd.i64 q3, q12, q3 1000*4882a593Smuzhiyun vadd.i64 q0, q0, q14 1001*4882a593Smuzhiyun vst1.8 d12, [r2, : 64]! 1002*4882a593Smuzhiyun vshl.i64 q7, q13, #4 1003*4882a593Smuzhiyun vst1.8 d13, [r4, : 64]! 1004*4882a593Smuzhiyun vsub.i64 q1, q1, q8 1005*4882a593Smuzhiyun vshr.s64 q3, q3, #25 1006*4882a593Smuzhiyun vadd.i64 q0, q0, q7 1007*4882a593Smuzhiyun vadd.i64 q5, q5, q3 1008*4882a593Smuzhiyun vshl.i64 q3, q3, #25 1009*4882a593Smuzhiyun vadd.i64 q6, q5, q11 1010*4882a593Smuzhiyun vadd.i64 q0, q0, q13 1011*4882a593Smuzhiyun vshl.i64 q7, q13, #25 1012*4882a593Smuzhiyun vadd.i64 q8, q0, q11 1013*4882a593Smuzhiyun vsub.i64 q3, q12, q3 1014*4882a593Smuzhiyun vshr.s64 q6, q6, #26 1015*4882a593Smuzhiyun vsub.i64 q7, q10, q7 1016*4882a593Smuzhiyun vtrn.32 d2, d6 1017*4882a593Smuzhiyun vshr.s64 q8, q8, #26 1018*4882a593Smuzhiyun vtrn.32 d3, d7 1019*4882a593Smuzhiyun vadd.i64 q3, q9, q6 1020*4882a593Smuzhiyun vst1.8 d2, [r2, : 64] 1021*4882a593Smuzhiyun vshl.i64 q6, q6, #26 1022*4882a593Smuzhiyun vst1.8 d3, [r4, : 64] 1023*4882a593Smuzhiyun vadd.i64 q1, q4, q8 1024*4882a593Smuzhiyun vtrn.32 d4, d14 1025*4882a593Smuzhiyun vshl.i64 q4, q8, #26 1026*4882a593Smuzhiyun vtrn.32 d5, d15 1027*4882a593Smuzhiyun vsub.i64 q5, q5, q6 1028*4882a593Smuzhiyun add r2, r2, #16 1029*4882a593Smuzhiyun vsub.i64 q0, q0, q4 1030*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 1031*4882a593Smuzhiyun add r4, r4, #16 1032*4882a593Smuzhiyun vst1.8 d5, [r4, : 64] 1033*4882a593Smuzhiyun vtrn.32 d10, d6 1034*4882a593Smuzhiyun vtrn.32 d11, d7 1035*4882a593Smuzhiyun sub r2, r2, #8 1036*4882a593Smuzhiyun sub r4, r4, #8 1037*4882a593Smuzhiyun vtrn.32 d0, d2 1038*4882a593Smuzhiyun vtrn.32 d1, d3 1039*4882a593Smuzhiyun vst1.8 d10, [r2, : 64] 1040*4882a593Smuzhiyun vst1.8 d11, [r4, : 64] 1041*4882a593Smuzhiyun sub r2, r2, #24 1042*4882a593Smuzhiyun sub r4, r4, #24 1043*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 1044*4882a593Smuzhiyun vst1.8 d1, [r4, : 64] 1045*4882a593Smuzhiyun add r2, sp, #512 1046*4882a593Smuzhiyun add r4, r3, #144 1047*4882a593Smuzhiyun add r5, r3, #192 1048*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128] 1049*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 1050*4882a593Smuzhiyun vld1.8 {d4-d5}, [r5, : 128]! 1051*4882a593Smuzhiyun vzip.i32 q1, q2 1052*4882a593Smuzhiyun vld1.8 {d6-d7}, [r4, : 128]! 1053*4882a593Smuzhiyun vld1.8 {d8-d9}, [r5, : 128]! 1054*4882a593Smuzhiyun vshl.i32 q5, q1, #1 1055*4882a593Smuzhiyun vzip.i32 q3, q4 1056*4882a593Smuzhiyun vshl.i32 q6, q2, #1 1057*4882a593Smuzhiyun vld1.8 {d14}, [r4, : 64] 1058*4882a593Smuzhiyun vshl.i32 q8, q3, #1 1059*4882a593Smuzhiyun vld1.8 {d15}, [r5, : 64] 1060*4882a593Smuzhiyun vshl.i32 q9, q4, #1 1061*4882a593Smuzhiyun vmul.i32 d21, d7, d1 1062*4882a593Smuzhiyun vtrn.32 d14, d15 1063*4882a593Smuzhiyun vmul.i32 q11, q4, q0 1064*4882a593Smuzhiyun vmul.i32 q0, q7, q0 1065*4882a593Smuzhiyun vmull.s32 q12, d2, d2 1066*4882a593Smuzhiyun vmlal.s32 q12, d11, d1 1067*4882a593Smuzhiyun vmlal.s32 q12, d12, d0 1068*4882a593Smuzhiyun vmlal.s32 q12, d13, d23 1069*4882a593Smuzhiyun vmlal.s32 q12, d16, d22 1070*4882a593Smuzhiyun vmlal.s32 q12, d7, d21 1071*4882a593Smuzhiyun vmull.s32 q10, d2, d11 1072*4882a593Smuzhiyun vmlal.s32 q10, d4, d1 1073*4882a593Smuzhiyun vmlal.s32 q10, d13, d0 1074*4882a593Smuzhiyun vmlal.s32 q10, d6, d23 1075*4882a593Smuzhiyun vmlal.s32 q10, d17, d22 1076*4882a593Smuzhiyun vmull.s32 q13, d10, d4 1077*4882a593Smuzhiyun vmlal.s32 q13, d11, d3 1078*4882a593Smuzhiyun vmlal.s32 q13, d13, d1 1079*4882a593Smuzhiyun vmlal.s32 q13, d16, d0 1080*4882a593Smuzhiyun vmlal.s32 q13, d17, d23 1081*4882a593Smuzhiyun vmlal.s32 q13, d8, d22 1082*4882a593Smuzhiyun vmull.s32 q1, d10, d5 1083*4882a593Smuzhiyun vmlal.s32 q1, d11, d4 1084*4882a593Smuzhiyun vmlal.s32 q1, d6, d1 1085*4882a593Smuzhiyun vmlal.s32 q1, d17, d0 1086*4882a593Smuzhiyun vmlal.s32 q1, d8, d23 1087*4882a593Smuzhiyun vmull.s32 q14, d10, d6 1088*4882a593Smuzhiyun vmlal.s32 q14, d11, d13 1089*4882a593Smuzhiyun vmlal.s32 q14, d4, d4 1090*4882a593Smuzhiyun vmlal.s32 q14, d17, d1 1091*4882a593Smuzhiyun vmlal.s32 q14, d18, d0 1092*4882a593Smuzhiyun vmlal.s32 q14, d9, d23 1093*4882a593Smuzhiyun vmull.s32 q11, d10, d7 1094*4882a593Smuzhiyun vmlal.s32 q11, d11, d6 1095*4882a593Smuzhiyun vmlal.s32 q11, d12, d5 1096*4882a593Smuzhiyun vmlal.s32 q11, d8, d1 1097*4882a593Smuzhiyun vmlal.s32 q11, d19, d0 1098*4882a593Smuzhiyun vmull.s32 q15, d10, d8 1099*4882a593Smuzhiyun vmlal.s32 q15, d11, d17 1100*4882a593Smuzhiyun vmlal.s32 q15, d12, d6 1101*4882a593Smuzhiyun vmlal.s32 q15, d13, d5 1102*4882a593Smuzhiyun vmlal.s32 q15, d19, d1 1103*4882a593Smuzhiyun vmlal.s32 q15, d14, d0 1104*4882a593Smuzhiyun vmull.s32 q2, d10, d9 1105*4882a593Smuzhiyun vmlal.s32 q2, d11, d8 1106*4882a593Smuzhiyun vmlal.s32 q2, d12, d7 1107*4882a593Smuzhiyun vmlal.s32 q2, d13, d6 1108*4882a593Smuzhiyun vmlal.s32 q2, d14, d1 1109*4882a593Smuzhiyun vmull.s32 q0, d15, d1 1110*4882a593Smuzhiyun vmlal.s32 q0, d10, d14 1111*4882a593Smuzhiyun vmlal.s32 q0, d11, d19 1112*4882a593Smuzhiyun vmlal.s32 q0, d12, d8 1113*4882a593Smuzhiyun vmlal.s32 q0, d13, d17 1114*4882a593Smuzhiyun vmlal.s32 q0, d6, d6 1115*4882a593Smuzhiyun add r2, sp, #480 1116*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128]! 1117*4882a593Smuzhiyun vmull.s32 q3, d16, d7 1118*4882a593Smuzhiyun vmlal.s32 q3, d10, d15 1119*4882a593Smuzhiyun vmlal.s32 q3, d11, d14 1120*4882a593Smuzhiyun vmlal.s32 q3, d12, d9 1121*4882a593Smuzhiyun vmlal.s32 q3, d13, d8 1122*4882a593Smuzhiyun vld1.8 {d8-d9}, [r2, : 128] 1123*4882a593Smuzhiyun vadd.i64 q5, q12, q9 1124*4882a593Smuzhiyun vadd.i64 q6, q15, q9 1125*4882a593Smuzhiyun vshr.s64 q5, q5, #26 1126*4882a593Smuzhiyun vshr.s64 q6, q6, #26 1127*4882a593Smuzhiyun vadd.i64 q7, q10, q5 1128*4882a593Smuzhiyun vshl.i64 q5, q5, #26 1129*4882a593Smuzhiyun vadd.i64 q8, q7, q4 1130*4882a593Smuzhiyun vadd.i64 q2, q2, q6 1131*4882a593Smuzhiyun vshl.i64 q6, q6, #26 1132*4882a593Smuzhiyun vadd.i64 q10, q2, q4 1133*4882a593Smuzhiyun vsub.i64 q5, q12, q5 1134*4882a593Smuzhiyun vshr.s64 q8, q8, #25 1135*4882a593Smuzhiyun vsub.i64 q6, q15, q6 1136*4882a593Smuzhiyun vshr.s64 q10, q10, #25 1137*4882a593Smuzhiyun vadd.i64 q12, q13, q8 1138*4882a593Smuzhiyun vshl.i64 q8, q8, #25 1139*4882a593Smuzhiyun vadd.i64 q13, q12, q9 1140*4882a593Smuzhiyun vadd.i64 q0, q0, q10 1141*4882a593Smuzhiyun vsub.i64 q7, q7, q8 1142*4882a593Smuzhiyun vshr.s64 q8, q13, #26 1143*4882a593Smuzhiyun vshl.i64 q10, q10, #25 1144*4882a593Smuzhiyun vadd.i64 q13, q0, q9 1145*4882a593Smuzhiyun vadd.i64 q1, q1, q8 1146*4882a593Smuzhiyun vshl.i64 q8, q8, #26 1147*4882a593Smuzhiyun vadd.i64 q15, q1, q4 1148*4882a593Smuzhiyun vsub.i64 q2, q2, q10 1149*4882a593Smuzhiyun vshr.s64 q10, q13, #26 1150*4882a593Smuzhiyun vsub.i64 q8, q12, q8 1151*4882a593Smuzhiyun vshr.s64 q12, q15, #25 1152*4882a593Smuzhiyun vadd.i64 q3, q3, q10 1153*4882a593Smuzhiyun vshl.i64 q10, q10, #26 1154*4882a593Smuzhiyun vadd.i64 q13, q3, q4 1155*4882a593Smuzhiyun vadd.i64 q14, q14, q12 1156*4882a593Smuzhiyun add r2, r3, #144 1157*4882a593Smuzhiyun vshl.i64 q12, q12, #25 1158*4882a593Smuzhiyun add r4, r3, #192 1159*4882a593Smuzhiyun vadd.i64 q15, q14, q9 1160*4882a593Smuzhiyun add r2, r2, #8 1161*4882a593Smuzhiyun vsub.i64 q0, q0, q10 1162*4882a593Smuzhiyun add r4, r4, #8 1163*4882a593Smuzhiyun vshr.s64 q10, q13, #25 1164*4882a593Smuzhiyun vsub.i64 q1, q1, q12 1165*4882a593Smuzhiyun vshr.s64 q12, q15, #26 1166*4882a593Smuzhiyun vadd.i64 q13, q10, q10 1167*4882a593Smuzhiyun vadd.i64 q11, q11, q12 1168*4882a593Smuzhiyun vtrn.32 d16, d2 1169*4882a593Smuzhiyun vshl.i64 q12, q12, #26 1170*4882a593Smuzhiyun vtrn.32 d17, d3 1171*4882a593Smuzhiyun vadd.i64 q1, q11, q4 1172*4882a593Smuzhiyun vadd.i64 q4, q5, q13 1173*4882a593Smuzhiyun vst1.8 d16, [r2, : 64]! 1174*4882a593Smuzhiyun vshl.i64 q5, q10, #4 1175*4882a593Smuzhiyun vst1.8 d17, [r4, : 64]! 1176*4882a593Smuzhiyun vsub.i64 q8, q14, q12 1177*4882a593Smuzhiyun vshr.s64 q1, q1, #25 1178*4882a593Smuzhiyun vadd.i64 q4, q4, q5 1179*4882a593Smuzhiyun vadd.i64 q5, q6, q1 1180*4882a593Smuzhiyun vshl.i64 q1, q1, #25 1181*4882a593Smuzhiyun vadd.i64 q6, q5, q9 1182*4882a593Smuzhiyun vadd.i64 q4, q4, q10 1183*4882a593Smuzhiyun vshl.i64 q10, q10, #25 1184*4882a593Smuzhiyun vadd.i64 q9, q4, q9 1185*4882a593Smuzhiyun vsub.i64 q1, q11, q1 1186*4882a593Smuzhiyun vshr.s64 q6, q6, #26 1187*4882a593Smuzhiyun vsub.i64 q3, q3, q10 1188*4882a593Smuzhiyun vtrn.32 d16, d2 1189*4882a593Smuzhiyun vshr.s64 q9, q9, #26 1190*4882a593Smuzhiyun vtrn.32 d17, d3 1191*4882a593Smuzhiyun vadd.i64 q1, q2, q6 1192*4882a593Smuzhiyun vst1.8 d16, [r2, : 64] 1193*4882a593Smuzhiyun vshl.i64 q2, q6, #26 1194*4882a593Smuzhiyun vst1.8 d17, [r4, : 64] 1195*4882a593Smuzhiyun vadd.i64 q6, q7, q9 1196*4882a593Smuzhiyun vtrn.32 d0, d6 1197*4882a593Smuzhiyun vshl.i64 q7, q9, #26 1198*4882a593Smuzhiyun vtrn.32 d1, d7 1199*4882a593Smuzhiyun vsub.i64 q2, q5, q2 1200*4882a593Smuzhiyun add r2, r2, #16 1201*4882a593Smuzhiyun vsub.i64 q3, q4, q7 1202*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 1203*4882a593Smuzhiyun add r4, r4, #16 1204*4882a593Smuzhiyun vst1.8 d1, [r4, : 64] 1205*4882a593Smuzhiyun vtrn.32 d4, d2 1206*4882a593Smuzhiyun vtrn.32 d5, d3 1207*4882a593Smuzhiyun sub r2, r2, #8 1208*4882a593Smuzhiyun sub r4, r4, #8 1209*4882a593Smuzhiyun vtrn.32 d6, d12 1210*4882a593Smuzhiyun vtrn.32 d7, d13 1211*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 1212*4882a593Smuzhiyun vst1.8 d5, [r4, : 64] 1213*4882a593Smuzhiyun sub r2, r2, #24 1214*4882a593Smuzhiyun sub r4, r4, #24 1215*4882a593Smuzhiyun vst1.8 d6, [r2, : 64] 1216*4882a593Smuzhiyun vst1.8 d7, [r4, : 64] 1217*4882a593Smuzhiyun add r2, r3, #336 1218*4882a593Smuzhiyun add r4, r3, #288 1219*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128]! 1220*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 1221*4882a593Smuzhiyun vadd.i32 q0, q0, q1 1222*4882a593Smuzhiyun vld1.8 {d2-d3}, [r2, : 128]! 1223*4882a593Smuzhiyun vld1.8 {d4-d5}, [r4, : 128]! 1224*4882a593Smuzhiyun vadd.i32 q1, q1, q2 1225*4882a593Smuzhiyun add r5, r3, #288 1226*4882a593Smuzhiyun vld1.8 {d4}, [r2, : 64] 1227*4882a593Smuzhiyun vld1.8 {d6}, [r4, : 64] 1228*4882a593Smuzhiyun vadd.i32 q2, q2, q3 1229*4882a593Smuzhiyun vst1.8 {d0-d1}, [r5, : 128]! 1230*4882a593Smuzhiyun vst1.8 {d2-d3}, [r5, : 128]! 1231*4882a593Smuzhiyun vst1.8 d4, [r5, : 64] 1232*4882a593Smuzhiyun add r2, r3, #48 1233*4882a593Smuzhiyun add r4, r3, #144 1234*4882a593Smuzhiyun vld1.8 {d0-d1}, [r4, : 128]! 1235*4882a593Smuzhiyun vld1.8 {d2-d3}, [r4, : 128]! 1236*4882a593Smuzhiyun vld1.8 {d4}, [r4, : 64] 1237*4882a593Smuzhiyun add r4, r3, #288 1238*4882a593Smuzhiyun vld1.8 {d6-d7}, [r4, : 128]! 1239*4882a593Smuzhiyun vtrn.32 q0, q3 1240*4882a593Smuzhiyun vld1.8 {d8-d9}, [r4, : 128]! 1241*4882a593Smuzhiyun vshl.i32 q5, q0, #4 1242*4882a593Smuzhiyun vtrn.32 q1, q4 1243*4882a593Smuzhiyun vshl.i32 q6, q3, #4 1244*4882a593Smuzhiyun vadd.i32 q5, q5, q0 1245*4882a593Smuzhiyun vadd.i32 q6, q6, q3 1246*4882a593Smuzhiyun vshl.i32 q7, q1, #4 1247*4882a593Smuzhiyun vld1.8 {d5}, [r4, : 64] 1248*4882a593Smuzhiyun vshl.i32 q8, q4, #4 1249*4882a593Smuzhiyun vtrn.32 d4, d5 1250*4882a593Smuzhiyun vadd.i32 q7, q7, q1 1251*4882a593Smuzhiyun vadd.i32 q8, q8, q4 1252*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128]! 1253*4882a593Smuzhiyun vshl.i32 q10, q2, #4 1254*4882a593Smuzhiyun vld1.8 {d22-d23}, [r2, : 128]! 1255*4882a593Smuzhiyun vadd.i32 q10, q10, q2 1256*4882a593Smuzhiyun vld1.8 {d24}, [r2, : 64] 1257*4882a593Smuzhiyun vadd.i32 q5, q5, q0 1258*4882a593Smuzhiyun add r2, r3, #240 1259*4882a593Smuzhiyun vld1.8 {d26-d27}, [r2, : 128]! 1260*4882a593Smuzhiyun vadd.i32 q6, q6, q3 1261*4882a593Smuzhiyun vld1.8 {d28-d29}, [r2, : 128]! 1262*4882a593Smuzhiyun vadd.i32 q8, q8, q4 1263*4882a593Smuzhiyun vld1.8 {d25}, [r2, : 64] 1264*4882a593Smuzhiyun vadd.i32 q10, q10, q2 1265*4882a593Smuzhiyun vtrn.32 q9, q13 1266*4882a593Smuzhiyun vadd.i32 q7, q7, q1 1267*4882a593Smuzhiyun vadd.i32 q5, q5, q0 1268*4882a593Smuzhiyun vtrn.32 q11, q14 1269*4882a593Smuzhiyun vadd.i32 q6, q6, q3 1270*4882a593Smuzhiyun add r2, sp, #528 1271*4882a593Smuzhiyun vadd.i32 q10, q10, q2 1272*4882a593Smuzhiyun vtrn.32 d24, d25 1273*4882a593Smuzhiyun vst1.8 {d12-d13}, [r2, : 128]! 1274*4882a593Smuzhiyun vshl.i32 q6, q13, #1 1275*4882a593Smuzhiyun vst1.8 {d20-d21}, [r2, : 128]! 1276*4882a593Smuzhiyun vshl.i32 q10, q14, #1 1277*4882a593Smuzhiyun vst1.8 {d12-d13}, [r2, : 128]! 1278*4882a593Smuzhiyun vshl.i32 q15, q12, #1 1279*4882a593Smuzhiyun vadd.i32 q8, q8, q4 1280*4882a593Smuzhiyun vext.32 d10, d31, d30, #0 1281*4882a593Smuzhiyun vadd.i32 q7, q7, q1 1282*4882a593Smuzhiyun vst1.8 {d16-d17}, [r2, : 128]! 1283*4882a593Smuzhiyun vmull.s32 q8, d18, d5 1284*4882a593Smuzhiyun vmlal.s32 q8, d26, d4 1285*4882a593Smuzhiyun vmlal.s32 q8, d19, d9 1286*4882a593Smuzhiyun vmlal.s32 q8, d27, d3 1287*4882a593Smuzhiyun vmlal.s32 q8, d22, d8 1288*4882a593Smuzhiyun vmlal.s32 q8, d28, d2 1289*4882a593Smuzhiyun vmlal.s32 q8, d23, d7 1290*4882a593Smuzhiyun vmlal.s32 q8, d29, d1 1291*4882a593Smuzhiyun vmlal.s32 q8, d24, d6 1292*4882a593Smuzhiyun vmlal.s32 q8, d25, d0 1293*4882a593Smuzhiyun vst1.8 {d14-d15}, [r2, : 128]! 1294*4882a593Smuzhiyun vmull.s32 q2, d18, d4 1295*4882a593Smuzhiyun vmlal.s32 q2, d12, d9 1296*4882a593Smuzhiyun vmlal.s32 q2, d13, d8 1297*4882a593Smuzhiyun vmlal.s32 q2, d19, d3 1298*4882a593Smuzhiyun vmlal.s32 q2, d22, d2 1299*4882a593Smuzhiyun vmlal.s32 q2, d23, d1 1300*4882a593Smuzhiyun vmlal.s32 q2, d24, d0 1301*4882a593Smuzhiyun vst1.8 {d20-d21}, [r2, : 128]! 1302*4882a593Smuzhiyun vmull.s32 q7, d18, d9 1303*4882a593Smuzhiyun vmlal.s32 q7, d26, d3 1304*4882a593Smuzhiyun vmlal.s32 q7, d19, d8 1305*4882a593Smuzhiyun vmlal.s32 q7, d27, d2 1306*4882a593Smuzhiyun vmlal.s32 q7, d22, d7 1307*4882a593Smuzhiyun vmlal.s32 q7, d28, d1 1308*4882a593Smuzhiyun vmlal.s32 q7, d23, d6 1309*4882a593Smuzhiyun vmlal.s32 q7, d29, d0 1310*4882a593Smuzhiyun vst1.8 {d10-d11}, [r2, : 128]! 1311*4882a593Smuzhiyun vmull.s32 q5, d18, d3 1312*4882a593Smuzhiyun vmlal.s32 q5, d19, d2 1313*4882a593Smuzhiyun vmlal.s32 q5, d22, d1 1314*4882a593Smuzhiyun vmlal.s32 q5, d23, d0 1315*4882a593Smuzhiyun vmlal.s32 q5, d12, d8 1316*4882a593Smuzhiyun vst1.8 {d16-d17}, [r2, : 128]! 1317*4882a593Smuzhiyun vmull.s32 q4, d18, d8 1318*4882a593Smuzhiyun vmlal.s32 q4, d26, d2 1319*4882a593Smuzhiyun vmlal.s32 q4, d19, d7 1320*4882a593Smuzhiyun vmlal.s32 q4, d27, d1 1321*4882a593Smuzhiyun vmlal.s32 q4, d22, d6 1322*4882a593Smuzhiyun vmlal.s32 q4, d28, d0 1323*4882a593Smuzhiyun vmull.s32 q8, d18, d7 1324*4882a593Smuzhiyun vmlal.s32 q8, d26, d1 1325*4882a593Smuzhiyun vmlal.s32 q8, d19, d6 1326*4882a593Smuzhiyun vmlal.s32 q8, d27, d0 1327*4882a593Smuzhiyun add r2, sp, #544 1328*4882a593Smuzhiyun vld1.8 {d20-d21}, [r2, : 128] 1329*4882a593Smuzhiyun vmlal.s32 q7, d24, d21 1330*4882a593Smuzhiyun vmlal.s32 q7, d25, d20 1331*4882a593Smuzhiyun vmlal.s32 q4, d23, d21 1332*4882a593Smuzhiyun vmlal.s32 q4, d29, d20 1333*4882a593Smuzhiyun vmlal.s32 q8, d22, d21 1334*4882a593Smuzhiyun vmlal.s32 q8, d28, d20 1335*4882a593Smuzhiyun vmlal.s32 q5, d24, d20 1336*4882a593Smuzhiyun vst1.8 {d14-d15}, [r2, : 128] 1337*4882a593Smuzhiyun vmull.s32 q7, d18, d6 1338*4882a593Smuzhiyun vmlal.s32 q7, d26, d0 1339*4882a593Smuzhiyun add r2, sp, #624 1340*4882a593Smuzhiyun vld1.8 {d30-d31}, [r2, : 128] 1341*4882a593Smuzhiyun vmlal.s32 q2, d30, d21 1342*4882a593Smuzhiyun vmlal.s32 q7, d19, d21 1343*4882a593Smuzhiyun vmlal.s32 q7, d27, d20 1344*4882a593Smuzhiyun add r2, sp, #592 1345*4882a593Smuzhiyun vld1.8 {d26-d27}, [r2, : 128] 1346*4882a593Smuzhiyun vmlal.s32 q4, d25, d27 1347*4882a593Smuzhiyun vmlal.s32 q8, d29, d27 1348*4882a593Smuzhiyun vmlal.s32 q8, d25, d26 1349*4882a593Smuzhiyun vmlal.s32 q7, d28, d27 1350*4882a593Smuzhiyun vmlal.s32 q7, d29, d26 1351*4882a593Smuzhiyun add r2, sp, #576 1352*4882a593Smuzhiyun vld1.8 {d28-d29}, [r2, : 128] 1353*4882a593Smuzhiyun vmlal.s32 q4, d24, d29 1354*4882a593Smuzhiyun vmlal.s32 q8, d23, d29 1355*4882a593Smuzhiyun vmlal.s32 q8, d24, d28 1356*4882a593Smuzhiyun vmlal.s32 q7, d22, d29 1357*4882a593Smuzhiyun vmlal.s32 q7, d23, d28 1358*4882a593Smuzhiyun vst1.8 {d8-d9}, [r2, : 128] 1359*4882a593Smuzhiyun add r2, sp, #528 1360*4882a593Smuzhiyun vld1.8 {d8-d9}, [r2, : 128] 1361*4882a593Smuzhiyun vmlal.s32 q7, d24, d9 1362*4882a593Smuzhiyun vmlal.s32 q7, d25, d31 1363*4882a593Smuzhiyun vmull.s32 q1, d18, d2 1364*4882a593Smuzhiyun vmlal.s32 q1, d19, d1 1365*4882a593Smuzhiyun vmlal.s32 q1, d22, d0 1366*4882a593Smuzhiyun vmlal.s32 q1, d24, d27 1367*4882a593Smuzhiyun vmlal.s32 q1, d23, d20 1368*4882a593Smuzhiyun vmlal.s32 q1, d12, d7 1369*4882a593Smuzhiyun vmlal.s32 q1, d13, d6 1370*4882a593Smuzhiyun vmull.s32 q6, d18, d1 1371*4882a593Smuzhiyun vmlal.s32 q6, d19, d0 1372*4882a593Smuzhiyun vmlal.s32 q6, d23, d27 1373*4882a593Smuzhiyun vmlal.s32 q6, d22, d20 1374*4882a593Smuzhiyun vmlal.s32 q6, d24, d26 1375*4882a593Smuzhiyun vmull.s32 q0, d18, d0 1376*4882a593Smuzhiyun vmlal.s32 q0, d22, d27 1377*4882a593Smuzhiyun vmlal.s32 q0, d23, d26 1378*4882a593Smuzhiyun vmlal.s32 q0, d24, d31 1379*4882a593Smuzhiyun vmlal.s32 q0, d19, d20 1380*4882a593Smuzhiyun add r2, sp, #608 1381*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 1382*4882a593Smuzhiyun vmlal.s32 q2, d18, d7 1383*4882a593Smuzhiyun vmlal.s32 q5, d18, d6 1384*4882a593Smuzhiyun vmlal.s32 q1, d18, d21 1385*4882a593Smuzhiyun vmlal.s32 q0, d18, d28 1386*4882a593Smuzhiyun vmlal.s32 q6, d18, d29 1387*4882a593Smuzhiyun vmlal.s32 q2, d19, d6 1388*4882a593Smuzhiyun vmlal.s32 q5, d19, d21 1389*4882a593Smuzhiyun vmlal.s32 q1, d19, d29 1390*4882a593Smuzhiyun vmlal.s32 q0, d19, d9 1391*4882a593Smuzhiyun vmlal.s32 q6, d19, d28 1392*4882a593Smuzhiyun add r2, sp, #560 1393*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 1394*4882a593Smuzhiyun add r2, sp, #480 1395*4882a593Smuzhiyun vld1.8 {d22-d23}, [r2, : 128] 1396*4882a593Smuzhiyun vmlal.s32 q5, d19, d7 1397*4882a593Smuzhiyun vmlal.s32 q0, d18, d21 1398*4882a593Smuzhiyun vmlal.s32 q0, d19, d29 1399*4882a593Smuzhiyun vmlal.s32 q6, d18, d6 1400*4882a593Smuzhiyun add r2, sp, #496 1401*4882a593Smuzhiyun vld1.8 {d6-d7}, [r2, : 128] 1402*4882a593Smuzhiyun vmlal.s32 q6, d19, d21 1403*4882a593Smuzhiyun add r2, sp, #544 1404*4882a593Smuzhiyun vld1.8 {d18-d19}, [r2, : 128] 1405*4882a593Smuzhiyun vmlal.s32 q0, d30, d8 1406*4882a593Smuzhiyun add r2, sp, #640 1407*4882a593Smuzhiyun vld1.8 {d20-d21}, [r2, : 128] 1408*4882a593Smuzhiyun vmlal.s32 q5, d30, d29 1409*4882a593Smuzhiyun add r2, sp, #576 1410*4882a593Smuzhiyun vld1.8 {d24-d25}, [r2, : 128] 1411*4882a593Smuzhiyun vmlal.s32 q1, d30, d28 1412*4882a593Smuzhiyun vadd.i64 q13, q0, q11 1413*4882a593Smuzhiyun vadd.i64 q14, q5, q11 1414*4882a593Smuzhiyun vmlal.s32 q6, d30, d9 1415*4882a593Smuzhiyun vshr.s64 q4, q13, #26 1416*4882a593Smuzhiyun vshr.s64 q13, q14, #26 1417*4882a593Smuzhiyun vadd.i64 q7, q7, q4 1418*4882a593Smuzhiyun vshl.i64 q4, q4, #26 1419*4882a593Smuzhiyun vadd.i64 q14, q7, q3 1420*4882a593Smuzhiyun vadd.i64 q9, q9, q13 1421*4882a593Smuzhiyun vshl.i64 q13, q13, #26 1422*4882a593Smuzhiyun vadd.i64 q15, q9, q3 1423*4882a593Smuzhiyun vsub.i64 q0, q0, q4 1424*4882a593Smuzhiyun vshr.s64 q4, q14, #25 1425*4882a593Smuzhiyun vsub.i64 q5, q5, q13 1426*4882a593Smuzhiyun vshr.s64 q13, q15, #25 1427*4882a593Smuzhiyun vadd.i64 q6, q6, q4 1428*4882a593Smuzhiyun vshl.i64 q4, q4, #25 1429*4882a593Smuzhiyun vadd.i64 q14, q6, q11 1430*4882a593Smuzhiyun vadd.i64 q2, q2, q13 1431*4882a593Smuzhiyun vsub.i64 q4, q7, q4 1432*4882a593Smuzhiyun vshr.s64 q7, q14, #26 1433*4882a593Smuzhiyun vshl.i64 q13, q13, #25 1434*4882a593Smuzhiyun vadd.i64 q14, q2, q11 1435*4882a593Smuzhiyun vadd.i64 q8, q8, q7 1436*4882a593Smuzhiyun vshl.i64 q7, q7, #26 1437*4882a593Smuzhiyun vadd.i64 q15, q8, q3 1438*4882a593Smuzhiyun vsub.i64 q9, q9, q13 1439*4882a593Smuzhiyun vshr.s64 q13, q14, #26 1440*4882a593Smuzhiyun vsub.i64 q6, q6, q7 1441*4882a593Smuzhiyun vshr.s64 q7, q15, #25 1442*4882a593Smuzhiyun vadd.i64 q10, q10, q13 1443*4882a593Smuzhiyun vshl.i64 q13, q13, #26 1444*4882a593Smuzhiyun vadd.i64 q14, q10, q3 1445*4882a593Smuzhiyun vadd.i64 q1, q1, q7 1446*4882a593Smuzhiyun add r2, r3, #240 1447*4882a593Smuzhiyun vshl.i64 q7, q7, #25 1448*4882a593Smuzhiyun add r4, r3, #144 1449*4882a593Smuzhiyun vadd.i64 q15, q1, q11 1450*4882a593Smuzhiyun add r2, r2, #8 1451*4882a593Smuzhiyun vsub.i64 q2, q2, q13 1452*4882a593Smuzhiyun add r4, r4, #8 1453*4882a593Smuzhiyun vshr.s64 q13, q14, #25 1454*4882a593Smuzhiyun vsub.i64 q7, q8, q7 1455*4882a593Smuzhiyun vshr.s64 q8, q15, #26 1456*4882a593Smuzhiyun vadd.i64 q14, q13, q13 1457*4882a593Smuzhiyun vadd.i64 q12, q12, q8 1458*4882a593Smuzhiyun vtrn.32 d12, d14 1459*4882a593Smuzhiyun vshl.i64 q8, q8, #26 1460*4882a593Smuzhiyun vtrn.32 d13, d15 1461*4882a593Smuzhiyun vadd.i64 q3, q12, q3 1462*4882a593Smuzhiyun vadd.i64 q0, q0, q14 1463*4882a593Smuzhiyun vst1.8 d12, [r2, : 64]! 1464*4882a593Smuzhiyun vshl.i64 q7, q13, #4 1465*4882a593Smuzhiyun vst1.8 d13, [r4, : 64]! 1466*4882a593Smuzhiyun vsub.i64 q1, q1, q8 1467*4882a593Smuzhiyun vshr.s64 q3, q3, #25 1468*4882a593Smuzhiyun vadd.i64 q0, q0, q7 1469*4882a593Smuzhiyun vadd.i64 q5, q5, q3 1470*4882a593Smuzhiyun vshl.i64 q3, q3, #25 1471*4882a593Smuzhiyun vadd.i64 q6, q5, q11 1472*4882a593Smuzhiyun vadd.i64 q0, q0, q13 1473*4882a593Smuzhiyun vshl.i64 q7, q13, #25 1474*4882a593Smuzhiyun vadd.i64 q8, q0, q11 1475*4882a593Smuzhiyun vsub.i64 q3, q12, q3 1476*4882a593Smuzhiyun vshr.s64 q6, q6, #26 1477*4882a593Smuzhiyun vsub.i64 q7, q10, q7 1478*4882a593Smuzhiyun vtrn.32 d2, d6 1479*4882a593Smuzhiyun vshr.s64 q8, q8, #26 1480*4882a593Smuzhiyun vtrn.32 d3, d7 1481*4882a593Smuzhiyun vadd.i64 q3, q9, q6 1482*4882a593Smuzhiyun vst1.8 d2, [r2, : 64] 1483*4882a593Smuzhiyun vshl.i64 q6, q6, #26 1484*4882a593Smuzhiyun vst1.8 d3, [r4, : 64] 1485*4882a593Smuzhiyun vadd.i64 q1, q4, q8 1486*4882a593Smuzhiyun vtrn.32 d4, d14 1487*4882a593Smuzhiyun vshl.i64 q4, q8, #26 1488*4882a593Smuzhiyun vtrn.32 d5, d15 1489*4882a593Smuzhiyun vsub.i64 q5, q5, q6 1490*4882a593Smuzhiyun add r2, r2, #16 1491*4882a593Smuzhiyun vsub.i64 q0, q0, q4 1492*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 1493*4882a593Smuzhiyun add r4, r4, #16 1494*4882a593Smuzhiyun vst1.8 d5, [r4, : 64] 1495*4882a593Smuzhiyun vtrn.32 d10, d6 1496*4882a593Smuzhiyun vtrn.32 d11, d7 1497*4882a593Smuzhiyun sub r2, r2, #8 1498*4882a593Smuzhiyun sub r4, r4, #8 1499*4882a593Smuzhiyun vtrn.32 d0, d2 1500*4882a593Smuzhiyun vtrn.32 d1, d3 1501*4882a593Smuzhiyun vst1.8 d10, [r2, : 64] 1502*4882a593Smuzhiyun vst1.8 d11, [r4, : 64] 1503*4882a593Smuzhiyun sub r2, r2, #24 1504*4882a593Smuzhiyun sub r4, r4, #24 1505*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 1506*4882a593Smuzhiyun vst1.8 d1, [r4, : 64] 1507*4882a593Smuzhiyun ldr r2, [sp, #456] 1508*4882a593Smuzhiyun ldr r4, [sp, #460] 1509*4882a593Smuzhiyun subs r5, r2, #1 1510*4882a593Smuzhiyun bge .Lmainloop 1511*4882a593Smuzhiyun add r1, r3, #144 1512*4882a593Smuzhiyun add r2, r3, #336 1513*4882a593Smuzhiyun vld1.8 {d0-d1}, [r1, : 128]! 1514*4882a593Smuzhiyun vld1.8 {d2-d3}, [r1, : 128]! 1515*4882a593Smuzhiyun vld1.8 {d4}, [r1, : 64] 1516*4882a593Smuzhiyun vst1.8 {d0-d1}, [r2, : 128]! 1517*4882a593Smuzhiyun vst1.8 {d2-d3}, [r2, : 128]! 1518*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 1519*4882a593Smuzhiyun movw r1, #0 1520*4882a593Smuzhiyun.Linvertloop: 1521*4882a593Smuzhiyun add r2, r3, #144 1522*4882a593Smuzhiyun movw r4, #0 1523*4882a593Smuzhiyun movw r5, #2 1524*4882a593Smuzhiyun cmp r1, #1 1525*4882a593Smuzhiyun moveq r5, #1 1526*4882a593Smuzhiyun addeq r2, r3, #336 1527*4882a593Smuzhiyun addeq r4, r3, #48 1528*4882a593Smuzhiyun cmp r1, #2 1529*4882a593Smuzhiyun moveq r5, #1 1530*4882a593Smuzhiyun addeq r2, r3, #48 1531*4882a593Smuzhiyun cmp r1, #3 1532*4882a593Smuzhiyun moveq r5, #5 1533*4882a593Smuzhiyun addeq r4, r3, #336 1534*4882a593Smuzhiyun cmp r1, #4 1535*4882a593Smuzhiyun moveq r5, #10 1536*4882a593Smuzhiyun cmp r1, #5 1537*4882a593Smuzhiyun moveq r5, #20 1538*4882a593Smuzhiyun cmp r1, #6 1539*4882a593Smuzhiyun moveq r5, #10 1540*4882a593Smuzhiyun addeq r2, r3, #336 1541*4882a593Smuzhiyun addeq r4, r3, #336 1542*4882a593Smuzhiyun cmp r1, #7 1543*4882a593Smuzhiyun moveq r5, #50 1544*4882a593Smuzhiyun cmp r1, #8 1545*4882a593Smuzhiyun moveq r5, #100 1546*4882a593Smuzhiyun cmp r1, #9 1547*4882a593Smuzhiyun moveq r5, #50 1548*4882a593Smuzhiyun addeq r2, r3, #336 1549*4882a593Smuzhiyun cmp r1, #10 1550*4882a593Smuzhiyun moveq r5, #5 1551*4882a593Smuzhiyun addeq r2, r3, #48 1552*4882a593Smuzhiyun cmp r1, #11 1553*4882a593Smuzhiyun moveq r5, #0 1554*4882a593Smuzhiyun addeq r2, r3, #96 1555*4882a593Smuzhiyun add r6, r3, #144 1556*4882a593Smuzhiyun add r7, r3, #288 1557*4882a593Smuzhiyun vld1.8 {d0-d1}, [r6, : 128]! 1558*4882a593Smuzhiyun vld1.8 {d2-d3}, [r6, : 128]! 1559*4882a593Smuzhiyun vld1.8 {d4}, [r6, : 64] 1560*4882a593Smuzhiyun vst1.8 {d0-d1}, [r7, : 128]! 1561*4882a593Smuzhiyun vst1.8 {d2-d3}, [r7, : 128]! 1562*4882a593Smuzhiyun vst1.8 d4, [r7, : 64] 1563*4882a593Smuzhiyun cmp r5, #0 1564*4882a593Smuzhiyun beq .Lskipsquaringloop 1565*4882a593Smuzhiyun.Lsquaringloop: 1566*4882a593Smuzhiyun add r6, r3, #288 1567*4882a593Smuzhiyun add r7, r3, #288 1568*4882a593Smuzhiyun add r8, r3, #288 1569*4882a593Smuzhiyun vmov.i32 q0, #19 1570*4882a593Smuzhiyun vmov.i32 q1, #0 1571*4882a593Smuzhiyun vmov.i32 q2, #1 1572*4882a593Smuzhiyun vzip.i32 q1, q2 1573*4882a593Smuzhiyun vld1.8 {d4-d5}, [r7, : 128]! 1574*4882a593Smuzhiyun vld1.8 {d6-d7}, [r7, : 128]! 1575*4882a593Smuzhiyun vld1.8 {d9}, [r7, : 64] 1576*4882a593Smuzhiyun vld1.8 {d10-d11}, [r6, : 128]! 1577*4882a593Smuzhiyun add r7, sp, #384 1578*4882a593Smuzhiyun vld1.8 {d12-d13}, [r6, : 128]! 1579*4882a593Smuzhiyun vmul.i32 q7, q2, q0 1580*4882a593Smuzhiyun vld1.8 {d8}, [r6, : 64] 1581*4882a593Smuzhiyun vext.32 d17, d11, d10, #1 1582*4882a593Smuzhiyun vmul.i32 q9, q3, q0 1583*4882a593Smuzhiyun vext.32 d16, d10, d8, #1 1584*4882a593Smuzhiyun vshl.u32 q10, q5, q1 1585*4882a593Smuzhiyun vext.32 d22, d14, d4, #1 1586*4882a593Smuzhiyun vext.32 d24, d18, d6, #1 1587*4882a593Smuzhiyun vshl.u32 q13, q6, q1 1588*4882a593Smuzhiyun vshl.u32 d28, d8, d2 1589*4882a593Smuzhiyun vrev64.i32 d22, d22 1590*4882a593Smuzhiyun vmul.i32 d1, d9, d1 1591*4882a593Smuzhiyun vrev64.i32 d24, d24 1592*4882a593Smuzhiyun vext.32 d29, d8, d13, #1 1593*4882a593Smuzhiyun vext.32 d0, d1, d9, #1 1594*4882a593Smuzhiyun vrev64.i32 d0, d0 1595*4882a593Smuzhiyun vext.32 d2, d9, d1, #1 1596*4882a593Smuzhiyun vext.32 d23, d15, d5, #1 1597*4882a593Smuzhiyun vmull.s32 q4, d20, d4 1598*4882a593Smuzhiyun vrev64.i32 d23, d23 1599*4882a593Smuzhiyun vmlal.s32 q4, d21, d1 1600*4882a593Smuzhiyun vrev64.i32 d2, d2 1601*4882a593Smuzhiyun vmlal.s32 q4, d26, d19 1602*4882a593Smuzhiyun vext.32 d3, d5, d15, #1 1603*4882a593Smuzhiyun vmlal.s32 q4, d27, d18 1604*4882a593Smuzhiyun vrev64.i32 d3, d3 1605*4882a593Smuzhiyun vmlal.s32 q4, d28, d15 1606*4882a593Smuzhiyun vext.32 d14, d12, d11, #1 1607*4882a593Smuzhiyun vmull.s32 q5, d16, d23 1608*4882a593Smuzhiyun vext.32 d15, d13, d12, #1 1609*4882a593Smuzhiyun vmlal.s32 q5, d17, d4 1610*4882a593Smuzhiyun vst1.8 d8, [r7, : 64]! 1611*4882a593Smuzhiyun vmlal.s32 q5, d14, d1 1612*4882a593Smuzhiyun vext.32 d12, d9, d8, #0 1613*4882a593Smuzhiyun vmlal.s32 q5, d15, d19 1614*4882a593Smuzhiyun vmov.i64 d13, #0 1615*4882a593Smuzhiyun vmlal.s32 q5, d29, d18 1616*4882a593Smuzhiyun vext.32 d25, d19, d7, #1 1617*4882a593Smuzhiyun vmlal.s32 q6, d20, d5 1618*4882a593Smuzhiyun vrev64.i32 d25, d25 1619*4882a593Smuzhiyun vmlal.s32 q6, d21, d4 1620*4882a593Smuzhiyun vst1.8 d11, [r7, : 64]! 1621*4882a593Smuzhiyun vmlal.s32 q6, d26, d1 1622*4882a593Smuzhiyun vext.32 d9, d10, d10, #0 1623*4882a593Smuzhiyun vmlal.s32 q6, d27, d19 1624*4882a593Smuzhiyun vmov.i64 d8, #0 1625*4882a593Smuzhiyun vmlal.s32 q6, d28, d18 1626*4882a593Smuzhiyun vmlal.s32 q4, d16, d24 1627*4882a593Smuzhiyun vmlal.s32 q4, d17, d5 1628*4882a593Smuzhiyun vmlal.s32 q4, d14, d4 1629*4882a593Smuzhiyun vst1.8 d12, [r7, : 64]! 1630*4882a593Smuzhiyun vmlal.s32 q4, d15, d1 1631*4882a593Smuzhiyun vext.32 d10, d13, d12, #0 1632*4882a593Smuzhiyun vmlal.s32 q4, d29, d19 1633*4882a593Smuzhiyun vmov.i64 d11, #0 1634*4882a593Smuzhiyun vmlal.s32 q5, d20, d6 1635*4882a593Smuzhiyun vmlal.s32 q5, d21, d5 1636*4882a593Smuzhiyun vmlal.s32 q5, d26, d4 1637*4882a593Smuzhiyun vext.32 d13, d8, d8, #0 1638*4882a593Smuzhiyun vmlal.s32 q5, d27, d1 1639*4882a593Smuzhiyun vmov.i64 d12, #0 1640*4882a593Smuzhiyun vmlal.s32 q5, d28, d19 1641*4882a593Smuzhiyun vst1.8 d9, [r7, : 64]! 1642*4882a593Smuzhiyun vmlal.s32 q6, d16, d25 1643*4882a593Smuzhiyun vmlal.s32 q6, d17, d6 1644*4882a593Smuzhiyun vst1.8 d10, [r7, : 64] 1645*4882a593Smuzhiyun vmlal.s32 q6, d14, d5 1646*4882a593Smuzhiyun vext.32 d8, d11, d10, #0 1647*4882a593Smuzhiyun vmlal.s32 q6, d15, d4 1648*4882a593Smuzhiyun vmov.i64 d9, #0 1649*4882a593Smuzhiyun vmlal.s32 q6, d29, d1 1650*4882a593Smuzhiyun vmlal.s32 q4, d20, d7 1651*4882a593Smuzhiyun vmlal.s32 q4, d21, d6 1652*4882a593Smuzhiyun vmlal.s32 q4, d26, d5 1653*4882a593Smuzhiyun vext.32 d11, d12, d12, #0 1654*4882a593Smuzhiyun vmlal.s32 q4, d27, d4 1655*4882a593Smuzhiyun vmov.i64 d10, #0 1656*4882a593Smuzhiyun vmlal.s32 q4, d28, d1 1657*4882a593Smuzhiyun vmlal.s32 q5, d16, d0 1658*4882a593Smuzhiyun sub r6, r7, #32 1659*4882a593Smuzhiyun vmlal.s32 q5, d17, d7 1660*4882a593Smuzhiyun vmlal.s32 q5, d14, d6 1661*4882a593Smuzhiyun vext.32 d30, d9, d8, #0 1662*4882a593Smuzhiyun vmlal.s32 q5, d15, d5 1663*4882a593Smuzhiyun vld1.8 {d31}, [r6, : 64]! 1664*4882a593Smuzhiyun vmlal.s32 q5, d29, d4 1665*4882a593Smuzhiyun vmlal.s32 q15, d20, d0 1666*4882a593Smuzhiyun vext.32 d0, d6, d18, #1 1667*4882a593Smuzhiyun vmlal.s32 q15, d21, d25 1668*4882a593Smuzhiyun vrev64.i32 d0, d0 1669*4882a593Smuzhiyun vmlal.s32 q15, d26, d24 1670*4882a593Smuzhiyun vext.32 d1, d7, d19, #1 1671*4882a593Smuzhiyun vext.32 d7, d10, d10, #0 1672*4882a593Smuzhiyun vmlal.s32 q15, d27, d23 1673*4882a593Smuzhiyun vrev64.i32 d1, d1 1674*4882a593Smuzhiyun vld1.8 {d6}, [r6, : 64] 1675*4882a593Smuzhiyun vmlal.s32 q15, d28, d22 1676*4882a593Smuzhiyun vmlal.s32 q3, d16, d4 1677*4882a593Smuzhiyun add r6, r6, #24 1678*4882a593Smuzhiyun vmlal.s32 q3, d17, d2 1679*4882a593Smuzhiyun vext.32 d4, d31, d30, #0 1680*4882a593Smuzhiyun vmov d17, d11 1681*4882a593Smuzhiyun vmlal.s32 q3, d14, d1 1682*4882a593Smuzhiyun vext.32 d11, d13, d13, #0 1683*4882a593Smuzhiyun vext.32 d13, d30, d30, #0 1684*4882a593Smuzhiyun vmlal.s32 q3, d15, d0 1685*4882a593Smuzhiyun vext.32 d1, d8, d8, #0 1686*4882a593Smuzhiyun vmlal.s32 q3, d29, d3 1687*4882a593Smuzhiyun vld1.8 {d5}, [r6, : 64] 1688*4882a593Smuzhiyun sub r6, r6, #16 1689*4882a593Smuzhiyun vext.32 d10, d6, d6, #0 1690*4882a593Smuzhiyun vmov.i32 q1, #0xffffffff 1691*4882a593Smuzhiyun vshl.i64 q4, q1, #25 1692*4882a593Smuzhiyun add r7, sp, #480 1693*4882a593Smuzhiyun vld1.8 {d14-d15}, [r7, : 128] 1694*4882a593Smuzhiyun vadd.i64 q9, q2, q7 1695*4882a593Smuzhiyun vshl.i64 q1, q1, #26 1696*4882a593Smuzhiyun vshr.s64 q10, q9, #26 1697*4882a593Smuzhiyun vld1.8 {d0}, [r6, : 64]! 1698*4882a593Smuzhiyun vadd.i64 q5, q5, q10 1699*4882a593Smuzhiyun vand q9, q9, q1 1700*4882a593Smuzhiyun vld1.8 {d16}, [r6, : 64]! 1701*4882a593Smuzhiyun add r6, sp, #496 1702*4882a593Smuzhiyun vld1.8 {d20-d21}, [r6, : 128] 1703*4882a593Smuzhiyun vadd.i64 q11, q5, q10 1704*4882a593Smuzhiyun vsub.i64 q2, q2, q9 1705*4882a593Smuzhiyun vshr.s64 q9, q11, #25 1706*4882a593Smuzhiyun vext.32 d12, d5, d4, #0 1707*4882a593Smuzhiyun vand q11, q11, q4 1708*4882a593Smuzhiyun vadd.i64 q0, q0, q9 1709*4882a593Smuzhiyun vmov d19, d7 1710*4882a593Smuzhiyun vadd.i64 q3, q0, q7 1711*4882a593Smuzhiyun vsub.i64 q5, q5, q11 1712*4882a593Smuzhiyun vshr.s64 q11, q3, #26 1713*4882a593Smuzhiyun vext.32 d18, d11, d10, #0 1714*4882a593Smuzhiyun vand q3, q3, q1 1715*4882a593Smuzhiyun vadd.i64 q8, q8, q11 1716*4882a593Smuzhiyun vadd.i64 q11, q8, q10 1717*4882a593Smuzhiyun vsub.i64 q0, q0, q3 1718*4882a593Smuzhiyun vshr.s64 q3, q11, #25 1719*4882a593Smuzhiyun vand q11, q11, q4 1720*4882a593Smuzhiyun vadd.i64 q3, q6, q3 1721*4882a593Smuzhiyun vadd.i64 q6, q3, q7 1722*4882a593Smuzhiyun vsub.i64 q8, q8, q11 1723*4882a593Smuzhiyun vshr.s64 q11, q6, #26 1724*4882a593Smuzhiyun vand q6, q6, q1 1725*4882a593Smuzhiyun vadd.i64 q9, q9, q11 1726*4882a593Smuzhiyun vadd.i64 d25, d19, d21 1727*4882a593Smuzhiyun vsub.i64 q3, q3, q6 1728*4882a593Smuzhiyun vshr.s64 d23, d25, #25 1729*4882a593Smuzhiyun vand q4, q12, q4 1730*4882a593Smuzhiyun vadd.i64 d21, d23, d23 1731*4882a593Smuzhiyun vshl.i64 d25, d23, #4 1732*4882a593Smuzhiyun vadd.i64 d21, d21, d23 1733*4882a593Smuzhiyun vadd.i64 d25, d25, d21 1734*4882a593Smuzhiyun vadd.i64 d4, d4, d25 1735*4882a593Smuzhiyun vzip.i32 q0, q8 1736*4882a593Smuzhiyun vadd.i64 d12, d4, d14 1737*4882a593Smuzhiyun add r6, r8, #8 1738*4882a593Smuzhiyun vst1.8 d0, [r6, : 64] 1739*4882a593Smuzhiyun vsub.i64 d19, d19, d9 1740*4882a593Smuzhiyun add r6, r6, #16 1741*4882a593Smuzhiyun vst1.8 d16, [r6, : 64] 1742*4882a593Smuzhiyun vshr.s64 d22, d12, #26 1743*4882a593Smuzhiyun vand q0, q6, q1 1744*4882a593Smuzhiyun vadd.i64 d10, d10, d22 1745*4882a593Smuzhiyun vzip.i32 q3, q9 1746*4882a593Smuzhiyun vsub.i64 d4, d4, d0 1747*4882a593Smuzhiyun sub r6, r6, #8 1748*4882a593Smuzhiyun vst1.8 d6, [r6, : 64] 1749*4882a593Smuzhiyun add r6, r6, #16 1750*4882a593Smuzhiyun vst1.8 d18, [r6, : 64] 1751*4882a593Smuzhiyun vzip.i32 q2, q5 1752*4882a593Smuzhiyun sub r6, r6, #32 1753*4882a593Smuzhiyun vst1.8 d4, [r6, : 64] 1754*4882a593Smuzhiyun subs r5, r5, #1 1755*4882a593Smuzhiyun bhi .Lsquaringloop 1756*4882a593Smuzhiyun.Lskipsquaringloop: 1757*4882a593Smuzhiyun mov r2, r2 1758*4882a593Smuzhiyun add r5, r3, #288 1759*4882a593Smuzhiyun add r6, r3, #144 1760*4882a593Smuzhiyun vmov.i32 q0, #19 1761*4882a593Smuzhiyun vmov.i32 q1, #0 1762*4882a593Smuzhiyun vmov.i32 q2, #1 1763*4882a593Smuzhiyun vzip.i32 q1, q2 1764*4882a593Smuzhiyun vld1.8 {d4-d5}, [r5, : 128]! 1765*4882a593Smuzhiyun vld1.8 {d6-d7}, [r5, : 128]! 1766*4882a593Smuzhiyun vld1.8 {d9}, [r5, : 64] 1767*4882a593Smuzhiyun vld1.8 {d10-d11}, [r2, : 128]! 1768*4882a593Smuzhiyun add r5, sp, #384 1769*4882a593Smuzhiyun vld1.8 {d12-d13}, [r2, : 128]! 1770*4882a593Smuzhiyun vmul.i32 q7, q2, q0 1771*4882a593Smuzhiyun vld1.8 {d8}, [r2, : 64] 1772*4882a593Smuzhiyun vext.32 d17, d11, d10, #1 1773*4882a593Smuzhiyun vmul.i32 q9, q3, q0 1774*4882a593Smuzhiyun vext.32 d16, d10, d8, #1 1775*4882a593Smuzhiyun vshl.u32 q10, q5, q1 1776*4882a593Smuzhiyun vext.32 d22, d14, d4, #1 1777*4882a593Smuzhiyun vext.32 d24, d18, d6, #1 1778*4882a593Smuzhiyun vshl.u32 q13, q6, q1 1779*4882a593Smuzhiyun vshl.u32 d28, d8, d2 1780*4882a593Smuzhiyun vrev64.i32 d22, d22 1781*4882a593Smuzhiyun vmul.i32 d1, d9, d1 1782*4882a593Smuzhiyun vrev64.i32 d24, d24 1783*4882a593Smuzhiyun vext.32 d29, d8, d13, #1 1784*4882a593Smuzhiyun vext.32 d0, d1, d9, #1 1785*4882a593Smuzhiyun vrev64.i32 d0, d0 1786*4882a593Smuzhiyun vext.32 d2, d9, d1, #1 1787*4882a593Smuzhiyun vext.32 d23, d15, d5, #1 1788*4882a593Smuzhiyun vmull.s32 q4, d20, d4 1789*4882a593Smuzhiyun vrev64.i32 d23, d23 1790*4882a593Smuzhiyun vmlal.s32 q4, d21, d1 1791*4882a593Smuzhiyun vrev64.i32 d2, d2 1792*4882a593Smuzhiyun vmlal.s32 q4, d26, d19 1793*4882a593Smuzhiyun vext.32 d3, d5, d15, #1 1794*4882a593Smuzhiyun vmlal.s32 q4, d27, d18 1795*4882a593Smuzhiyun vrev64.i32 d3, d3 1796*4882a593Smuzhiyun vmlal.s32 q4, d28, d15 1797*4882a593Smuzhiyun vext.32 d14, d12, d11, #1 1798*4882a593Smuzhiyun vmull.s32 q5, d16, d23 1799*4882a593Smuzhiyun vext.32 d15, d13, d12, #1 1800*4882a593Smuzhiyun vmlal.s32 q5, d17, d4 1801*4882a593Smuzhiyun vst1.8 d8, [r5, : 64]! 1802*4882a593Smuzhiyun vmlal.s32 q5, d14, d1 1803*4882a593Smuzhiyun vext.32 d12, d9, d8, #0 1804*4882a593Smuzhiyun vmlal.s32 q5, d15, d19 1805*4882a593Smuzhiyun vmov.i64 d13, #0 1806*4882a593Smuzhiyun vmlal.s32 q5, d29, d18 1807*4882a593Smuzhiyun vext.32 d25, d19, d7, #1 1808*4882a593Smuzhiyun vmlal.s32 q6, d20, d5 1809*4882a593Smuzhiyun vrev64.i32 d25, d25 1810*4882a593Smuzhiyun vmlal.s32 q6, d21, d4 1811*4882a593Smuzhiyun vst1.8 d11, [r5, : 64]! 1812*4882a593Smuzhiyun vmlal.s32 q6, d26, d1 1813*4882a593Smuzhiyun vext.32 d9, d10, d10, #0 1814*4882a593Smuzhiyun vmlal.s32 q6, d27, d19 1815*4882a593Smuzhiyun vmov.i64 d8, #0 1816*4882a593Smuzhiyun vmlal.s32 q6, d28, d18 1817*4882a593Smuzhiyun vmlal.s32 q4, d16, d24 1818*4882a593Smuzhiyun vmlal.s32 q4, d17, d5 1819*4882a593Smuzhiyun vmlal.s32 q4, d14, d4 1820*4882a593Smuzhiyun vst1.8 d12, [r5, : 64]! 1821*4882a593Smuzhiyun vmlal.s32 q4, d15, d1 1822*4882a593Smuzhiyun vext.32 d10, d13, d12, #0 1823*4882a593Smuzhiyun vmlal.s32 q4, d29, d19 1824*4882a593Smuzhiyun vmov.i64 d11, #0 1825*4882a593Smuzhiyun vmlal.s32 q5, d20, d6 1826*4882a593Smuzhiyun vmlal.s32 q5, d21, d5 1827*4882a593Smuzhiyun vmlal.s32 q5, d26, d4 1828*4882a593Smuzhiyun vext.32 d13, d8, d8, #0 1829*4882a593Smuzhiyun vmlal.s32 q5, d27, d1 1830*4882a593Smuzhiyun vmov.i64 d12, #0 1831*4882a593Smuzhiyun vmlal.s32 q5, d28, d19 1832*4882a593Smuzhiyun vst1.8 d9, [r5, : 64]! 1833*4882a593Smuzhiyun vmlal.s32 q6, d16, d25 1834*4882a593Smuzhiyun vmlal.s32 q6, d17, d6 1835*4882a593Smuzhiyun vst1.8 d10, [r5, : 64] 1836*4882a593Smuzhiyun vmlal.s32 q6, d14, d5 1837*4882a593Smuzhiyun vext.32 d8, d11, d10, #0 1838*4882a593Smuzhiyun vmlal.s32 q6, d15, d4 1839*4882a593Smuzhiyun vmov.i64 d9, #0 1840*4882a593Smuzhiyun vmlal.s32 q6, d29, d1 1841*4882a593Smuzhiyun vmlal.s32 q4, d20, d7 1842*4882a593Smuzhiyun vmlal.s32 q4, d21, d6 1843*4882a593Smuzhiyun vmlal.s32 q4, d26, d5 1844*4882a593Smuzhiyun vext.32 d11, d12, d12, #0 1845*4882a593Smuzhiyun vmlal.s32 q4, d27, d4 1846*4882a593Smuzhiyun vmov.i64 d10, #0 1847*4882a593Smuzhiyun vmlal.s32 q4, d28, d1 1848*4882a593Smuzhiyun vmlal.s32 q5, d16, d0 1849*4882a593Smuzhiyun sub r2, r5, #32 1850*4882a593Smuzhiyun vmlal.s32 q5, d17, d7 1851*4882a593Smuzhiyun vmlal.s32 q5, d14, d6 1852*4882a593Smuzhiyun vext.32 d30, d9, d8, #0 1853*4882a593Smuzhiyun vmlal.s32 q5, d15, d5 1854*4882a593Smuzhiyun vld1.8 {d31}, [r2, : 64]! 1855*4882a593Smuzhiyun vmlal.s32 q5, d29, d4 1856*4882a593Smuzhiyun vmlal.s32 q15, d20, d0 1857*4882a593Smuzhiyun vext.32 d0, d6, d18, #1 1858*4882a593Smuzhiyun vmlal.s32 q15, d21, d25 1859*4882a593Smuzhiyun vrev64.i32 d0, d0 1860*4882a593Smuzhiyun vmlal.s32 q15, d26, d24 1861*4882a593Smuzhiyun vext.32 d1, d7, d19, #1 1862*4882a593Smuzhiyun vext.32 d7, d10, d10, #0 1863*4882a593Smuzhiyun vmlal.s32 q15, d27, d23 1864*4882a593Smuzhiyun vrev64.i32 d1, d1 1865*4882a593Smuzhiyun vld1.8 {d6}, [r2, : 64] 1866*4882a593Smuzhiyun vmlal.s32 q15, d28, d22 1867*4882a593Smuzhiyun vmlal.s32 q3, d16, d4 1868*4882a593Smuzhiyun add r2, r2, #24 1869*4882a593Smuzhiyun vmlal.s32 q3, d17, d2 1870*4882a593Smuzhiyun vext.32 d4, d31, d30, #0 1871*4882a593Smuzhiyun vmov d17, d11 1872*4882a593Smuzhiyun vmlal.s32 q3, d14, d1 1873*4882a593Smuzhiyun vext.32 d11, d13, d13, #0 1874*4882a593Smuzhiyun vext.32 d13, d30, d30, #0 1875*4882a593Smuzhiyun vmlal.s32 q3, d15, d0 1876*4882a593Smuzhiyun vext.32 d1, d8, d8, #0 1877*4882a593Smuzhiyun vmlal.s32 q3, d29, d3 1878*4882a593Smuzhiyun vld1.8 {d5}, [r2, : 64] 1879*4882a593Smuzhiyun sub r2, r2, #16 1880*4882a593Smuzhiyun vext.32 d10, d6, d6, #0 1881*4882a593Smuzhiyun vmov.i32 q1, #0xffffffff 1882*4882a593Smuzhiyun vshl.i64 q4, q1, #25 1883*4882a593Smuzhiyun add r5, sp, #480 1884*4882a593Smuzhiyun vld1.8 {d14-d15}, [r5, : 128] 1885*4882a593Smuzhiyun vadd.i64 q9, q2, q7 1886*4882a593Smuzhiyun vshl.i64 q1, q1, #26 1887*4882a593Smuzhiyun vshr.s64 q10, q9, #26 1888*4882a593Smuzhiyun vld1.8 {d0}, [r2, : 64]! 1889*4882a593Smuzhiyun vadd.i64 q5, q5, q10 1890*4882a593Smuzhiyun vand q9, q9, q1 1891*4882a593Smuzhiyun vld1.8 {d16}, [r2, : 64]! 1892*4882a593Smuzhiyun add r2, sp, #496 1893*4882a593Smuzhiyun vld1.8 {d20-d21}, [r2, : 128] 1894*4882a593Smuzhiyun vadd.i64 q11, q5, q10 1895*4882a593Smuzhiyun vsub.i64 q2, q2, q9 1896*4882a593Smuzhiyun vshr.s64 q9, q11, #25 1897*4882a593Smuzhiyun vext.32 d12, d5, d4, #0 1898*4882a593Smuzhiyun vand q11, q11, q4 1899*4882a593Smuzhiyun vadd.i64 q0, q0, q9 1900*4882a593Smuzhiyun vmov d19, d7 1901*4882a593Smuzhiyun vadd.i64 q3, q0, q7 1902*4882a593Smuzhiyun vsub.i64 q5, q5, q11 1903*4882a593Smuzhiyun vshr.s64 q11, q3, #26 1904*4882a593Smuzhiyun vext.32 d18, d11, d10, #0 1905*4882a593Smuzhiyun vand q3, q3, q1 1906*4882a593Smuzhiyun vadd.i64 q8, q8, q11 1907*4882a593Smuzhiyun vadd.i64 q11, q8, q10 1908*4882a593Smuzhiyun vsub.i64 q0, q0, q3 1909*4882a593Smuzhiyun vshr.s64 q3, q11, #25 1910*4882a593Smuzhiyun vand q11, q11, q4 1911*4882a593Smuzhiyun vadd.i64 q3, q6, q3 1912*4882a593Smuzhiyun vadd.i64 q6, q3, q7 1913*4882a593Smuzhiyun vsub.i64 q8, q8, q11 1914*4882a593Smuzhiyun vshr.s64 q11, q6, #26 1915*4882a593Smuzhiyun vand q6, q6, q1 1916*4882a593Smuzhiyun vadd.i64 q9, q9, q11 1917*4882a593Smuzhiyun vadd.i64 d25, d19, d21 1918*4882a593Smuzhiyun vsub.i64 q3, q3, q6 1919*4882a593Smuzhiyun vshr.s64 d23, d25, #25 1920*4882a593Smuzhiyun vand q4, q12, q4 1921*4882a593Smuzhiyun vadd.i64 d21, d23, d23 1922*4882a593Smuzhiyun vshl.i64 d25, d23, #4 1923*4882a593Smuzhiyun vadd.i64 d21, d21, d23 1924*4882a593Smuzhiyun vadd.i64 d25, d25, d21 1925*4882a593Smuzhiyun vadd.i64 d4, d4, d25 1926*4882a593Smuzhiyun vzip.i32 q0, q8 1927*4882a593Smuzhiyun vadd.i64 d12, d4, d14 1928*4882a593Smuzhiyun add r2, r6, #8 1929*4882a593Smuzhiyun vst1.8 d0, [r2, : 64] 1930*4882a593Smuzhiyun vsub.i64 d19, d19, d9 1931*4882a593Smuzhiyun add r2, r2, #16 1932*4882a593Smuzhiyun vst1.8 d16, [r2, : 64] 1933*4882a593Smuzhiyun vshr.s64 d22, d12, #26 1934*4882a593Smuzhiyun vand q0, q6, q1 1935*4882a593Smuzhiyun vadd.i64 d10, d10, d22 1936*4882a593Smuzhiyun vzip.i32 q3, q9 1937*4882a593Smuzhiyun vsub.i64 d4, d4, d0 1938*4882a593Smuzhiyun sub r2, r2, #8 1939*4882a593Smuzhiyun vst1.8 d6, [r2, : 64] 1940*4882a593Smuzhiyun add r2, r2, #16 1941*4882a593Smuzhiyun vst1.8 d18, [r2, : 64] 1942*4882a593Smuzhiyun vzip.i32 q2, q5 1943*4882a593Smuzhiyun sub r2, r2, #32 1944*4882a593Smuzhiyun vst1.8 d4, [r2, : 64] 1945*4882a593Smuzhiyun cmp r4, #0 1946*4882a593Smuzhiyun beq .Lskippostcopy 1947*4882a593Smuzhiyun add r2, r3, #144 1948*4882a593Smuzhiyun mov r4, r4 1949*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128]! 1950*4882a593Smuzhiyun vld1.8 {d2-d3}, [r2, : 128]! 1951*4882a593Smuzhiyun vld1.8 {d4}, [r2, : 64] 1952*4882a593Smuzhiyun vst1.8 {d0-d1}, [r4, : 128]! 1953*4882a593Smuzhiyun vst1.8 {d2-d3}, [r4, : 128]! 1954*4882a593Smuzhiyun vst1.8 d4, [r4, : 64] 1955*4882a593Smuzhiyun.Lskippostcopy: 1956*4882a593Smuzhiyun cmp r1, #1 1957*4882a593Smuzhiyun bne .Lskipfinalcopy 1958*4882a593Smuzhiyun add r2, r3, #288 1959*4882a593Smuzhiyun add r4, r3, #144 1960*4882a593Smuzhiyun vld1.8 {d0-d1}, [r2, : 128]! 1961*4882a593Smuzhiyun vld1.8 {d2-d3}, [r2, : 128]! 1962*4882a593Smuzhiyun vld1.8 {d4}, [r2, : 64] 1963*4882a593Smuzhiyun vst1.8 {d0-d1}, [r4, : 128]! 1964*4882a593Smuzhiyun vst1.8 {d2-d3}, [r4, : 128]! 1965*4882a593Smuzhiyun vst1.8 d4, [r4, : 64] 1966*4882a593Smuzhiyun.Lskipfinalcopy: 1967*4882a593Smuzhiyun add r1, r1, #1 1968*4882a593Smuzhiyun cmp r1, #12 1969*4882a593Smuzhiyun blo .Linvertloop 1970*4882a593Smuzhiyun add r1, r3, #144 1971*4882a593Smuzhiyun ldr r2, [r1], #4 1972*4882a593Smuzhiyun ldr r3, [r1], #4 1973*4882a593Smuzhiyun ldr r4, [r1], #4 1974*4882a593Smuzhiyun ldr r5, [r1], #4 1975*4882a593Smuzhiyun ldr r6, [r1], #4 1976*4882a593Smuzhiyun ldr r7, [r1], #4 1977*4882a593Smuzhiyun ldr r8, [r1], #4 1978*4882a593Smuzhiyun ldr r9, [r1], #4 1979*4882a593Smuzhiyun ldr r10, [r1], #4 1980*4882a593Smuzhiyun ldr r1, [r1] 1981*4882a593Smuzhiyun add r11, r1, r1, LSL #4 1982*4882a593Smuzhiyun add r11, r11, r1, LSL #1 1983*4882a593Smuzhiyun add r11, r11, #16777216 1984*4882a593Smuzhiyun mov r11, r11, ASR #25 1985*4882a593Smuzhiyun add r11, r11, r2 1986*4882a593Smuzhiyun mov r11, r11, ASR #26 1987*4882a593Smuzhiyun add r11, r11, r3 1988*4882a593Smuzhiyun mov r11, r11, ASR #25 1989*4882a593Smuzhiyun add r11, r11, r4 1990*4882a593Smuzhiyun mov r11, r11, ASR #26 1991*4882a593Smuzhiyun add r11, r11, r5 1992*4882a593Smuzhiyun mov r11, r11, ASR #25 1993*4882a593Smuzhiyun add r11, r11, r6 1994*4882a593Smuzhiyun mov r11, r11, ASR #26 1995*4882a593Smuzhiyun add r11, r11, r7 1996*4882a593Smuzhiyun mov r11, r11, ASR #25 1997*4882a593Smuzhiyun add r11, r11, r8 1998*4882a593Smuzhiyun mov r11, r11, ASR #26 1999*4882a593Smuzhiyun add r11, r11, r9 2000*4882a593Smuzhiyun mov r11, r11, ASR #25 2001*4882a593Smuzhiyun add r11, r11, r10 2002*4882a593Smuzhiyun mov r11, r11, ASR #26 2003*4882a593Smuzhiyun add r11, r11, r1 2004*4882a593Smuzhiyun mov r11, r11, ASR #25 2005*4882a593Smuzhiyun add r2, r2, r11 2006*4882a593Smuzhiyun add r2, r2, r11, LSL #1 2007*4882a593Smuzhiyun add r2, r2, r11, LSL #4 2008*4882a593Smuzhiyun mov r11, r2, ASR #26 2009*4882a593Smuzhiyun add r3, r3, r11 2010*4882a593Smuzhiyun sub r2, r2, r11, LSL #26 2011*4882a593Smuzhiyun mov r11, r3, ASR #25 2012*4882a593Smuzhiyun add r4, r4, r11 2013*4882a593Smuzhiyun sub r3, r3, r11, LSL #25 2014*4882a593Smuzhiyun mov r11, r4, ASR #26 2015*4882a593Smuzhiyun add r5, r5, r11 2016*4882a593Smuzhiyun sub r4, r4, r11, LSL #26 2017*4882a593Smuzhiyun mov r11, r5, ASR #25 2018*4882a593Smuzhiyun add r6, r6, r11 2019*4882a593Smuzhiyun sub r5, r5, r11, LSL #25 2020*4882a593Smuzhiyun mov r11, r6, ASR #26 2021*4882a593Smuzhiyun add r7, r7, r11 2022*4882a593Smuzhiyun sub r6, r6, r11, LSL #26 2023*4882a593Smuzhiyun mov r11, r7, ASR #25 2024*4882a593Smuzhiyun add r8, r8, r11 2025*4882a593Smuzhiyun sub r7, r7, r11, LSL #25 2026*4882a593Smuzhiyun mov r11, r8, ASR #26 2027*4882a593Smuzhiyun add r9, r9, r11 2028*4882a593Smuzhiyun sub r8, r8, r11, LSL #26 2029*4882a593Smuzhiyun mov r11, r9, ASR #25 2030*4882a593Smuzhiyun add r10, r10, r11 2031*4882a593Smuzhiyun sub r9, r9, r11, LSL #25 2032*4882a593Smuzhiyun mov r11, r10, ASR #26 2033*4882a593Smuzhiyun add r1, r1, r11 2034*4882a593Smuzhiyun sub r10, r10, r11, LSL #26 2035*4882a593Smuzhiyun mov r11, r1, ASR #25 2036*4882a593Smuzhiyun sub r1, r1, r11, LSL #25 2037*4882a593Smuzhiyun add r2, r2, r3, LSL #26 2038*4882a593Smuzhiyun mov r3, r3, LSR #6 2039*4882a593Smuzhiyun add r3, r3, r4, LSL #19 2040*4882a593Smuzhiyun mov r4, r4, LSR #13 2041*4882a593Smuzhiyun add r4, r4, r5, LSL #13 2042*4882a593Smuzhiyun mov r5, r5, LSR #19 2043*4882a593Smuzhiyun add r5, r5, r6, LSL #6 2044*4882a593Smuzhiyun add r6, r7, r8, LSL #25 2045*4882a593Smuzhiyun mov r7, r8, LSR #7 2046*4882a593Smuzhiyun add r7, r7, r9, LSL #19 2047*4882a593Smuzhiyun mov r8, r9, LSR #13 2048*4882a593Smuzhiyun add r8, r8, r10, LSL #12 2049*4882a593Smuzhiyun mov r9, r10, LSR #20 2050*4882a593Smuzhiyun add r1, r9, r1, LSL #6 2051*4882a593Smuzhiyun str r2, [r0] 2052*4882a593Smuzhiyun str r3, [r0, #4] 2053*4882a593Smuzhiyun str r4, [r0, #8] 2054*4882a593Smuzhiyun str r5, [r0, #12] 2055*4882a593Smuzhiyun str r6, [r0, #16] 2056*4882a593Smuzhiyun str r7, [r0, #20] 2057*4882a593Smuzhiyun str r8, [r0, #24] 2058*4882a593Smuzhiyun str r1, [r0, #28] 2059*4882a593Smuzhiyun movw r0, #0 2060*4882a593Smuzhiyun mov sp, ip 2061*4882a593Smuzhiyun pop {r4-r11, pc} 2062*4882a593SmuzhiyunENDPROC(curve25519_neon) 2063