1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) Hisilicon Technologies Co., Ltd. 2023. All rights reserved. 4 * Copyright (C) 2022, Alibaba Group. 5 * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 6 * 7 * SM4 optimization for ARMv8 by NEON and AES HW instruction, which is an 8 * optional Cryptographic Extension for ARMv8-A. 9 * 10 * The NEON implementation refers to Linux kernel (sm4-neon-core.S contributed 11 * by Tianjia Zhang <tianjia.zhang@linux.alibaba.com>). 12 * 13 * The AES trick refers to sm4ni (https://github.com/mjosaarinen/sm4ni). The 14 * constants used in load_sbox_matrix are from this blog (https://www.cnblogs. 15 * com/kentle/p/15826075.html). We've done some further optimizations so the 16 * constants don't look the same. 17 */ 18 19#include <asm.S> 20 21.arch armv8-a+crypto 22 23#define m0 w9 24#define m1 w10 25#define m2 w11 26#define m3 w12 27#define tw0l x7 28#define tw0h x8 29#define tw1l x9 30#define tw1h x10 31#define tw2l x11 32#define tw2h x12 33#define tw3l x13 34#define tw3h x14 35#define tw4l x15 36#define tw4h x16 37#define tw5l x17 38#define tw5h x18 39#define tw6l x19 40#define tw6h x20 41#define tw7l x21 42#define tw7h x22 43#define tmpw0 w23 44#define tmpx0 x23 45#define tmpw1 w24 46#define tmpx1 x24 47#define tmpw2 w25 48 49/* round keys: v0-v7 */ 50#define RK0 v0 51#define RK1 v1 52#define RK2 v2 53#define RK3 v3 54#define RK4 v4 55#define RK5 v5 56#define RK6 v6 57#define RK7 v7 58 59/* plain blocks: v8-v15 */ 60#define BLK0 v8 61#define BLK1 v9 62#define BLK2 v10 63#define BLK3 v11 64#define BLK4 v12 65#define BLK5 v13 66#define BLK6 v14 67#define BLK7 v15 68 69#define TMP0 v16 70#define TMP1 v17 71#define TMP2 v18 72#define TMP3 v19 73#define TMP4 v20 74#define TMP5 v21 75#define TMP6 v22 76#define TMP7 v23 77#define TMP8 v24 78#define IV v25 79#define ANDMASKV v26 80#define ANDMASKQ q26 81#define ATALMaskV v27 82#define ATALMaskQ q27 83#define ATAHMaskV v28 84#define ATAHMaskQ q28 85#define TALMaskV v29 86#define TALMaskQ q29 87#define TAHMaskV v30 88#define TAHMaskQ q30 89#define MASKV v31 90#define MASKQ q31 91 92.macro frame_push 93 stp x15, x16, [sp, #-0x10]! 94 stp x17, x18, [sp, #-0x10]! 95 stp x19, x20, [sp, #-0x10]! 96 stp x21, x22, [sp, #-0x10]! 97 stp x23, x24, [sp, #-0x10]! 98 stp x25, x26, [sp, #-0x10]! 99 stp x27, x28, [sp, #-0x10]! 100 stp x29, x30, [sp, #-0x10]! 101 stp d8, d9, [sp, #-0x10]! 102 stp d10, d11, [sp, #-0x10]! 103 stp d12, d13, [sp, #-0x10]! 104 stp d14, d15, [sp, #-0x10]! 105.endm 106 107.macro frame_pop 108 ldp d14, d15, [sp], #0x10 109 ldp d12, d13, [sp], #0x10 110 ldp d10, d11, [sp], #0x10 111 ldp d8, d9, [sp], #0x10 112 ldp x29, x30, [sp], #0x10 113 ldp x27, x28, [sp], #0x10 114 ldp x25, x26, [sp], #0x10 115 ldp x23, x24, [sp], #0x10 116 ldp x21, x22, [sp], #0x10 117 ldp x19, x20, [sp], #0x10 118 ldp x17, x18, [sp], #0x10 119 ldp x15, x16, [sp], #0x10 120.endm 121 122.macro load_sbox_matrix 123 ldr MASKQ, .Lsbox_magic 124 ldr TAHMaskQ, .Lsbox_magic+16 125 ldr TALMaskQ, .Lsbox_magic+32 126 ldr ATAHMaskQ, .Lsbox_magic+48 127 ldr ATALMaskQ, .Lsbox_magic+64 128 ldr ANDMASKQ, .Lsbox_magic+80 129.endm 130 131.macro multi_matrix, x, high, low, tmp 132 ushr \tmp\().16b, \x\().16b, 4 133 and \x\().16b, \x\().16b, ANDMASKV.16b 134 tbl \x\().16b, {\low\().16b}, \x\().16b 135 tbl \tmp\().16b, {\high\().16b}, \tmp\().16b 136 eor \x\().16b, \x\().16b, \tmp\().16b 137.endm 138 139.macro sbox, des, src, tmp1, tmp2 140 tbl \des\().16b, {\src\().16b}, MASKV.16b 141 multi_matrix \des, TAHMaskV, TALMaskV, \tmp2 142 eor \tmp1\().16b, \tmp1\().16b, \tmp1\().16b 143 aese \des\().16b, \tmp1\().16b 144 multi_matrix \des, ATAHMaskV, ATALMaskV, \tmp2 145.endm 146 147.macro sbox_double, des0, src0, des1, src1, tmp1, tmp2 148 tbl \des0\().16b, {\src0\().16b}, MASKV.16b 149 tbl \des1\().16b, {\src1\().16b}, MASKV.16b 150 multi_matrix \des0, TAHMaskV, TALMaskV, \tmp2 151 multi_matrix \des1, TAHMaskV, TALMaskV, \tmp2 152 eor \tmp1\().16b, \tmp1\().16b, \tmp1\().16b 153 aese \des0\().16b, \tmp1\().16b 154 multi_matrix \des0, ATAHMaskV, ATALMaskV, \tmp2 155 aese \des1\().16b, \tmp1\().16b 156 multi_matrix \des1, ATAHMaskV, ATALMaskV, \tmp2 157.endm 158 159.macro round, c0, c1, c2, c3, k 160 mov tmpw0, \k 161 eor tmpw1, \c1, \c2 162 eor tmpw0, \c3, tmpw0 163 eor tmpw2, tmpw1, tmpw0 164 mov TMP0.s[0], tmpw2 165 /* nonlinear transformation */ 166 sbox TMP1, TMP0, TMP2, TMP3 167 /* linear transformation */ 168 mov tmpw2, TMP1.s[0] 169 ror tmpw0, tmpw2, #(32-10) 170 eor tmpw0, tmpw0, tmpw2, ror #(32-2) 171 ror tmpw1, tmpw2, #(32-24) 172 eor tmpw1, tmpw1, tmpw2, ror #(32-18) 173 eor tmpw0, tmpw0, tmpw1 174 eor tmpw2, tmpw0, tmpw2 175 eor \c0, \c0, tmpw2 176.endm 177 178.macro round4_enc, k 179 round m0, m1, m2, m3, \k\().s[0] 180 round m1, m2, m3, m0, \k\().s[1] 181 round m2, m3, m0, m1, \k\().s[2] 182 round m3, m0, m1, m2, \k\().s[3] 183.endm 184 185.macro round4_dec, k 186 round m0, m1, m2, m3, \k\().s[3] 187 round m1, m2, m3, m0, \k\().s[2] 188 round m2, m3, m0, m1, \k\().s[1] 189 round m3, m0, m1, m2, \k\().s[0] 190.endm 191 192.macro encrypt_block_no_rev, in 193 mov m0, \in\().s[0] 194 mov m1, \in\().s[1] 195 mov m2, \in\().s[2] 196 mov m3, \in\().s[3] 197 round4_enc RK0 198 round4_enc RK1 199 round4_enc RK2 200 round4_enc RK3 201 round4_enc RK4 202 round4_enc RK5 203 round4_enc RK6 204 round4_enc RK7 205 mov \in\().s[0], m3 206 mov \in\().s[1], m2 207 mov \in\().s[2], m1 208 mov \in\().s[3], m0 209.endm 210 211.macro encrypt_block, in 212 rev32 \in\().16b, \in\().16b 213 encrypt_block_no_rev \in 214 rev32 \in\().16b, \in\().16b 215.endm 216 217.macro decrypt_block_no_rev, in 218 mov m0, \in\().s[0] 219 mov m1, \in\().s[1] 220 mov m2, \in\().s[2] 221 mov m3, \in\().s[3] 222 round4_dec RK7 223 round4_dec RK6 224 round4_dec RK5 225 round4_dec RK4 226 round4_dec RK3 227 round4_dec RK2 228 round4_dec RK1 229 round4_dec RK0 230 mov \in\().s[0], m3 231 mov \in\().s[1], m2 232 mov \in\().s[2], m1 233 mov \in\().s[3], m0 234.endm 235 236.macro decrypt_block, in 237 rev32 \in\().16b, \in\().16b 238 decrypt_block_no_rev \in 239 rev32 \in\().16b, \in\().16b 240.endm 241 242LOCAL_FUNC sm4_encrypt_block1x , : 243 encrypt_block BLK0 244 ret 245END_FUNC sm4_encrypt_block1x 246 247LOCAL_FUNC sm4_decrypt_block1x , : 248 decrypt_block BLK0 249 ret 250END_FUNC sm4_decrypt_block1x 251 252.macro transpose_4x4, s0, s1, s2, s3 253 zip1 TMP0.4s, \s0\().4s, \s1\().4s 254 zip1 TMP1.4s, \s2\().4s, \s3\().4s 255 zip2 TMP2.4s, \s0\().4s, \s1\().4s 256 zip2 TMP3.4s, \s2\().4s, \s3\().4s 257 zip1 \s0\().2d, TMP0.2d, TMP1.2d 258 zip2 \s1\().2d, TMP0.2d, TMP1.2d 259 zip1 \s2\().2d, TMP2.2d, TMP3.2d 260 zip2 \s3\().2d, TMP2.2d, TMP3.2d 261.endm 262 263.macro rotate_clockwise_90, s0, s1, s2, s3 264 zip1 TMP0.4s, \s1\().4s, \s0\().4s 265 zip2 TMP1.4s, \s1\().4s, \s0\().4s 266 zip1 TMP2.4s, \s3\().4s, \s2\().4s 267 zip2 TMP3.4s, \s3\().4s, \s2\().4s 268 zip1 \s0\().2d, TMP2.2d, TMP0.2d 269 zip2 \s1\().2d, TMP2.2d, TMP0.2d 270 zip1 \s2\().2d, TMP3.2d, TMP1.2d 271 zip2 \s3\().2d, TMP3.2d, TMP1.2d 272.endm 273 274 275.macro round_4x, s0, s1, s2, s3, k 276 dup TMP8.4s, \k 277 eor TMP1.16b, \s2\().16b, \s3\().16b 278 eor TMP8.16b, TMP8.16b, \s1\().16b 279 eor TMP8.16b, TMP8.16b, TMP1.16b 280 281 /* nonlinear transformation */ 282 sbox TMP0, TMP8, TMP2, TMP3 283 284 /* linear transformation */ 285 shl TMP1.4s, TMP0.4s, #2 286 shl TMP2.4s, TMP0.4s, #10 287 shl TMP3.4s, TMP0.4s, #18 288 shl TMP4.4s, TMP0.4s, #24 289 sri TMP1.4s, TMP0.4s, #(32-2) 290 sri TMP2.4s, TMP0.4s, #(32-10) 291 sri TMP3.4s, TMP0.4s, #(32-18) 292 sri TMP4.4s, TMP0.4s, #(32-24) 293 eor TMP0.16b, TMP0.16b, TMP1.16b 294 eor TMP2.16b, TMP2.16b, TMP3.16b 295 eor TMP4.16b, TMP4.16b, \s0\().16b 296 eor TMP0.16b, TMP0.16b, TMP2.16b 297 eor \s0\().16b, TMP0.16b, TMP4.16b 298.endm 299 300.macro round4_4x, k 301 round_4x BLK0, BLK1, BLK2, BLK3, \k\().s[0] 302 round_4x BLK1, BLK2, BLK3, BLK0, \k\().s[1] 303 round_4x BLK2, BLK3, BLK0, BLK1, \k\().s[2] 304 round_4x BLK3, BLK0, BLK1, BLK2, \k\().s[3] 305.endm 306 307LOCAL_FUNC sm4_encrypt_block4x , : 308 rev32 BLK0.16b, BLK0.16b 309 rev32 BLK1.16b, BLK1.16b 310 rev32 BLK2.16b, BLK2.16b 311 rev32 BLK3.16b, BLK3.16b 312 313 transpose_4x4 BLK0, BLK1, BLK2, BLK3 314 315 round4_4x RK0 316 round4_4x RK1 317 round4_4x RK2 318 round4_4x RK3 319 round4_4x RK4 320 round4_4x RK5 321 round4_4x RK6 322 round4_4x RK7 323 324 rotate_clockwise_90 BLK0, BLK1, BLK2, BLK3 325 rev32 BLK0.16b, BLK0.16b 326 rev32 BLK1.16b, BLK1.16b 327 rev32 BLK2.16b, BLK2.16b 328 rev32 BLK3.16b, BLK3.16b 329 ret 330END_FUNC sm4_encrypt_block4x 331 332.macro round_8x, s0, s1, s2, s3, t0, t1, t2, t3, k 333 dup TMP8.4s, \k 334 eor TMP0.16b, \s2\().16b, \s3\().16b 335 mov TMP7.16b, TMP8.16b 336 eor TMP1.16b, \t2\().16b, \t3\().16b 337 eor TMP8.16b, TMP8.16b, \s1\().16b 338 eor TMP7.16b, TMP7.16b, \t1\().16b 339 eor TMP8.16b, TMP8.16b, TMP0.16b 340 eor TMP7.16b, TMP7.16b, TMP1.16b 341 342 /* nonlinear transformation */ 343 sbox_double TMP0, TMP8, TMP1, TMP7, TMP2, TMP3 344 345 /* linear transformation */ 346 shl TMP6.4s, TMP0.4s, #2 347 shl TMP8.4s, TMP1.4s, #2 348 shl TMP2.4s, TMP0.4s, #10 349 shl TMP5.4s, TMP1.4s, #10 350 shl TMP3.4s, TMP0.4s, #18 351 shl TMP4.4s, TMP0.4s, #24 352 sri TMP6.4s, TMP0.4s, #(32-2) 353 sri TMP2.4s, TMP0.4s, #(32-10) 354 sri TMP3.4s, TMP0.4s, #(32-18) 355 sri TMP4.4s, TMP0.4s, #(32-24) 356 eor TMP0.16b, TMP0.16b, TMP6.16b 357 eor TMP2.16b, TMP2.16b, TMP3.16b 358 shl TMP6.4s, TMP1.4s, #18 359 shl TMP7.4s, TMP1.4s, #24 360 sri TMP8.4s, TMP1.4s, #(32-2) 361 sri TMP5.4s, TMP1.4s, #(32-10) 362 sri TMP6.4s, TMP1.4s, #(32-18) 363 sri TMP7.4s, TMP1.4s, #(32-24) 364 eor TMP4.16b, TMP4.16b, \s0\().16b 365 eor TMP1.16b, TMP1.16b, TMP8.16b 366 eor \s0\().16b, TMP0.16b, TMP2.16b 367 eor \s0\().16b, \s0\().16b, TMP4.16b 368 eor TMP5.16b, TMP5.16b, TMP6.16b 369 eor TMP7.16b, TMP7.16b, \t0\().16b 370 eor TMP1.16b, TMP1.16b, TMP5.16b 371 eor \t0\().16b, TMP1.16b, TMP7.16b 372.endm 373 374.macro round4_8x, k 375 round_8x BLK0, BLK1, BLK2, BLK3, BLK4, BLK5, BLK6, BLK7, \k\().s[0] 376 round_8x BLK1, BLK2, BLK3, BLK0, BLK5, BLK6, BLK7, BLK4, \k\().s[1] 377 round_8x BLK2, BLK3, BLK0, BLK1, BLK6, BLK7, BLK4, BLK5, \k\().s[2] 378 round_8x BLK3, BLK0, BLK1, BLK2, BLK7, BLK4, BLK5, BLK6, \k\().s[3] 379.endm 380 381LOCAL_FUNC sm4_encrypt_block8x , : 382 rev32 BLK0.16b, BLK0.16b 383 rev32 BLK1.16b, BLK1.16b 384 rev32 BLK2.16b, BLK2.16b 385 rev32 BLK3.16b, BLK3.16b 386 rev32 BLK4.16b, BLK4.16b 387 rev32 BLK5.16b, BLK5.16b 388 rev32 BLK6.16b, BLK6.16b 389 rev32 BLK7.16b, BLK7.16b 390 391 transpose_4x4 BLK0, BLK1, BLK2, BLK3 392 transpose_4x4 BLK4, BLK5, BLK6, BLK7 393 394 round4_8x RK0 395 round4_8x RK1 396 round4_8x RK2 397 round4_8x RK3 398 round4_8x RK4 399 round4_8x RK5 400 round4_8x RK6 401 round4_8x RK7 402 403 rotate_clockwise_90 BLK0, BLK1, BLK2, BLK3 404 rotate_clockwise_90 BLK4, BLK5, BLK6, BLK7 405 406 rev32 BLK0.16b, BLK0.16b 407 rev32 BLK1.16b, BLK1.16b 408 rev32 BLK2.16b, BLK2.16b 409 rev32 BLK3.16b, BLK3.16b 410 rev32 BLK4.16b, BLK4.16b 411 rev32 BLK5.16b, BLK5.16b 412 rev32 BLK6.16b, BLK6.16b 413 rev32 BLK7.16b, BLK7.16b 414 ret 415END_FUNC sm4_encrypt_block8x 416 417.macro inc_le128, vctr, low, high 418 mov \vctr\().d[1], \high 419 mov \vctr\().d[0], \low 420 adds \high, \high, #1 421 adc \low, \low, xzr 422 rev64 \vctr\().16b, \vctr\().16b 423.endm 424 425.macro mov_reg_to_vec, desv, src0, src1 426 mov \desv\().d[0], \src0 427 mov \desv\().d[1], \src1 428.endm 429 430.macro next_tweak, des0, des1, src0, src1 431 mov tmpw2, 0x87 432 extr tmpx0, \src1, \src1, #32 433 extr \des1, \src1, \src0, #63 434 and tmpw1, tmpw2, tmpw0, asr#31 435 eor \des0, tmpx1, \src0, lsl#1 436.endm 437 438.macro next_tweak_vec, desv, srcv 439 mov tw0l, \srcv\().d[0] 440 mov tw0h, \srcv\().d[1] 441 next_tweak tw1l, tw1h, tw0l, tw0h 442 mov \desv\().d[0], tw1l 443 mov \desv\().d[1], tw1h 444.endm 445 446LOCAL_DATA .Lck , : 447 .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 448 .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 449 .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 450 .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 451 .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 452 .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 453 .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 454 .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 455END_DATA .Lck 456 457LOCAL_DATA .Lfk , : 458 .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc 459END_DATA .Lfk 460 461LOCAL_DATA .Lshuffles , : 462 .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100 463END_DATA .Lshuffles 464 465LOCAL_DATA .Lsbox_magic , : 466 .dword 0x0b0e0104070a0d00, 0x0306090c0f020508 467 .dword 0x62185a2042387a00, 0x22581a6002783a40 468 .dword 0x15df62a89e54e923, 0xc10bb67c4a803df7 469 .dword 0xb9aa6b78c1d21300, 0x1407c6d56c7fbead 470 .dword 0x6404462679195b3b, 0xe383c1a1fe9edcbc 471 .dword 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f 472END_DATA .Lsbox_magic 473 474.macro sm4_setkey 475 ld1 {v5.4s}, [x1] 476 load_sbox_matrix 477 rev32 v5.16b, v5.16b 478 adr x5, .Lfk 479 ld1 {v6.4s}, [x5] 480 eor v5.16b, v5.16b, v6.16b 481 mov x6, #32 482 adr x5, .Lck 4831: 484 mov w7, v5.s[1] 485 ldr w8, [x5], #4 486 eor w8, w8, w7 487 mov w7, v5.s[2] 488 eor w8, w8, w7 489 mov w7, v5.s[3] 490 eor w8, w8, w7 491 492 /* optimize sbox using AESE instruction */ 493 mov TMP0.s[0], w8 494 sbox TMP1, TMP0, TMP2, TMP3 495 mov w7, TMP1.s[0] 496 497 /* linear transformation */ 498 eor w8, w7, w7, ror #19 499 eor w8, w8, w7, ror #9 500 mov w7, v5.s[0] 501 eor w8, w8, w7 502 mov v5.s[0], w8 503 ext v5.16b, v5.16b, v5.16b, 4 504 subs x6, x6, #1 505.endm 506 507/* 508 * void neon_sm4_setkey_enc(uint32_t sk[32], uint8_t const key[16]); 509 * x0: round key 510 * x1: user key 511 */ 512FUNC neon_sm4_setkey_enc , : 513 sm4_setkey 514 str w8, [x0], #4 515 b.ne 1b 516 ret 517END_FUNC neon_sm4_setkey_enc 518 519/* 520 * void neon_sm4_setkey_dec(uint32_t sk[32], uint8_t const key[16]); 521 * x0: round key 522 * x1: user key 523 */ 524FUNC neon_sm4_setkey_dec , : 525 add x0, x0, 124 526 sm4_setkey 527 str w8, [x0], #-4 528 b.ne 1b 529 ret 530END_FUNC neon_sm4_setkey_dec 531 532/* 533 * void neon_sm4_ecb_encrypt(uint8_t out[], uint8_t const in[], 534 * uint8_t const rk[], size_t len); 535 * x0: output 536 * x1: input 537 * x2: round key 538 * w3: length 539 */ 540FUNC neon_sm4_ecb_encrypt , : 541 frame_push 542 load_sbox_matrix 543 ld1 {RK0.4s, RK1.4s, RK2.4s, RK3.4s}, [x2], #64 544 ld1 {RK4.4s, RK5.4s, RK6.4s, RK7.4s}, [x2], #64 545 lsr w3, w3, 4 546 547.Lecbloop8x: 548 cmp w3, 8 549 b.lt .Lecb4x 550 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x1], #64 551 ld1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [x1], #64 552 bl sm4_encrypt_block8x 553 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 554 st1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [x0], #64 555 subs w3, w3, #8 556 b.gt .Lecbloop8x 557 558.Lecb4x: 559 cmp w3, 1 560 b.lt .Lecbout 561 cmp w3, 2 562 b.lt .Lecb1x 563 cmp w3, 3 564 b.lt .Lecb2x 565 cmp w3, 4 566 b.lt .Lecb3x 567 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x1], #64 568 bl sm4_encrypt_block4x 569 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 570 sub w3, w3, #4 571 b .Lecb4x 572 573.Lecb3x: 574 ld1 {BLK0.16b, BLK1.16b, BLK2.16b}, [x1], #48 575 bl sm4_encrypt_block4x 576 st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [x0], #48 577 subs w3, w3, #3 578 b.le .Lecbout 579 580.Lecb2x: 581 ld1 {BLK0.16b, BLK1.16b}, [x1], #32 582 bl sm4_encrypt_block4x 583 st1 {BLK0.16b, BLK1.16b}, [x0], #32 584 subs w3, w3, #2 585 b.le .Lecbout 586 587.Lecb1x: 588 ld1 {BLK0.16b}, [x1], #16 589 bl sm4_encrypt_block1x 590 st1 {BLK0.16b}, [x0], #16 591 592.Lecbout: 593 frame_pop 594 ret 595 596END_FUNC neon_sm4_ecb_encrypt 597 598/* 599 * void neon_sm4_cbc_encrypt(uint8_t out[], uint8_t const in[], 600 * uint8_t const rk[], size_t len, 601 * uint8_t iv[]); 602 * x0: output 603 * x1: input 604 * x2: round key 605 * w3: length 606 * x4: iv 607 */ 608FUNC neon_sm4_cbc_encrypt , : 609 frame_push 610 load_sbox_matrix 611 612 ld1 {RK0.4s, RK1.4s, RK2.4s, RK3.4s}, [x2], #64 613 ld1 {RK4.4s, RK5.4s, RK6.4s, RK7.4s}, [x2], #64 614 lsr w3, w3, 4 615 ld1 {IV.16b}, [x4] 616 617.Lcbcencloop4x: 618 cmp w3, 4 619 b.lt .Lcbcenc1x 620 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x1], #64 621 eor BLK0.16b, BLK0.16b, IV.16b 622 rev32 BLK0.16b, BLK0.16b 623 rev32 BLK1.16b, BLK1.16b 624 rev32 BLK2.16b, BLK2.16b 625 rev32 BLK3.16b, BLK3.16b 626 encrypt_block_no_rev BLK0 627 eor BLK1.16b, BLK1.16b, BLK0.16b 628 encrypt_block_no_rev BLK1 629 rev32 BLK0.16b, BLK0.16b 630 eor BLK2.16b, BLK2.16b, BLK1.16b 631 encrypt_block_no_rev BLK2 632 rev32 BLK1.16b, BLK1.16b 633 eor BLK3.16b, BLK3.16b, BLK2.16b 634 encrypt_block_no_rev BLK3 635 rev32 BLK2.16b, BLK2.16b 636 rev32 BLK3.16b, BLK3.16b 637 mov IV.16b, BLK3.16b 638 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 639 subs w3, w3, #4 640 b .Lcbcencloop4x 641.Lcbcenc1x: 642 cmp w3, 1 643 b.lt .Lcbcencout 644.Lcbcencloop: 645 ld1 {BLK0.16b}, [x1], #16 646 eor BLK0.16b, BLK0.16b, IV.16b 647 bl sm4_encrypt_block1x 648 mov IV.16b, BLK0.16b 649 st1 {BLK0.16b}, [x0], #16 650 subs w3, w3, #1 651 bne .Lcbcencloop 652.Lcbcencout: 653 st1 {IV.16b}, [x4] 654 frame_pop 655 ret 656END_FUNC neon_sm4_cbc_encrypt 657 658/* 659 * void neon_sm4_cbc_decrypt(uint8_t out[], uint8_t const in[], 660 * uint8_t const rk[], size_t len, 661 * uint8_t iv[]); 662 * x0: output 663 * x1: input 664 * x2: round key 665 * w3: length 666 * x4: iv 667 */ 668FUNC neon_sm4_cbc_decrypt , : 669 frame_push 670 load_sbox_matrix 671 672 ld1 {RK0.4s, RK1.4s, RK2.4s, RK3.4s}, [x2], #64 673 ld1 {RK4.4s, RK5.4s, RK6.4s, RK7.4s}, [x2], #64 674 lsr w3, w3, 4 675 ld1 {IV.16b}, [x4] 676 677.Lcbcdecloop8x: 678 cmp w3, 8 679 b.lt .Lcbcdec4x 680 681 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x1], #64 682 ld1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [x1], #64 683 bl sm4_encrypt_block8x 684 sub x5, x1, #128 685 eor BLK0.16b, BLK0.16b, IV.16b 686 ld1 {TMP0.16b, TMP1.16b, TMP2.16b, TMP3.16b}, [x5], #64 687 eor BLK1.16b, BLK1.16b, TMP0.16b 688 eor BLK2.16b, BLK2.16b, TMP1.16b 689 eor BLK3.16b, BLK3.16b, TMP2.16b 690 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 691 ld1 {TMP4.16b, TMP5.16b, TMP6.16b, TMP7.16b}, [x5], #64 692 eor BLK4.16b, BLK4.16b, TMP3.16b 693 eor BLK5.16b, BLK5.16b, TMP4.16b 694 mov IV.16b, TMP7.16b 695 eor BLK6.16b, BLK6.16b, TMP5.16b 696 eor BLK7.16b, BLK7.16b, TMP6.16b 697 st1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [x0], #64 698 subs w3, w3, #8 699 b.gt .Lcbcdecloop8x 700 701.Lcbcdec4x: 702 cmp w3, 1 703 b.lt .Lcbcdecout 704 cmp w3, 2 705 b.lt .Lcbcdec1x 706 cmp w3, 3 707 b.lt .Lcbcdec2x 708 cmp w3, 4 709 b.lt .Lcbcdec3x 710 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x1], #64 711 bl sm4_encrypt_block4x 712 sub x5, x1, 64 713 ld1 {TMP0.16b, TMP1.16b, TMP2.16b, TMP3.16b}, [x5], #64 714 eor BLK0.16b, BLK0.16b, IV.16b 715 eor BLK1.16b, BLK1.16b, TMP0.16b 716 eor BLK2.16b, BLK2.16b, TMP1.16b 717 eor BLK3.16b, BLK3.16b, TMP2.16b 718 mov IV.16b, TMP3.16b 719 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 720 sub w3, w3, #4 721 b .Lcbcdec4x 722 723.Lcbcdec3x: 724 ld1 {BLK0.16b, BLK1.16b, BLK2.16b}, [x1], #48 725 bl sm4_encrypt_block4x 726 sub x5, x1, 48 727 ld1 {TMP0.16b, TMP1.16b, TMP2.16b}, [x5], #48 728 eor BLK0.16b, BLK0.16b, IV.16b 729 eor BLK1.16b, BLK1.16b, TMP0.16b 730 eor BLK2.16b, BLK2.16b, TMP1.16b 731 mov IV.16b, TMP2.16b 732 st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [x0], #48 733 subs w3, w3, #3 734 b.le .Lcbcdecout 735 736.Lcbcdec2x: 737 ld1 {BLK0.16b, BLK1.16b}, [x1], #32 738 bl sm4_encrypt_block4x 739 sub x5, x1, 32 740 ld1 {TMP0.16b, TMP1.16b}, [x5], #32 741 eor BLK0.16b, BLK0.16b, IV.16b 742 eor BLK1.16b, BLK1.16b, TMP0.16b 743 mov IV.16b, TMP1.16b 744 st1 {BLK0.16b, BLK1.16b}, [x0], #32 745 subs w3, w3, #2 746 b.le .Lcbcdecout 747 748.Lcbcdec1x: 749 ld1 {BLK0.16b}, [x1], #16 750 bl sm4_encrypt_block1x 751 sub x5, x1, 16 752 ld1 {TMP0.16b}, [x5], #16 753 eor BLK0.16b, BLK0.16b, IV.16b 754 mov IV.16b, TMP0.16b 755 st1 {BLK0.16b}, [x0], #16 756 757.Lcbcdecout: 758 st1 {IV.16b}, [x4] 759 frame_pop 760 ret 761END_FUNC neon_sm4_cbc_decrypt 762 763/* 764 * void neon_sm4_ctr_encrypt(uint8_t out[], uint8_t const in[], 765 * uint8_t const rk[], size_t len, 766 * uint8_t iv[]); 767 * x0: output 768 * x1: input 769 * x2: round key 770 * w3: length 771 * x4: iv 772 */ 773FUNC neon_sm4_ctr_encrypt , : 774 frame_push 775 load_sbox_matrix 776 777 ld1 {RK0.4s, RK1.4s, RK2.4s, RK3.4s}, [x2], #64 778 ld1 {RK4.4s, RK5.4s, RK6.4s, RK7.4s}, [x2], #64 779 lsr w3, w3, 4 780 ldp x7, x8, [x4] 781 rev x7, x7 782 rev x8, x8 783 784.Lctrloop8x: 785 cmp w3, 8 786 b.lt .Lctr4x 787 788 /* construct CTRs */ 789 inc_le128 BLK0, x7, x8 790 inc_le128 BLK1, x7, x8 791 inc_le128 BLK2, x7, x8 792 inc_le128 BLK3, x7, x8 793 inc_le128 BLK4, x7, x8 794 inc_le128 BLK5, x7, x8 795 inc_le128 BLK6, x7, x8 796 inc_le128 BLK7, x7, x8 797 bl sm4_encrypt_block8x 798 ld1 {TMP0.16b, TMP1.16b, TMP2.16b, TMP3.16b}, [x1], #64 799 ld1 {TMP4.16b, TMP5.16b, TMP6.16b, TMP7.16b}, [x1], #64 800 eor BLK0.16b, BLK0.16b, TMP0.16b 801 eor BLK1.16b, BLK1.16b, TMP1.16b 802 eor BLK2.16b, BLK2.16b, TMP2.16b 803 eor BLK3.16b, BLK3.16b, TMP3.16b 804 eor BLK4.16b, BLK4.16b, TMP4.16b 805 eor BLK5.16b, BLK5.16b, TMP5.16b 806 eor BLK6.16b, BLK6.16b, TMP6.16b 807 eor BLK7.16b, BLK7.16b, TMP7.16b 808 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 809 st1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [x0], #64 810 subs w3, w3, #8 811 b.gt .Lctrloop8x 812 813.Lctr4x: 814 cmp w3, 1 815 b.lt .Lctrout 816 cmp w3, 2 817 b.lt .Lctr1x 818 cmp w3, 3 819 b.lt .Lctr2x 820 cmp w3, 4 821 b.lt .Lctr3x 822 inc_le128 BLK0, x7, x8 823 inc_le128 BLK1, x7, x8 824 inc_le128 BLK2, x7, x8 825 inc_le128 BLK3, x7, x8 826 bl sm4_encrypt_block4x 827 ld1 {TMP0.16b, TMP1.16b, TMP2.16b, TMP3.16b}, [x1], #64 828 eor BLK0.16b, BLK0.16b, TMP0.16b 829 eor BLK1.16b, BLK1.16b, TMP1.16b 830 eor BLK2.16b, BLK2.16b, TMP2.16b 831 eor BLK3.16b, BLK3.16b, TMP3.16b 832 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 833 sub w3, w3, #4 834 b .Lctr4x 835 836.Lctr3x: 837 inc_le128 BLK0, x7, x8 838 inc_le128 BLK1, x7, x8 839 inc_le128 BLK2, x7, x8 840 bl sm4_encrypt_block4x 841 ld1 {TMP0.16b, TMP1.16b, TMP2.16b}, [x1], #48 842 eor BLK0.16b, BLK0.16b, TMP0.16b 843 eor BLK1.16b, BLK1.16b, TMP1.16b 844 eor BLK2.16b, BLK2.16b, TMP2.16b 845 st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [x0], #48 846 subs w3, w3, #3 847 b.le .Lctrout 848 849.Lctr2x: 850 inc_le128 BLK0, x7, x8 851 inc_le128 BLK1, x7, x8 852 bl sm4_encrypt_block4x 853 ld1 {TMP0.16b, TMP1.16b}, [x1], #32 854 eor BLK0.16b, BLK0.16b, TMP0.16b 855 eor BLK1.16b, BLK1.16b, TMP1.16b 856 st1 {BLK0.16b, BLK1.16b}, [x0], #32 857 subs w3, w3, #2 858 b.le .Lctrout 859 860.Lctr1x: 861 inc_le128 BLK0, x7, x8 862 bl sm4_encrypt_block1x 863 ld1 {TMP0.16b}, [x1], #16 864 eor BLK0.16b, BLK0.16b, TMP0.16b 865 st1 {BLK0.16b}, [x0], #16 866 867.Lctrout: 868 rev x7, x7 869 rev x8, x8 870 stp x7, x8, [x4] 871 frame_pop 872 ret 873END_FUNC neon_sm4_ctr_encrypt 874 875/* 876 * x0: output 877 * x1: input 878 * x2: round key1 879 * x3: round key2 880 * w4: blocks 881 * x26: enc/dec 882 */ 883LOCAL_FUNC xts_do_cipher , : 884 stp x29, x30, [sp, #-16]! 885 mov x29, sp 886 load_sbox_matrix 887 ld1 {IV.16b}, [x5] 888 /* load round key2 for first tweak */ 889 ld1 {RK0.4s, RK1.4s, RK2.4s, RK3.4s}, [x3], #64 890 ld1 {RK4.4s, RK5.4s, RK6.4s, RK7.4s}, [x3], #64 891 encrypt_block IV 892 /* load round key1 for block cipher */ 893 ld1 {RK0.4s, RK1.4s, RK2.4s, RK3.4s}, [x2], #64 894 ld1 {RK4.4s, RK5.4s, RK6.4s, RK7.4s}, [x2], #64 895 /* w6: remain */ 896 and w6, w4, #0x0F 897 /* w4: blocks */ 898 lsr w4, w4, 4 899 /* blocks == 0: ret */ 900 cmp w4, #1 901 b.lt .Lxtsout 902 cmp w6, 0 903 b.eq .Lxtsblks 904 subs w4, w4, #1 905 b.eq .Lxtstail 906.Lxtsblks: 907 mov tw0l, IV.d[0] 908 mov tw0h, IV.d[1] 909 next_tweak tw1l, tw1h, tw0l, tw0h 910 next_tweak tw2l, tw2h, tw1l, tw1h 911 next_tweak tw3l, tw3h, tw2l, tw2h 912 next_tweak tw4l, tw4h, tw3l, tw3h 913 next_tweak tw5l, tw5h, tw4l, tw4h 914 next_tweak tw6l, tw6h, tw5l, tw5h 915 next_tweak tw7l, tw7h, tw6l, tw6h 916.Lxtsloop8x: 917 cmp w4, 8 918 b.lt .Lxts4x 919 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x1], #64 920 mov_reg_to_vec TMP0, tw0l, tw0h 921 mov_reg_to_vec TMP1, tw1l, tw1h 922 mov_reg_to_vec TMP2, tw2l, tw2h 923 mov_reg_to_vec TMP3, tw3l, tw3h 924 eor BLK0.16b, BLK0.16b, TMP0.16b 925 eor BLK1.16b, BLK1.16b, TMP1.16b 926 eor BLK2.16b, BLK2.16b, TMP2.16b 927 eor BLK3.16b, BLK3.16b, TMP3.16b 928 ld1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [x1], #64 929 mov_reg_to_vec TMP4, tw4l, tw4h 930 mov_reg_to_vec TMP5, tw5l, tw5h 931 mov_reg_to_vec TMP6, tw6l, tw6h 932 mov_reg_to_vec IV, tw7l, tw7h 933 eor BLK4.16b, BLK4.16b, TMP4.16b 934 eor BLK5.16b, BLK5.16b, TMP5.16b 935 eor BLK6.16b, BLK6.16b, TMP6.16b 936 eor BLK7.16b, BLK7.16b, IV.16b 937 938 bl sm4_encrypt_block8x 939 940 mov_reg_to_vec TMP0, tw0l, tw0h 941 next_tweak tw0l, tw0h, tw7l, tw7h 942 mov_reg_to_vec TMP1, tw1l, tw1h 943 next_tweak tw1l, tw1h, tw0l, tw0h 944 mov_reg_to_vec TMP2, tw2l, tw2h 945 next_tweak tw2l, tw2h, tw1l, tw1h 946 mov_reg_to_vec TMP3, tw3l, tw3h 947 next_tweak tw3l, tw3h, tw2l, tw2h 948 mov_reg_to_vec TMP4, tw4l, tw4h 949 next_tweak tw4l, tw4h, tw3l, tw3h 950 mov_reg_to_vec TMP5, tw5l, tw5h 951 next_tweak tw5l, tw5h, tw4l, tw4h 952 mov_reg_to_vec TMP6, tw6l, tw6h 953 next_tweak tw6l, tw6h, tw5l, tw5h 954 mov_reg_to_vec IV, tw7l, tw7h 955 next_tweak tw7l, tw7h, tw6l, tw6h 956 957 eor BLK0.16b, BLK0.16b, TMP0.16b 958 eor BLK1.16b, BLK1.16b, TMP1.16b 959 eor BLK2.16b, BLK2.16b, TMP2.16b 960 eor BLK3.16b, BLK3.16b, TMP3.16b 961 eor BLK4.16b, BLK4.16b, TMP4.16b 962 eor BLK5.16b, BLK5.16b, TMP5.16b 963 eor BLK6.16b, BLK6.16b, TMP6.16b 964 eor BLK7.16b, BLK7.16b, IV.16b 965 966 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 967 st1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [x0], #64 968 subs w4, w4, #8 969 b.gt .Lxtsloop8x 970 971.Lxts4x: 972 cmp w4, 1 973 b.lt .Lxtsblksout 974 cmp w4, 2 975 b.lt .Lxts1x 976 cmp w4, 3 977 b.lt .Lxts2x 978 cmp w4, 4 979 b.lt .Lxts3x 980 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x1], #64 981 mov_reg_to_vec BLK4, tw0l, tw0h 982 mov_reg_to_vec BLK5, tw1l, tw1h 983 mov_reg_to_vec BLK6, tw2l, tw2h 984 mov_reg_to_vec IV, tw3l, tw3h 985 eor BLK0.16b, BLK0.16b, BLK4.16b 986 eor BLK1.16b, BLK1.16b, BLK5.16b 987 eor BLK2.16b, BLK2.16b, BLK6.16b 988 eor BLK3.16b, BLK3.16b, IV.16b 989 bl sm4_encrypt_block4x 990 eor BLK0.16b, BLK0.16b, BLK4.16b 991 eor BLK1.16b, BLK1.16b, BLK5.16b 992 eor BLK2.16b, BLK2.16b, BLK6.16b 993 eor BLK3.16b, BLK3.16b, IV.16b 994 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [x0], #64 995 sub w4, w4, #4 996 997 mov tw0l, tw4l 998 mov tw0h, tw4h 999 mov tw1l, tw5l 1000 mov tw1h, tw5h 1001 mov tw2l, tw6l 1002 mov tw2h, tw6h 1003 b .Lxts4x 1004 1005.Lxts3x: 1006 ld1 {BLK0.16b, BLK1.16b, BLK2.16b}, [x1], #48 1007 mov_reg_to_vec BLK4, tw0l, tw0h 1008 mov_reg_to_vec BLK5, tw1l, tw1h 1009 mov_reg_to_vec IV, tw2l, tw2h 1010 eor BLK0.16b, BLK0.16b, BLK4.16b 1011 eor BLK1.16b, BLK1.16b, BLK5.16b 1012 eor BLK2.16b, BLK2.16b, IV.16b 1013 bl sm4_encrypt_block4x 1014 eor BLK0.16b, BLK0.16b, BLK4.16b 1015 eor BLK1.16b, BLK1.16b, BLK5.16b 1016 eor BLK2.16b, BLK2.16b, IV.16b 1017 st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [x0], #48 1018 subs w4, w4, #3 1019 b.le .Lxtsblksout 1020 1021.Lxts2x: 1022 ld1 {BLK0.16b, BLK1.16b}, [x1], #32 1023 mov_reg_to_vec BLK4, tw0l, tw0h 1024 mov_reg_to_vec IV, tw1l, tw1h 1025 eor BLK0.16b, BLK0.16b, BLK4.16b 1026 eor BLK1.16b, BLK1.16b, IV.16b 1027 bl sm4_encrypt_block4x 1028 eor BLK0.16b, BLK0.16b, BLK4.16b 1029 eor BLK1.16b, BLK1.16b, IV.16b 1030 st1 {BLK0.16b, BLK1.16b}, [x0], #32 1031 subs w4, w4, #2 1032 b.le .Lxtsblksout 1033 1034.Lxts1x: 1035 ld1 {BLK0.16b}, [x1], #16 1036 mov_reg_to_vec IV, tw0l, tw0h 1037 eor BLK0.16b, BLK0.16b, IV.16b 1038 bl sm4_encrypt_block1x 1039 eor BLK0.16b, BLK0.16b, IV.16b 1040 st1 {BLK0.16b}, [x0], #16 1041.Lxtsblksout: 1042 cmp w6, 0 1043 /* if encrypt some blocks with a partial block */ 1044 next_tweak_vec IV, IV 1045 b.eq .Lxtsout 1046.Lxtstail: 1047 next_tweak_vec TMP7, IV 1048 cmp x26, 1 1049 b.eq 1f 1050 /* The last two tweaks IV, TMP7 need to be swapped for decryption */ 1051 mov TMP8.16b, IV.16b 1052 mov IV.16b, TMP7.16b 1053 mov TMP7.16b, TMP8.16b 1054 1: 1055 ld1 {BLK0.16b}, [x1], #16 1056 eor BLK0.16b, BLK0.16b, IV.16b 1057 bl sm4_encrypt_block1x 1058 eor BLK0.16b, BLK0.16b, IV.16b 1059 st1 {BLK0.16b}, [x0], #16 1060 sub x7, x0, 16 1061 10: 1062 subs x6, x6, 1 1063 ldrb tmpw0, [x7, x6] 1064 strb tmpw0, [x0, x6] 1065 ldrb tmpw0, [x1, x6] 1066 strb tmpw0, [x7, x6] 1067 b.gt 10b 1068 ld1 {BLK0.16b}, [x7] 1069 eor BLK0.16b, BLK0.16b, TMP7.16b 1070 bl sm4_encrypt_block1x 1071 eor BLK0.16b, BLK0.16b, TMP7.16b 1072 st1 {BLK0.16b}, [x7] 1073 1074.Lxtsout: 1075 /* load round key2 for last tweak */ 1076 sub x3, x3, #128 1077 ld1 {RK0.4s, RK1.4s, RK2.4s, RK3.4s}, [x3], #64 1078 ld1 {RK4.4s, RK5.4s, RK6.4s, RK7.4s}, [x3], #64 1079 /* decrypt last tweak for next update */ 1080 decrypt_block IV 1081 st1 {IV.16b}, [x5] 1082 ldp x29, x30, [sp], #16 1083 ret 1084END_FUNC xts_do_cipher 1085 1086/* 1087 * void neon_sm4_xts_encrypt(uint8_t out[], uint8_t const in[], 1088 * uint8_t const rk1[], uint8_t const rk2[], 1089 * size_t len, uint8_t iv[]) 1090 * x0: output 1091 * x1: input 1092 * x2: round key1 1093 * x3: round key2 1094 * w4: len 1095 * x5: iv 1096 */ 1097FUNC neon_sm4_xts_encrypt , : 1098 frame_push 1099 mov x26, 1 1100 bl xts_do_cipher 1101 frame_pop 1102 ret 1103END_FUNC neon_sm4_xts_encrypt 1104 1105/* 1106 * void neon_sm4_xts_decrypt(uint8_t out[], uint8_t const in[], 1107 * uint8_t const rk1[], uint8_t const rk2[], 1108 * size_t len, uint8_t iv[]) 1109 * x0: output 1110 * x1: input 1111 * x2: round key1 1112 * x3: round key2 1113 * w4: len 1114 * x5: iv 1115 */ 1116FUNC neon_sm4_xts_decrypt , : 1117 frame_push 1118 mov x26, 0 1119 bl xts_do_cipher 1120 frame_pop 1121 ret 1122END_FUNC neon_sm4_xts_decrypt 1123