1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2015, 2020 Linaro Limited 4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * - AES cipher for ARMv8 with Crypto Extensions 7 * - Chaining mode wrappers for AES 8 */ 9 10#include <asm.S> 11 12 .arch armv8-a+crypto 13 14 /* Preload all round keys */ 15 .macro load_round_keys, rounds, rk 16 cmp \rounds, #12 17 blo 2222f /* 128 bits */ 18 beq 1111f /* 192 bits */ 19 ld1 {v17.16b-v18.16b}, [\rk], #32 201111: ld1 {v19.16b-v20.16b}, [\rk], #32 212222: ld1 {v21.16b-v24.16b}, [\rk], #64 22 ld1 {v25.16b-v28.16b}, [\rk], #64 23 ld1 {v29.16b-v31.16b}, [\rk] 24 .endm 25 26 /* Prepare for encryption with key in rk[] */ 27 .macro enc_prepare, rounds, rk, ignore 28 load_round_keys \rounds, \rk 29 .endm 30 31 /* Prepare for encryption (again) but with new key in rk[] */ 32 .macro enc_switch_key, rounds, rk, ignore 33 load_round_keys \rounds, \rk 34 .endm 35 36 /* Prepare for decryption with key in rk[] */ 37 .macro dec_prepare, rounds, rk, ignore 38 load_round_keys \rounds, \rk 39 .endm 40 41 .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 42 aes\de \i0\().16b, \k\().16b 43 aes\mc \i0\().16b, \i0\().16b 44 .ifnb \i1 45 aes\de \i1\().16b, \k\().16b 46 aes\mc \i1\().16b, \i1\().16b 47 .ifnb \i3 48 aes\de \i2\().16b, \k\().16b 49 aes\mc \i2\().16b, \i2\().16b 50 aes\de \i3\().16b, \k\().16b 51 aes\mc \i3\().16b, \i3\().16b 52 .endif 53 .endif 54 .endm 55 56 /* Up to 4 interleaved encryption rounds with the same round key */ 57 .macro round_Nx, enc, k, i0, i1, i2, i3 58 .ifc \enc, e 59 do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 60 .else 61 do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 62 .endif 63 .endm 64 65 /* Up to 4 interleaved final rounds */ 66 .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 67 aes\de \i0\().16b, \k\().16b 68 .ifnb \i1 69 aes\de \i1\().16b, \k\().16b 70 .ifnb \i3 71 aes\de \i2\().16b, \k\().16b 72 aes\de \i3\().16b, \k\().16b 73 .endif 74 .endif 75 eor \i0\().16b, \i0\().16b, \k2\().16b 76 .ifnb \i1 77 eor \i1\().16b, \i1\().16b, \k2\().16b 78 .ifnb \i3 79 eor \i2\().16b, \i2\().16b, \k2\().16b 80 eor \i3\().16b, \i3\().16b, \k2\().16b 81 .endif 82 .endif 83 .endm 84 85 /* Up to 4 interleaved blocks */ 86 .macro do_block_Nx, enc, rounds, i0, i1, i2, i3 87 cmp \rounds, #12 88 blo 2222f /* 128 bits */ 89 beq 1111f /* 192 bits */ 90 round_Nx \enc, v17, \i0, \i1, \i2, \i3 91 round_Nx \enc, v18, \i0, \i1, \i2, \i3 921111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 93 round_Nx \enc, v20, \i0, \i1, \i2, \i3 942222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 95 round_Nx \enc, \key, \i0, \i1, \i2, \i3 96 .endr 97 fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 98 .endm 99 100 .macro encrypt_block, in, rounds, t0, t1, t2 101 do_block_Nx e, \rounds, \in 102 .endm 103 104 .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2 105 do_block_Nx e, \rounds, \i0, \i1 106 .endm 107 108 .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 109 do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 110 .endm 111 112 .macro decrypt_block, in, rounds, t0, t1, t2 113 do_block_Nx d, \rounds, \in 114 .endm 115 116 .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 117 do_block_Nx d, \rounds, \i0, \i1 118 .endm 119 120 .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 121 do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 122 .endm 123 124 125/* 126 * There are several ways to instantiate this code: 127 * - no interleave, all inline 128 * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2) 129 * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE) 130 * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4) 131 * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE) 132 * 133 * Macros imported by this code: 134 * - enc_prepare - setup NEON registers for encryption 135 * - dec_prepare - setup NEON registers for decryption 136 * - enc_switch_key - change to new key after having prepared for encryption 137 * - encrypt_block - encrypt a single block 138 * - decrypt block - decrypt a single block 139 * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2) 140 * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2) 141 * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4) 142 * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4) 143 */ 144 145#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE) 146#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp 147#define FRAME_POP ldp x29, x30, [sp],#16 148 149#if INTERLEAVE == 2 150 151LOCAL_FUNC aes_encrypt_block2x , : 152 encrypt_block2x v0, v1, w3, x2, x6, w7 153 ret 154END_FUNC aes_encrypt_block2x 155 156LOCAL_FUNC aes_decrypt_block2x , : 157 decrypt_block2x v0, v1, w3, x2, x6, w7 158 ret 159END_FUNC aes_decrypt_block2x 160 161#elif INTERLEAVE == 4 162 163LOCAL_FUNC aes_encrypt_block4x , : 164 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 165 ret 166END_FUNC aes_encrypt_block4x 167 168LOCAL_FUNC aes_decrypt_block4x , : 169 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 170 ret 171END_FUNC aes_decrypt_block4x 172 173#else 174#error INTERLEAVE should equal 2 or 4 175#endif 176 177 .macro do_encrypt_block2x 178 bl aes_encrypt_block2x 179 .endm 180 181 .macro do_decrypt_block2x 182 bl aes_decrypt_block2x 183 .endm 184 185 .macro do_encrypt_block4x 186 bl aes_encrypt_block4x 187 .endm 188 189 .macro do_decrypt_block4x 190 bl aes_decrypt_block4x 191 .endm 192 193#else 194#define FRAME_PUSH 195#define FRAME_POP 196 197 .macro do_encrypt_block2x 198 encrypt_block2x v0, v1, w3, x2, x6, w7 199 .endm 200 201 .macro do_decrypt_block2x 202 decrypt_block2x v0, v1, w3, x2, x6, w7 203 .endm 204 205 .macro do_encrypt_block4x 206 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 207 .endm 208 209 .macro do_decrypt_block4x 210 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 211 .endm 212 213#endif 214 215 /* 216 * uint32_t ce_aes_sub(uint32_t in) - use the aese instruction to 217 * perform the AES sbox substitution on each byte in 'input' 218 */ 219FUNC ce_aes_sub , : 220 dup v1.4s, w0 221 movi v0.16b, #0 222 aese v0.16b, v1.16b 223 umov w0, v0.s[0] 224 ret 225END_FUNC ce_aes_sub 226 227 /* 228 * void ce_aes_invert(void *dst, const void *src); 229 */ 230FUNC ce_aes_invert , : 231 ld1 {v0.16b}, [x1] 232 aesimc v1.16b, v0.16b 233 st1 {v1.16b}, [x0] 234 ret 235END_FUNC ce_aes_invert 236 237 /* 238 * ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], 239 * int rounds, int blocks, int first) 240 */ 241FUNC ce_aes_ecb_encrypt , : 242 FRAME_PUSH 243 cbz w5, .LecbencloopNx 244 245 enc_prepare w3, x2, x5 246 247.LecbencloopNx: 248#if INTERLEAVE >= 2 249 subs w4, w4, #INTERLEAVE 250 bmi .Lecbenc1x 251#if INTERLEAVE == 2 252 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ 253 do_encrypt_block2x 254 st1 {v0.16b-v1.16b}, [x0], #32 255#else 256 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 257 do_encrypt_block4x 258 st1 {v0.16b-v3.16b}, [x0], #64 259#endif 260 b .LecbencloopNx 261.Lecbenc1x: 262 adds w4, w4, #INTERLEAVE 263 beq .Lecbencout 264#endif 265.Lecbencloop: 266 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 267 encrypt_block v0, w3, x2, x5, w6 268 st1 {v0.16b}, [x0], #16 269 subs w4, w4, #1 270 bne .Lecbencloop 271.Lecbencout: 272 FRAME_POP 273 ret 274END_FUNC ce_aes_ecb_encrypt 275 276 /* 277 * ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], 278 * int rounds, int blocks, int first) 279 */ 280FUNC ce_aes_ecb_decrypt , : 281 FRAME_PUSH 282 cbz w5, .LecbdecloopNx 283 284 dec_prepare w3, x2, x5 285 286.LecbdecloopNx: 287#if INTERLEAVE >= 2 288 subs w4, w4, #INTERLEAVE 289 bmi .Lecbdec1x 290#if INTERLEAVE == 2 291 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ 292 do_decrypt_block2x 293 st1 {v0.16b-v1.16b}, [x0], #32 294#else 295 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 296 do_decrypt_block4x 297 st1 {v0.16b-v3.16b}, [x0], #64 298#endif 299 b .LecbdecloopNx 300.Lecbdec1x: 301 adds w4, w4, #INTERLEAVE 302 beq .Lecbdecout 303#endif 304.Lecbdecloop: 305 ld1 {v0.16b}, [x1], #16 /* get next ct block */ 306 decrypt_block v0, w3, x2, x5, w6 307 st1 {v0.16b}, [x0], #16 308 subs w4, w4, #1 309 bne .Lecbdecloop 310.Lecbdecout: 311 FRAME_POP 312 ret 313END_FUNC ce_aes_ecb_decrypt 314 315 /* 316 * void ce_aes_cbc_encrypt(uint8_t out[], uint8_t const in[], 317 * uint8_t const rk[], int rounds, int blocks, 318 * uint8_t iv[]) 319 */ 320FUNC ce_aes_cbc_encrypt , : 321 ld1 {v4.16b}, [x5] /* get iv */ 322 enc_prepare w3, x2, x6 323 324.Lcbcencloop4x: 325 subs w4, w4, #4 326 bmi .Lcbcenc1x 327 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 328 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 329 encrypt_block v0, w3, x2, x6, w7 330 eor v1.16b, v1.16b, v0.16b 331 encrypt_block v1, w3, x2, x6, w7 332 eor v2.16b, v2.16b, v1.16b 333 encrypt_block v2, w3, x2, x6, w7 334 eor v3.16b, v3.16b, v2.16b 335 encrypt_block v3, w3, x2, x6, w7 336 st1 {v0.16b-v3.16b}, [x0], #64 337 mov v4.16b, v3.16b 338 b .Lcbcencloop4x 339.Lcbcenc1x: 340 adds w4, w4, #4 341 beq .Lcbcencout 342.Lcbcencloop: 343 ld1 {v0.16b}, [x1], #16 /* get next pt block */ 344 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 345 encrypt_block v4, w3, x2, x6, w7 346 st1 {v4.16b}, [x0], #16 347 subs w4, w4, #1 348 bne .Lcbcencloop 349.Lcbcencout: 350 st1 {v4.16b}, [x5] /* return iv */ 351 ret 352END_FUNC ce_aes_cbc_encrypt 353 354 /* 355 * void ce_aes_cbc_decrypt(uint8_t out[], uint8_t const in[], 356 * uint8_t const rk[], int rounds, int blocks, 357 * uint8_t iv[]) 358 */ 359FUNC ce_aes_cbc_decrypt , : 360 stp x29, x30, [sp, #-16]! 361 mov x29, sp 362 363 ld1 {v7.16b}, [x5] /* get iv */ 364 dec_prepare w3, x2, x6 365 366.LcbcdecloopNx: 367 subs w4, w4, #4 368 bmi .Lcbcdec1x 369 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 370 mov v4.16b, v0.16b 371 mov v5.16b, v1.16b 372 mov v6.16b, v2.16b 373 bl aes_decrypt_block4x 374 sub x1, x1, #16 375 eor v0.16b, v0.16b, v7.16b 376 eor v1.16b, v1.16b, v4.16b 377 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ 378 eor v2.16b, v2.16b, v5.16b 379 eor v3.16b, v3.16b, v6.16b 380 st1 {v0.16b-v3.16b}, [x0], #64 381 b .LcbcdecloopNx 382.Lcbcdec1x: 383 adds w4, w4, #4 384 beq .Lcbcdecout 385.Lcbcdecloop: 386 ld1 {v1.16b}, [x1], #16 /* get next ct block */ 387 mov v0.16b, v1.16b /* ...and copy to v0 */ 388 decrypt_block v0, w3, x2, x6, w7 389 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ 390 mov v7.16b, v1.16b /* ct is next iv */ 391 st1 {v0.16b}, [x0], #16 392 subs w4, w4, #1 393 bne .Lcbcdecloop 394.Lcbcdecout: 395 st1 {v7.16b}, [x5] /* return iv */ 396 ldp x29, x30, [sp], #16 397 ret 398END_FUNC ce_aes_cbc_decrypt 399 400 401 /* 402 * void ce_aes_ctr_encrypt(uint8_t out[], uint8_t const in[], 403 * uint8_t const rk[], int rounds, int blocks, 404 * uint8_t ctr[], int first) 405 */ 406FUNC ce_aes_ctr_encrypt , : 407 stp x29, x30, [sp, #-16]! 408 mov x29, sp 409 410 enc_prepare w3, x2, x6 411 ld1 {v4.16b}, [x5] 412 413 umov x6, v4.d[1] /* keep swabbed ctr in reg */ 414 rev x6, x6 415 cmn w6, w4 /* 32 bit overflow? */ 416 bcs .Lctrloop 417.LctrloopNx: 418 subs w4, w4, #4 419 bmi .Lctr1x 420 add w7, w6, #1 421 mov v0.16b, v4.16b 422 add w8, w6, #2 423 mov v1.16b, v4.16b 424 add w9, w6, #3 425 mov v2.16b, v4.16b 426 rev w7, w7 427 mov v3.16b, v4.16b 428 rev w8, w8 429 mov v1.s[3], w7 430 rev w9, w9 431 mov v2.s[3], w8 432 mov v3.s[3], w9 433 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ 434 bl aes_encrypt_block4x 435 eor v0.16b, v5.16b, v0.16b 436 ld1 {v5.16b}, [x1], #16 /* get 1 input block */ 437 eor v1.16b, v6.16b, v1.16b 438 eor v2.16b, v7.16b, v2.16b 439 eor v3.16b, v5.16b, v3.16b 440 st1 {v0.16b-v3.16b}, [x0], #64 441 add x6, x6, #4 442 rev x7, x6 443 ins v4.d[1], x7 444 cbz w4, .Lctrout 445 b .LctrloopNx 446.Lctr1x: 447 adds w4, w4, #4 448 beq .Lctrout 449.Lctrloop: 450 mov v0.16b, v4.16b 451 encrypt_block v0, w3, x2, x8, w7 452 453 adds x6, x6, #1 /* increment BE ctr */ 454 rev x7, x6 455 ins v4.d[1], x7 456 bcs .Lctrcarry /* overflow? */ 457 458.Lctrcarrydone: 459 subs w4, w4, #1 460 bmi .Lctrtailblock /* blocks <0 means tail block */ 461 ld1 {v3.16b}, [x1], #16 462 eor v3.16b, v0.16b, v3.16b 463 st1 {v3.16b}, [x0], #16 464 bne .Lctrloop 465 466.Lctrout: 467 st1 {v4.16b}, [x5] /* return next CTR value */ 468 ldp x29, x30, [sp], #16 469 ret 470 471.Lctrtailblock: 472 st1 {v0.16b}, [x0] 473 ldp x29, x30, [sp], #16 474 ret 475 476.Lctrcarry: 477 umov x7, v4.d[0] /* load upper word of ctr */ 478 rev x7, x7 /* ... to handle the carry */ 479 add x7, x7, #1 480 rev x7, x7 481 ins v4.d[0], x7 482 b .Lctrcarrydone 483END_FUNC ce_aes_ctr_encrypt 484 485 486 .macro next_tweak, out, in, const, tmp 487 sshr \tmp\().2d, \in\().2d, #63 488 and \tmp\().16b, \tmp\().16b, \const\().16b 489 add \out\().2d, \in\().2d, \in\().2d 490 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 491 eor \out\().16b, \out\().16b, \tmp\().16b 492 .endm 493 494 /* 495 * void ce_aes_xts_encrypt(uint8_t out[], uint8_t const in[], 496 * uint8_t const rk1[], int rounds, int blocks, 497 * uint8_t const rk2[], uint8_t iv[]) 498 */ 499FUNC ce_aes_xts_encrypt , : 500 FRAME_PUSH 501 502 ld1 {v4.16b}, [x6] 503 enc_prepare w3, x5, x6 504 encrypt_block v4, w3, x5, x6, w7 /* first tweak */ 505 enc_switch_key w3, x2, x6 506 ldr q7, .Lxts_mul_x 507 b .LxtsencNx 508 509.LxtsencloopNx: 510 next_tweak v4, v4, v7, v8 511.LxtsencNx: 512#if INTERLEAVE >= 2 513 subs w4, w4, #INTERLEAVE 514 bmi .Lxtsenc1x 515#if INTERLEAVE == 2 516 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ 517 next_tweak v5, v4, v7, v8 518 eor v0.16b, v0.16b, v4.16b 519 eor v1.16b, v1.16b, v5.16b 520 do_encrypt_block2x 521 eor v0.16b, v0.16b, v4.16b 522 eor v1.16b, v1.16b, v5.16b 523 st1 {v0.16b-v1.16b}, [x0], #32 524 cbz w4, .LxtsencoutNx 525 next_tweak v4, v5, v7, v8 526 b .LxtsencNx 527.LxtsencoutNx: 528 mov v4.16b, v5.16b 529 b .Lxtsencout 530#else 531 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 532 next_tweak v5, v4, v7, v8 533 eor v0.16b, v0.16b, v4.16b 534 next_tweak v6, v5, v7, v8 535 eor v1.16b, v1.16b, v5.16b 536 eor v2.16b, v2.16b, v6.16b 537 next_tweak v7, v6, v7, v8 538 eor v3.16b, v3.16b, v7.16b 539 do_encrypt_block4x 540 eor v3.16b, v3.16b, v7.16b 541 eor v0.16b, v0.16b, v4.16b 542 eor v1.16b, v1.16b, v5.16b 543 eor v2.16b, v2.16b, v6.16b 544 st1 {v0.16b-v3.16b}, [x0], #64 545 mov v4.16b, v7.16b 546 ldr q7, .Lxts_mul_x 547 cbz w4, .Lxtsencout 548 b .LxtsencloopNx 549#endif 550.Lxtsenc1x: 551 adds w4, w4, #INTERLEAVE 552 beq .Lxtsencout 553#endif 554.Lxtsencloop: 555 ld1 {v1.16b}, [x1], #16 556 eor v0.16b, v1.16b, v4.16b 557 encrypt_block v0, w3, x2, x6, w7 558 eor v0.16b, v0.16b, v4.16b 559 st1 {v0.16b}, [x0], #16 560 subs w4, w4, #1 561 beq .Lxtsencout 562 next_tweak v4, v4, v7, v8 563 b .Lxtsencloop 564.Lxtsencout: 565 next_tweak v4, v4, v7, v8 566 st1 {v4.16b}, [x6], #16 567 FRAME_POP 568 ret 569 570 .align 4 571.Lxts_mul_x: 572 .word 1, 0, 0x87, 0 573END_FUNC ce_aes_xts_encrypt 574 575 /* 576 * void ce_aes_xts_decrypt(uint8_t out[], uint8_t const in[], 577 * uint8_t const rk1[], int rounds, int blocks, 578 * uint8_t const rk2[], uint8_t iv[]) 579 */ 580FUNC ce_aes_xts_decrypt , : 581 FRAME_PUSH 582 583 ld1 {v4.16b}, [x6] 584 enc_prepare w3, x5, x6 585 encrypt_block v4, w3, x5, x6, w7 /* first tweak */ 586 dec_prepare w3, x2, x6 587 ldr q7, .Lxts_mul_x 588 b .LxtsdecNx 589 590.LxtsdecloopNx: 591 next_tweak v4, v4, v7, v8 592.LxtsdecNx: 593#if INTERLEAVE >= 2 594 subs w4, w4, #INTERLEAVE 595 bmi .Lxtsdec1x 596#if INTERLEAVE == 2 597 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ 598 next_tweak v5, v4, v7, v8 599 eor v0.16b, v0.16b, v4.16b 600 eor v1.16b, v1.16b, v5.16b 601 do_decrypt_block2x 602 eor v0.16b, v0.16b, v4.16b 603 eor v1.16b, v1.16b, v5.16b 604 st1 {v0.16b-v1.16b}, [x0], #32 605 cbz w4, .LxtsdecoutNx 606 next_tweak v4, v5, v7, v8 607 b .LxtsdecNx 608.LxtsdecoutNx: 609 mov v4.16b, v5.16b 610 b .Lxtsdecout 611#else 612 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 613 next_tweak v5, v4, v7, v8 614 eor v0.16b, v0.16b, v4.16b 615 next_tweak v6, v5, v7, v8 616 eor v1.16b, v1.16b, v5.16b 617 eor v2.16b, v2.16b, v6.16b 618 next_tweak v7, v6, v7, v8 619 eor v3.16b, v3.16b, v7.16b 620 do_decrypt_block4x 621 eor v3.16b, v3.16b, v7.16b 622 eor v0.16b, v0.16b, v4.16b 623 eor v1.16b, v1.16b, v5.16b 624 eor v2.16b, v2.16b, v6.16b 625 st1 {v0.16b-v3.16b}, [x0], #64 626 mov v4.16b, v7.16b 627 ldr q7, .Lxts_mul_x 628 cbz w4, .Lxtsdecout 629 b .LxtsdecloopNx 630#endif 631.Lxtsdec1x: 632 adds w4, w4, #INTERLEAVE 633 beq .Lxtsdecout 634#endif 635.Lxtsdecloop: 636 ld1 {v1.16b}, [x1], #16 637 eor v0.16b, v1.16b, v4.16b 638 decrypt_block v0, w3, x2, x6, w7 639 eor v0.16b, v0.16b, v4.16b 640 st1 {v0.16b}, [x0], #16 641 subs w4, w4, #1 642 beq .Lxtsdecout 643 next_tweak v4, v4, v7, v8 644 b .Lxtsdecloop 645.Lxtsdecout: 646 FRAME_POP 647 next_tweak v4, v4, v7, v8 648 st1 {v4.16b}, [x6], #16 649 ret 650END_FUNC ce_aes_xts_decrypt 651 652 /* 653 * void ce_aes_xor_block(uint8_t out[], uint8_t const op1[], 654 * uint8_t const op2[]); 655 */ 656FUNC ce_aes_xor_block , : 657 ld1 {v0.16b}, [x1] 658 ld1 {v1.16b}, [x2] 659 eor v0.16b, v0.16b, v1.16b 660 st1 {v0.16b}, [x0] 661 ret 662END_FUNC ce_aes_xor_block 663 664BTI(emit_aarch64_feature_1_and GNU_PROPERTY_AARCH64_FEATURE_1_BTI) 665