1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2020, 2024 Linaro Limited 4 * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 5 * 6 * Accelerated GHASH implementation with ARMv8 PMULL instructions. 7 */ 8 9#include <arm64_macros.S> 10#include <asm.S> 11#define CPU_LE(x...) x 12 13/* 14 * Only increase the lowest quarter, that is, 32-bits of the counter. If 15 * it wraps it must not propagate into the upper bits. 16 */ 17#define INC_QUART_CTR 1 18 19 SHASH .req v0 20 SHASH2 .req v1 21 T1 .req v2 22 T2 .req v3 23 MASK .req v4 24 XL .req v5 25 XM .req v6 26 XH .req v7 27 IN1 .req v7 28 29 k00_16 .req v8 30 k32_48 .req v9 31 32 t3 .req v10 33 t4 .req v11 34 t5 .req v12 35 t6 .req v13 36 t7 .req v14 37 t8 .req v15 38 t9 .req v16 39 40 perm1 .req v17 41 perm2 .req v18 42 perm3 .req v19 43 44 sh1 .req v20 45 sh2 .req v21 46 sh3 .req v22 47 sh4 .req v23 48 49 ss1 .req v24 50 ss2 .req v25 51 ss3 .req v26 52 ss4 .req v27 53 54 XL2 .req v8 55 XM2 .req v9 56 XH2 .req v10 57 XL3 .req v11 58 XM3 .req v12 59 XH3 .req v13 60 TT3 .req v14 61 TT4 .req v15 62 HH .req v16 63 HH3 .req v17 64 HH4 .req v18 65 HH34 .req v19 66 67 .arch armv8-a+crypto 68 69 .macro __pmull_p64, rd, rn, rm 70 pmull \rd\().1q, \rn\().1d, \rm\().1d 71 .endm 72 73 .macro __pmull2_p64, rd, rn, rm 74 pmull2 \rd\().1q, \rn\().2d, \rm\().2d 75 .endm 76 77 .macro __pmull_p8, rq, ad, bd 78 ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1 79 ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2 80 ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3 81 82 __pmull_p8_\bd \rq, \ad 83 .endm 84 85 .macro __pmull2_p8, rq, ad, bd 86 tbl t3.16b, {\ad\().16b}, perm1.16b // A1 87 tbl t5.16b, {\ad\().16b}, perm2.16b // A2 88 tbl t7.16b, {\ad\().16b}, perm3.16b // A3 89 90 __pmull2_p8_\bd \rq, \ad 91 .endm 92 93 .macro __pmull_p8_SHASH, rq, ad 94 __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4 95 .endm 96 97 .macro __pmull_p8_SHASH2, rq, ad 98 __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4 99 .endm 100 101 .macro __pmull2_p8_SHASH, rq, ad 102 __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4 103 .endm 104 105 .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4 106 pmull\t t3.8h, t3.\nb, \bd // F = A1*B 107 pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1 108 pmull\t t5.8h, t5.\nb, \bd // H = A2*B 109 pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2 110 pmull\t t7.8h, t7.\nb, \bd // J = A3*B 111 pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3 112 pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4 113 pmull\t \rq\().8h, \ad, \bd // D = A*B 114 115 eor t3.16b, t3.16b, t4.16b // L = E + F 116 eor t5.16b, t5.16b, t6.16b // M = G + H 117 eor t7.16b, t7.16b, t8.16b // N = I + J 118 119 uzp1 t4.2d, t3.2d, t5.2d 120 uzp2 t3.2d, t3.2d, t5.2d 121 uzp1 t6.2d, t7.2d, t9.2d 122 uzp2 t7.2d, t7.2d, t9.2d 123 124 // t3 = (L) (P0 + P1) << 8 125 // t5 = (M) (P2 + P3) << 16 126 eor t4.16b, t4.16b, t3.16b 127 and t3.16b, t3.16b, k32_48.16b 128 129 // t7 = (N) (P4 + P5) << 24 130 // t9 = (K) (P6 + P7) << 32 131 eor t6.16b, t6.16b, t7.16b 132 and t7.16b, t7.16b, k00_16.16b 133 134 eor t4.16b, t4.16b, t3.16b 135 eor t6.16b, t6.16b, t7.16b 136 137 zip2 t5.2d, t4.2d, t3.2d 138 zip1 t3.2d, t4.2d, t3.2d 139 zip2 t9.2d, t6.2d, t7.2d 140 zip1 t7.2d, t6.2d, t7.2d 141 142 ext t3.16b, t3.16b, t3.16b, #15 143 ext t5.16b, t5.16b, t5.16b, #14 144 ext t7.16b, t7.16b, t7.16b, #13 145 ext t9.16b, t9.16b, t9.16b, #12 146 147 eor t3.16b, t3.16b, t5.16b 148 eor t7.16b, t7.16b, t9.16b 149 eor \rq\().16b, \rq\().16b, t3.16b 150 eor \rq\().16b, \rq\().16b, t7.16b 151 .endm 152 153 .macro __pmull_pre_p64 154 add x8, x3, #16 155 ld1 {HH.2d-HH4.2d}, [x8] 156 157 trn1 SHASH2.2d, SHASH.2d, HH.2d 158 trn2 T1.2d, SHASH.2d, HH.2d 159 eor SHASH2.16b, SHASH2.16b, T1.16b 160 161 trn1 HH34.2d, HH3.2d, HH4.2d 162 trn2 T1.2d, HH3.2d, HH4.2d 163 eor HH34.16b, HH34.16b, T1.16b 164 165 movi MASK.16b, #0xe1 166 shl MASK.2d, MASK.2d, #57 167 .endm 168 169 .macro __pmull_pre_p8 170 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 171 eor SHASH2.16b, SHASH2.16b, SHASH.16b 172 173 // k00_16 := 0x0000000000000000_000000000000ffff 174 // k32_48 := 0x00000000ffffffff_0000ffffffffffff 175 movi k32_48.2d, #0xffffffff 176 mov k32_48.h[2], k32_48.h[0] 177 ushr k00_16.2d, k32_48.2d, #32 178 179 // prepare the permutation vectors 180 mov_imm x5, 0x080f0e0d0c0b0a09 181 movi T1.8b, #8 182 dup perm1.2d, x5 183 eor perm1.16b, perm1.16b, T1.16b 184 ushr perm2.2d, perm1.2d, #8 185 ushr perm3.2d, perm1.2d, #16 186 ushr T1.2d, perm1.2d, #24 187 sli perm2.2d, perm1.2d, #56 188 sli perm3.2d, perm1.2d, #48 189 sli T1.2d, perm1.2d, #40 190 191 // precompute loop invariants 192 tbl sh1.16b, {SHASH.16b}, perm1.16b 193 tbl sh2.16b, {SHASH.16b}, perm2.16b 194 tbl sh3.16b, {SHASH.16b}, perm3.16b 195 tbl sh4.16b, {SHASH.16b}, T1.16b 196 ext ss1.8b, SHASH2.8b, SHASH2.8b, #1 197 ext ss2.8b, SHASH2.8b, SHASH2.8b, #2 198 ext ss3.8b, SHASH2.8b, SHASH2.8b, #3 199 ext ss4.8b, SHASH2.8b, SHASH2.8b, #4 200 .endm 201 202 // 203 // PMULL (64x64->128) based reduction for CPUs that can do 204 // it in a single instruction. 205 // 206 .macro __pmull_reduce_p64 207 pmull T2.1q, XL.1d, MASK.1d 208 eor XM.16b, XM.16b, T1.16b 209 210 mov XH.d[0], XM.d[1] 211 mov XM.d[1], XL.d[0] 212 213 eor XL.16b, XM.16b, T2.16b 214 ext T2.16b, XL.16b, XL.16b, #8 215 pmull XL.1q, XL.1d, MASK.1d 216 .endm 217 218 // 219 // Alternative reduction for CPUs that lack support for the 220 // 64x64->128 PMULL instruction 221 // 222 .macro __pmull_reduce_p8 223 eor XM.16b, XM.16b, T1.16b 224 225 mov XL.d[1], XM.d[0] 226 mov XH.d[0], XM.d[1] 227 228 shl T1.2d, XL.2d, #57 229 shl T2.2d, XL.2d, #62 230 eor T2.16b, T2.16b, T1.16b 231 shl T1.2d, XL.2d, #63 232 eor T2.16b, T2.16b, T1.16b 233 ext T1.16b, XL.16b, XH.16b, #8 234 eor T2.16b, T2.16b, T1.16b 235 236 mov XL.d[1], T2.d[0] 237 mov XH.d[0], T2.d[1] 238 239 ushr T2.2d, XL.2d, #1 240 eor XH.16b, XH.16b, XL.16b 241 eor XL.16b, XL.16b, T2.16b 242 ushr T2.2d, T2.2d, #6 243 ushr XL.2d, XL.2d, #1 244 .endm 245 246 .macro __pmull_ghash, pn 247 ld1 {SHASH.2d}, [x3] 248 ld1 {XL.2d}, [x1] 249 250 __pmull_pre_\pn 251 252 /* do the head block first, if supplied */ 253 cbz x4, 0f 254 ld1 {T1.16b}, [x4] 255 mov x4, xzr 256 b 3f 257 2580: .ifc \pn, p64 259 tbnz w0, #0, 2f // skip until #blocks is a 260 tbnz w0, #1, 2f // round multiple of 4 261 2621: ld1 {XM3.16b-TT4.16b}, [x2], #64 263 264 sub w0, w0, #4 265 266 rev64 T1.16b, XM3.16b 267 rev64 T2.16b, XH3.16b 268 rev64 TT4.16b, TT4.16b 269 rev64 TT3.16b, TT3.16b 270 271 ext IN1.16b, TT4.16b, TT4.16b, #8 272 ext XL3.16b, TT3.16b, TT3.16b, #8 273 274 eor TT4.16b, TT4.16b, IN1.16b 275 pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 276 pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 277 pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) 278 279 eor TT3.16b, TT3.16b, XL3.16b 280 pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1 281 pmull XL3.1q, HH.1d, XL3.1d // a0 * b0 282 pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) 283 284 ext IN1.16b, T2.16b, T2.16b, #8 285 eor XL2.16b, XL2.16b, XL3.16b 286 eor XH2.16b, XH2.16b, XH3.16b 287 eor XM2.16b, XM2.16b, XM3.16b 288 289 eor T2.16b, T2.16b, IN1.16b 290 pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1 291 pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0 292 pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) 293 294 eor XL2.16b, XL2.16b, XL3.16b 295 eor XH2.16b, XH2.16b, XH3.16b 296 eor XM2.16b, XM2.16b, XM3.16b 297 298 ext IN1.16b, T1.16b, T1.16b, #8 299 ext TT3.16b, XL.16b, XL.16b, #8 300 eor XL.16b, XL.16b, IN1.16b 301 eor T1.16b, T1.16b, TT3.16b 302 303 pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1 304 eor T1.16b, T1.16b, XL.16b 305 pmull XL.1q, HH4.1d, XL.1d // a0 * b0 306 pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) 307 308 eor XL.16b, XL.16b, XL2.16b 309 eor XH.16b, XH.16b, XH2.16b 310 eor XM.16b, XM.16b, XM2.16b 311 312 eor T2.16b, XL.16b, XH.16b 313 ext T1.16b, XL.16b, XH.16b, #8 314 eor XM.16b, XM.16b, T2.16b 315 316 __pmull_reduce_p64 317 318 eor T2.16b, T2.16b, XH.16b 319 eor XL.16b, XL.16b, T2.16b 320 321 cbz w0, 5f 322 b 1b 323 .endif 324 3252: ld1 {T1.16b}, [x2], #16 326 sub w0, w0, #1 327 3283: /* multiply XL by SHASH in GF(2^128) */ 329CPU_LE( rev64 T1.16b, T1.16b ) 330 331 ext T2.16b, XL.16b, XL.16b, #8 332 ext IN1.16b, T1.16b, T1.16b, #8 333 eor T1.16b, T1.16b, T2.16b 334 eor XL.16b, XL.16b, IN1.16b 335 336 __pmull2_\pn XH, XL, SHASH // a1 * b1 337 eor T1.16b, T1.16b, XL.16b 338 __pmull_\pn XL, XL, SHASH // a0 * b0 339 __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0) 340 3414: eor T2.16b, XL.16b, XH.16b 342 ext T1.16b, XL.16b, XH.16b, #8 343 eor XM.16b, XM.16b, T2.16b 344 345 __pmull_reduce_\pn 346 347 eor T2.16b, T2.16b, XH.16b 348 eor XL.16b, XL.16b, T2.16b 349 350 cbnz w0, 0b 351 3525: st1 {XL.2d}, [x1] 353 ret 354 .endm 355 356/* 357 * void pmull_ghash_update_p64(int blocks, uint64_t dg[2], const uint8_t *src, 358 * const struct internal_ghash_key *ghash_key, 359 * const uint8_t *head); 360 */ 361FUNC pmull_ghash_update_p64 , : 362 __pmull_ghash p64 363END_FUNC pmull_ghash_update_p64 364 365/* 366 * void pmull_ghash_update_p8(int blocks, uint64_t dg[2], const uint8_t *src, 367 * const struct internal_ghash_key *ghash_key, 368 * const uint8_t *head); 369 */ 370FUNC pmull_ghash_update_p8 , : 371 __pmull_ghash p8 372END_FUNC pmull_ghash_update_p8 373 374 KS0 .req v12 375 KS1 .req v13 376 INP0 .req v14 377 INP1 .req v15 378 379 .macro load_round_keys, rounds, rk 380 cmp \rounds, #12 381 blo 2222f /* 128 bits */ 382 beq 1111f /* 192 bits */ 383 ld1 {v17.4s-v18.4s}, [\rk], #32 3841111: ld1 {v19.4s-v20.4s}, [\rk], #32 3852222: ld1 {v21.4s-v24.4s}, [\rk], #64 386 ld1 {v25.4s-v28.4s}, [\rk], #64 387 ld1 {v29.4s-v31.4s}, [\rk] 388 .endm 389 390 .macro enc_round, state, key 391 aese \state\().16b, \key\().16b 392 aesmc \state\().16b, \state\().16b 393 .endm 394 395 .macro enc_block, state, rounds 396 cmp \rounds, #12 397 b.lo 2222f /* 128 bits */ 398 b.eq 1111f /* 192 bits */ 399 enc_round \state, v17 400 enc_round \state, v18 4011111: enc_round \state, v19 402 enc_round \state, v20 4032222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 404 enc_round \state, \key 405 .endr 406 aese \state\().16b, v30.16b 407 eor \state\().16b, \state\().16b, v31.16b 408 .endm 409 410 .macro pmull_gcm_do_crypt, enc 411 ld1 {SHASH.2d}, [x4], #16 412 ld1 {HH.2d}, [x4] 413 ld1 {XL.2d}, [x1] 414#if INC_QUART_CTR 415 ldr x8, [x5, #8] // load lower counter 416#else 417 ldp x9, x8, [x5] // load counter 418#endif 419 420 movi MASK.16b, #0xe1 421 trn1 SHASH2.2d, SHASH.2d, HH.2d 422 trn2 T1.2d, SHASH.2d, HH.2d 423CPU_LE( rev x8, x8 ) 424#if !INC_QUART_CTR 425CPU_LE( rev x9, x9 ) 426#endif 427 shl MASK.2d, MASK.2d, #57 428 eor SHASH2.16b, SHASH2.16b, T1.16b 429 430 .if \enc == 1 431 ldr x10, [sp] 432 ld1 {KS0.16b-KS1.16b}, [x10] 433 .endif 434 435 cbnz x6, 4f 436 4370: ld1 {INP0.16b-INP1.16b}, [x3], #32 438 439#if INC_QUART_CTR 440 lsr x12, x8, #32 // Save the upper 32 bits 441 rev x9, x8 442 add w11, w8, #1 443 add w8, w8, #2 444 add x11, x11, x12, lsl #32 // Restore the upper 32 bits 445 add x8, x8, x12, lsl #32 446#endif 447 448 .if \enc == 1 449 eor INP0.16b, INP0.16b, KS0.16b // encrypt input 450 eor INP1.16b, INP1.16b, KS1.16b 451 .endif 452 453 sub w0, w0, #2 454 455#if INC_QUART_CTR 456 ld1 {KS0.8b}, [x5] // load upper counter 457 rev x11, x11 458 mov KS1.8b, KS0.8b 459 ins KS0.d[1], x9 // set lower counter 460 ins KS1.d[1], x11 461#else 462 ins KS0.d[1], x8 463 ins KS0.d[0], x9 464 rev64 KS0.16b, KS0.16b 465 466 add x8, x8, #1 467 cbnz x8, 10f 468 add x9, x9, #1 46910: 470 ins KS1.d[1], x8 471 ins KS1.d[0], x9 472 rev64 KS1.16b, KS1.16b 473 474 add x8, x8, #1 475 cbnz x8, 11f 476 add x9, x9, #1 47711: 478#endif 479 480 rev64 T1.16b, INP1.16b 481 482 cmp w7, #12 483 b.ge 2f // AES-192/256? 484 4851: enc_round KS0, v21 486 ext IN1.16b, T1.16b, T1.16b, #8 487 488 enc_round KS1, v21 489 pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 490 491 enc_round KS0, v22 492 eor T1.16b, T1.16b, IN1.16b 493 494 enc_round KS1, v22 495 pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 496 497 enc_round KS0, v23 498 pmull XM2.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) 499 500 enc_round KS1, v23 501 rev64 T1.16b, INP0.16b 502 ext T2.16b, XL.16b, XL.16b, #8 503 504 enc_round KS0, v24 505 ext IN1.16b, T1.16b, T1.16b, #8 506 eor T1.16b, T1.16b, T2.16b 507 508 enc_round KS1, v24 509 eor XL.16b, XL.16b, IN1.16b 510 511 enc_round KS0, v25 512 eor T1.16b, T1.16b, XL.16b 513 514 enc_round KS1, v25 515 pmull2 XH.1q, HH.2d, XL.2d // a1 * b1 516 517 enc_round KS0, v26 518 pmull XL.1q, HH.1d, XL.1d // a0 * b0 519 520 enc_round KS1, v26 521 pmull2 XM.1q, SHASH2.2d, T1.2d // (a1 + a0)(b1 + b0) 522 523 enc_round KS0, v27 524 eor XL.16b, XL.16b, XL2.16b 525 eor XH.16b, XH.16b, XH2.16b 526 527 enc_round KS1, v27 528 eor XM.16b, XM.16b, XM2.16b 529 ext T1.16b, XL.16b, XH.16b, #8 530 531 enc_round KS0, v28 532 eor T2.16b, XL.16b, XH.16b 533 eor XM.16b, XM.16b, T1.16b 534 535 enc_round KS1, v28 536 eor XM.16b, XM.16b, T2.16b 537 538 enc_round KS0, v29 539 pmull T2.1q, XL.1d, MASK.1d 540 541 enc_round KS1, v29 542 mov XH.d[0], XM.d[1] 543 mov XM.d[1], XL.d[0] 544 545 aese KS0.16b, v30.16b 546 eor XL.16b, XM.16b, T2.16b 547 548 aese KS1.16b, v30.16b 549 ext T2.16b, XL.16b, XL.16b, #8 550 551 eor KS0.16b, KS0.16b, v31.16b 552 pmull XL.1q, XL.1d, MASK.1d 553 eor T2.16b, T2.16b, XH.16b 554 555 eor KS1.16b, KS1.16b, v31.16b 556 eor XL.16b, XL.16b, T2.16b 557 558 .if \enc == 0 559 eor INP0.16b, INP0.16b, KS0.16b 560 eor INP1.16b, INP1.16b, KS1.16b 561 .endif 562 563 st1 {INP0.16b-INP1.16b}, [x2], #32 564 565 cbnz w0, 0b 566 567CPU_LE( rev x8, x8 ) 568#if !INC_QUART_CTR 569CPU_LE( rev x9, x9 ) 570#endif 571 st1 {XL.2d}, [x1] 572#if INC_QUART_CTR 573 str x8, [x5, #8] // store lower counter 574#else 575 stp x9, x8, [x5] // store counter 576#endif 577 578 .if \enc == 1 579 st1 {KS0.16b-KS1.16b}, [x10] 580 .endif 581 582 ret 583 5842: b.eq 3f // AES-192? 585 enc_round KS0, v17 586 enc_round KS1, v17 587 enc_round KS0, v18 588 enc_round KS1, v18 5893: enc_round KS0, v19 590 enc_round KS1, v19 591 enc_round KS0, v20 592 enc_round KS1, v20 593 b 1b 594 5954: load_round_keys w7, x6 596 b 0b 597 .endm 598 599/* 600 * void pmull_gcm_encrypt(int blocks, uint64_t dg[2], uint8_t dst[], 601 * const uint8_t src[], 602 * const struct internal_ghash_key *ghash_key, 603 * uint64_t ctr[], const uint64_t rk[], int rounds, 604 * uint8_t ks[]); 605 */ 606FUNC pmull_gcm_encrypt , : 607 pmull_gcm_do_crypt 1 608END_FUNC pmull_gcm_encrypt 609 610/* 611 * void pmull_gcm_decrypt(int blocks, uint64_t dg[2], uint8_t dst[], 612 * const uint8_t src[], 613 * const struct internal_ghash_key *ghash_key, 614 * uint64_t ctr[], const uint64_t rk[], int rounds); 615 */ 616FUNC pmull_gcm_decrypt , : 617 pmull_gcm_do_crypt 0 618END_FUNC pmull_gcm_decrypt 619 620/* 621 * void pmull_gcm_encrypt_block(uint8_t dst[], const uint8_t src[], int rounds) 622 */ 623FUNC pmull_gcm_encrypt_block , : 624 ld1 {v0.16b}, [x1] 625 enc_block v0, w2 626 st1 {v0.16b}, [x0] 627 ret 628END_FUNC pmull_gcm_encrypt_block 629 630/* 631 * void pmull_gcm_load_round_keys(const uint64_t rk[30], int rounds) 632 */ 633FUNC pmull_gcm_load_round_keys , : 634 load_round_keys w1, x0 635 ret 636END_FUNC pmull_gcm_load_round_keys 637 638/* 639 * uint32_t pmull_gcm_aes_sub(uint32_t input) 640 * 641 * use the aese instruction to perform the AES sbox substitution 642 * on each byte in 'input' 643 */ 644FUNC pmull_gcm_aes_sub , : 645 dup v1.4s, w0 646 movi v0.16b, #0 647 aese v0.16b, v1.16b 648 umov w0, v0.s[0] 649 ret 650END_FUNC pmull_gcm_aes_sub 651 652BTI(emit_aarch64_feature_1_and GNU_PROPERTY_AARCH64_FEATURE_1_BTI) 653