1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 4 * Copyright (c) 2023 Linaro Limited 5 */ 6 7/* Core SHA-3 transform using v8 Crypto Extensions */ 8 9#include <asm.S> 10#include <arm64_macros.S> 11 12 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,\ 13 21,22,23,24,25,26,27,28,29,30,31 14 .set .Lv\b\().2d, \b 15 .set .Lv\b\().16b, \b 16 .endr 17 18 /* 19 * ARMv8.2 Crypto Extensions instructions 20 */ 21 .macro eor3, rd, rn, rm, ra 22 .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | \ 23 (.L\rm << 16) 24 .endm 25 26 .macro rax1, rd, rn, rm 27 .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 28 .endm 29 30 .macro bcax, rd, rn, rm, ra 31 .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | \ 32 (.L\rm << 16) 33 .endm 34 35 .macro xar, rd, rn, rm, imm6 36 .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | \ 37 (.L\rm << 16) 38 .endm 39 40 /* 41 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, 42 * int dg_size) 43 */ 44 .text 45FUNC sha3_ce_transform , : 46 /* load state */ 47 add x8, x0, #32 48 ld1 { v0.1d- v3.1d}, [x0] 49 ld1 { v4.1d- v7.1d}, [x8], #32 50 ld1 { v8.1d-v11.1d}, [x8], #32 51 ld1 {v12.1d-v15.1d}, [x8], #32 52 ld1 {v16.1d-v19.1d}, [x8], #32 53 ld1 {v20.1d-v23.1d}, [x8], #32 54 ld1 {v24.1d}, [x8] 55 560: sub w2, w2, #1 57 mov w8, #24 58 adr_l x9, .Lsha3_rcon 59 60 /* load input */ 61 ld1 {v25.8b-v28.8b}, [x1], #32 62 ld1 {v29.8b-v31.8b}, [x1], #24 63 eor v0.8b, v0.8b, v25.8b 64 eor v1.8b, v1.8b, v26.8b 65 eor v2.8b, v2.8b, v27.8b 66 eor v3.8b, v3.8b, v28.8b 67 eor v4.8b, v4.8b, v29.8b 68 eor v5.8b, v5.8b, v30.8b 69 eor v6.8b, v6.8b, v31.8b 70 71 /* Bit 6 set? -> SHA3-512 */ 72 tbnz x3, #6, 3f 73 74 /* SHA3-384, SHA3-256, SHA3-224 or SHA3-128 */ 75 ld1 {v25.8b-v28.8b}, [x1], #32 76 ld1 {v29.8b-v30.8b}, [x1], #16 77 eor v7.8b, v7.8b, v25.8b 78 eor v8.8b, v8.8b, v26.8b 79 eor v9.8b, v9.8b, v27.8b 80 eor v10.8b, v10.8b, v28.8b 81 eor v11.8b, v11.8b, v29.8b 82 eor v12.8b, v12.8b, v30.8b 83 84 /* bit 4 set? -> SHA3-384, SHA3-224 or SHA3-128 */ 85 tbnz x3, #4, 1f 86 87 /* SHA3-256: digest size 32 bytes, block size 136 bytes */ 88 ld1 {v25.8b-v28.8b}, [x1], #32 89 eor v13.8b, v13.8b, v25.8b 90 eor v14.8b, v14.8b, v26.8b 91 eor v15.8b, v15.8b, v27.8b 92 eor v16.8b, v16.8b, v28.8b 93 b 4f 94 95 /* bit 5 set? -> SHA-384 */ 961: tbnz x3, #5, 4f 97 98 /* SHA3-224 or SHA3-128 */ 99 ld1 {v25.8b-v28.8b}, [x1], #32 100 eor v13.8b, v13.8b, v25.8b 101 eor v14.8b, v14.8b, v26.8b 102 eor v15.8b, v15.8b, v27.8b 103 eor v16.8b, v16.8b, v28.8b 104 105 /* bit 2 set? -> SHA-224 */ 106 tbnz x3, #2, 2f 107 108 /* SHA3-128: digest size 16 bytes, block size 168 bytes */ 109 ld1 {v25.8b-v28.8b}, [x1], #32 110 eor v17.8b, v17.8b, v25.8b 111 eor v18.8b, v18.8b, v26.8b 112 eor v19.8b, v19.8b, v27.8b 113 eor v20.8b, v20.8b, v28.8b 114 b 4f 115 116 /* SHA3-224: digest size 28 bytes, block size 144 bytes */ 1172: ld1 {v29.8b}, [x1], #8 118 eor v17.8b, v17.8b, v29.8b 119 b 4f 120 121 /* SHA3-512: digest size 64 bytes , block size 72 bytes */ 1223: ld1 {v25.8b-v26.8b}, [x1], #16 123 eor v7.8b, v7.8b, v25.8b 124 eor v8.8b, v8.8b, v26.8b 125 1264: sub w8, w8, #1 127 128 eor3 v29.16b, v4.16b, v9.16b, v14.16b 129 eor3 v26.16b, v1.16b, v6.16b, v11.16b 130 eor3 v28.16b, v3.16b, v8.16b, v13.16b 131 eor3 v25.16b, v0.16b, v5.16b, v10.16b 132 eor3 v27.16b, v2.16b, v7.16b, v12.16b 133 eor3 v29.16b, v29.16b, v19.16b, v24.16b 134 eor3 v26.16b, v26.16b, v16.16b, v21.16b 135 eor3 v28.16b, v28.16b, v18.16b, v23.16b 136 eor3 v25.16b, v25.16b, v15.16b, v20.16b 137 eor3 v27.16b, v27.16b, v17.16b, v22.16b 138 139 rax1 v30.2d, v29.2d, v26.2d // bc[0] 140 rax1 v26.2d, v26.2d, v28.2d // bc[2] 141 rax1 v28.2d, v28.2d, v25.2d // bc[4] 142 rax1 v25.2d, v25.2d, v27.2d // bc[1] 143 rax1 v27.2d, v27.2d, v29.2d // bc[3] 144 145 eor v0.16b, v0.16b, v30.16b 146 xar v29.2d, v1.2d, v25.2d, (64 - 1) 147 xar v1.2d, v6.2d, v25.2d, (64 - 44) 148 xar v6.2d, v9.2d, v28.2d, (64 - 20) 149 xar v9.2d, v22.2d, v26.2d, (64 - 61) 150 xar v22.2d, v14.2d, v28.2d, (64 - 39) 151 xar v14.2d, v20.2d, v30.2d, (64 - 18) 152 xar v31.2d, v2.2d, v26.2d, (64 - 62) 153 xar v2.2d, v12.2d, v26.2d, (64 - 43) 154 xar v12.2d, v13.2d, v27.2d, (64 - 25) 155 xar v13.2d, v19.2d, v28.2d, (64 - 8) 156 xar v19.2d, v23.2d, v27.2d, (64 - 56) 157 xar v23.2d, v15.2d, v30.2d, (64 - 41) 158 xar v15.2d, v4.2d, v28.2d, (64 - 27) 159 xar v28.2d, v24.2d, v28.2d, (64 - 14) 160 xar v24.2d, v21.2d, v25.2d, (64 - 2) 161 xar v8.2d, v8.2d, v27.2d, (64 - 55) 162 xar v4.2d, v16.2d, v25.2d, (64 - 45) 163 xar v16.2d, v5.2d, v30.2d, (64 - 36) 164 xar v5.2d, v3.2d, v27.2d, (64 - 28) 165 xar v27.2d, v18.2d, v27.2d, (64 - 21) 166 xar v3.2d, v17.2d, v26.2d, (64 - 15) 167 xar v25.2d, v11.2d, v25.2d, (64 - 10) 168 xar v26.2d, v7.2d, v26.2d, (64 - 6) 169 xar v30.2d, v10.2d, v30.2d, (64 - 3) 170 171 bcax v20.16b, v31.16b, v22.16b, v8.16b 172 bcax v21.16b, v8.16b, v23.16b, v22.16b 173 bcax v22.16b, v22.16b, v24.16b, v23.16b 174 bcax v23.16b, v23.16b, v31.16b, v24.16b 175 bcax v24.16b, v24.16b, v8.16b, v31.16b 176 177 ld1r {v31.2d}, [x9], #8 178 179 bcax v17.16b, v25.16b, v19.16b, v3.16b 180 bcax v18.16b, v3.16b, v15.16b, v19.16b 181 bcax v19.16b, v19.16b, v16.16b, v15.16b 182 bcax v15.16b, v15.16b, v25.16b, v16.16b 183 bcax v16.16b, v16.16b, v3.16b, v25.16b 184 185 bcax v10.16b, v29.16b, v12.16b, v26.16b 186 bcax v11.16b, v26.16b, v13.16b, v12.16b 187 bcax v12.16b, v12.16b, v14.16b, v13.16b 188 bcax v13.16b, v13.16b, v29.16b, v14.16b 189 bcax v14.16b, v14.16b, v26.16b, v29.16b 190 191 bcax v7.16b, v30.16b, v9.16b, v4.16b 192 bcax v8.16b, v4.16b, v5.16b, v9.16b 193 bcax v9.16b, v9.16b, v6.16b, v5.16b 194 bcax v5.16b, v5.16b, v30.16b, v6.16b 195 bcax v6.16b, v6.16b, v4.16b, v30.16b 196 197 bcax v3.16b, v27.16b, v0.16b, v28.16b 198 bcax v4.16b, v28.16b, v1.16b, v0.16b 199 bcax v0.16b, v0.16b, v2.16b, v1.16b 200 bcax v1.16b, v1.16b, v27.16b, v2.16b 201 bcax v2.16b, v2.16b, v28.16b, v27.16b 202 203 eor v0.16b, v0.16b, v31.16b 204 205 cbnz w8, 4b 206 cbnz w2, 0b 207 208 /* save state */ 209 st1 { v0.1d- v3.1d}, [x0], #32 210 st1 { v4.1d- v7.1d}, [x0], #32 211 st1 { v8.1d-v11.1d}, [x0], #32 212 st1 {v12.1d-v15.1d}, [x0], #32 213 st1 {v16.1d-v19.1d}, [x0], #32 214 st1 {v20.1d-v23.1d}, [x0], #32 215 st1 {v24.1d}, [x0] 216 mov w0, w2 217 ret 218END_FUNC sha3_ce_transform 219 220 .section ".rodata", "a" 221 .align 8 222LOCAL_DATA .Lsha3_rcon , : 223 .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a 224 .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 225 .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a 226 .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a 227 .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 228 .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 229 .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 230 .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 231 232BTI(emit_aarch64_feature_1_and GNU_PROPERTY_AARCH64_FEATURE_1_BTI) 233