1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6*4882a593Smuzhiyun * 7*4882a593Smuzhiyun * This program is free software; you can redistribute it and/or modify 8*4882a593Smuzhiyun * it under the terms of the GNU General Public License version 2 as 9*4882a593Smuzhiyun * published by the Free Software Foundation. 10*4882a593Smuzhiyun */ 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun#include <linux/linkage.h> 13*4882a593Smuzhiyun#include <asm/assembler.h> 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 16*4882a593Smuzhiyun .set .Lv\b\().2d, \b 17*4882a593Smuzhiyun .set .Lv\b\().16b, \b 18*4882a593Smuzhiyun .endr 19*4882a593Smuzhiyun 20*4882a593Smuzhiyun /* 21*4882a593Smuzhiyun * ARMv8.2 Crypto Extensions instructions 22*4882a593Smuzhiyun */ 23*4882a593Smuzhiyun .macro eor3, rd, rn, rm, ra 24*4882a593Smuzhiyun .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 25*4882a593Smuzhiyun .endm 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun .macro rax1, rd, rn, rm 28*4882a593Smuzhiyun .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 29*4882a593Smuzhiyun .endm 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun .macro bcax, rd, rn, rm, ra 32*4882a593Smuzhiyun .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 33*4882a593Smuzhiyun .endm 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun .macro xar, rd, rn, rm, imm6 36*4882a593Smuzhiyun .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16) 37*4882a593Smuzhiyun .endm 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun /* 40*4882a593Smuzhiyun * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) 41*4882a593Smuzhiyun */ 42*4882a593Smuzhiyun .text 43*4882a593SmuzhiyunSYM_FUNC_START(sha3_ce_transform) 44*4882a593Smuzhiyun /* load state */ 45*4882a593Smuzhiyun add x8, x0, #32 46*4882a593Smuzhiyun ld1 { v0.1d- v3.1d}, [x0] 47*4882a593Smuzhiyun ld1 { v4.1d- v7.1d}, [x8], #32 48*4882a593Smuzhiyun ld1 { v8.1d-v11.1d}, [x8], #32 49*4882a593Smuzhiyun ld1 {v12.1d-v15.1d}, [x8], #32 50*4882a593Smuzhiyun ld1 {v16.1d-v19.1d}, [x8], #32 51*4882a593Smuzhiyun ld1 {v20.1d-v23.1d}, [x8], #32 52*4882a593Smuzhiyun ld1 {v24.1d}, [x8] 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun0: sub w2, w2, #1 55*4882a593Smuzhiyun mov w8, #24 56*4882a593Smuzhiyun adr_l x9, .Lsha3_rcon 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun /* load input */ 59*4882a593Smuzhiyun ld1 {v25.8b-v28.8b}, [x1], #32 60*4882a593Smuzhiyun ld1 {v29.8b-v31.8b}, [x1], #24 61*4882a593Smuzhiyun eor v0.8b, v0.8b, v25.8b 62*4882a593Smuzhiyun eor v1.8b, v1.8b, v26.8b 63*4882a593Smuzhiyun eor v2.8b, v2.8b, v27.8b 64*4882a593Smuzhiyun eor v3.8b, v3.8b, v28.8b 65*4882a593Smuzhiyun eor v4.8b, v4.8b, v29.8b 66*4882a593Smuzhiyun eor v5.8b, v5.8b, v30.8b 67*4882a593Smuzhiyun eor v6.8b, v6.8b, v31.8b 68*4882a593Smuzhiyun 69*4882a593Smuzhiyun tbnz x3, #6, 2f // SHA3-512 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun ld1 {v25.8b-v28.8b}, [x1], #32 72*4882a593Smuzhiyun ld1 {v29.8b-v30.8b}, [x1], #16 73*4882a593Smuzhiyun eor v7.8b, v7.8b, v25.8b 74*4882a593Smuzhiyun eor v8.8b, v8.8b, v26.8b 75*4882a593Smuzhiyun eor v9.8b, v9.8b, v27.8b 76*4882a593Smuzhiyun eor v10.8b, v10.8b, v28.8b 77*4882a593Smuzhiyun eor v11.8b, v11.8b, v29.8b 78*4882a593Smuzhiyun eor v12.8b, v12.8b, v30.8b 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun tbnz x3, #4, 1f // SHA3-384 or SHA3-224 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun // SHA3-256 83*4882a593Smuzhiyun ld1 {v25.8b-v28.8b}, [x1], #32 84*4882a593Smuzhiyun eor v13.8b, v13.8b, v25.8b 85*4882a593Smuzhiyun eor v14.8b, v14.8b, v26.8b 86*4882a593Smuzhiyun eor v15.8b, v15.8b, v27.8b 87*4882a593Smuzhiyun eor v16.8b, v16.8b, v28.8b 88*4882a593Smuzhiyun b 3f 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun // SHA3-224 93*4882a593Smuzhiyun ld1 {v25.8b-v28.8b}, [x1], #32 94*4882a593Smuzhiyun ld1 {v29.8b}, [x1], #8 95*4882a593Smuzhiyun eor v13.8b, v13.8b, v25.8b 96*4882a593Smuzhiyun eor v14.8b, v14.8b, v26.8b 97*4882a593Smuzhiyun eor v15.8b, v15.8b, v27.8b 98*4882a593Smuzhiyun eor v16.8b, v16.8b, v28.8b 99*4882a593Smuzhiyun eor v17.8b, v17.8b, v29.8b 100*4882a593Smuzhiyun b 3f 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun // SHA3-512 103*4882a593Smuzhiyun2: ld1 {v25.8b-v26.8b}, [x1], #16 104*4882a593Smuzhiyun eor v7.8b, v7.8b, v25.8b 105*4882a593Smuzhiyun eor v8.8b, v8.8b, v26.8b 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun3: sub w8, w8, #1 108*4882a593Smuzhiyun 109*4882a593Smuzhiyun eor3 v29.16b, v4.16b, v9.16b, v14.16b 110*4882a593Smuzhiyun eor3 v26.16b, v1.16b, v6.16b, v11.16b 111*4882a593Smuzhiyun eor3 v28.16b, v3.16b, v8.16b, v13.16b 112*4882a593Smuzhiyun eor3 v25.16b, v0.16b, v5.16b, v10.16b 113*4882a593Smuzhiyun eor3 v27.16b, v2.16b, v7.16b, v12.16b 114*4882a593Smuzhiyun eor3 v29.16b, v29.16b, v19.16b, v24.16b 115*4882a593Smuzhiyun eor3 v26.16b, v26.16b, v16.16b, v21.16b 116*4882a593Smuzhiyun eor3 v28.16b, v28.16b, v18.16b, v23.16b 117*4882a593Smuzhiyun eor3 v25.16b, v25.16b, v15.16b, v20.16b 118*4882a593Smuzhiyun eor3 v27.16b, v27.16b, v17.16b, v22.16b 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun rax1 v30.2d, v29.2d, v26.2d // bc[0] 121*4882a593Smuzhiyun rax1 v26.2d, v26.2d, v28.2d // bc[2] 122*4882a593Smuzhiyun rax1 v28.2d, v28.2d, v25.2d // bc[4] 123*4882a593Smuzhiyun rax1 v25.2d, v25.2d, v27.2d // bc[1] 124*4882a593Smuzhiyun rax1 v27.2d, v27.2d, v29.2d // bc[3] 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun eor v0.16b, v0.16b, v30.16b 127*4882a593Smuzhiyun xar v29.2d, v1.2d, v25.2d, (64 - 1) 128*4882a593Smuzhiyun xar v1.2d, v6.2d, v25.2d, (64 - 44) 129*4882a593Smuzhiyun xar v6.2d, v9.2d, v28.2d, (64 - 20) 130*4882a593Smuzhiyun xar v9.2d, v22.2d, v26.2d, (64 - 61) 131*4882a593Smuzhiyun xar v22.2d, v14.2d, v28.2d, (64 - 39) 132*4882a593Smuzhiyun xar v14.2d, v20.2d, v30.2d, (64 - 18) 133*4882a593Smuzhiyun xar v31.2d, v2.2d, v26.2d, (64 - 62) 134*4882a593Smuzhiyun xar v2.2d, v12.2d, v26.2d, (64 - 43) 135*4882a593Smuzhiyun xar v12.2d, v13.2d, v27.2d, (64 - 25) 136*4882a593Smuzhiyun xar v13.2d, v19.2d, v28.2d, (64 - 8) 137*4882a593Smuzhiyun xar v19.2d, v23.2d, v27.2d, (64 - 56) 138*4882a593Smuzhiyun xar v23.2d, v15.2d, v30.2d, (64 - 41) 139*4882a593Smuzhiyun xar v15.2d, v4.2d, v28.2d, (64 - 27) 140*4882a593Smuzhiyun xar v28.2d, v24.2d, v28.2d, (64 - 14) 141*4882a593Smuzhiyun xar v24.2d, v21.2d, v25.2d, (64 - 2) 142*4882a593Smuzhiyun xar v8.2d, v8.2d, v27.2d, (64 - 55) 143*4882a593Smuzhiyun xar v4.2d, v16.2d, v25.2d, (64 - 45) 144*4882a593Smuzhiyun xar v16.2d, v5.2d, v30.2d, (64 - 36) 145*4882a593Smuzhiyun xar v5.2d, v3.2d, v27.2d, (64 - 28) 146*4882a593Smuzhiyun xar v27.2d, v18.2d, v27.2d, (64 - 21) 147*4882a593Smuzhiyun xar v3.2d, v17.2d, v26.2d, (64 - 15) 148*4882a593Smuzhiyun xar v25.2d, v11.2d, v25.2d, (64 - 10) 149*4882a593Smuzhiyun xar v26.2d, v7.2d, v26.2d, (64 - 6) 150*4882a593Smuzhiyun xar v30.2d, v10.2d, v30.2d, (64 - 3) 151*4882a593Smuzhiyun 152*4882a593Smuzhiyun bcax v20.16b, v31.16b, v22.16b, v8.16b 153*4882a593Smuzhiyun bcax v21.16b, v8.16b, v23.16b, v22.16b 154*4882a593Smuzhiyun bcax v22.16b, v22.16b, v24.16b, v23.16b 155*4882a593Smuzhiyun bcax v23.16b, v23.16b, v31.16b, v24.16b 156*4882a593Smuzhiyun bcax v24.16b, v24.16b, v8.16b, v31.16b 157*4882a593Smuzhiyun 158*4882a593Smuzhiyun ld1r {v31.2d}, [x9], #8 159*4882a593Smuzhiyun 160*4882a593Smuzhiyun bcax v17.16b, v25.16b, v19.16b, v3.16b 161*4882a593Smuzhiyun bcax v18.16b, v3.16b, v15.16b, v19.16b 162*4882a593Smuzhiyun bcax v19.16b, v19.16b, v16.16b, v15.16b 163*4882a593Smuzhiyun bcax v15.16b, v15.16b, v25.16b, v16.16b 164*4882a593Smuzhiyun bcax v16.16b, v16.16b, v3.16b, v25.16b 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun bcax v10.16b, v29.16b, v12.16b, v26.16b 167*4882a593Smuzhiyun bcax v11.16b, v26.16b, v13.16b, v12.16b 168*4882a593Smuzhiyun bcax v12.16b, v12.16b, v14.16b, v13.16b 169*4882a593Smuzhiyun bcax v13.16b, v13.16b, v29.16b, v14.16b 170*4882a593Smuzhiyun bcax v14.16b, v14.16b, v26.16b, v29.16b 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun bcax v7.16b, v30.16b, v9.16b, v4.16b 173*4882a593Smuzhiyun bcax v8.16b, v4.16b, v5.16b, v9.16b 174*4882a593Smuzhiyun bcax v9.16b, v9.16b, v6.16b, v5.16b 175*4882a593Smuzhiyun bcax v5.16b, v5.16b, v30.16b, v6.16b 176*4882a593Smuzhiyun bcax v6.16b, v6.16b, v4.16b, v30.16b 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun bcax v3.16b, v27.16b, v0.16b, v28.16b 179*4882a593Smuzhiyun bcax v4.16b, v28.16b, v1.16b, v0.16b 180*4882a593Smuzhiyun bcax v0.16b, v0.16b, v2.16b, v1.16b 181*4882a593Smuzhiyun bcax v1.16b, v1.16b, v27.16b, v2.16b 182*4882a593Smuzhiyun bcax v2.16b, v2.16b, v28.16b, v27.16b 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun eor v0.16b, v0.16b, v31.16b 185*4882a593Smuzhiyun 186*4882a593Smuzhiyun cbnz w8, 3b 187*4882a593Smuzhiyun cond_yield 4f, x8, x9 188*4882a593Smuzhiyun cbnz w2, 0b 189*4882a593Smuzhiyun 190*4882a593Smuzhiyun /* save state */ 191*4882a593Smuzhiyun4: st1 { v0.1d- v3.1d}, [x0], #32 192*4882a593Smuzhiyun st1 { v4.1d- v7.1d}, [x0], #32 193*4882a593Smuzhiyun st1 { v8.1d-v11.1d}, [x0], #32 194*4882a593Smuzhiyun st1 {v12.1d-v15.1d}, [x0], #32 195*4882a593Smuzhiyun st1 {v16.1d-v19.1d}, [x0], #32 196*4882a593Smuzhiyun st1 {v20.1d-v23.1d}, [x0], #32 197*4882a593Smuzhiyun st1 {v24.1d}, [x0] 198*4882a593Smuzhiyun mov w0, w2 199*4882a593Smuzhiyun ret 200*4882a593SmuzhiyunSYM_FUNC_END(sha3_ce_transform) 201*4882a593Smuzhiyun 202*4882a593Smuzhiyun .section ".rodata", "a" 203*4882a593Smuzhiyun .align 8 204*4882a593Smuzhiyun.Lsha3_rcon: 205*4882a593Smuzhiyun .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a 206*4882a593Smuzhiyun .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001 207*4882a593Smuzhiyun .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a 208*4882a593Smuzhiyun .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a 209*4882a593Smuzhiyun .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089 210*4882a593Smuzhiyun .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080 211*4882a593Smuzhiyun .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081 212*4882a593Smuzhiyun .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 213