1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <linux/linkage.h> 9*4882a593Smuzhiyun#include <asm/assembler.h> 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun .text 12*4882a593Smuzhiyun .arch armv8-a+crypto 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun /* 15*4882a593Smuzhiyun * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 16*4882a593Smuzhiyun * u32 *macp, u8 const rk[], u32 rounds); 17*4882a593Smuzhiyun */ 18*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_auth_data) 19*4882a593Smuzhiyun ldr w8, [x3] /* leftover from prev round? */ 20*4882a593Smuzhiyun ld1 {v0.16b}, [x0] /* load mac */ 21*4882a593Smuzhiyun cbz w8, 1f 22*4882a593Smuzhiyun sub w8, w8, #16 23*4882a593Smuzhiyun eor v1.16b, v1.16b, v1.16b 24*4882a593Smuzhiyun0: ldrb w7, [x1], #1 /* get 1 byte of input */ 25*4882a593Smuzhiyun subs w2, w2, #1 26*4882a593Smuzhiyun add w8, w8, #1 27*4882a593Smuzhiyun ins v1.b[0], w7 28*4882a593Smuzhiyun ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 29*4882a593Smuzhiyun beq 8f /* out of input? */ 30*4882a593Smuzhiyun cbnz w8, 0b 31*4882a593Smuzhiyun eor v0.16b, v0.16b, v1.16b 32*4882a593Smuzhiyun1: ld1 {v3.4s}, [x4] /* load first round key */ 33*4882a593Smuzhiyun prfm pldl1strm, [x1] 34*4882a593Smuzhiyun cmp w5, #12 /* which key size? */ 35*4882a593Smuzhiyun add x6, x4, #16 36*4882a593Smuzhiyun sub w7, w5, #2 /* modified # of rounds */ 37*4882a593Smuzhiyun bmi 2f 38*4882a593Smuzhiyun bne 5f 39*4882a593Smuzhiyun mov v5.16b, v3.16b 40*4882a593Smuzhiyun b 4f 41*4882a593Smuzhiyun2: mov v4.16b, v3.16b 42*4882a593Smuzhiyun ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ 43*4882a593Smuzhiyun3: aese v0.16b, v4.16b 44*4882a593Smuzhiyun aesmc v0.16b, v0.16b 45*4882a593Smuzhiyun4: ld1 {v3.4s}, [x6], #16 /* load next round key */ 46*4882a593Smuzhiyun aese v0.16b, v5.16b 47*4882a593Smuzhiyun aesmc v0.16b, v0.16b 48*4882a593Smuzhiyun5: ld1 {v4.4s}, [x6], #16 /* load next round key */ 49*4882a593Smuzhiyun subs w7, w7, #3 50*4882a593Smuzhiyun aese v0.16b, v3.16b 51*4882a593Smuzhiyun aesmc v0.16b, v0.16b 52*4882a593Smuzhiyun ld1 {v5.4s}, [x6], #16 /* load next round key */ 53*4882a593Smuzhiyun bpl 3b 54*4882a593Smuzhiyun aese v0.16b, v4.16b 55*4882a593Smuzhiyun subs w2, w2, #16 /* last data? */ 56*4882a593Smuzhiyun eor v0.16b, v0.16b, v5.16b /* final round */ 57*4882a593Smuzhiyun bmi 6f 58*4882a593Smuzhiyun ld1 {v1.16b}, [x1], #16 /* load next input block */ 59*4882a593Smuzhiyun eor v0.16b, v0.16b, v1.16b /* xor with mac */ 60*4882a593Smuzhiyun bne 1b 61*4882a593Smuzhiyun6: st1 {v0.16b}, [x0] /* store mac */ 62*4882a593Smuzhiyun beq 10f 63*4882a593Smuzhiyun adds w2, w2, #16 64*4882a593Smuzhiyun beq 10f 65*4882a593Smuzhiyun mov w8, w2 66*4882a593Smuzhiyun7: ldrb w7, [x1], #1 67*4882a593Smuzhiyun umov w6, v0.b[0] 68*4882a593Smuzhiyun eor w6, w6, w7 69*4882a593Smuzhiyun strb w6, [x0], #1 70*4882a593Smuzhiyun subs w2, w2, #1 71*4882a593Smuzhiyun beq 10f 72*4882a593Smuzhiyun ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 73*4882a593Smuzhiyun b 7b 74*4882a593Smuzhiyun8: cbz w8, 91f 75*4882a593Smuzhiyun mov w7, w8 76*4882a593Smuzhiyun add w8, w8, #16 77*4882a593Smuzhiyun9: ext v1.16b, v1.16b, v1.16b, #1 78*4882a593Smuzhiyun adds w7, w7, #1 79*4882a593Smuzhiyun bne 9b 80*4882a593Smuzhiyun91: eor v0.16b, v0.16b, v1.16b 81*4882a593Smuzhiyun st1 {v0.16b}, [x0] 82*4882a593Smuzhiyun10: str w8, [x3] 83*4882a593Smuzhiyun ret 84*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_auth_data) 85*4882a593Smuzhiyun 86*4882a593Smuzhiyun /* 87*4882a593Smuzhiyun * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], 88*4882a593Smuzhiyun * u32 rounds); 89*4882a593Smuzhiyun */ 90*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_final) 91*4882a593Smuzhiyun ld1 {v3.4s}, [x2], #16 /* load first round key */ 92*4882a593Smuzhiyun ld1 {v0.16b}, [x0] /* load mac */ 93*4882a593Smuzhiyun cmp w3, #12 /* which key size? */ 94*4882a593Smuzhiyun sub w3, w3, #2 /* modified # of rounds */ 95*4882a593Smuzhiyun ld1 {v1.16b}, [x1] /* load 1st ctriv */ 96*4882a593Smuzhiyun bmi 0f 97*4882a593Smuzhiyun bne 3f 98*4882a593Smuzhiyun mov v5.16b, v3.16b 99*4882a593Smuzhiyun b 2f 100*4882a593Smuzhiyun0: mov v4.16b, v3.16b 101*4882a593Smuzhiyun1: ld1 {v5.4s}, [x2], #16 /* load next round key */ 102*4882a593Smuzhiyun aese v0.16b, v4.16b 103*4882a593Smuzhiyun aesmc v0.16b, v0.16b 104*4882a593Smuzhiyun aese v1.16b, v4.16b 105*4882a593Smuzhiyun aesmc v1.16b, v1.16b 106*4882a593Smuzhiyun2: ld1 {v3.4s}, [x2], #16 /* load next round key */ 107*4882a593Smuzhiyun aese v0.16b, v5.16b 108*4882a593Smuzhiyun aesmc v0.16b, v0.16b 109*4882a593Smuzhiyun aese v1.16b, v5.16b 110*4882a593Smuzhiyun aesmc v1.16b, v1.16b 111*4882a593Smuzhiyun3: ld1 {v4.4s}, [x2], #16 /* load next round key */ 112*4882a593Smuzhiyun subs w3, w3, #3 113*4882a593Smuzhiyun aese v0.16b, v3.16b 114*4882a593Smuzhiyun aesmc v0.16b, v0.16b 115*4882a593Smuzhiyun aese v1.16b, v3.16b 116*4882a593Smuzhiyun aesmc v1.16b, v1.16b 117*4882a593Smuzhiyun bpl 1b 118*4882a593Smuzhiyun aese v0.16b, v4.16b 119*4882a593Smuzhiyun aese v1.16b, v4.16b 120*4882a593Smuzhiyun /* final round key cancels out */ 121*4882a593Smuzhiyun eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 122*4882a593Smuzhiyun st1 {v0.16b}, [x0] /* store result */ 123*4882a593Smuzhiyun ret 124*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_final) 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun .macro aes_ccm_do_crypt,enc 127*4882a593Smuzhiyun ldr x8, [x6, #8] /* load lower ctr */ 128*4882a593Smuzhiyun ld1 {v0.16b}, [x5] /* load mac */ 129*4882a593SmuzhiyunCPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 130*4882a593Smuzhiyun0: /* outer loop */ 131*4882a593Smuzhiyun ld1 {v1.8b}, [x6] /* load upper ctr */ 132*4882a593Smuzhiyun prfm pldl1strm, [x1] 133*4882a593Smuzhiyun add x8, x8, #1 134*4882a593Smuzhiyun rev x9, x8 135*4882a593Smuzhiyun cmp w4, #12 /* which key size? */ 136*4882a593Smuzhiyun sub w7, w4, #2 /* get modified # of rounds */ 137*4882a593Smuzhiyun ins v1.d[1], x9 /* no carry in lower ctr */ 138*4882a593Smuzhiyun ld1 {v3.4s}, [x3] /* load first round key */ 139*4882a593Smuzhiyun add x10, x3, #16 140*4882a593Smuzhiyun bmi 1f 141*4882a593Smuzhiyun bne 4f 142*4882a593Smuzhiyun mov v5.16b, v3.16b 143*4882a593Smuzhiyun b 3f 144*4882a593Smuzhiyun1: mov v4.16b, v3.16b 145*4882a593Smuzhiyun ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ 146*4882a593Smuzhiyun2: /* inner loop: 3 rounds, 2x interleaved */ 147*4882a593Smuzhiyun aese v0.16b, v4.16b 148*4882a593Smuzhiyun aesmc v0.16b, v0.16b 149*4882a593Smuzhiyun aese v1.16b, v4.16b 150*4882a593Smuzhiyun aesmc v1.16b, v1.16b 151*4882a593Smuzhiyun3: ld1 {v3.4s}, [x10], #16 /* load next round key */ 152*4882a593Smuzhiyun aese v0.16b, v5.16b 153*4882a593Smuzhiyun aesmc v0.16b, v0.16b 154*4882a593Smuzhiyun aese v1.16b, v5.16b 155*4882a593Smuzhiyun aesmc v1.16b, v1.16b 156*4882a593Smuzhiyun4: ld1 {v4.4s}, [x10], #16 /* load next round key */ 157*4882a593Smuzhiyun subs w7, w7, #3 158*4882a593Smuzhiyun aese v0.16b, v3.16b 159*4882a593Smuzhiyun aesmc v0.16b, v0.16b 160*4882a593Smuzhiyun aese v1.16b, v3.16b 161*4882a593Smuzhiyun aesmc v1.16b, v1.16b 162*4882a593Smuzhiyun ld1 {v5.4s}, [x10], #16 /* load next round key */ 163*4882a593Smuzhiyun bpl 2b 164*4882a593Smuzhiyun aese v0.16b, v4.16b 165*4882a593Smuzhiyun aese v1.16b, v4.16b 166*4882a593Smuzhiyun subs w2, w2, #16 167*4882a593Smuzhiyun bmi 6f /* partial block? */ 168*4882a593Smuzhiyun ld1 {v2.16b}, [x1], #16 /* load next input block */ 169*4882a593Smuzhiyun .if \enc == 1 170*4882a593Smuzhiyun eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 171*4882a593Smuzhiyun eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ 172*4882a593Smuzhiyun .else 173*4882a593Smuzhiyun eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ 174*4882a593Smuzhiyun eor v1.16b, v2.16b, v5.16b /* final round enc */ 175*4882a593Smuzhiyun .endif 176*4882a593Smuzhiyun eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 177*4882a593Smuzhiyun st1 {v1.16b}, [x0], #16 /* write output block */ 178*4882a593Smuzhiyun bne 0b 179*4882a593SmuzhiyunCPU_LE( rev x8, x8 ) 180*4882a593Smuzhiyun st1 {v0.16b}, [x5] /* store mac */ 181*4882a593Smuzhiyun str x8, [x6, #8] /* store lsb end of ctr (BE) */ 182*4882a593Smuzhiyun5: ret 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun6: eor v0.16b, v0.16b, v5.16b /* final round mac */ 185*4882a593Smuzhiyun eor v1.16b, v1.16b, v5.16b /* final round enc */ 186*4882a593Smuzhiyun st1 {v0.16b}, [x5] /* store mac */ 187*4882a593Smuzhiyun add w2, w2, #16 /* process partial tail block */ 188*4882a593Smuzhiyun7: ldrb w9, [x1], #1 /* get 1 byte of input */ 189*4882a593Smuzhiyun umov w6, v1.b[0] /* get top crypted ctr byte */ 190*4882a593Smuzhiyun umov w7, v0.b[0] /* get top mac byte */ 191*4882a593Smuzhiyun .if \enc == 1 192*4882a593Smuzhiyun eor w7, w7, w9 193*4882a593Smuzhiyun eor w9, w9, w6 194*4882a593Smuzhiyun .else 195*4882a593Smuzhiyun eor w9, w9, w6 196*4882a593Smuzhiyun eor w7, w7, w9 197*4882a593Smuzhiyun .endif 198*4882a593Smuzhiyun strb w9, [x0], #1 /* store out byte */ 199*4882a593Smuzhiyun strb w7, [x5], #1 /* store mac byte */ 200*4882a593Smuzhiyun subs w2, w2, #1 201*4882a593Smuzhiyun beq 5b 202*4882a593Smuzhiyun ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 203*4882a593Smuzhiyun ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 204*4882a593Smuzhiyun b 7b 205*4882a593Smuzhiyun .endm 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun /* 208*4882a593Smuzhiyun * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 209*4882a593Smuzhiyun * u8 const rk[], u32 rounds, u8 mac[], 210*4882a593Smuzhiyun * u8 ctr[]); 211*4882a593Smuzhiyun * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, 212*4882a593Smuzhiyun * u8 const rk[], u32 rounds, u8 mac[], 213*4882a593Smuzhiyun * u8 ctr[]); 214*4882a593Smuzhiyun */ 215*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_encrypt) 216*4882a593Smuzhiyun aes_ccm_do_crypt 1 217*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_encrypt) 218*4882a593Smuzhiyun 219*4882a593SmuzhiyunSYM_FUNC_START(ce_aes_ccm_decrypt) 220*4882a593Smuzhiyun aes_ccm_do_crypt 0 221*4882a593SmuzhiyunSYM_FUNC_END(ce_aes_ccm_decrypt) 222