1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Key handling functions for PPC AES implementation 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <asm/ppc_asm.h> 9*4882a593Smuzhiyun 10*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__ 11*4882a593Smuzhiyun#define LOAD_KEY(d, s, off) \ 12*4882a593Smuzhiyun lwz d,off(s); 13*4882a593Smuzhiyun#else 14*4882a593Smuzhiyun#define LOAD_KEY(d, s, off) \ 15*4882a593Smuzhiyun li r0,off; \ 16*4882a593Smuzhiyun lwbrx d,s,r0; 17*4882a593Smuzhiyun#endif 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun#define INITIALIZE_KEY \ 20*4882a593Smuzhiyun stwu r1,-32(r1); /* create stack frame */ \ 21*4882a593Smuzhiyun stw r14,8(r1); /* save registers */ \ 22*4882a593Smuzhiyun stw r15,12(r1); \ 23*4882a593Smuzhiyun stw r16,16(r1); 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun#define FINALIZE_KEY \ 26*4882a593Smuzhiyun lwz r14,8(r1); /* restore registers */ \ 27*4882a593Smuzhiyun lwz r15,12(r1); \ 28*4882a593Smuzhiyun lwz r16,16(r1); \ 29*4882a593Smuzhiyun xor r5,r5,r5; /* clear sensitive data */ \ 30*4882a593Smuzhiyun xor r6,r6,r6; \ 31*4882a593Smuzhiyun xor r7,r7,r7; \ 32*4882a593Smuzhiyun xor r8,r8,r8; \ 33*4882a593Smuzhiyun xor r9,r9,r9; \ 34*4882a593Smuzhiyun xor r10,r10,r10; \ 35*4882a593Smuzhiyun xor r11,r11,r11; \ 36*4882a593Smuzhiyun xor r12,r12,r12; \ 37*4882a593Smuzhiyun addi r1,r1,32; /* cleanup stack */ 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun#define LS_BOX(r, t1, t2) \ 40*4882a593Smuzhiyun lis t2,PPC_AES_4K_ENCTAB@h; \ 41*4882a593Smuzhiyun ori t2,t2,PPC_AES_4K_ENCTAB@l; \ 42*4882a593Smuzhiyun rlwimi t2,r,4,20,27; \ 43*4882a593Smuzhiyun lbz t1,8(t2); \ 44*4882a593Smuzhiyun rlwimi r,t1,0,24,31; \ 45*4882a593Smuzhiyun rlwimi t2,r,28,20,27; \ 46*4882a593Smuzhiyun lbz t1,8(t2); \ 47*4882a593Smuzhiyun rlwimi r,t1,8,16,23; \ 48*4882a593Smuzhiyun rlwimi t2,r,20,20,27; \ 49*4882a593Smuzhiyun lbz t1,8(t2); \ 50*4882a593Smuzhiyun rlwimi r,t1,16,8,15; \ 51*4882a593Smuzhiyun rlwimi t2,r,12,20,27; \ 52*4882a593Smuzhiyun lbz t1,8(t2); \ 53*4882a593Smuzhiyun rlwimi r,t1,24,0,7; 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun#define GF8_MUL(out, in, t1, t2) \ 56*4882a593Smuzhiyun lis t1,0x8080; /* multiplication in GF8 */ \ 57*4882a593Smuzhiyun ori t1,t1,0x8080; \ 58*4882a593Smuzhiyun and t1,t1,in; \ 59*4882a593Smuzhiyun srwi t1,t1,7; \ 60*4882a593Smuzhiyun mulli t1,t1,0x1b; \ 61*4882a593Smuzhiyun lis t2,0x7f7f; \ 62*4882a593Smuzhiyun ori t2,t2,0x7f7f; \ 63*4882a593Smuzhiyun and t2,t2,in; \ 64*4882a593Smuzhiyun slwi t2,t2,1; \ 65*4882a593Smuzhiyun xor out,t1,t2; 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun/* 68*4882a593Smuzhiyun * ppc_expand_key_128(u32 *key_enc, const u8 *key) 69*4882a593Smuzhiyun * 70*4882a593Smuzhiyun * Expand 128 bit key into 176 bytes encryption key. It consists of 71*4882a593Smuzhiyun * key itself plus 10 rounds with 16 bytes each 72*4882a593Smuzhiyun * 73*4882a593Smuzhiyun */ 74*4882a593Smuzhiyun_GLOBAL(ppc_expand_key_128) 75*4882a593Smuzhiyun INITIALIZE_KEY 76*4882a593Smuzhiyun LOAD_KEY(r5,r4,0) 77*4882a593Smuzhiyun LOAD_KEY(r6,r4,4) 78*4882a593Smuzhiyun LOAD_KEY(r7,r4,8) 79*4882a593Smuzhiyun LOAD_KEY(r8,r4,12) 80*4882a593Smuzhiyun stw r5,0(r3) /* key[0..3] = input data */ 81*4882a593Smuzhiyun stw r6,4(r3) 82*4882a593Smuzhiyun stw r7,8(r3) 83*4882a593Smuzhiyun stw r8,12(r3) 84*4882a593Smuzhiyun li r16,10 /* 10 expansion rounds */ 85*4882a593Smuzhiyun lis r0,0x0100 /* RCO(1) */ 86*4882a593Smuzhiyunppc_expand_128_loop: 87*4882a593Smuzhiyun addi r3,r3,16 88*4882a593Smuzhiyun mr r14,r8 /* apply LS_BOX to 4th temp */ 89*4882a593Smuzhiyun rotlwi r14,r14,8 90*4882a593Smuzhiyun LS_BOX(r14, r15, r4) 91*4882a593Smuzhiyun xor r14,r14,r0 92*4882a593Smuzhiyun xor r5,r5,r14 /* xor next 4 keys */ 93*4882a593Smuzhiyun xor r6,r6,r5 94*4882a593Smuzhiyun xor r7,r7,r6 95*4882a593Smuzhiyun xor r8,r8,r7 96*4882a593Smuzhiyun stw r5,0(r3) /* store next 4 keys */ 97*4882a593Smuzhiyun stw r6,4(r3) 98*4882a593Smuzhiyun stw r7,8(r3) 99*4882a593Smuzhiyun stw r8,12(r3) 100*4882a593Smuzhiyun GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */ 101*4882a593Smuzhiyun subi r16,r16,1 102*4882a593Smuzhiyun cmpwi r16,0 103*4882a593Smuzhiyun bt eq,ppc_expand_128_end 104*4882a593Smuzhiyun b ppc_expand_128_loop 105*4882a593Smuzhiyunppc_expand_128_end: 106*4882a593Smuzhiyun FINALIZE_KEY 107*4882a593Smuzhiyun blr 108*4882a593Smuzhiyun 109*4882a593Smuzhiyun/* 110*4882a593Smuzhiyun * ppc_expand_key_192(u32 *key_enc, const u8 *key) 111*4882a593Smuzhiyun * 112*4882a593Smuzhiyun * Expand 192 bit key into 208 bytes encryption key. It consists of key 113*4882a593Smuzhiyun * itself plus 12 rounds with 16 bytes each 114*4882a593Smuzhiyun * 115*4882a593Smuzhiyun */ 116*4882a593Smuzhiyun_GLOBAL(ppc_expand_key_192) 117*4882a593Smuzhiyun INITIALIZE_KEY 118*4882a593Smuzhiyun LOAD_KEY(r5,r4,0) 119*4882a593Smuzhiyun LOAD_KEY(r6,r4,4) 120*4882a593Smuzhiyun LOAD_KEY(r7,r4,8) 121*4882a593Smuzhiyun LOAD_KEY(r8,r4,12) 122*4882a593Smuzhiyun LOAD_KEY(r9,r4,16) 123*4882a593Smuzhiyun LOAD_KEY(r10,r4,20) 124*4882a593Smuzhiyun stw r5,0(r3) 125*4882a593Smuzhiyun stw r6,4(r3) 126*4882a593Smuzhiyun stw r7,8(r3) 127*4882a593Smuzhiyun stw r8,12(r3) 128*4882a593Smuzhiyun stw r9,16(r3) 129*4882a593Smuzhiyun stw r10,20(r3) 130*4882a593Smuzhiyun li r16,8 /* 8 expansion rounds */ 131*4882a593Smuzhiyun lis r0,0x0100 /* RCO(1) */ 132*4882a593Smuzhiyunppc_expand_192_loop: 133*4882a593Smuzhiyun addi r3,r3,24 134*4882a593Smuzhiyun mr r14,r10 /* apply LS_BOX to 6th temp */ 135*4882a593Smuzhiyun rotlwi r14,r14,8 136*4882a593Smuzhiyun LS_BOX(r14, r15, r4) 137*4882a593Smuzhiyun xor r14,r14,r0 138*4882a593Smuzhiyun xor r5,r5,r14 /* xor next 6 keys */ 139*4882a593Smuzhiyun xor r6,r6,r5 140*4882a593Smuzhiyun xor r7,r7,r6 141*4882a593Smuzhiyun xor r8,r8,r7 142*4882a593Smuzhiyun xor r9,r9,r8 143*4882a593Smuzhiyun xor r10,r10,r9 144*4882a593Smuzhiyun stw r5,0(r3) 145*4882a593Smuzhiyun stw r6,4(r3) 146*4882a593Smuzhiyun stw r7,8(r3) 147*4882a593Smuzhiyun stw r8,12(r3) 148*4882a593Smuzhiyun subi r16,r16,1 149*4882a593Smuzhiyun cmpwi r16,0 /* last round early kick out */ 150*4882a593Smuzhiyun bt eq,ppc_expand_192_end 151*4882a593Smuzhiyun stw r9,16(r3) 152*4882a593Smuzhiyun stw r10,20(r3) 153*4882a593Smuzhiyun GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */ 154*4882a593Smuzhiyun b ppc_expand_192_loop 155*4882a593Smuzhiyunppc_expand_192_end: 156*4882a593Smuzhiyun FINALIZE_KEY 157*4882a593Smuzhiyun blr 158*4882a593Smuzhiyun 159*4882a593Smuzhiyun/* 160*4882a593Smuzhiyun * ppc_expand_key_256(u32 *key_enc, const u8 *key) 161*4882a593Smuzhiyun * 162*4882a593Smuzhiyun * Expand 256 bit key into 240 bytes encryption key. It consists of key 163*4882a593Smuzhiyun * itself plus 14 rounds with 16 bytes each 164*4882a593Smuzhiyun * 165*4882a593Smuzhiyun */ 166*4882a593Smuzhiyun_GLOBAL(ppc_expand_key_256) 167*4882a593Smuzhiyun INITIALIZE_KEY 168*4882a593Smuzhiyun LOAD_KEY(r5,r4,0) 169*4882a593Smuzhiyun LOAD_KEY(r6,r4,4) 170*4882a593Smuzhiyun LOAD_KEY(r7,r4,8) 171*4882a593Smuzhiyun LOAD_KEY(r8,r4,12) 172*4882a593Smuzhiyun LOAD_KEY(r9,r4,16) 173*4882a593Smuzhiyun LOAD_KEY(r10,r4,20) 174*4882a593Smuzhiyun LOAD_KEY(r11,r4,24) 175*4882a593Smuzhiyun LOAD_KEY(r12,r4,28) 176*4882a593Smuzhiyun stw r5,0(r3) 177*4882a593Smuzhiyun stw r6,4(r3) 178*4882a593Smuzhiyun stw r7,8(r3) 179*4882a593Smuzhiyun stw r8,12(r3) 180*4882a593Smuzhiyun stw r9,16(r3) 181*4882a593Smuzhiyun stw r10,20(r3) 182*4882a593Smuzhiyun stw r11,24(r3) 183*4882a593Smuzhiyun stw r12,28(r3) 184*4882a593Smuzhiyun li r16,7 /* 7 expansion rounds */ 185*4882a593Smuzhiyun lis r0,0x0100 /* RCO(1) */ 186*4882a593Smuzhiyunppc_expand_256_loop: 187*4882a593Smuzhiyun addi r3,r3,32 188*4882a593Smuzhiyun mr r14,r12 /* apply LS_BOX to 8th temp */ 189*4882a593Smuzhiyun rotlwi r14,r14,8 190*4882a593Smuzhiyun LS_BOX(r14, r15, r4) 191*4882a593Smuzhiyun xor r14,r14,r0 192*4882a593Smuzhiyun xor r5,r5,r14 /* xor 4 keys */ 193*4882a593Smuzhiyun xor r6,r6,r5 194*4882a593Smuzhiyun xor r7,r7,r6 195*4882a593Smuzhiyun xor r8,r8,r7 196*4882a593Smuzhiyun mr r14,r8 197*4882a593Smuzhiyun LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */ 198*4882a593Smuzhiyun xor r9,r9,r14 /* xor 4 keys */ 199*4882a593Smuzhiyun xor r10,r10,r9 200*4882a593Smuzhiyun xor r11,r11,r10 201*4882a593Smuzhiyun xor r12,r12,r11 202*4882a593Smuzhiyun stw r5,0(r3) 203*4882a593Smuzhiyun stw r6,4(r3) 204*4882a593Smuzhiyun stw r7,8(r3) 205*4882a593Smuzhiyun stw r8,12(r3) 206*4882a593Smuzhiyun subi r16,r16,1 207*4882a593Smuzhiyun cmpwi r16,0 /* last round early kick out */ 208*4882a593Smuzhiyun bt eq,ppc_expand_256_end 209*4882a593Smuzhiyun stw r9,16(r3) 210*4882a593Smuzhiyun stw r10,20(r3) 211*4882a593Smuzhiyun stw r11,24(r3) 212*4882a593Smuzhiyun stw r12,28(r3) 213*4882a593Smuzhiyun GF8_MUL(r0, r0, r4, r14) 214*4882a593Smuzhiyun b ppc_expand_256_loop 215*4882a593Smuzhiyunppc_expand_256_end: 216*4882a593Smuzhiyun FINALIZE_KEY 217*4882a593Smuzhiyun blr 218*4882a593Smuzhiyun 219*4882a593Smuzhiyun/* 220*4882a593Smuzhiyun * ppc_generate_decrypt_key: derive decryption key from encryption key 221*4882a593Smuzhiyun * number of bytes to handle are calculated from length of key (16/24/32) 222*4882a593Smuzhiyun * 223*4882a593Smuzhiyun */ 224*4882a593Smuzhiyun_GLOBAL(ppc_generate_decrypt_key) 225*4882a593Smuzhiyun addi r6,r5,24 226*4882a593Smuzhiyun slwi r6,r6,2 227*4882a593Smuzhiyun lwzx r7,r4,r6 /* first/last 4 words are same */ 228*4882a593Smuzhiyun stw r7,0(r3) 229*4882a593Smuzhiyun lwz r7,0(r4) 230*4882a593Smuzhiyun stwx r7,r3,r6 231*4882a593Smuzhiyun addi r6,r6,4 232*4882a593Smuzhiyun lwzx r7,r4,r6 233*4882a593Smuzhiyun stw r7,4(r3) 234*4882a593Smuzhiyun lwz r7,4(r4) 235*4882a593Smuzhiyun stwx r7,r3,r6 236*4882a593Smuzhiyun addi r6,r6,4 237*4882a593Smuzhiyun lwzx r7,r4,r6 238*4882a593Smuzhiyun stw r7,8(r3) 239*4882a593Smuzhiyun lwz r7,8(r4) 240*4882a593Smuzhiyun stwx r7,r3,r6 241*4882a593Smuzhiyun addi r6,r6,4 242*4882a593Smuzhiyun lwzx r7,r4,r6 243*4882a593Smuzhiyun stw r7,12(r3) 244*4882a593Smuzhiyun lwz r7,12(r4) 245*4882a593Smuzhiyun stwx r7,r3,r6 246*4882a593Smuzhiyun addi r3,r3,16 247*4882a593Smuzhiyun add r4,r4,r6 248*4882a593Smuzhiyun subi r4,r4,28 249*4882a593Smuzhiyun addi r5,r5,20 250*4882a593Smuzhiyun srwi r5,r5,2 251*4882a593Smuzhiyunppc_generate_decrypt_block: 252*4882a593Smuzhiyun li r6,4 253*4882a593Smuzhiyun mtctr r6 254*4882a593Smuzhiyunppc_generate_decrypt_word: 255*4882a593Smuzhiyun lwz r6,0(r4) 256*4882a593Smuzhiyun GF8_MUL(r7, r6, r0, r7) 257*4882a593Smuzhiyun GF8_MUL(r8, r7, r0, r8) 258*4882a593Smuzhiyun GF8_MUL(r9, r8, r0, r9) 259*4882a593Smuzhiyun xor r10,r9,r6 260*4882a593Smuzhiyun xor r11,r7,r8 261*4882a593Smuzhiyun xor r11,r11,r9 262*4882a593Smuzhiyun xor r12,r7,r10 263*4882a593Smuzhiyun rotrwi r12,r12,24 264*4882a593Smuzhiyun xor r11,r11,r12 265*4882a593Smuzhiyun xor r12,r8,r10 266*4882a593Smuzhiyun rotrwi r12,r12,16 267*4882a593Smuzhiyun xor r11,r11,r12 268*4882a593Smuzhiyun rotrwi r12,r10,8 269*4882a593Smuzhiyun xor r11,r11,r12 270*4882a593Smuzhiyun stw r11,0(r3) 271*4882a593Smuzhiyun addi r3,r3,4 272*4882a593Smuzhiyun addi r4,r4,4 273*4882a593Smuzhiyun bdnz ppc_generate_decrypt_word 274*4882a593Smuzhiyun subi r4,r4,32 275*4882a593Smuzhiyun subi r5,r5,1 276*4882a593Smuzhiyun cmpwi r5,0 277*4882a593Smuzhiyun bt gt,ppc_generate_decrypt_block 278*4882a593Smuzhiyun blr 279