1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <asm/ppc_asm.h> 9*4882a593Smuzhiyun#include "aes-spe-regs.h" 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */ 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \ 14*4882a593Smuzhiyun lwz reg,off(rSP); /* load with offset */ 15*4882a593Smuzhiyun#define SAVE_DATA(reg, off) \ 16*4882a593Smuzhiyun stw reg,off(rDP); /* save with offset */ 17*4882a593Smuzhiyun#define NEXT_BLOCK \ 18*4882a593Smuzhiyun addi rSP,rSP,16; /* increment pointers per bloc */ \ 19*4882a593Smuzhiyun addi rDP,rDP,16; 20*4882a593Smuzhiyun#define LOAD_IV(reg, off) \ 21*4882a593Smuzhiyun lwz reg,off(rIP); /* IV loading with offset */ 22*4882a593Smuzhiyun#define SAVE_IV(reg, off) \ 23*4882a593Smuzhiyun stw reg,off(rIP); /* IV saving with offset */ 24*4882a593Smuzhiyun#define START_IV /* nothing to reset */ 25*4882a593Smuzhiyun#define CBC_DEC 16 /* CBC decrement per block */ 26*4882a593Smuzhiyun#define CTR_DEC 1 /* CTR decrement one byte */ 27*4882a593Smuzhiyun 28*4882a593Smuzhiyun#else /* Macros for little endian */ 29*4882a593Smuzhiyun 30*4882a593Smuzhiyun#define LOAD_DATA(reg, off) \ 31*4882a593Smuzhiyun lwbrx reg,0,rSP; /* load reversed */ \ 32*4882a593Smuzhiyun addi rSP,rSP,4; /* and increment pointer */ 33*4882a593Smuzhiyun#define SAVE_DATA(reg, off) \ 34*4882a593Smuzhiyun stwbrx reg,0,rDP; /* save reversed */ \ 35*4882a593Smuzhiyun addi rDP,rDP,4; /* and increment pointer */ 36*4882a593Smuzhiyun#define NEXT_BLOCK /* nothing todo */ 37*4882a593Smuzhiyun#define LOAD_IV(reg, off) \ 38*4882a593Smuzhiyun lwbrx reg,0,rIP; /* load reversed */ \ 39*4882a593Smuzhiyun addi rIP,rIP,4; /* and increment pointer */ 40*4882a593Smuzhiyun#define SAVE_IV(reg, off) \ 41*4882a593Smuzhiyun stwbrx reg,0,rIP; /* load reversed */ \ 42*4882a593Smuzhiyun addi rIP,rIP,4; /* and increment pointer */ 43*4882a593Smuzhiyun#define START_IV \ 44*4882a593Smuzhiyun subi rIP,rIP,16; /* must reset pointer */ 45*4882a593Smuzhiyun#define CBC_DEC 32 /* 2 blocks because of incs */ 46*4882a593Smuzhiyun#define CTR_DEC 17 /* 1 block because of incs */ 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun#endif 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun#define SAVE_0_REGS 51*4882a593Smuzhiyun#define LOAD_0_REGS 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun#define SAVE_4_REGS \ 54*4882a593Smuzhiyun stw rI0,96(r1); /* save 32 bit registers */ \ 55*4882a593Smuzhiyun stw rI1,100(r1); \ 56*4882a593Smuzhiyun stw rI2,104(r1); \ 57*4882a593Smuzhiyun stw rI3,108(r1); 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun#define LOAD_4_REGS \ 60*4882a593Smuzhiyun lwz rI0,96(r1); /* restore 32 bit registers */ \ 61*4882a593Smuzhiyun lwz rI1,100(r1); \ 62*4882a593Smuzhiyun lwz rI2,104(r1); \ 63*4882a593Smuzhiyun lwz rI3,108(r1); 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun#define SAVE_8_REGS \ 66*4882a593Smuzhiyun SAVE_4_REGS \ 67*4882a593Smuzhiyun stw rG0,112(r1); /* save 32 bit registers */ \ 68*4882a593Smuzhiyun stw rG1,116(r1); \ 69*4882a593Smuzhiyun stw rG2,120(r1); \ 70*4882a593Smuzhiyun stw rG3,124(r1); 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun#define LOAD_8_REGS \ 73*4882a593Smuzhiyun LOAD_4_REGS \ 74*4882a593Smuzhiyun lwz rG0,112(r1); /* restore 32 bit registers */ \ 75*4882a593Smuzhiyun lwz rG1,116(r1); \ 76*4882a593Smuzhiyun lwz rG2,120(r1); \ 77*4882a593Smuzhiyun lwz rG3,124(r1); 78*4882a593Smuzhiyun 79*4882a593Smuzhiyun#define INITIALIZE_CRYPT(tab,nr32bitregs) \ 80*4882a593Smuzhiyun mflr r0; \ 81*4882a593Smuzhiyun stwu r1,-160(r1); /* create stack frame */ \ 82*4882a593Smuzhiyun lis rT0,tab@h; /* en-/decryption table pointer */ \ 83*4882a593Smuzhiyun stw r0,8(r1); /* save link register */ \ 84*4882a593Smuzhiyun ori rT0,rT0,tab@l; \ 85*4882a593Smuzhiyun evstdw r14,16(r1); \ 86*4882a593Smuzhiyun mr rKS,rKP; \ 87*4882a593Smuzhiyun evstdw r15,24(r1); /* We must save non volatile */ \ 88*4882a593Smuzhiyun evstdw r16,32(r1); /* registers. Take the chance */ \ 89*4882a593Smuzhiyun evstdw r17,40(r1); /* and save the SPE part too */ \ 90*4882a593Smuzhiyun evstdw r18,48(r1); \ 91*4882a593Smuzhiyun evstdw r19,56(r1); \ 92*4882a593Smuzhiyun evstdw r20,64(r1); \ 93*4882a593Smuzhiyun evstdw r21,72(r1); \ 94*4882a593Smuzhiyun evstdw r22,80(r1); \ 95*4882a593Smuzhiyun evstdw r23,88(r1); \ 96*4882a593Smuzhiyun SAVE_##nr32bitregs##_REGS 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun#define FINALIZE_CRYPT(nr32bitregs) \ 99*4882a593Smuzhiyun lwz r0,8(r1); \ 100*4882a593Smuzhiyun evldw r14,16(r1); /* restore SPE registers */ \ 101*4882a593Smuzhiyun evldw r15,24(r1); \ 102*4882a593Smuzhiyun evldw r16,32(r1); \ 103*4882a593Smuzhiyun evldw r17,40(r1); \ 104*4882a593Smuzhiyun evldw r18,48(r1); \ 105*4882a593Smuzhiyun evldw r19,56(r1); \ 106*4882a593Smuzhiyun evldw r20,64(r1); \ 107*4882a593Smuzhiyun evldw r21,72(r1); \ 108*4882a593Smuzhiyun evldw r22,80(r1); \ 109*4882a593Smuzhiyun evldw r23,88(r1); \ 110*4882a593Smuzhiyun LOAD_##nr32bitregs##_REGS \ 111*4882a593Smuzhiyun mtlr r0; /* restore link register */ \ 112*4882a593Smuzhiyun xor r0,r0,r0; \ 113*4882a593Smuzhiyun stw r0,16(r1); /* delete sensitive data */ \ 114*4882a593Smuzhiyun stw r0,24(r1); /* that we might have pushed */ \ 115*4882a593Smuzhiyun stw r0,32(r1); /* from other context that runs */ \ 116*4882a593Smuzhiyun stw r0,40(r1); /* the same code */ \ 117*4882a593Smuzhiyun stw r0,48(r1); \ 118*4882a593Smuzhiyun stw r0,56(r1); \ 119*4882a593Smuzhiyun stw r0,64(r1); \ 120*4882a593Smuzhiyun stw r0,72(r1); \ 121*4882a593Smuzhiyun stw r0,80(r1); \ 122*4882a593Smuzhiyun stw r0,88(r1); \ 123*4882a593Smuzhiyun addi r1,r1,160; /* cleanup stack frame */ 124*4882a593Smuzhiyun 125*4882a593Smuzhiyun#define ENDIAN_SWAP(t0, t1, s0, s1) \ 126*4882a593Smuzhiyun rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \ 127*4882a593Smuzhiyun rotrwi t1,s1,8; \ 128*4882a593Smuzhiyun rlwimi t0,s0,8,8,15; \ 129*4882a593Smuzhiyun rlwimi t1,s1,8,8,15; \ 130*4882a593Smuzhiyun rlwimi t0,s0,8,24,31; \ 131*4882a593Smuzhiyun rlwimi t1,s1,8,24,31; 132*4882a593Smuzhiyun 133*4882a593Smuzhiyun#define GF128_MUL(d0, d1, d2, d3, t0) \ 134*4882a593Smuzhiyun li t0,0x87; /* multiplication in GF128 */ \ 135*4882a593Smuzhiyun cmpwi d3,-1; \ 136*4882a593Smuzhiyun iselgt t0,0,t0; \ 137*4882a593Smuzhiyun rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \ 138*4882a593Smuzhiyun rotlwi d3,d3,1; \ 139*4882a593Smuzhiyun rlwimi d2,d1,0,0,0; \ 140*4882a593Smuzhiyun rotlwi d2,d2,1; \ 141*4882a593Smuzhiyun rlwimi d1,d0,0,0,0; \ 142*4882a593Smuzhiyun slwi d0,d0,1; /* shift left 128 bit */ \ 143*4882a593Smuzhiyun rotlwi d1,d1,1; \ 144*4882a593Smuzhiyun xor d0,d0,t0; 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun#define START_KEY(d0, d1, d2, d3) \ 147*4882a593Smuzhiyun lwz rW0,0(rKP); \ 148*4882a593Smuzhiyun mtctr rRR; \ 149*4882a593Smuzhiyun lwz rW1,4(rKP); \ 150*4882a593Smuzhiyun lwz rW2,8(rKP); \ 151*4882a593Smuzhiyun lwz rW3,12(rKP); \ 152*4882a593Smuzhiyun xor rD0,d0,rW0; \ 153*4882a593Smuzhiyun xor rD1,d1,rW1; \ 154*4882a593Smuzhiyun xor rD2,d2,rW2; \ 155*4882a593Smuzhiyun xor rD3,d3,rW3; 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun/* 158*4882a593Smuzhiyun * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, 159*4882a593Smuzhiyun * u32 rounds) 160*4882a593Smuzhiyun * 161*4882a593Smuzhiyun * called from glue layer to encrypt a single 16 byte block 162*4882a593Smuzhiyun * round values are AES128 = 4, AES192 = 5, AES256 = 6 163*4882a593Smuzhiyun * 164*4882a593Smuzhiyun */ 165*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_aes) 166*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) 167*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 168*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 169*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 170*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 171*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 172*4882a593Smuzhiyun bl ppc_encrypt_block 173*4882a593Smuzhiyun xor rD0,rD0,rW0 174*4882a593Smuzhiyun SAVE_DATA(rD0, 0) 175*4882a593Smuzhiyun xor rD1,rD1,rW1 176*4882a593Smuzhiyun SAVE_DATA(rD1, 4) 177*4882a593Smuzhiyun xor rD2,rD2,rW2 178*4882a593Smuzhiyun SAVE_DATA(rD2, 8) 179*4882a593Smuzhiyun xor rD3,rD3,rW3 180*4882a593Smuzhiyun SAVE_DATA(rD3, 12) 181*4882a593Smuzhiyun FINALIZE_CRYPT(0) 182*4882a593Smuzhiyun blr 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun/* 185*4882a593Smuzhiyun * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, 186*4882a593Smuzhiyun * u32 rounds) 187*4882a593Smuzhiyun * 188*4882a593Smuzhiyun * called from glue layer to decrypt a single 16 byte block 189*4882a593Smuzhiyun * round values are AES128 = 4, AES192 = 5, AES256 = 6 190*4882a593Smuzhiyun * 191*4882a593Smuzhiyun */ 192*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_aes) 193*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0) 194*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 195*4882a593Smuzhiyun addi rT1,rT0,4096 196*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 197*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 198*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 199*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 200*4882a593Smuzhiyun bl ppc_decrypt_block 201*4882a593Smuzhiyun xor rD0,rD0,rW0 202*4882a593Smuzhiyun SAVE_DATA(rD0, 0) 203*4882a593Smuzhiyun xor rD1,rD1,rW1 204*4882a593Smuzhiyun SAVE_DATA(rD1, 4) 205*4882a593Smuzhiyun xor rD2,rD2,rW2 206*4882a593Smuzhiyun SAVE_DATA(rD2, 8) 207*4882a593Smuzhiyun xor rD3,rD3,rW3 208*4882a593Smuzhiyun SAVE_DATA(rD3, 12) 209*4882a593Smuzhiyun FINALIZE_CRYPT(0) 210*4882a593Smuzhiyun blr 211*4882a593Smuzhiyun 212*4882a593Smuzhiyun/* 213*4882a593Smuzhiyun * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, 214*4882a593Smuzhiyun * u32 rounds, u32 bytes); 215*4882a593Smuzhiyun * 216*4882a593Smuzhiyun * called from glue layer to encrypt multiple blocks via ECB 217*4882a593Smuzhiyun * Bytes must be larger or equal 16 and only whole blocks are 218*4882a593Smuzhiyun * processed. round values are AES128 = 4, AES192 = 5 and 219*4882a593Smuzhiyun * AES256 = 6 220*4882a593Smuzhiyun * 221*4882a593Smuzhiyun */ 222*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_ecb) 223*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) 224*4882a593Smuzhiyunppc_encrypt_ecb_loop: 225*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 226*4882a593Smuzhiyun mr rKP,rKS 227*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 228*4882a593Smuzhiyun subi rLN,rLN,16 229*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 230*4882a593Smuzhiyun cmpwi rLN,15 231*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 232*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 233*4882a593Smuzhiyun bl ppc_encrypt_block 234*4882a593Smuzhiyun xor rD0,rD0,rW0 235*4882a593Smuzhiyun SAVE_DATA(rD0, 0) 236*4882a593Smuzhiyun xor rD1,rD1,rW1 237*4882a593Smuzhiyun SAVE_DATA(rD1, 4) 238*4882a593Smuzhiyun xor rD2,rD2,rW2 239*4882a593Smuzhiyun SAVE_DATA(rD2, 8) 240*4882a593Smuzhiyun xor rD3,rD3,rW3 241*4882a593Smuzhiyun SAVE_DATA(rD3, 12) 242*4882a593Smuzhiyun NEXT_BLOCK 243*4882a593Smuzhiyun bt gt,ppc_encrypt_ecb_loop 244*4882a593Smuzhiyun FINALIZE_CRYPT(0) 245*4882a593Smuzhiyun blr 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun/* 248*4882a593Smuzhiyun * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, 249*4882a593Smuzhiyun * u32 rounds, u32 bytes); 250*4882a593Smuzhiyun * 251*4882a593Smuzhiyun * called from glue layer to decrypt multiple blocks via ECB 252*4882a593Smuzhiyun * Bytes must be larger or equal 16 and only whole blocks are 253*4882a593Smuzhiyun * processed. round values are AES128 = 4, AES192 = 5 and 254*4882a593Smuzhiyun * AES256 = 6 255*4882a593Smuzhiyun * 256*4882a593Smuzhiyun */ 257*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_ecb) 258*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0) 259*4882a593Smuzhiyun addi rT1,rT0,4096 260*4882a593Smuzhiyunppc_decrypt_ecb_loop: 261*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 262*4882a593Smuzhiyun mr rKP,rKS 263*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 264*4882a593Smuzhiyun subi rLN,rLN,16 265*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 266*4882a593Smuzhiyun cmpwi rLN,15 267*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 268*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 269*4882a593Smuzhiyun bl ppc_decrypt_block 270*4882a593Smuzhiyun xor rD0,rD0,rW0 271*4882a593Smuzhiyun SAVE_DATA(rD0, 0) 272*4882a593Smuzhiyun xor rD1,rD1,rW1 273*4882a593Smuzhiyun SAVE_DATA(rD1, 4) 274*4882a593Smuzhiyun xor rD2,rD2,rW2 275*4882a593Smuzhiyun SAVE_DATA(rD2, 8) 276*4882a593Smuzhiyun xor rD3,rD3,rW3 277*4882a593Smuzhiyun SAVE_DATA(rD3, 12) 278*4882a593Smuzhiyun NEXT_BLOCK 279*4882a593Smuzhiyun bt gt,ppc_decrypt_ecb_loop 280*4882a593Smuzhiyun FINALIZE_CRYPT(0) 281*4882a593Smuzhiyun blr 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun/* 284*4882a593Smuzhiyun * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, 285*4882a593Smuzhiyun * 32 rounds, u32 bytes, u8 *iv); 286*4882a593Smuzhiyun * 287*4882a593Smuzhiyun * called from glue layer to encrypt multiple blocks via CBC 288*4882a593Smuzhiyun * Bytes must be larger or equal 16 and only whole blocks are 289*4882a593Smuzhiyun * processed. round values are AES128 = 4, AES192 = 5 and 290*4882a593Smuzhiyun * AES256 = 6 291*4882a593Smuzhiyun * 292*4882a593Smuzhiyun */ 293*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_cbc) 294*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) 295*4882a593Smuzhiyun LOAD_IV(rI0, 0) 296*4882a593Smuzhiyun LOAD_IV(rI1, 4) 297*4882a593Smuzhiyun LOAD_IV(rI2, 8) 298*4882a593Smuzhiyun LOAD_IV(rI3, 12) 299*4882a593Smuzhiyunppc_encrypt_cbc_loop: 300*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 301*4882a593Smuzhiyun mr rKP,rKS 302*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 303*4882a593Smuzhiyun subi rLN,rLN,16 304*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 305*4882a593Smuzhiyun cmpwi rLN,15 306*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 307*4882a593Smuzhiyun xor rD0,rD0,rI0 308*4882a593Smuzhiyun xor rD1,rD1,rI1 309*4882a593Smuzhiyun xor rD2,rD2,rI2 310*4882a593Smuzhiyun xor rD3,rD3,rI3 311*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 312*4882a593Smuzhiyun bl ppc_encrypt_block 313*4882a593Smuzhiyun xor rI0,rD0,rW0 314*4882a593Smuzhiyun SAVE_DATA(rI0, 0) 315*4882a593Smuzhiyun xor rI1,rD1,rW1 316*4882a593Smuzhiyun SAVE_DATA(rI1, 4) 317*4882a593Smuzhiyun xor rI2,rD2,rW2 318*4882a593Smuzhiyun SAVE_DATA(rI2, 8) 319*4882a593Smuzhiyun xor rI3,rD3,rW3 320*4882a593Smuzhiyun SAVE_DATA(rI3, 12) 321*4882a593Smuzhiyun NEXT_BLOCK 322*4882a593Smuzhiyun bt gt,ppc_encrypt_cbc_loop 323*4882a593Smuzhiyun START_IV 324*4882a593Smuzhiyun SAVE_IV(rI0, 0) 325*4882a593Smuzhiyun SAVE_IV(rI1, 4) 326*4882a593Smuzhiyun SAVE_IV(rI2, 8) 327*4882a593Smuzhiyun SAVE_IV(rI3, 12) 328*4882a593Smuzhiyun FINALIZE_CRYPT(4) 329*4882a593Smuzhiyun blr 330*4882a593Smuzhiyun 331*4882a593Smuzhiyun/* 332*4882a593Smuzhiyun * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, 333*4882a593Smuzhiyun * u32 rounds, u32 bytes, u8 *iv); 334*4882a593Smuzhiyun * 335*4882a593Smuzhiyun * called from glue layer to decrypt multiple blocks via CBC 336*4882a593Smuzhiyun * round values are AES128 = 4, AES192 = 5, AES256 = 6 337*4882a593Smuzhiyun * 338*4882a593Smuzhiyun */ 339*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_cbc) 340*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4) 341*4882a593Smuzhiyun li rT1,15 342*4882a593Smuzhiyun LOAD_IV(rI0, 0) 343*4882a593Smuzhiyun andc rLN,rLN,rT1 344*4882a593Smuzhiyun LOAD_IV(rI1, 4) 345*4882a593Smuzhiyun subi rLN,rLN,16 346*4882a593Smuzhiyun LOAD_IV(rI2, 8) 347*4882a593Smuzhiyun add rSP,rSP,rLN /* reverse processing */ 348*4882a593Smuzhiyun LOAD_IV(rI3, 12) 349*4882a593Smuzhiyun add rDP,rDP,rLN 350*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 351*4882a593Smuzhiyun addi rT1,rT0,4096 352*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 353*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 354*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 355*4882a593Smuzhiyun START_IV 356*4882a593Smuzhiyun SAVE_IV(rD0, 0) 357*4882a593Smuzhiyun SAVE_IV(rD1, 4) 358*4882a593Smuzhiyun SAVE_IV(rD2, 8) 359*4882a593Smuzhiyun cmpwi rLN,16 360*4882a593Smuzhiyun SAVE_IV(rD3, 12) 361*4882a593Smuzhiyun bt lt,ppc_decrypt_cbc_end 362*4882a593Smuzhiyunppc_decrypt_cbc_loop: 363*4882a593Smuzhiyun mr rKP,rKS 364*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 365*4882a593Smuzhiyun bl ppc_decrypt_block 366*4882a593Smuzhiyun subi rLN,rLN,16 367*4882a593Smuzhiyun subi rSP,rSP,CBC_DEC 368*4882a593Smuzhiyun xor rW0,rD0,rW0 369*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 370*4882a593Smuzhiyun xor rW1,rD1,rW1 371*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 372*4882a593Smuzhiyun xor rW2,rD2,rW2 373*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 374*4882a593Smuzhiyun xor rW3,rD3,rW3 375*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 376*4882a593Smuzhiyun xor rW0,rW0,rD0 377*4882a593Smuzhiyun SAVE_DATA(rW0, 0) 378*4882a593Smuzhiyun xor rW1,rW1,rD1 379*4882a593Smuzhiyun SAVE_DATA(rW1, 4) 380*4882a593Smuzhiyun xor rW2,rW2,rD2 381*4882a593Smuzhiyun SAVE_DATA(rW2, 8) 382*4882a593Smuzhiyun xor rW3,rW3,rD3 383*4882a593Smuzhiyun SAVE_DATA(rW3, 12) 384*4882a593Smuzhiyun cmpwi rLN,15 385*4882a593Smuzhiyun subi rDP,rDP,CBC_DEC 386*4882a593Smuzhiyun bt gt,ppc_decrypt_cbc_loop 387*4882a593Smuzhiyunppc_decrypt_cbc_end: 388*4882a593Smuzhiyun mr rKP,rKS 389*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 390*4882a593Smuzhiyun bl ppc_decrypt_block 391*4882a593Smuzhiyun xor rW0,rW0,rD0 392*4882a593Smuzhiyun xor rW1,rW1,rD1 393*4882a593Smuzhiyun xor rW2,rW2,rD2 394*4882a593Smuzhiyun xor rW3,rW3,rD3 395*4882a593Smuzhiyun xor rW0,rW0,rI0 /* decrypt with initial IV */ 396*4882a593Smuzhiyun SAVE_DATA(rW0, 0) 397*4882a593Smuzhiyun xor rW1,rW1,rI1 398*4882a593Smuzhiyun SAVE_DATA(rW1, 4) 399*4882a593Smuzhiyun xor rW2,rW2,rI2 400*4882a593Smuzhiyun SAVE_DATA(rW2, 8) 401*4882a593Smuzhiyun xor rW3,rW3,rI3 402*4882a593Smuzhiyun SAVE_DATA(rW3, 12) 403*4882a593Smuzhiyun FINALIZE_CRYPT(4) 404*4882a593Smuzhiyun blr 405*4882a593Smuzhiyun 406*4882a593Smuzhiyun/* 407*4882a593Smuzhiyun * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc, 408*4882a593Smuzhiyun * u32 rounds, u32 bytes, u8 *iv); 409*4882a593Smuzhiyun * 410*4882a593Smuzhiyun * called from glue layer to encrypt/decrypt multiple blocks 411*4882a593Smuzhiyun * via CTR. Number of bytes does not need to be a multiple of 412*4882a593Smuzhiyun * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6 413*4882a593Smuzhiyun * 414*4882a593Smuzhiyun */ 415*4882a593Smuzhiyun_GLOBAL(ppc_crypt_ctr) 416*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) 417*4882a593Smuzhiyun LOAD_IV(rI0, 0) 418*4882a593Smuzhiyun LOAD_IV(rI1, 4) 419*4882a593Smuzhiyun LOAD_IV(rI2, 8) 420*4882a593Smuzhiyun cmpwi rLN,16 421*4882a593Smuzhiyun LOAD_IV(rI3, 12) 422*4882a593Smuzhiyun START_IV 423*4882a593Smuzhiyun bt lt,ppc_crypt_ctr_partial 424*4882a593Smuzhiyunppc_crypt_ctr_loop: 425*4882a593Smuzhiyun mr rKP,rKS 426*4882a593Smuzhiyun START_KEY(rI0, rI1, rI2, rI3) 427*4882a593Smuzhiyun bl ppc_encrypt_block 428*4882a593Smuzhiyun xor rW0,rD0,rW0 429*4882a593Smuzhiyun xor rW1,rD1,rW1 430*4882a593Smuzhiyun xor rW2,rD2,rW2 431*4882a593Smuzhiyun xor rW3,rD3,rW3 432*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 433*4882a593Smuzhiyun subi rLN,rLN,16 434*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 435*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 436*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 437*4882a593Smuzhiyun xor rD0,rD0,rW0 438*4882a593Smuzhiyun SAVE_DATA(rD0, 0) 439*4882a593Smuzhiyun xor rD1,rD1,rW1 440*4882a593Smuzhiyun SAVE_DATA(rD1, 4) 441*4882a593Smuzhiyun xor rD2,rD2,rW2 442*4882a593Smuzhiyun SAVE_DATA(rD2, 8) 443*4882a593Smuzhiyun xor rD3,rD3,rW3 444*4882a593Smuzhiyun SAVE_DATA(rD3, 12) 445*4882a593Smuzhiyun addic rI3,rI3,1 /* increase counter */ 446*4882a593Smuzhiyun addze rI2,rI2 447*4882a593Smuzhiyun addze rI1,rI1 448*4882a593Smuzhiyun addze rI0,rI0 449*4882a593Smuzhiyun NEXT_BLOCK 450*4882a593Smuzhiyun cmpwi rLN,15 451*4882a593Smuzhiyun bt gt,ppc_crypt_ctr_loop 452*4882a593Smuzhiyunppc_crypt_ctr_partial: 453*4882a593Smuzhiyun cmpwi rLN,0 454*4882a593Smuzhiyun bt eq,ppc_crypt_ctr_end 455*4882a593Smuzhiyun mr rKP,rKS 456*4882a593Smuzhiyun START_KEY(rI0, rI1, rI2, rI3) 457*4882a593Smuzhiyun bl ppc_encrypt_block 458*4882a593Smuzhiyun xor rW0,rD0,rW0 459*4882a593Smuzhiyun SAVE_IV(rW0, 0) 460*4882a593Smuzhiyun xor rW1,rD1,rW1 461*4882a593Smuzhiyun SAVE_IV(rW1, 4) 462*4882a593Smuzhiyun xor rW2,rD2,rW2 463*4882a593Smuzhiyun SAVE_IV(rW2, 8) 464*4882a593Smuzhiyun xor rW3,rD3,rW3 465*4882a593Smuzhiyun SAVE_IV(rW3, 12) 466*4882a593Smuzhiyun mtctr rLN 467*4882a593Smuzhiyun subi rIP,rIP,CTR_DEC 468*4882a593Smuzhiyun subi rSP,rSP,1 469*4882a593Smuzhiyun subi rDP,rDP,1 470*4882a593Smuzhiyunppc_crypt_ctr_xorbyte: 471*4882a593Smuzhiyun lbzu rW4,1(rIP) /* bytewise xor for partial block */ 472*4882a593Smuzhiyun lbzu rW5,1(rSP) 473*4882a593Smuzhiyun xor rW4,rW4,rW5 474*4882a593Smuzhiyun stbu rW4,1(rDP) 475*4882a593Smuzhiyun bdnz ppc_crypt_ctr_xorbyte 476*4882a593Smuzhiyun subf rIP,rLN,rIP 477*4882a593Smuzhiyun addi rIP,rIP,1 478*4882a593Smuzhiyun addic rI3,rI3,1 479*4882a593Smuzhiyun addze rI2,rI2 480*4882a593Smuzhiyun addze rI1,rI1 481*4882a593Smuzhiyun addze rI0,rI0 482*4882a593Smuzhiyunppc_crypt_ctr_end: 483*4882a593Smuzhiyun SAVE_IV(rI0, 0) 484*4882a593Smuzhiyun SAVE_IV(rI1, 4) 485*4882a593Smuzhiyun SAVE_IV(rI2, 8) 486*4882a593Smuzhiyun SAVE_IV(rI3, 12) 487*4882a593Smuzhiyun FINALIZE_CRYPT(4) 488*4882a593Smuzhiyun blr 489*4882a593Smuzhiyun 490*4882a593Smuzhiyun/* 491*4882a593Smuzhiyun * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, 492*4882a593Smuzhiyun * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk); 493*4882a593Smuzhiyun * 494*4882a593Smuzhiyun * called from glue layer to encrypt multiple blocks via XTS 495*4882a593Smuzhiyun * If key_twk is given, the initial IV encryption will be 496*4882a593Smuzhiyun * processed too. Round values are AES128 = 4, AES192 = 5, 497*4882a593Smuzhiyun * AES256 = 6 498*4882a593Smuzhiyun * 499*4882a593Smuzhiyun */ 500*4882a593Smuzhiyun_GLOBAL(ppc_encrypt_xts) 501*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8) 502*4882a593Smuzhiyun LOAD_IV(rI0, 0) 503*4882a593Smuzhiyun LOAD_IV(rI1, 4) 504*4882a593Smuzhiyun LOAD_IV(rI2, 8) 505*4882a593Smuzhiyun cmpwi rKT,0 506*4882a593Smuzhiyun LOAD_IV(rI3, 12) 507*4882a593Smuzhiyun bt eq,ppc_encrypt_xts_notweak 508*4882a593Smuzhiyun mr rKP,rKT 509*4882a593Smuzhiyun START_KEY(rI0, rI1, rI2, rI3) 510*4882a593Smuzhiyun bl ppc_encrypt_block 511*4882a593Smuzhiyun xor rI0,rD0,rW0 512*4882a593Smuzhiyun xor rI1,rD1,rW1 513*4882a593Smuzhiyun xor rI2,rD2,rW2 514*4882a593Smuzhiyun xor rI3,rD3,rW3 515*4882a593Smuzhiyunppc_encrypt_xts_notweak: 516*4882a593Smuzhiyun ENDIAN_SWAP(rG0, rG1, rI0, rI1) 517*4882a593Smuzhiyun ENDIAN_SWAP(rG2, rG3, rI2, rI3) 518*4882a593Smuzhiyunppc_encrypt_xts_loop: 519*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 520*4882a593Smuzhiyun mr rKP,rKS 521*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 522*4882a593Smuzhiyun subi rLN,rLN,16 523*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 524*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 525*4882a593Smuzhiyun xor rD0,rD0,rI0 526*4882a593Smuzhiyun xor rD1,rD1,rI1 527*4882a593Smuzhiyun xor rD2,rD2,rI2 528*4882a593Smuzhiyun xor rD3,rD3,rI3 529*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 530*4882a593Smuzhiyun bl ppc_encrypt_block 531*4882a593Smuzhiyun xor rD0,rD0,rW0 532*4882a593Smuzhiyun xor rD1,rD1,rW1 533*4882a593Smuzhiyun xor rD2,rD2,rW2 534*4882a593Smuzhiyun xor rD3,rD3,rW3 535*4882a593Smuzhiyun xor rD0,rD0,rI0 536*4882a593Smuzhiyun SAVE_DATA(rD0, 0) 537*4882a593Smuzhiyun xor rD1,rD1,rI1 538*4882a593Smuzhiyun SAVE_DATA(rD1, 4) 539*4882a593Smuzhiyun xor rD2,rD2,rI2 540*4882a593Smuzhiyun SAVE_DATA(rD2, 8) 541*4882a593Smuzhiyun xor rD3,rD3,rI3 542*4882a593Smuzhiyun SAVE_DATA(rD3, 12) 543*4882a593Smuzhiyun GF128_MUL(rG0, rG1, rG2, rG3, rW0) 544*4882a593Smuzhiyun ENDIAN_SWAP(rI0, rI1, rG0, rG1) 545*4882a593Smuzhiyun ENDIAN_SWAP(rI2, rI3, rG2, rG3) 546*4882a593Smuzhiyun cmpwi rLN,0 547*4882a593Smuzhiyun NEXT_BLOCK 548*4882a593Smuzhiyun bt gt,ppc_encrypt_xts_loop 549*4882a593Smuzhiyun START_IV 550*4882a593Smuzhiyun SAVE_IV(rI0, 0) 551*4882a593Smuzhiyun SAVE_IV(rI1, 4) 552*4882a593Smuzhiyun SAVE_IV(rI2, 8) 553*4882a593Smuzhiyun SAVE_IV(rI3, 12) 554*4882a593Smuzhiyun FINALIZE_CRYPT(8) 555*4882a593Smuzhiyun blr 556*4882a593Smuzhiyun 557*4882a593Smuzhiyun/* 558*4882a593Smuzhiyun * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, 559*4882a593Smuzhiyun * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk); 560*4882a593Smuzhiyun * 561*4882a593Smuzhiyun * called from glue layer to decrypt multiple blocks via XTS 562*4882a593Smuzhiyun * If key_twk is given, the initial IV encryption will be 563*4882a593Smuzhiyun * processed too. Round values are AES128 = 4, AES192 = 5, 564*4882a593Smuzhiyun * AES256 = 6 565*4882a593Smuzhiyun * 566*4882a593Smuzhiyun */ 567*4882a593Smuzhiyun_GLOBAL(ppc_decrypt_xts) 568*4882a593Smuzhiyun INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8) 569*4882a593Smuzhiyun LOAD_IV(rI0, 0) 570*4882a593Smuzhiyun addi rT1,rT0,4096 571*4882a593Smuzhiyun LOAD_IV(rI1, 4) 572*4882a593Smuzhiyun LOAD_IV(rI2, 8) 573*4882a593Smuzhiyun cmpwi rKT,0 574*4882a593Smuzhiyun LOAD_IV(rI3, 12) 575*4882a593Smuzhiyun bt eq,ppc_decrypt_xts_notweak 576*4882a593Smuzhiyun subi rT0,rT0,4096 577*4882a593Smuzhiyun mr rKP,rKT 578*4882a593Smuzhiyun START_KEY(rI0, rI1, rI2, rI3) 579*4882a593Smuzhiyun bl ppc_encrypt_block 580*4882a593Smuzhiyun xor rI0,rD0,rW0 581*4882a593Smuzhiyun xor rI1,rD1,rW1 582*4882a593Smuzhiyun xor rI2,rD2,rW2 583*4882a593Smuzhiyun xor rI3,rD3,rW3 584*4882a593Smuzhiyun addi rT0,rT0,4096 585*4882a593Smuzhiyunppc_decrypt_xts_notweak: 586*4882a593Smuzhiyun ENDIAN_SWAP(rG0, rG1, rI0, rI1) 587*4882a593Smuzhiyun ENDIAN_SWAP(rG2, rG3, rI2, rI3) 588*4882a593Smuzhiyunppc_decrypt_xts_loop: 589*4882a593Smuzhiyun LOAD_DATA(rD0, 0) 590*4882a593Smuzhiyun mr rKP,rKS 591*4882a593Smuzhiyun LOAD_DATA(rD1, 4) 592*4882a593Smuzhiyun subi rLN,rLN,16 593*4882a593Smuzhiyun LOAD_DATA(rD2, 8) 594*4882a593Smuzhiyun LOAD_DATA(rD3, 12) 595*4882a593Smuzhiyun xor rD0,rD0,rI0 596*4882a593Smuzhiyun xor rD1,rD1,rI1 597*4882a593Smuzhiyun xor rD2,rD2,rI2 598*4882a593Smuzhiyun xor rD3,rD3,rI3 599*4882a593Smuzhiyun START_KEY(rD0, rD1, rD2, rD3) 600*4882a593Smuzhiyun bl ppc_decrypt_block 601*4882a593Smuzhiyun xor rD0,rD0,rW0 602*4882a593Smuzhiyun xor rD1,rD1,rW1 603*4882a593Smuzhiyun xor rD2,rD2,rW2 604*4882a593Smuzhiyun xor rD3,rD3,rW3 605*4882a593Smuzhiyun xor rD0,rD0,rI0 606*4882a593Smuzhiyun SAVE_DATA(rD0, 0) 607*4882a593Smuzhiyun xor rD1,rD1,rI1 608*4882a593Smuzhiyun SAVE_DATA(rD1, 4) 609*4882a593Smuzhiyun xor rD2,rD2,rI2 610*4882a593Smuzhiyun SAVE_DATA(rD2, 8) 611*4882a593Smuzhiyun xor rD3,rD3,rI3 612*4882a593Smuzhiyun SAVE_DATA(rD3, 12) 613*4882a593Smuzhiyun GF128_MUL(rG0, rG1, rG2, rG3, rW0) 614*4882a593Smuzhiyun ENDIAN_SWAP(rI0, rI1, rG0, rG1) 615*4882a593Smuzhiyun ENDIAN_SWAP(rI2, rI3, rG2, rG3) 616*4882a593Smuzhiyun cmpwi rLN,0 617*4882a593Smuzhiyun NEXT_BLOCK 618*4882a593Smuzhiyun bt gt,ppc_decrypt_xts_loop 619*4882a593Smuzhiyun START_IV 620*4882a593Smuzhiyun SAVE_IV(rI0, 0) 621*4882a593Smuzhiyun SAVE_IV(rI1, 4) 622*4882a593Smuzhiyun SAVE_IV(rI2, 8) 623*4882a593Smuzhiyun SAVE_IV(rI3, 12) 624*4882a593Smuzhiyun FINALIZE_CRYPT(8) 625*4882a593Smuzhiyun blr 626