1*4882a593Smuzhiyun#! /usr/bin/env perl 2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0 3*4882a593Smuzhiyun 4*4882a593Smuzhiyun# This code is taken from CRYPTOGAMs[1] and is included here using the option 5*4882a593Smuzhiyun# in the license to distribute the code under the GPL. Therefore this program 6*4882a593Smuzhiyun# is free software; you can redistribute it and/or modify it under the terms of 7*4882a593Smuzhiyun# the GNU General Public License version 2 as published by the Free Software 8*4882a593Smuzhiyun# Foundation. 9*4882a593Smuzhiyun# 10*4882a593Smuzhiyun# [1] https://www.openssl.org/~appro/cryptogams/ 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13*4882a593Smuzhiyun# All rights reserved. 14*4882a593Smuzhiyun# 15*4882a593Smuzhiyun# Redistribution and use in source and binary forms, with or without 16*4882a593Smuzhiyun# modification, are permitted provided that the following conditions 17*4882a593Smuzhiyun# are met: 18*4882a593Smuzhiyun# 19*4882a593Smuzhiyun# * Redistributions of source code must retain copyright notices, 20*4882a593Smuzhiyun# this list of conditions and the following disclaimer. 21*4882a593Smuzhiyun# 22*4882a593Smuzhiyun# * Redistributions in binary form must reproduce the above 23*4882a593Smuzhiyun# copyright notice, this list of conditions and the following 24*4882a593Smuzhiyun# disclaimer in the documentation and/or other materials 25*4882a593Smuzhiyun# provided with the distribution. 26*4882a593Smuzhiyun# 27*4882a593Smuzhiyun# * Neither the name of the CRYPTOGAMS nor the names of its 28*4882a593Smuzhiyun# copyright holder and contributors may be used to endorse or 29*4882a593Smuzhiyun# promote products derived from this software without specific 30*4882a593Smuzhiyun# prior written permission. 31*4882a593Smuzhiyun# 32*4882a593Smuzhiyun# ALTERNATIVELY, provided that this notice is retained in full, this 33*4882a593Smuzhiyun# product may be distributed under the terms of the GNU General Public 34*4882a593Smuzhiyun# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35*4882a593Smuzhiyun# those given above. 36*4882a593Smuzhiyun# 37*4882a593Smuzhiyun# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38*4882a593Smuzhiyun# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39*4882a593Smuzhiyun# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40*4882a593Smuzhiyun# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41*4882a593Smuzhiyun# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42*4882a593Smuzhiyun# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43*4882a593Smuzhiyun# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44*4882a593Smuzhiyun# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45*4882a593Smuzhiyun# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46*4882a593Smuzhiyun# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47*4882a593Smuzhiyun# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun# ==================================================================== 50*4882a593Smuzhiyun# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51*4882a593Smuzhiyun# project. The module is, however, dual licensed under OpenSSL and 52*4882a593Smuzhiyun# CRYPTOGAMS licenses depending on where you obtain it. For further 53*4882a593Smuzhiyun# details see https://www.openssl.org/~appro/cryptogams/. 54*4882a593Smuzhiyun# ==================================================================== 55*4882a593Smuzhiyun# 56*4882a593Smuzhiyun# This module implements support for AES instructions as per PowerISA 57*4882a593Smuzhiyun# specification version 2.07, first implemented by POWER8 processor. 58*4882a593Smuzhiyun# The module is endian-agnostic in sense that it supports both big- 59*4882a593Smuzhiyun# and little-endian cases. Data alignment in parallelizable modes is 60*4882a593Smuzhiyun# handled with VSX loads and stores, which implies MSR.VSX flag being 61*4882a593Smuzhiyun# set. It should also be noted that ISA specification doesn't prohibit 62*4882a593Smuzhiyun# alignment exceptions for these instructions on page boundaries. 63*4882a593Smuzhiyun# Initially alignment was handled in pure AltiVec/VMX way [when data 64*4882a593Smuzhiyun# is aligned programmatically, which in turn guarantees exception- 65*4882a593Smuzhiyun# free execution], but it turned to hamper performance when vcipher 66*4882a593Smuzhiyun# instructions are interleaved. It's reckoned that eventual 67*4882a593Smuzhiyun# misalignment penalties at page boundaries are in average lower 68*4882a593Smuzhiyun# than additional overhead in pure AltiVec approach. 69*4882a593Smuzhiyun# 70*4882a593Smuzhiyun# May 2016 71*4882a593Smuzhiyun# 72*4882a593Smuzhiyun# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73*4882a593Smuzhiyun# systems were measured. 74*4882a593Smuzhiyun# 75*4882a593Smuzhiyun###################################################################### 76*4882a593Smuzhiyun# Current large-block performance in cycles per byte processed with 77*4882a593Smuzhiyun# 128-bit key (less is better). 78*4882a593Smuzhiyun# 79*4882a593Smuzhiyun# CBC en-/decrypt CTR XTS 80*4882a593Smuzhiyun# POWER8[le] 3.96/0.72 0.74 1.1 81*4882a593Smuzhiyun# POWER8[be] 3.75/0.65 0.66 1.0 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun$flavour = shift; 84*4882a593Smuzhiyun 85*4882a593Smuzhiyunif ($flavour =~ /64/) { 86*4882a593Smuzhiyun $SIZE_T =8; 87*4882a593Smuzhiyun $LRSAVE =2*$SIZE_T; 88*4882a593Smuzhiyun $STU ="stdu"; 89*4882a593Smuzhiyun $POP ="ld"; 90*4882a593Smuzhiyun $PUSH ="std"; 91*4882a593Smuzhiyun $UCMP ="cmpld"; 92*4882a593Smuzhiyun $SHL ="sldi"; 93*4882a593Smuzhiyun} elsif ($flavour =~ /32/) { 94*4882a593Smuzhiyun $SIZE_T =4; 95*4882a593Smuzhiyun $LRSAVE =$SIZE_T; 96*4882a593Smuzhiyun $STU ="stwu"; 97*4882a593Smuzhiyun $POP ="lwz"; 98*4882a593Smuzhiyun $PUSH ="stw"; 99*4882a593Smuzhiyun $UCMP ="cmplw"; 100*4882a593Smuzhiyun $SHL ="slwi"; 101*4882a593Smuzhiyun} else { die "nonsense $flavour"; } 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104*4882a593Smuzhiyun 105*4882a593Smuzhiyun$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106*4882a593Smuzhiyun( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107*4882a593Smuzhiyun( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108*4882a593Smuzhiyundie "can't locate ppc-xlate.pl"; 109*4882a593Smuzhiyun 110*4882a593Smuzhiyunopen STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun$FRAME=8*$SIZE_T; 113*4882a593Smuzhiyun$prefix="aes_p8"; 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun$sp="r1"; 116*4882a593Smuzhiyun$vrsave="r12"; 117*4882a593Smuzhiyun 118*4882a593Smuzhiyun######################################################################### 119*4882a593Smuzhiyun{{{ # Key setup procedures # 120*4882a593Smuzhiyunmy ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121*4882a593Smuzhiyunmy ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122*4882a593Smuzhiyunmy ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun$code.=<<___; 125*4882a593Smuzhiyun.machine "any" 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun.text 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun.align 7 130*4882a593Smuzhiyunrcon: 131*4882a593Smuzhiyun.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132*4882a593Smuzhiyun.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133*4882a593Smuzhiyun.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134*4882a593Smuzhiyun.long 0,0,0,0 ?asis 135*4882a593SmuzhiyunLconsts: 136*4882a593Smuzhiyun mflr r0 137*4882a593Smuzhiyun bcl 20,31,\$+4 138*4882a593Smuzhiyun mflr $ptr #vvvvv "distance between . and rcon 139*4882a593Smuzhiyun addi $ptr,$ptr,-0x48 140*4882a593Smuzhiyun mtlr r0 141*4882a593Smuzhiyun blr 142*4882a593Smuzhiyun .long 0 143*4882a593Smuzhiyun .byte 0,12,0x14,0,0,0,0,0 144*4882a593Smuzhiyun.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun.globl .${prefix}_set_encrypt_key 147*4882a593SmuzhiyunLset_encrypt_key: 148*4882a593Smuzhiyun mflr r11 149*4882a593Smuzhiyun $PUSH r11,$LRSAVE($sp) 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun li $ptr,-1 152*4882a593Smuzhiyun ${UCMP}i $inp,0 153*4882a593Smuzhiyun beq- Lenc_key_abort # if ($inp==0) return -1; 154*4882a593Smuzhiyun ${UCMP}i $out,0 155*4882a593Smuzhiyun beq- Lenc_key_abort # if ($out==0) return -1; 156*4882a593Smuzhiyun li $ptr,-2 157*4882a593Smuzhiyun cmpwi $bits,128 158*4882a593Smuzhiyun blt- Lenc_key_abort 159*4882a593Smuzhiyun cmpwi $bits,256 160*4882a593Smuzhiyun bgt- Lenc_key_abort 161*4882a593Smuzhiyun andi. r0,$bits,0x3f 162*4882a593Smuzhiyun bne- Lenc_key_abort 163*4882a593Smuzhiyun 164*4882a593Smuzhiyun lis r0,0xfff0 165*4882a593Smuzhiyun mfspr $vrsave,256 166*4882a593Smuzhiyun mtspr 256,r0 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun bl Lconsts 169*4882a593Smuzhiyun mtlr r11 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun neg r9,$inp 172*4882a593Smuzhiyun lvx $in0,0,$inp 173*4882a593Smuzhiyun addi $inp,$inp,15 # 15 is not typo 174*4882a593Smuzhiyun lvsr $key,0,r9 # borrow $key 175*4882a593Smuzhiyun li r8,0x20 176*4882a593Smuzhiyun cmpwi $bits,192 177*4882a593Smuzhiyun lvx $in1,0,$inp 178*4882a593Smuzhiyun le?vspltisb $mask,0x0f # borrow $mask 179*4882a593Smuzhiyun lvx $rcon,0,$ptr 180*4882a593Smuzhiyun le?vxor $key,$key,$mask # adjust for byte swap 181*4882a593Smuzhiyun lvx $mask,r8,$ptr 182*4882a593Smuzhiyun addi $ptr,$ptr,0x10 183*4882a593Smuzhiyun vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 184*4882a593Smuzhiyun li $cnt,8 185*4882a593Smuzhiyun vxor $zero,$zero,$zero 186*4882a593Smuzhiyun mtctr $cnt 187*4882a593Smuzhiyun 188*4882a593Smuzhiyun ?lvsr $outperm,0,$out 189*4882a593Smuzhiyun vspltisb $outmask,-1 190*4882a593Smuzhiyun lvx $outhead,0,$out 191*4882a593Smuzhiyun ?vperm $outmask,$zero,$outmask,$outperm 192*4882a593Smuzhiyun 193*4882a593Smuzhiyun blt Loop128 194*4882a593Smuzhiyun addi $inp,$inp,8 195*4882a593Smuzhiyun beq L192 196*4882a593Smuzhiyun addi $inp,$inp,8 197*4882a593Smuzhiyun b L256 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun.align 4 200*4882a593SmuzhiyunLoop128: 201*4882a593Smuzhiyun vperm $key,$in0,$in0,$mask # rotate-n-splat 202*4882a593Smuzhiyun vsldoi $tmp,$zero,$in0,12 # >>32 203*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 204*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 205*4882a593Smuzhiyun vmr $outhead,$outtail 206*4882a593Smuzhiyun vcipherlast $key,$key,$rcon 207*4882a593Smuzhiyun stvx $stage,0,$out 208*4882a593Smuzhiyun addi $out,$out,16 209*4882a593Smuzhiyun 210*4882a593Smuzhiyun vxor $in0,$in0,$tmp 211*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 212*4882a593Smuzhiyun vxor $in0,$in0,$tmp 213*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 214*4882a593Smuzhiyun vxor $in0,$in0,$tmp 215*4882a593Smuzhiyun vadduwm $rcon,$rcon,$rcon 216*4882a593Smuzhiyun vxor $in0,$in0,$key 217*4882a593Smuzhiyun bdnz Loop128 218*4882a593Smuzhiyun 219*4882a593Smuzhiyun lvx $rcon,0,$ptr # last two round keys 220*4882a593Smuzhiyun 221*4882a593Smuzhiyun vperm $key,$in0,$in0,$mask # rotate-n-splat 222*4882a593Smuzhiyun vsldoi $tmp,$zero,$in0,12 # >>32 223*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 224*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 225*4882a593Smuzhiyun vmr $outhead,$outtail 226*4882a593Smuzhiyun vcipherlast $key,$key,$rcon 227*4882a593Smuzhiyun stvx $stage,0,$out 228*4882a593Smuzhiyun addi $out,$out,16 229*4882a593Smuzhiyun 230*4882a593Smuzhiyun vxor $in0,$in0,$tmp 231*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 232*4882a593Smuzhiyun vxor $in0,$in0,$tmp 233*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 234*4882a593Smuzhiyun vxor $in0,$in0,$tmp 235*4882a593Smuzhiyun vadduwm $rcon,$rcon,$rcon 236*4882a593Smuzhiyun vxor $in0,$in0,$key 237*4882a593Smuzhiyun 238*4882a593Smuzhiyun vperm $key,$in0,$in0,$mask # rotate-n-splat 239*4882a593Smuzhiyun vsldoi $tmp,$zero,$in0,12 # >>32 240*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 241*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 242*4882a593Smuzhiyun vmr $outhead,$outtail 243*4882a593Smuzhiyun vcipherlast $key,$key,$rcon 244*4882a593Smuzhiyun stvx $stage,0,$out 245*4882a593Smuzhiyun addi $out,$out,16 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun vxor $in0,$in0,$tmp 248*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 249*4882a593Smuzhiyun vxor $in0,$in0,$tmp 250*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 251*4882a593Smuzhiyun vxor $in0,$in0,$tmp 252*4882a593Smuzhiyun vxor $in0,$in0,$key 253*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 254*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 255*4882a593Smuzhiyun vmr $outhead,$outtail 256*4882a593Smuzhiyun stvx $stage,0,$out 257*4882a593Smuzhiyun 258*4882a593Smuzhiyun addi $inp,$out,15 # 15 is not typo 259*4882a593Smuzhiyun addi $out,$out,0x50 260*4882a593Smuzhiyun 261*4882a593Smuzhiyun li $rounds,10 262*4882a593Smuzhiyun b Ldone 263*4882a593Smuzhiyun 264*4882a593Smuzhiyun.align 4 265*4882a593SmuzhiyunL192: 266*4882a593Smuzhiyun lvx $tmp,0,$inp 267*4882a593Smuzhiyun li $cnt,4 268*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 269*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 270*4882a593Smuzhiyun vmr $outhead,$outtail 271*4882a593Smuzhiyun stvx $stage,0,$out 272*4882a593Smuzhiyun addi $out,$out,16 273*4882a593Smuzhiyun vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 274*4882a593Smuzhiyun vspltisb $key,8 # borrow $key 275*4882a593Smuzhiyun mtctr $cnt 276*4882a593Smuzhiyun vsububm $mask,$mask,$key # adjust the mask 277*4882a593Smuzhiyun 278*4882a593SmuzhiyunLoop192: 279*4882a593Smuzhiyun vperm $key,$in1,$in1,$mask # roate-n-splat 280*4882a593Smuzhiyun vsldoi $tmp,$zero,$in0,12 # >>32 281*4882a593Smuzhiyun vcipherlast $key,$key,$rcon 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun vxor $in0,$in0,$tmp 284*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 285*4882a593Smuzhiyun vxor $in0,$in0,$tmp 286*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 287*4882a593Smuzhiyun vxor $in0,$in0,$tmp 288*4882a593Smuzhiyun 289*4882a593Smuzhiyun vsldoi $stage,$zero,$in1,8 290*4882a593Smuzhiyun vspltw $tmp,$in0,3 291*4882a593Smuzhiyun vxor $tmp,$tmp,$in1 292*4882a593Smuzhiyun vsldoi $in1,$zero,$in1,12 # >>32 293*4882a593Smuzhiyun vadduwm $rcon,$rcon,$rcon 294*4882a593Smuzhiyun vxor $in1,$in1,$tmp 295*4882a593Smuzhiyun vxor $in0,$in0,$key 296*4882a593Smuzhiyun vxor $in1,$in1,$key 297*4882a593Smuzhiyun vsldoi $stage,$stage,$in0,8 298*4882a593Smuzhiyun 299*4882a593Smuzhiyun vperm $key,$in1,$in1,$mask # rotate-n-splat 300*4882a593Smuzhiyun vsldoi $tmp,$zero,$in0,12 # >>32 301*4882a593Smuzhiyun vperm $outtail,$stage,$stage,$outperm # rotate 302*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 303*4882a593Smuzhiyun vmr $outhead,$outtail 304*4882a593Smuzhiyun vcipherlast $key,$key,$rcon 305*4882a593Smuzhiyun stvx $stage,0,$out 306*4882a593Smuzhiyun addi $out,$out,16 307*4882a593Smuzhiyun 308*4882a593Smuzhiyun vsldoi $stage,$in0,$in1,8 309*4882a593Smuzhiyun vxor $in0,$in0,$tmp 310*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 311*4882a593Smuzhiyun vperm $outtail,$stage,$stage,$outperm # rotate 312*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 313*4882a593Smuzhiyun vmr $outhead,$outtail 314*4882a593Smuzhiyun vxor $in0,$in0,$tmp 315*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 316*4882a593Smuzhiyun vxor $in0,$in0,$tmp 317*4882a593Smuzhiyun stvx $stage,0,$out 318*4882a593Smuzhiyun addi $out,$out,16 319*4882a593Smuzhiyun 320*4882a593Smuzhiyun vspltw $tmp,$in0,3 321*4882a593Smuzhiyun vxor $tmp,$tmp,$in1 322*4882a593Smuzhiyun vsldoi $in1,$zero,$in1,12 # >>32 323*4882a593Smuzhiyun vadduwm $rcon,$rcon,$rcon 324*4882a593Smuzhiyun vxor $in1,$in1,$tmp 325*4882a593Smuzhiyun vxor $in0,$in0,$key 326*4882a593Smuzhiyun vxor $in1,$in1,$key 327*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 328*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 329*4882a593Smuzhiyun vmr $outhead,$outtail 330*4882a593Smuzhiyun stvx $stage,0,$out 331*4882a593Smuzhiyun addi $inp,$out,15 # 15 is not typo 332*4882a593Smuzhiyun addi $out,$out,16 333*4882a593Smuzhiyun bdnz Loop192 334*4882a593Smuzhiyun 335*4882a593Smuzhiyun li $rounds,12 336*4882a593Smuzhiyun addi $out,$out,0x20 337*4882a593Smuzhiyun b Ldone 338*4882a593Smuzhiyun 339*4882a593Smuzhiyun.align 4 340*4882a593SmuzhiyunL256: 341*4882a593Smuzhiyun lvx $tmp,0,$inp 342*4882a593Smuzhiyun li $cnt,7 343*4882a593Smuzhiyun li $rounds,14 344*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 345*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 346*4882a593Smuzhiyun vmr $outhead,$outtail 347*4882a593Smuzhiyun stvx $stage,0,$out 348*4882a593Smuzhiyun addi $out,$out,16 349*4882a593Smuzhiyun vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 350*4882a593Smuzhiyun mtctr $cnt 351*4882a593Smuzhiyun 352*4882a593SmuzhiyunLoop256: 353*4882a593Smuzhiyun vperm $key,$in1,$in1,$mask # rotate-n-splat 354*4882a593Smuzhiyun vsldoi $tmp,$zero,$in0,12 # >>32 355*4882a593Smuzhiyun vperm $outtail,$in1,$in1,$outperm # rotate 356*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 357*4882a593Smuzhiyun vmr $outhead,$outtail 358*4882a593Smuzhiyun vcipherlast $key,$key,$rcon 359*4882a593Smuzhiyun stvx $stage,0,$out 360*4882a593Smuzhiyun addi $out,$out,16 361*4882a593Smuzhiyun 362*4882a593Smuzhiyun vxor $in0,$in0,$tmp 363*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 364*4882a593Smuzhiyun vxor $in0,$in0,$tmp 365*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 366*4882a593Smuzhiyun vxor $in0,$in0,$tmp 367*4882a593Smuzhiyun vadduwm $rcon,$rcon,$rcon 368*4882a593Smuzhiyun vxor $in0,$in0,$key 369*4882a593Smuzhiyun vperm $outtail,$in0,$in0,$outperm # rotate 370*4882a593Smuzhiyun vsel $stage,$outhead,$outtail,$outmask 371*4882a593Smuzhiyun vmr $outhead,$outtail 372*4882a593Smuzhiyun stvx $stage,0,$out 373*4882a593Smuzhiyun addi $inp,$out,15 # 15 is not typo 374*4882a593Smuzhiyun addi $out,$out,16 375*4882a593Smuzhiyun bdz Ldone 376*4882a593Smuzhiyun 377*4882a593Smuzhiyun vspltw $key,$in0,3 # just splat 378*4882a593Smuzhiyun vsldoi $tmp,$zero,$in1,12 # >>32 379*4882a593Smuzhiyun vsbox $key,$key 380*4882a593Smuzhiyun 381*4882a593Smuzhiyun vxor $in1,$in1,$tmp 382*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 383*4882a593Smuzhiyun vxor $in1,$in1,$tmp 384*4882a593Smuzhiyun vsldoi $tmp,$zero,$tmp,12 # >>32 385*4882a593Smuzhiyun vxor $in1,$in1,$tmp 386*4882a593Smuzhiyun 387*4882a593Smuzhiyun vxor $in1,$in1,$key 388*4882a593Smuzhiyun b Loop256 389*4882a593Smuzhiyun 390*4882a593Smuzhiyun.align 4 391*4882a593SmuzhiyunLdone: 392*4882a593Smuzhiyun lvx $in1,0,$inp # redundant in aligned case 393*4882a593Smuzhiyun vsel $in1,$outhead,$in1,$outmask 394*4882a593Smuzhiyun stvx $in1,0,$inp 395*4882a593Smuzhiyun li $ptr,0 396*4882a593Smuzhiyun mtspr 256,$vrsave 397*4882a593Smuzhiyun stw $rounds,0($out) 398*4882a593Smuzhiyun 399*4882a593SmuzhiyunLenc_key_abort: 400*4882a593Smuzhiyun mr r3,$ptr 401*4882a593Smuzhiyun blr 402*4882a593Smuzhiyun .long 0 403*4882a593Smuzhiyun .byte 0,12,0x14,1,0,0,3,0 404*4882a593Smuzhiyun .long 0 405*4882a593Smuzhiyun.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 406*4882a593Smuzhiyun 407*4882a593Smuzhiyun.globl .${prefix}_set_decrypt_key 408*4882a593Smuzhiyun $STU $sp,-$FRAME($sp) 409*4882a593Smuzhiyun mflr r10 410*4882a593Smuzhiyun $PUSH r10,$FRAME+$LRSAVE($sp) 411*4882a593Smuzhiyun bl Lset_encrypt_key 412*4882a593Smuzhiyun mtlr r10 413*4882a593Smuzhiyun 414*4882a593Smuzhiyun cmpwi r3,0 415*4882a593Smuzhiyun bne- Ldec_key_abort 416*4882a593Smuzhiyun 417*4882a593Smuzhiyun slwi $cnt,$rounds,4 418*4882a593Smuzhiyun subi $inp,$out,240 # first round key 419*4882a593Smuzhiyun srwi $rounds,$rounds,1 420*4882a593Smuzhiyun add $out,$inp,$cnt # last round key 421*4882a593Smuzhiyun mtctr $rounds 422*4882a593Smuzhiyun 423*4882a593SmuzhiyunLdeckey: 424*4882a593Smuzhiyun lwz r0, 0($inp) 425*4882a593Smuzhiyun lwz r6, 4($inp) 426*4882a593Smuzhiyun lwz r7, 8($inp) 427*4882a593Smuzhiyun lwz r8, 12($inp) 428*4882a593Smuzhiyun addi $inp,$inp,16 429*4882a593Smuzhiyun lwz r9, 0($out) 430*4882a593Smuzhiyun lwz r10,4($out) 431*4882a593Smuzhiyun lwz r11,8($out) 432*4882a593Smuzhiyun lwz r12,12($out) 433*4882a593Smuzhiyun stw r0, 0($out) 434*4882a593Smuzhiyun stw r6, 4($out) 435*4882a593Smuzhiyun stw r7, 8($out) 436*4882a593Smuzhiyun stw r8, 12($out) 437*4882a593Smuzhiyun subi $out,$out,16 438*4882a593Smuzhiyun stw r9, -16($inp) 439*4882a593Smuzhiyun stw r10,-12($inp) 440*4882a593Smuzhiyun stw r11,-8($inp) 441*4882a593Smuzhiyun stw r12,-4($inp) 442*4882a593Smuzhiyun bdnz Ldeckey 443*4882a593Smuzhiyun 444*4882a593Smuzhiyun xor r3,r3,r3 # return value 445*4882a593SmuzhiyunLdec_key_abort: 446*4882a593Smuzhiyun addi $sp,$sp,$FRAME 447*4882a593Smuzhiyun blr 448*4882a593Smuzhiyun .long 0 449*4882a593Smuzhiyun .byte 0,12,4,1,0x80,0,3,0 450*4882a593Smuzhiyun .long 0 451*4882a593Smuzhiyun.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 452*4882a593Smuzhiyun___ 453*4882a593Smuzhiyun}}} 454*4882a593Smuzhiyun######################################################################### 455*4882a593Smuzhiyun{{{ # Single block en- and decrypt procedures # 456*4882a593Smuzhiyunsub gen_block () { 457*4882a593Smuzhiyunmy $dir = shift; 458*4882a593Smuzhiyunmy $n = $dir eq "de" ? "n" : ""; 459*4882a593Smuzhiyunmy ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 460*4882a593Smuzhiyun 461*4882a593Smuzhiyun$code.=<<___; 462*4882a593Smuzhiyun.globl .${prefix}_${dir}crypt 463*4882a593Smuzhiyun lwz $rounds,240($key) 464*4882a593Smuzhiyun lis r0,0xfc00 465*4882a593Smuzhiyun mfspr $vrsave,256 466*4882a593Smuzhiyun li $idx,15 # 15 is not typo 467*4882a593Smuzhiyun mtspr 256,r0 468*4882a593Smuzhiyun 469*4882a593Smuzhiyun lvx v0,0,$inp 470*4882a593Smuzhiyun neg r11,$out 471*4882a593Smuzhiyun lvx v1,$idx,$inp 472*4882a593Smuzhiyun lvsl v2,0,$inp # inpperm 473*4882a593Smuzhiyun le?vspltisb v4,0x0f 474*4882a593Smuzhiyun ?lvsl v3,0,r11 # outperm 475*4882a593Smuzhiyun le?vxor v2,v2,v4 476*4882a593Smuzhiyun li $idx,16 477*4882a593Smuzhiyun vperm v0,v0,v1,v2 # align [and byte swap in LE] 478*4882a593Smuzhiyun lvx v1,0,$key 479*4882a593Smuzhiyun ?lvsl v5,0,$key # keyperm 480*4882a593Smuzhiyun srwi $rounds,$rounds,1 481*4882a593Smuzhiyun lvx v2,$idx,$key 482*4882a593Smuzhiyun addi $idx,$idx,16 483*4882a593Smuzhiyun subi $rounds,$rounds,1 484*4882a593Smuzhiyun ?vperm v1,v1,v2,v5 # align round key 485*4882a593Smuzhiyun 486*4882a593Smuzhiyun vxor v0,v0,v1 487*4882a593Smuzhiyun lvx v1,$idx,$key 488*4882a593Smuzhiyun addi $idx,$idx,16 489*4882a593Smuzhiyun mtctr $rounds 490*4882a593Smuzhiyun 491*4882a593SmuzhiyunLoop_${dir}c: 492*4882a593Smuzhiyun ?vperm v2,v2,v1,v5 493*4882a593Smuzhiyun v${n}cipher v0,v0,v2 494*4882a593Smuzhiyun lvx v2,$idx,$key 495*4882a593Smuzhiyun addi $idx,$idx,16 496*4882a593Smuzhiyun ?vperm v1,v1,v2,v5 497*4882a593Smuzhiyun v${n}cipher v0,v0,v1 498*4882a593Smuzhiyun lvx v1,$idx,$key 499*4882a593Smuzhiyun addi $idx,$idx,16 500*4882a593Smuzhiyun bdnz Loop_${dir}c 501*4882a593Smuzhiyun 502*4882a593Smuzhiyun ?vperm v2,v2,v1,v5 503*4882a593Smuzhiyun v${n}cipher v0,v0,v2 504*4882a593Smuzhiyun lvx v2,$idx,$key 505*4882a593Smuzhiyun ?vperm v1,v1,v2,v5 506*4882a593Smuzhiyun v${n}cipherlast v0,v0,v1 507*4882a593Smuzhiyun 508*4882a593Smuzhiyun vspltisb v2,-1 509*4882a593Smuzhiyun vxor v1,v1,v1 510*4882a593Smuzhiyun li $idx,15 # 15 is not typo 511*4882a593Smuzhiyun ?vperm v2,v1,v2,v3 # outmask 512*4882a593Smuzhiyun le?vxor v3,v3,v4 513*4882a593Smuzhiyun lvx v1,0,$out # outhead 514*4882a593Smuzhiyun vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 515*4882a593Smuzhiyun vsel v1,v1,v0,v2 516*4882a593Smuzhiyun lvx v4,$idx,$out 517*4882a593Smuzhiyun stvx v1,0,$out 518*4882a593Smuzhiyun vsel v0,v0,v4,v2 519*4882a593Smuzhiyun stvx v0,$idx,$out 520*4882a593Smuzhiyun 521*4882a593Smuzhiyun mtspr 256,$vrsave 522*4882a593Smuzhiyun blr 523*4882a593Smuzhiyun .long 0 524*4882a593Smuzhiyun .byte 0,12,0x14,0,0,0,3,0 525*4882a593Smuzhiyun .long 0 526*4882a593Smuzhiyun.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 527*4882a593Smuzhiyun___ 528*4882a593Smuzhiyun} 529*4882a593Smuzhiyun&gen_block("en"); 530*4882a593Smuzhiyun&gen_block("de"); 531*4882a593Smuzhiyun}}} 532*4882a593Smuzhiyun######################################################################### 533*4882a593Smuzhiyun{{{ # CBC en- and decrypt procedures # 534*4882a593Smuzhiyunmy ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 535*4882a593Smuzhiyunmy ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 536*4882a593Smuzhiyunmy ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 537*4882a593Smuzhiyun map("v$_",(4..10)); 538*4882a593Smuzhiyun$code.=<<___; 539*4882a593Smuzhiyun.globl .${prefix}_cbc_encrypt 540*4882a593Smuzhiyun ${UCMP}i $len,16 541*4882a593Smuzhiyun bltlr- 542*4882a593Smuzhiyun 543*4882a593Smuzhiyun cmpwi $enc,0 # test direction 544*4882a593Smuzhiyun lis r0,0xffe0 545*4882a593Smuzhiyun mfspr $vrsave,256 546*4882a593Smuzhiyun mtspr 256,r0 547*4882a593Smuzhiyun 548*4882a593Smuzhiyun li $idx,15 549*4882a593Smuzhiyun vxor $rndkey0,$rndkey0,$rndkey0 550*4882a593Smuzhiyun le?vspltisb $tmp,0x0f 551*4882a593Smuzhiyun 552*4882a593Smuzhiyun lvx $ivec,0,$ivp # load [unaligned] iv 553*4882a593Smuzhiyun lvsl $inpperm,0,$ivp 554*4882a593Smuzhiyun lvx $inptail,$idx,$ivp 555*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 556*4882a593Smuzhiyun vperm $ivec,$ivec,$inptail,$inpperm 557*4882a593Smuzhiyun 558*4882a593Smuzhiyun neg r11,$inp 559*4882a593Smuzhiyun ?lvsl $keyperm,0,$key # prepare for unaligned key 560*4882a593Smuzhiyun lwz $rounds,240($key) 561*4882a593Smuzhiyun 562*4882a593Smuzhiyun lvsr $inpperm,0,r11 # prepare for unaligned load 563*4882a593Smuzhiyun lvx $inptail,0,$inp 564*4882a593Smuzhiyun addi $inp,$inp,15 # 15 is not typo 565*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 566*4882a593Smuzhiyun 567*4882a593Smuzhiyun ?lvsr $outperm,0,$out # prepare for unaligned store 568*4882a593Smuzhiyun vspltisb $outmask,-1 569*4882a593Smuzhiyun lvx $outhead,0,$out 570*4882a593Smuzhiyun ?vperm $outmask,$rndkey0,$outmask,$outperm 571*4882a593Smuzhiyun le?vxor $outperm,$outperm,$tmp 572*4882a593Smuzhiyun 573*4882a593Smuzhiyun srwi $rounds,$rounds,1 574*4882a593Smuzhiyun li $idx,16 575*4882a593Smuzhiyun subi $rounds,$rounds,1 576*4882a593Smuzhiyun beq Lcbc_dec 577*4882a593Smuzhiyun 578*4882a593SmuzhiyunLcbc_enc: 579*4882a593Smuzhiyun vmr $inout,$inptail 580*4882a593Smuzhiyun lvx $inptail,0,$inp 581*4882a593Smuzhiyun addi $inp,$inp,16 582*4882a593Smuzhiyun mtctr $rounds 583*4882a593Smuzhiyun subi $len,$len,16 # len-=16 584*4882a593Smuzhiyun 585*4882a593Smuzhiyun lvx $rndkey0,0,$key 586*4882a593Smuzhiyun vperm $inout,$inout,$inptail,$inpperm 587*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 588*4882a593Smuzhiyun addi $idx,$idx,16 589*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 590*4882a593Smuzhiyun vxor $inout,$inout,$rndkey0 591*4882a593Smuzhiyun lvx $rndkey0,$idx,$key 592*4882a593Smuzhiyun addi $idx,$idx,16 593*4882a593Smuzhiyun vxor $inout,$inout,$ivec 594*4882a593Smuzhiyun 595*4882a593SmuzhiyunLoop_cbc_enc: 596*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 597*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey1 598*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 599*4882a593Smuzhiyun addi $idx,$idx,16 600*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 601*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey0 602*4882a593Smuzhiyun lvx $rndkey0,$idx,$key 603*4882a593Smuzhiyun addi $idx,$idx,16 604*4882a593Smuzhiyun bdnz Loop_cbc_enc 605*4882a593Smuzhiyun 606*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 607*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey1 608*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 609*4882a593Smuzhiyun li $idx,16 610*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 611*4882a593Smuzhiyun vcipherlast $ivec,$inout,$rndkey0 612*4882a593Smuzhiyun ${UCMP}i $len,16 613*4882a593Smuzhiyun 614*4882a593Smuzhiyun vperm $tmp,$ivec,$ivec,$outperm 615*4882a593Smuzhiyun vsel $inout,$outhead,$tmp,$outmask 616*4882a593Smuzhiyun vmr $outhead,$tmp 617*4882a593Smuzhiyun stvx $inout,0,$out 618*4882a593Smuzhiyun addi $out,$out,16 619*4882a593Smuzhiyun bge Lcbc_enc 620*4882a593Smuzhiyun 621*4882a593Smuzhiyun b Lcbc_done 622*4882a593Smuzhiyun 623*4882a593Smuzhiyun.align 4 624*4882a593SmuzhiyunLcbc_dec: 625*4882a593Smuzhiyun ${UCMP}i $len,128 626*4882a593Smuzhiyun bge _aesp8_cbc_decrypt8x 627*4882a593Smuzhiyun vmr $tmp,$inptail 628*4882a593Smuzhiyun lvx $inptail,0,$inp 629*4882a593Smuzhiyun addi $inp,$inp,16 630*4882a593Smuzhiyun mtctr $rounds 631*4882a593Smuzhiyun subi $len,$len,16 # len-=16 632*4882a593Smuzhiyun 633*4882a593Smuzhiyun lvx $rndkey0,0,$key 634*4882a593Smuzhiyun vperm $tmp,$tmp,$inptail,$inpperm 635*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 636*4882a593Smuzhiyun addi $idx,$idx,16 637*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 638*4882a593Smuzhiyun vxor $inout,$tmp,$rndkey0 639*4882a593Smuzhiyun lvx $rndkey0,$idx,$key 640*4882a593Smuzhiyun addi $idx,$idx,16 641*4882a593Smuzhiyun 642*4882a593SmuzhiyunLoop_cbc_dec: 643*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 644*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey1 645*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 646*4882a593Smuzhiyun addi $idx,$idx,16 647*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 648*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey0 649*4882a593Smuzhiyun lvx $rndkey0,$idx,$key 650*4882a593Smuzhiyun addi $idx,$idx,16 651*4882a593Smuzhiyun bdnz Loop_cbc_dec 652*4882a593Smuzhiyun 653*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 654*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey1 655*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 656*4882a593Smuzhiyun li $idx,16 657*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 658*4882a593Smuzhiyun vncipherlast $inout,$inout,$rndkey0 659*4882a593Smuzhiyun ${UCMP}i $len,16 660*4882a593Smuzhiyun 661*4882a593Smuzhiyun vxor $inout,$inout,$ivec 662*4882a593Smuzhiyun vmr $ivec,$tmp 663*4882a593Smuzhiyun vperm $tmp,$inout,$inout,$outperm 664*4882a593Smuzhiyun vsel $inout,$outhead,$tmp,$outmask 665*4882a593Smuzhiyun vmr $outhead,$tmp 666*4882a593Smuzhiyun stvx $inout,0,$out 667*4882a593Smuzhiyun addi $out,$out,16 668*4882a593Smuzhiyun bge Lcbc_dec 669*4882a593Smuzhiyun 670*4882a593SmuzhiyunLcbc_done: 671*4882a593Smuzhiyun addi $out,$out,-1 672*4882a593Smuzhiyun lvx $inout,0,$out # redundant in aligned case 673*4882a593Smuzhiyun vsel $inout,$outhead,$inout,$outmask 674*4882a593Smuzhiyun stvx $inout,0,$out 675*4882a593Smuzhiyun 676*4882a593Smuzhiyun neg $enc,$ivp # write [unaligned] iv 677*4882a593Smuzhiyun li $idx,15 # 15 is not typo 678*4882a593Smuzhiyun vxor $rndkey0,$rndkey0,$rndkey0 679*4882a593Smuzhiyun vspltisb $outmask,-1 680*4882a593Smuzhiyun le?vspltisb $tmp,0x0f 681*4882a593Smuzhiyun ?lvsl $outperm,0,$enc 682*4882a593Smuzhiyun ?vperm $outmask,$rndkey0,$outmask,$outperm 683*4882a593Smuzhiyun le?vxor $outperm,$outperm,$tmp 684*4882a593Smuzhiyun lvx $outhead,0,$ivp 685*4882a593Smuzhiyun vperm $ivec,$ivec,$ivec,$outperm 686*4882a593Smuzhiyun vsel $inout,$outhead,$ivec,$outmask 687*4882a593Smuzhiyun lvx $inptail,$idx,$ivp 688*4882a593Smuzhiyun stvx $inout,0,$ivp 689*4882a593Smuzhiyun vsel $inout,$ivec,$inptail,$outmask 690*4882a593Smuzhiyun stvx $inout,$idx,$ivp 691*4882a593Smuzhiyun 692*4882a593Smuzhiyun mtspr 256,$vrsave 693*4882a593Smuzhiyun blr 694*4882a593Smuzhiyun .long 0 695*4882a593Smuzhiyun .byte 0,12,0x14,0,0,0,6,0 696*4882a593Smuzhiyun .long 0 697*4882a593Smuzhiyun___ 698*4882a593Smuzhiyun######################################################################### 699*4882a593Smuzhiyun{{ # Optimized CBC decrypt procedure # 700*4882a593Smuzhiyunmy $key_="r11"; 701*4882a593Smuzhiyunmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 702*4882a593Smuzhiyunmy ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 703*4882a593Smuzhiyunmy ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 704*4882a593Smuzhiyunmy $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 705*4882a593Smuzhiyun # v26-v31 last 6 round keys 706*4882a593Smuzhiyunmy ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 707*4882a593Smuzhiyun 708*4882a593Smuzhiyun$code.=<<___; 709*4882a593Smuzhiyun.align 5 710*4882a593Smuzhiyun_aesp8_cbc_decrypt8x: 711*4882a593Smuzhiyun $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 712*4882a593Smuzhiyun li r10,`$FRAME+8*16+15` 713*4882a593Smuzhiyun li r11,`$FRAME+8*16+31` 714*4882a593Smuzhiyun stvx v20,r10,$sp # ABI says so 715*4882a593Smuzhiyun addi r10,r10,32 716*4882a593Smuzhiyun stvx v21,r11,$sp 717*4882a593Smuzhiyun addi r11,r11,32 718*4882a593Smuzhiyun stvx v22,r10,$sp 719*4882a593Smuzhiyun addi r10,r10,32 720*4882a593Smuzhiyun stvx v23,r11,$sp 721*4882a593Smuzhiyun addi r11,r11,32 722*4882a593Smuzhiyun stvx v24,r10,$sp 723*4882a593Smuzhiyun addi r10,r10,32 724*4882a593Smuzhiyun stvx v25,r11,$sp 725*4882a593Smuzhiyun addi r11,r11,32 726*4882a593Smuzhiyun stvx v26,r10,$sp 727*4882a593Smuzhiyun addi r10,r10,32 728*4882a593Smuzhiyun stvx v27,r11,$sp 729*4882a593Smuzhiyun addi r11,r11,32 730*4882a593Smuzhiyun stvx v28,r10,$sp 731*4882a593Smuzhiyun addi r10,r10,32 732*4882a593Smuzhiyun stvx v29,r11,$sp 733*4882a593Smuzhiyun addi r11,r11,32 734*4882a593Smuzhiyun stvx v30,r10,$sp 735*4882a593Smuzhiyun stvx v31,r11,$sp 736*4882a593Smuzhiyun li r0,-1 737*4882a593Smuzhiyun stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 738*4882a593Smuzhiyun li $x10,0x10 739*4882a593Smuzhiyun $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 740*4882a593Smuzhiyun li $x20,0x20 741*4882a593Smuzhiyun $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 742*4882a593Smuzhiyun li $x30,0x30 743*4882a593Smuzhiyun $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 744*4882a593Smuzhiyun li $x40,0x40 745*4882a593Smuzhiyun $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 746*4882a593Smuzhiyun li $x50,0x50 747*4882a593Smuzhiyun $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 748*4882a593Smuzhiyun li $x60,0x60 749*4882a593Smuzhiyun $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 750*4882a593Smuzhiyun li $x70,0x70 751*4882a593Smuzhiyun mtspr 256,r0 752*4882a593Smuzhiyun 753*4882a593Smuzhiyun subi $rounds,$rounds,3 # -4 in total 754*4882a593Smuzhiyun subi $len,$len,128 # bias 755*4882a593Smuzhiyun 756*4882a593Smuzhiyun lvx $rndkey0,$x00,$key # load key schedule 757*4882a593Smuzhiyun lvx v30,$x10,$key 758*4882a593Smuzhiyun addi $key,$key,0x20 759*4882a593Smuzhiyun lvx v31,$x00,$key 760*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,v30,$keyperm 761*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 762*4882a593Smuzhiyun mtctr $rounds 763*4882a593Smuzhiyun 764*4882a593SmuzhiyunLoad_cbc_dec_key: 765*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 766*4882a593Smuzhiyun lvx v30,$x10,$key 767*4882a593Smuzhiyun addi $key,$key,0x20 768*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[1] 769*4882a593Smuzhiyun ?vperm v25,v31,v30,$keyperm 770*4882a593Smuzhiyun lvx v31,$x00,$key 771*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[2] 772*4882a593Smuzhiyun addi $key_,$key_,0x20 773*4882a593Smuzhiyun bdnz Load_cbc_dec_key 774*4882a593Smuzhiyun 775*4882a593Smuzhiyun lvx v26,$x10,$key 776*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 777*4882a593Smuzhiyun lvx v27,$x20,$key 778*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[3] 779*4882a593Smuzhiyun ?vperm v25,v31,v26,$keyperm 780*4882a593Smuzhiyun lvx v28,$x30,$key 781*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[4] 782*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 783*4882a593Smuzhiyun ?vperm v26,v26,v27,$keyperm 784*4882a593Smuzhiyun lvx v29,$x40,$key 785*4882a593Smuzhiyun ?vperm v27,v27,v28,$keyperm 786*4882a593Smuzhiyun lvx v30,$x50,$key 787*4882a593Smuzhiyun ?vperm v28,v28,v29,$keyperm 788*4882a593Smuzhiyun lvx v31,$x60,$key 789*4882a593Smuzhiyun ?vperm v29,v29,v30,$keyperm 790*4882a593Smuzhiyun lvx $out0,$x70,$key # borrow $out0 791*4882a593Smuzhiyun ?vperm v30,v30,v31,$keyperm 792*4882a593Smuzhiyun lvx v24,$x00,$key_ # pre-load round[1] 793*4882a593Smuzhiyun ?vperm v31,v31,$out0,$keyperm 794*4882a593Smuzhiyun lvx v25,$x10,$key_ # pre-load round[2] 795*4882a593Smuzhiyun 796*4882a593Smuzhiyun #lvx $inptail,0,$inp # "caller" already did this 797*4882a593Smuzhiyun #addi $inp,$inp,15 # 15 is not typo 798*4882a593Smuzhiyun subi $inp,$inp,15 # undo "caller" 799*4882a593Smuzhiyun 800*4882a593Smuzhiyun le?li $idx,8 801*4882a593Smuzhiyun lvx_u $in0,$x00,$inp # load first 8 "words" 802*4882a593Smuzhiyun le?lvsl $inpperm,0,$idx 803*4882a593Smuzhiyun le?vspltisb $tmp,0x0f 804*4882a593Smuzhiyun lvx_u $in1,$x10,$inp 805*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 806*4882a593Smuzhiyun lvx_u $in2,$x20,$inp 807*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$inpperm 808*4882a593Smuzhiyun lvx_u $in3,$x30,$inp 809*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$inpperm 810*4882a593Smuzhiyun lvx_u $in4,$x40,$inp 811*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$inpperm 812*4882a593Smuzhiyun vxor $out0,$in0,$rndkey0 813*4882a593Smuzhiyun lvx_u $in5,$x50,$inp 814*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$inpperm 815*4882a593Smuzhiyun vxor $out1,$in1,$rndkey0 816*4882a593Smuzhiyun lvx_u $in6,$x60,$inp 817*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$inpperm 818*4882a593Smuzhiyun vxor $out2,$in2,$rndkey0 819*4882a593Smuzhiyun lvx_u $in7,$x70,$inp 820*4882a593Smuzhiyun addi $inp,$inp,0x80 821*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$inpperm 822*4882a593Smuzhiyun vxor $out3,$in3,$rndkey0 823*4882a593Smuzhiyun le?vperm $in6,$in6,$in6,$inpperm 824*4882a593Smuzhiyun vxor $out4,$in4,$rndkey0 825*4882a593Smuzhiyun le?vperm $in7,$in7,$in7,$inpperm 826*4882a593Smuzhiyun vxor $out5,$in5,$rndkey0 827*4882a593Smuzhiyun vxor $out6,$in6,$rndkey0 828*4882a593Smuzhiyun vxor $out7,$in7,$rndkey0 829*4882a593Smuzhiyun 830*4882a593Smuzhiyun mtctr $rounds 831*4882a593Smuzhiyun b Loop_cbc_dec8x 832*4882a593Smuzhiyun.align 5 833*4882a593SmuzhiyunLoop_cbc_dec8x: 834*4882a593Smuzhiyun vncipher $out0,$out0,v24 835*4882a593Smuzhiyun vncipher $out1,$out1,v24 836*4882a593Smuzhiyun vncipher $out2,$out2,v24 837*4882a593Smuzhiyun vncipher $out3,$out3,v24 838*4882a593Smuzhiyun vncipher $out4,$out4,v24 839*4882a593Smuzhiyun vncipher $out5,$out5,v24 840*4882a593Smuzhiyun vncipher $out6,$out6,v24 841*4882a593Smuzhiyun vncipher $out7,$out7,v24 842*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 843*4882a593Smuzhiyun addi $key_,$key_,0x20 844*4882a593Smuzhiyun 845*4882a593Smuzhiyun vncipher $out0,$out0,v25 846*4882a593Smuzhiyun vncipher $out1,$out1,v25 847*4882a593Smuzhiyun vncipher $out2,$out2,v25 848*4882a593Smuzhiyun vncipher $out3,$out3,v25 849*4882a593Smuzhiyun vncipher $out4,$out4,v25 850*4882a593Smuzhiyun vncipher $out5,$out5,v25 851*4882a593Smuzhiyun vncipher $out6,$out6,v25 852*4882a593Smuzhiyun vncipher $out7,$out7,v25 853*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 854*4882a593Smuzhiyun bdnz Loop_cbc_dec8x 855*4882a593Smuzhiyun 856*4882a593Smuzhiyun subic $len,$len,128 # $len-=128 857*4882a593Smuzhiyun vncipher $out0,$out0,v24 858*4882a593Smuzhiyun vncipher $out1,$out1,v24 859*4882a593Smuzhiyun vncipher $out2,$out2,v24 860*4882a593Smuzhiyun vncipher $out3,$out3,v24 861*4882a593Smuzhiyun vncipher $out4,$out4,v24 862*4882a593Smuzhiyun vncipher $out5,$out5,v24 863*4882a593Smuzhiyun vncipher $out6,$out6,v24 864*4882a593Smuzhiyun vncipher $out7,$out7,v24 865*4882a593Smuzhiyun 866*4882a593Smuzhiyun subfe. r0,r0,r0 # borrow?-1:0 867*4882a593Smuzhiyun vncipher $out0,$out0,v25 868*4882a593Smuzhiyun vncipher $out1,$out1,v25 869*4882a593Smuzhiyun vncipher $out2,$out2,v25 870*4882a593Smuzhiyun vncipher $out3,$out3,v25 871*4882a593Smuzhiyun vncipher $out4,$out4,v25 872*4882a593Smuzhiyun vncipher $out5,$out5,v25 873*4882a593Smuzhiyun vncipher $out6,$out6,v25 874*4882a593Smuzhiyun vncipher $out7,$out7,v25 875*4882a593Smuzhiyun 876*4882a593Smuzhiyun and r0,r0,$len 877*4882a593Smuzhiyun vncipher $out0,$out0,v26 878*4882a593Smuzhiyun vncipher $out1,$out1,v26 879*4882a593Smuzhiyun vncipher $out2,$out2,v26 880*4882a593Smuzhiyun vncipher $out3,$out3,v26 881*4882a593Smuzhiyun vncipher $out4,$out4,v26 882*4882a593Smuzhiyun vncipher $out5,$out5,v26 883*4882a593Smuzhiyun vncipher $out6,$out6,v26 884*4882a593Smuzhiyun vncipher $out7,$out7,v26 885*4882a593Smuzhiyun 886*4882a593Smuzhiyun add $inp,$inp,r0 # $inp is adjusted in such 887*4882a593Smuzhiyun # way that at exit from the 888*4882a593Smuzhiyun # loop inX-in7 are loaded 889*4882a593Smuzhiyun # with last "words" 890*4882a593Smuzhiyun vncipher $out0,$out0,v27 891*4882a593Smuzhiyun vncipher $out1,$out1,v27 892*4882a593Smuzhiyun vncipher $out2,$out2,v27 893*4882a593Smuzhiyun vncipher $out3,$out3,v27 894*4882a593Smuzhiyun vncipher $out4,$out4,v27 895*4882a593Smuzhiyun vncipher $out5,$out5,v27 896*4882a593Smuzhiyun vncipher $out6,$out6,v27 897*4882a593Smuzhiyun vncipher $out7,$out7,v27 898*4882a593Smuzhiyun 899*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 900*4882a593Smuzhiyun vncipher $out0,$out0,v28 901*4882a593Smuzhiyun vncipher $out1,$out1,v28 902*4882a593Smuzhiyun vncipher $out2,$out2,v28 903*4882a593Smuzhiyun vncipher $out3,$out3,v28 904*4882a593Smuzhiyun vncipher $out4,$out4,v28 905*4882a593Smuzhiyun vncipher $out5,$out5,v28 906*4882a593Smuzhiyun vncipher $out6,$out6,v28 907*4882a593Smuzhiyun vncipher $out7,$out7,v28 908*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 909*4882a593Smuzhiyun 910*4882a593Smuzhiyun vncipher $out0,$out0,v29 911*4882a593Smuzhiyun vncipher $out1,$out1,v29 912*4882a593Smuzhiyun vncipher $out2,$out2,v29 913*4882a593Smuzhiyun vncipher $out3,$out3,v29 914*4882a593Smuzhiyun vncipher $out4,$out4,v29 915*4882a593Smuzhiyun vncipher $out5,$out5,v29 916*4882a593Smuzhiyun vncipher $out6,$out6,v29 917*4882a593Smuzhiyun vncipher $out7,$out7,v29 918*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 919*4882a593Smuzhiyun 920*4882a593Smuzhiyun vncipher $out0,$out0,v30 921*4882a593Smuzhiyun vxor $ivec,$ivec,v31 # xor with last round key 922*4882a593Smuzhiyun vncipher $out1,$out1,v30 923*4882a593Smuzhiyun vxor $in0,$in0,v31 924*4882a593Smuzhiyun vncipher $out2,$out2,v30 925*4882a593Smuzhiyun vxor $in1,$in1,v31 926*4882a593Smuzhiyun vncipher $out3,$out3,v30 927*4882a593Smuzhiyun vxor $in2,$in2,v31 928*4882a593Smuzhiyun vncipher $out4,$out4,v30 929*4882a593Smuzhiyun vxor $in3,$in3,v31 930*4882a593Smuzhiyun vncipher $out5,$out5,v30 931*4882a593Smuzhiyun vxor $in4,$in4,v31 932*4882a593Smuzhiyun vncipher $out6,$out6,v30 933*4882a593Smuzhiyun vxor $in5,$in5,v31 934*4882a593Smuzhiyun vncipher $out7,$out7,v30 935*4882a593Smuzhiyun vxor $in6,$in6,v31 936*4882a593Smuzhiyun 937*4882a593Smuzhiyun vncipherlast $out0,$out0,$ivec 938*4882a593Smuzhiyun vncipherlast $out1,$out1,$in0 939*4882a593Smuzhiyun lvx_u $in0,$x00,$inp # load next input block 940*4882a593Smuzhiyun vncipherlast $out2,$out2,$in1 941*4882a593Smuzhiyun lvx_u $in1,$x10,$inp 942*4882a593Smuzhiyun vncipherlast $out3,$out3,$in2 943*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$inpperm 944*4882a593Smuzhiyun lvx_u $in2,$x20,$inp 945*4882a593Smuzhiyun vncipherlast $out4,$out4,$in3 946*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$inpperm 947*4882a593Smuzhiyun lvx_u $in3,$x30,$inp 948*4882a593Smuzhiyun vncipherlast $out5,$out5,$in4 949*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$inpperm 950*4882a593Smuzhiyun lvx_u $in4,$x40,$inp 951*4882a593Smuzhiyun vncipherlast $out6,$out6,$in5 952*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$inpperm 953*4882a593Smuzhiyun lvx_u $in5,$x50,$inp 954*4882a593Smuzhiyun vncipherlast $out7,$out7,$in6 955*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$inpperm 956*4882a593Smuzhiyun lvx_u $in6,$x60,$inp 957*4882a593Smuzhiyun vmr $ivec,$in7 958*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$inpperm 959*4882a593Smuzhiyun lvx_u $in7,$x70,$inp 960*4882a593Smuzhiyun addi $inp,$inp,0x80 961*4882a593Smuzhiyun 962*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 963*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 964*4882a593Smuzhiyun stvx_u $out0,$x00,$out 965*4882a593Smuzhiyun le?vperm $in6,$in6,$in6,$inpperm 966*4882a593Smuzhiyun vxor $out0,$in0,$rndkey0 967*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 968*4882a593Smuzhiyun stvx_u $out1,$x10,$out 969*4882a593Smuzhiyun le?vperm $in7,$in7,$in7,$inpperm 970*4882a593Smuzhiyun vxor $out1,$in1,$rndkey0 971*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 972*4882a593Smuzhiyun stvx_u $out2,$x20,$out 973*4882a593Smuzhiyun vxor $out2,$in2,$rndkey0 974*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 975*4882a593Smuzhiyun stvx_u $out3,$x30,$out 976*4882a593Smuzhiyun vxor $out3,$in3,$rndkey0 977*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 978*4882a593Smuzhiyun stvx_u $out4,$x40,$out 979*4882a593Smuzhiyun vxor $out4,$in4,$rndkey0 980*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 981*4882a593Smuzhiyun stvx_u $out5,$x50,$out 982*4882a593Smuzhiyun vxor $out5,$in5,$rndkey0 983*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 984*4882a593Smuzhiyun stvx_u $out6,$x60,$out 985*4882a593Smuzhiyun vxor $out6,$in6,$rndkey0 986*4882a593Smuzhiyun stvx_u $out7,$x70,$out 987*4882a593Smuzhiyun addi $out,$out,0x80 988*4882a593Smuzhiyun vxor $out7,$in7,$rndkey0 989*4882a593Smuzhiyun 990*4882a593Smuzhiyun mtctr $rounds 991*4882a593Smuzhiyun beq Loop_cbc_dec8x # did $len-=128 borrow? 992*4882a593Smuzhiyun 993*4882a593Smuzhiyun addic. $len,$len,128 994*4882a593Smuzhiyun beq Lcbc_dec8x_done 995*4882a593Smuzhiyun nop 996*4882a593Smuzhiyun nop 997*4882a593Smuzhiyun 998*4882a593SmuzhiyunLoop_cbc_dec8x_tail: # up to 7 "words" tail... 999*4882a593Smuzhiyun vncipher $out1,$out1,v24 1000*4882a593Smuzhiyun vncipher $out2,$out2,v24 1001*4882a593Smuzhiyun vncipher $out3,$out3,v24 1002*4882a593Smuzhiyun vncipher $out4,$out4,v24 1003*4882a593Smuzhiyun vncipher $out5,$out5,v24 1004*4882a593Smuzhiyun vncipher $out6,$out6,v24 1005*4882a593Smuzhiyun vncipher $out7,$out7,v24 1006*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 1007*4882a593Smuzhiyun addi $key_,$key_,0x20 1008*4882a593Smuzhiyun 1009*4882a593Smuzhiyun vncipher $out1,$out1,v25 1010*4882a593Smuzhiyun vncipher $out2,$out2,v25 1011*4882a593Smuzhiyun vncipher $out3,$out3,v25 1012*4882a593Smuzhiyun vncipher $out4,$out4,v25 1013*4882a593Smuzhiyun vncipher $out5,$out5,v25 1014*4882a593Smuzhiyun vncipher $out6,$out6,v25 1015*4882a593Smuzhiyun vncipher $out7,$out7,v25 1016*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 1017*4882a593Smuzhiyun bdnz Loop_cbc_dec8x_tail 1018*4882a593Smuzhiyun 1019*4882a593Smuzhiyun vncipher $out1,$out1,v24 1020*4882a593Smuzhiyun vncipher $out2,$out2,v24 1021*4882a593Smuzhiyun vncipher $out3,$out3,v24 1022*4882a593Smuzhiyun vncipher $out4,$out4,v24 1023*4882a593Smuzhiyun vncipher $out5,$out5,v24 1024*4882a593Smuzhiyun vncipher $out6,$out6,v24 1025*4882a593Smuzhiyun vncipher $out7,$out7,v24 1026*4882a593Smuzhiyun 1027*4882a593Smuzhiyun vncipher $out1,$out1,v25 1028*4882a593Smuzhiyun vncipher $out2,$out2,v25 1029*4882a593Smuzhiyun vncipher $out3,$out3,v25 1030*4882a593Smuzhiyun vncipher $out4,$out4,v25 1031*4882a593Smuzhiyun vncipher $out5,$out5,v25 1032*4882a593Smuzhiyun vncipher $out6,$out6,v25 1033*4882a593Smuzhiyun vncipher $out7,$out7,v25 1034*4882a593Smuzhiyun 1035*4882a593Smuzhiyun vncipher $out1,$out1,v26 1036*4882a593Smuzhiyun vncipher $out2,$out2,v26 1037*4882a593Smuzhiyun vncipher $out3,$out3,v26 1038*4882a593Smuzhiyun vncipher $out4,$out4,v26 1039*4882a593Smuzhiyun vncipher $out5,$out5,v26 1040*4882a593Smuzhiyun vncipher $out6,$out6,v26 1041*4882a593Smuzhiyun vncipher $out7,$out7,v26 1042*4882a593Smuzhiyun 1043*4882a593Smuzhiyun vncipher $out1,$out1,v27 1044*4882a593Smuzhiyun vncipher $out2,$out2,v27 1045*4882a593Smuzhiyun vncipher $out3,$out3,v27 1046*4882a593Smuzhiyun vncipher $out4,$out4,v27 1047*4882a593Smuzhiyun vncipher $out5,$out5,v27 1048*4882a593Smuzhiyun vncipher $out6,$out6,v27 1049*4882a593Smuzhiyun vncipher $out7,$out7,v27 1050*4882a593Smuzhiyun 1051*4882a593Smuzhiyun vncipher $out1,$out1,v28 1052*4882a593Smuzhiyun vncipher $out2,$out2,v28 1053*4882a593Smuzhiyun vncipher $out3,$out3,v28 1054*4882a593Smuzhiyun vncipher $out4,$out4,v28 1055*4882a593Smuzhiyun vncipher $out5,$out5,v28 1056*4882a593Smuzhiyun vncipher $out6,$out6,v28 1057*4882a593Smuzhiyun vncipher $out7,$out7,v28 1058*4882a593Smuzhiyun 1059*4882a593Smuzhiyun vncipher $out1,$out1,v29 1060*4882a593Smuzhiyun vncipher $out2,$out2,v29 1061*4882a593Smuzhiyun vncipher $out3,$out3,v29 1062*4882a593Smuzhiyun vncipher $out4,$out4,v29 1063*4882a593Smuzhiyun vncipher $out5,$out5,v29 1064*4882a593Smuzhiyun vncipher $out6,$out6,v29 1065*4882a593Smuzhiyun vncipher $out7,$out7,v29 1066*4882a593Smuzhiyun 1067*4882a593Smuzhiyun vncipher $out1,$out1,v30 1068*4882a593Smuzhiyun vxor $ivec,$ivec,v31 # last round key 1069*4882a593Smuzhiyun vncipher $out2,$out2,v30 1070*4882a593Smuzhiyun vxor $in1,$in1,v31 1071*4882a593Smuzhiyun vncipher $out3,$out3,v30 1072*4882a593Smuzhiyun vxor $in2,$in2,v31 1073*4882a593Smuzhiyun vncipher $out4,$out4,v30 1074*4882a593Smuzhiyun vxor $in3,$in3,v31 1075*4882a593Smuzhiyun vncipher $out5,$out5,v30 1076*4882a593Smuzhiyun vxor $in4,$in4,v31 1077*4882a593Smuzhiyun vncipher $out6,$out6,v30 1078*4882a593Smuzhiyun vxor $in5,$in5,v31 1079*4882a593Smuzhiyun vncipher $out7,$out7,v30 1080*4882a593Smuzhiyun vxor $in6,$in6,v31 1081*4882a593Smuzhiyun 1082*4882a593Smuzhiyun cmplwi $len,32 # switch($len) 1083*4882a593Smuzhiyun blt Lcbc_dec8x_one 1084*4882a593Smuzhiyun nop 1085*4882a593Smuzhiyun beq Lcbc_dec8x_two 1086*4882a593Smuzhiyun cmplwi $len,64 1087*4882a593Smuzhiyun blt Lcbc_dec8x_three 1088*4882a593Smuzhiyun nop 1089*4882a593Smuzhiyun beq Lcbc_dec8x_four 1090*4882a593Smuzhiyun cmplwi $len,96 1091*4882a593Smuzhiyun blt Lcbc_dec8x_five 1092*4882a593Smuzhiyun nop 1093*4882a593Smuzhiyun beq Lcbc_dec8x_six 1094*4882a593Smuzhiyun 1095*4882a593SmuzhiyunLcbc_dec8x_seven: 1096*4882a593Smuzhiyun vncipherlast $out1,$out1,$ivec 1097*4882a593Smuzhiyun vncipherlast $out2,$out2,$in1 1098*4882a593Smuzhiyun vncipherlast $out3,$out3,$in2 1099*4882a593Smuzhiyun vncipherlast $out4,$out4,$in3 1100*4882a593Smuzhiyun vncipherlast $out5,$out5,$in4 1101*4882a593Smuzhiyun vncipherlast $out6,$out6,$in5 1102*4882a593Smuzhiyun vncipherlast $out7,$out7,$in6 1103*4882a593Smuzhiyun vmr $ivec,$in7 1104*4882a593Smuzhiyun 1105*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1106*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1107*4882a593Smuzhiyun stvx_u $out1,$x00,$out 1108*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1109*4882a593Smuzhiyun stvx_u $out2,$x10,$out 1110*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1111*4882a593Smuzhiyun stvx_u $out3,$x20,$out 1112*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1113*4882a593Smuzhiyun stvx_u $out4,$x30,$out 1114*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1115*4882a593Smuzhiyun stvx_u $out5,$x40,$out 1116*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1117*4882a593Smuzhiyun stvx_u $out6,$x50,$out 1118*4882a593Smuzhiyun stvx_u $out7,$x60,$out 1119*4882a593Smuzhiyun addi $out,$out,0x70 1120*4882a593Smuzhiyun b Lcbc_dec8x_done 1121*4882a593Smuzhiyun 1122*4882a593Smuzhiyun.align 5 1123*4882a593SmuzhiyunLcbc_dec8x_six: 1124*4882a593Smuzhiyun vncipherlast $out2,$out2,$ivec 1125*4882a593Smuzhiyun vncipherlast $out3,$out3,$in2 1126*4882a593Smuzhiyun vncipherlast $out4,$out4,$in3 1127*4882a593Smuzhiyun vncipherlast $out5,$out5,$in4 1128*4882a593Smuzhiyun vncipherlast $out6,$out6,$in5 1129*4882a593Smuzhiyun vncipherlast $out7,$out7,$in6 1130*4882a593Smuzhiyun vmr $ivec,$in7 1131*4882a593Smuzhiyun 1132*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1133*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1134*4882a593Smuzhiyun stvx_u $out2,$x00,$out 1135*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1136*4882a593Smuzhiyun stvx_u $out3,$x10,$out 1137*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1138*4882a593Smuzhiyun stvx_u $out4,$x20,$out 1139*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1140*4882a593Smuzhiyun stvx_u $out5,$x30,$out 1141*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1142*4882a593Smuzhiyun stvx_u $out6,$x40,$out 1143*4882a593Smuzhiyun stvx_u $out7,$x50,$out 1144*4882a593Smuzhiyun addi $out,$out,0x60 1145*4882a593Smuzhiyun b Lcbc_dec8x_done 1146*4882a593Smuzhiyun 1147*4882a593Smuzhiyun.align 5 1148*4882a593SmuzhiyunLcbc_dec8x_five: 1149*4882a593Smuzhiyun vncipherlast $out3,$out3,$ivec 1150*4882a593Smuzhiyun vncipherlast $out4,$out4,$in3 1151*4882a593Smuzhiyun vncipherlast $out5,$out5,$in4 1152*4882a593Smuzhiyun vncipherlast $out6,$out6,$in5 1153*4882a593Smuzhiyun vncipherlast $out7,$out7,$in6 1154*4882a593Smuzhiyun vmr $ivec,$in7 1155*4882a593Smuzhiyun 1156*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1157*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1158*4882a593Smuzhiyun stvx_u $out3,$x00,$out 1159*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1160*4882a593Smuzhiyun stvx_u $out4,$x10,$out 1161*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1162*4882a593Smuzhiyun stvx_u $out5,$x20,$out 1163*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1164*4882a593Smuzhiyun stvx_u $out6,$x30,$out 1165*4882a593Smuzhiyun stvx_u $out7,$x40,$out 1166*4882a593Smuzhiyun addi $out,$out,0x50 1167*4882a593Smuzhiyun b Lcbc_dec8x_done 1168*4882a593Smuzhiyun 1169*4882a593Smuzhiyun.align 5 1170*4882a593SmuzhiyunLcbc_dec8x_four: 1171*4882a593Smuzhiyun vncipherlast $out4,$out4,$ivec 1172*4882a593Smuzhiyun vncipherlast $out5,$out5,$in4 1173*4882a593Smuzhiyun vncipherlast $out6,$out6,$in5 1174*4882a593Smuzhiyun vncipherlast $out7,$out7,$in6 1175*4882a593Smuzhiyun vmr $ivec,$in7 1176*4882a593Smuzhiyun 1177*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1178*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1179*4882a593Smuzhiyun stvx_u $out4,$x00,$out 1180*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1181*4882a593Smuzhiyun stvx_u $out5,$x10,$out 1182*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1183*4882a593Smuzhiyun stvx_u $out6,$x20,$out 1184*4882a593Smuzhiyun stvx_u $out7,$x30,$out 1185*4882a593Smuzhiyun addi $out,$out,0x40 1186*4882a593Smuzhiyun b Lcbc_dec8x_done 1187*4882a593Smuzhiyun 1188*4882a593Smuzhiyun.align 5 1189*4882a593SmuzhiyunLcbc_dec8x_three: 1190*4882a593Smuzhiyun vncipherlast $out5,$out5,$ivec 1191*4882a593Smuzhiyun vncipherlast $out6,$out6,$in5 1192*4882a593Smuzhiyun vncipherlast $out7,$out7,$in6 1193*4882a593Smuzhiyun vmr $ivec,$in7 1194*4882a593Smuzhiyun 1195*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1196*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1197*4882a593Smuzhiyun stvx_u $out5,$x00,$out 1198*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1199*4882a593Smuzhiyun stvx_u $out6,$x10,$out 1200*4882a593Smuzhiyun stvx_u $out7,$x20,$out 1201*4882a593Smuzhiyun addi $out,$out,0x30 1202*4882a593Smuzhiyun b Lcbc_dec8x_done 1203*4882a593Smuzhiyun 1204*4882a593Smuzhiyun.align 5 1205*4882a593SmuzhiyunLcbc_dec8x_two: 1206*4882a593Smuzhiyun vncipherlast $out6,$out6,$ivec 1207*4882a593Smuzhiyun vncipherlast $out7,$out7,$in6 1208*4882a593Smuzhiyun vmr $ivec,$in7 1209*4882a593Smuzhiyun 1210*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1211*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1212*4882a593Smuzhiyun stvx_u $out6,$x00,$out 1213*4882a593Smuzhiyun stvx_u $out7,$x10,$out 1214*4882a593Smuzhiyun addi $out,$out,0x20 1215*4882a593Smuzhiyun b Lcbc_dec8x_done 1216*4882a593Smuzhiyun 1217*4882a593Smuzhiyun.align 5 1218*4882a593SmuzhiyunLcbc_dec8x_one: 1219*4882a593Smuzhiyun vncipherlast $out7,$out7,$ivec 1220*4882a593Smuzhiyun vmr $ivec,$in7 1221*4882a593Smuzhiyun 1222*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1223*4882a593Smuzhiyun stvx_u $out7,0,$out 1224*4882a593Smuzhiyun addi $out,$out,0x10 1225*4882a593Smuzhiyun 1226*4882a593SmuzhiyunLcbc_dec8x_done: 1227*4882a593Smuzhiyun le?vperm $ivec,$ivec,$ivec,$inpperm 1228*4882a593Smuzhiyun stvx_u $ivec,0,$ivp # write [unaligned] iv 1229*4882a593Smuzhiyun 1230*4882a593Smuzhiyun li r10,`$FRAME+15` 1231*4882a593Smuzhiyun li r11,`$FRAME+31` 1232*4882a593Smuzhiyun stvx $inpperm,r10,$sp # wipe copies of round keys 1233*4882a593Smuzhiyun addi r10,r10,32 1234*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1235*4882a593Smuzhiyun addi r11,r11,32 1236*4882a593Smuzhiyun stvx $inpperm,r10,$sp 1237*4882a593Smuzhiyun addi r10,r10,32 1238*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1239*4882a593Smuzhiyun addi r11,r11,32 1240*4882a593Smuzhiyun stvx $inpperm,r10,$sp 1241*4882a593Smuzhiyun addi r10,r10,32 1242*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1243*4882a593Smuzhiyun addi r11,r11,32 1244*4882a593Smuzhiyun stvx $inpperm,r10,$sp 1245*4882a593Smuzhiyun addi r10,r10,32 1246*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1247*4882a593Smuzhiyun addi r11,r11,32 1248*4882a593Smuzhiyun 1249*4882a593Smuzhiyun mtspr 256,$vrsave 1250*4882a593Smuzhiyun lvx v20,r10,$sp # ABI says so 1251*4882a593Smuzhiyun addi r10,r10,32 1252*4882a593Smuzhiyun lvx v21,r11,$sp 1253*4882a593Smuzhiyun addi r11,r11,32 1254*4882a593Smuzhiyun lvx v22,r10,$sp 1255*4882a593Smuzhiyun addi r10,r10,32 1256*4882a593Smuzhiyun lvx v23,r11,$sp 1257*4882a593Smuzhiyun addi r11,r11,32 1258*4882a593Smuzhiyun lvx v24,r10,$sp 1259*4882a593Smuzhiyun addi r10,r10,32 1260*4882a593Smuzhiyun lvx v25,r11,$sp 1261*4882a593Smuzhiyun addi r11,r11,32 1262*4882a593Smuzhiyun lvx v26,r10,$sp 1263*4882a593Smuzhiyun addi r10,r10,32 1264*4882a593Smuzhiyun lvx v27,r11,$sp 1265*4882a593Smuzhiyun addi r11,r11,32 1266*4882a593Smuzhiyun lvx v28,r10,$sp 1267*4882a593Smuzhiyun addi r10,r10,32 1268*4882a593Smuzhiyun lvx v29,r11,$sp 1269*4882a593Smuzhiyun addi r11,r11,32 1270*4882a593Smuzhiyun lvx v30,r10,$sp 1271*4882a593Smuzhiyun lvx v31,r11,$sp 1272*4882a593Smuzhiyun $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1273*4882a593Smuzhiyun $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1274*4882a593Smuzhiyun $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1275*4882a593Smuzhiyun $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1276*4882a593Smuzhiyun $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1277*4882a593Smuzhiyun $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1278*4882a593Smuzhiyun addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1279*4882a593Smuzhiyun blr 1280*4882a593Smuzhiyun .long 0 1281*4882a593Smuzhiyun .byte 0,12,0x14,0,0x80,6,6,0 1282*4882a593Smuzhiyun .long 0 1283*4882a593Smuzhiyun.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1284*4882a593Smuzhiyun___ 1285*4882a593Smuzhiyun}} }}} 1286*4882a593Smuzhiyun 1287*4882a593Smuzhiyun######################################################################### 1288*4882a593Smuzhiyun{{{ # CTR procedure[s] # 1289*4882a593Smuzhiyun 1290*4882a593Smuzhiyun####################### WARNING: Here be dragons! ####################### 1291*4882a593Smuzhiyun# 1292*4882a593Smuzhiyun# This code is written as 'ctr32', based on a 32-bit counter used 1293*4882a593Smuzhiyun# upstream. The kernel does *not* use a 32-bit counter. The kernel uses 1294*4882a593Smuzhiyun# a 128-bit counter. 1295*4882a593Smuzhiyun# 1296*4882a593Smuzhiyun# This leads to subtle changes from the upstream code: the counter 1297*4882a593Smuzhiyun# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in 1298*4882a593Smuzhiyun# both the bulk (8 blocks at a time) path, and in the individual block 1299*4882a593Smuzhiyun# path. Be aware of this when doing updates. 1300*4882a593Smuzhiyun# 1301*4882a593Smuzhiyun# See: 1302*4882a593Smuzhiyun# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") 1303*4882a593Smuzhiyun# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") 1304*4882a593Smuzhiyun# https://github.com/openssl/openssl/pull/8942 1305*4882a593Smuzhiyun# 1306*4882a593Smuzhiyun######################################################################### 1307*4882a593Smuzhiyunmy ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1308*4882a593Smuzhiyunmy ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1309*4882a593Smuzhiyunmy ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1310*4882a593Smuzhiyun map("v$_",(4..11)); 1311*4882a593Smuzhiyunmy $dat=$tmp; 1312*4882a593Smuzhiyun 1313*4882a593Smuzhiyun$code.=<<___; 1314*4882a593Smuzhiyun.globl .${prefix}_ctr32_encrypt_blocks 1315*4882a593Smuzhiyun ${UCMP}i $len,1 1316*4882a593Smuzhiyun bltlr- 1317*4882a593Smuzhiyun 1318*4882a593Smuzhiyun lis r0,0xfff0 1319*4882a593Smuzhiyun mfspr $vrsave,256 1320*4882a593Smuzhiyun mtspr 256,r0 1321*4882a593Smuzhiyun 1322*4882a593Smuzhiyun li $idx,15 1323*4882a593Smuzhiyun vxor $rndkey0,$rndkey0,$rndkey0 1324*4882a593Smuzhiyun le?vspltisb $tmp,0x0f 1325*4882a593Smuzhiyun 1326*4882a593Smuzhiyun lvx $ivec,0,$ivp # load [unaligned] iv 1327*4882a593Smuzhiyun lvsl $inpperm,0,$ivp 1328*4882a593Smuzhiyun lvx $inptail,$idx,$ivp 1329*4882a593Smuzhiyun vspltisb $one,1 1330*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 1331*4882a593Smuzhiyun vperm $ivec,$ivec,$inptail,$inpperm 1332*4882a593Smuzhiyun vsldoi $one,$rndkey0,$one,1 1333*4882a593Smuzhiyun 1334*4882a593Smuzhiyun neg r11,$inp 1335*4882a593Smuzhiyun ?lvsl $keyperm,0,$key # prepare for unaligned key 1336*4882a593Smuzhiyun lwz $rounds,240($key) 1337*4882a593Smuzhiyun 1338*4882a593Smuzhiyun lvsr $inpperm,0,r11 # prepare for unaligned load 1339*4882a593Smuzhiyun lvx $inptail,0,$inp 1340*4882a593Smuzhiyun addi $inp,$inp,15 # 15 is not typo 1341*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 1342*4882a593Smuzhiyun 1343*4882a593Smuzhiyun srwi $rounds,$rounds,1 1344*4882a593Smuzhiyun li $idx,16 1345*4882a593Smuzhiyun subi $rounds,$rounds,1 1346*4882a593Smuzhiyun 1347*4882a593Smuzhiyun ${UCMP}i $len,8 1348*4882a593Smuzhiyun bge _aesp8_ctr32_encrypt8x 1349*4882a593Smuzhiyun 1350*4882a593Smuzhiyun ?lvsr $outperm,0,$out # prepare for unaligned store 1351*4882a593Smuzhiyun vspltisb $outmask,-1 1352*4882a593Smuzhiyun lvx $outhead,0,$out 1353*4882a593Smuzhiyun ?vperm $outmask,$rndkey0,$outmask,$outperm 1354*4882a593Smuzhiyun le?vxor $outperm,$outperm,$tmp 1355*4882a593Smuzhiyun 1356*4882a593Smuzhiyun lvx $rndkey0,0,$key 1357*4882a593Smuzhiyun mtctr $rounds 1358*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 1359*4882a593Smuzhiyun addi $idx,$idx,16 1360*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1361*4882a593Smuzhiyun vxor $inout,$ivec,$rndkey0 1362*4882a593Smuzhiyun lvx $rndkey0,$idx,$key 1363*4882a593Smuzhiyun addi $idx,$idx,16 1364*4882a593Smuzhiyun b Loop_ctr32_enc 1365*4882a593Smuzhiyun 1366*4882a593Smuzhiyun.align 5 1367*4882a593SmuzhiyunLoop_ctr32_enc: 1368*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1369*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey1 1370*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 1371*4882a593Smuzhiyun addi $idx,$idx,16 1372*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1373*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey0 1374*4882a593Smuzhiyun lvx $rndkey0,$idx,$key 1375*4882a593Smuzhiyun addi $idx,$idx,16 1376*4882a593Smuzhiyun bdnz Loop_ctr32_enc 1377*4882a593Smuzhiyun 1378*4882a593Smuzhiyun vadduqm $ivec,$ivec,$one # Kernel change for 128-bit 1379*4882a593Smuzhiyun vmr $dat,$inptail 1380*4882a593Smuzhiyun lvx $inptail,0,$inp 1381*4882a593Smuzhiyun addi $inp,$inp,16 1382*4882a593Smuzhiyun subic. $len,$len,1 # blocks-- 1383*4882a593Smuzhiyun 1384*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1385*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey1 1386*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 1387*4882a593Smuzhiyun vperm $dat,$dat,$inptail,$inpperm 1388*4882a593Smuzhiyun li $idx,16 1389*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1390*4882a593Smuzhiyun lvx $rndkey0,0,$key 1391*4882a593Smuzhiyun vxor $dat,$dat,$rndkey1 # last round key 1392*4882a593Smuzhiyun vcipherlast $inout,$inout,$dat 1393*4882a593Smuzhiyun 1394*4882a593Smuzhiyun lvx $rndkey1,$idx,$key 1395*4882a593Smuzhiyun addi $idx,$idx,16 1396*4882a593Smuzhiyun vperm $inout,$inout,$inout,$outperm 1397*4882a593Smuzhiyun vsel $dat,$outhead,$inout,$outmask 1398*4882a593Smuzhiyun mtctr $rounds 1399*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1400*4882a593Smuzhiyun vmr $outhead,$inout 1401*4882a593Smuzhiyun vxor $inout,$ivec,$rndkey0 1402*4882a593Smuzhiyun lvx $rndkey0,$idx,$key 1403*4882a593Smuzhiyun addi $idx,$idx,16 1404*4882a593Smuzhiyun stvx $dat,0,$out 1405*4882a593Smuzhiyun addi $out,$out,16 1406*4882a593Smuzhiyun bne Loop_ctr32_enc 1407*4882a593Smuzhiyun 1408*4882a593Smuzhiyun addi $out,$out,-1 1409*4882a593Smuzhiyun lvx $inout,0,$out # redundant in aligned case 1410*4882a593Smuzhiyun vsel $inout,$outhead,$inout,$outmask 1411*4882a593Smuzhiyun stvx $inout,0,$out 1412*4882a593Smuzhiyun 1413*4882a593Smuzhiyun mtspr 256,$vrsave 1414*4882a593Smuzhiyun blr 1415*4882a593Smuzhiyun .long 0 1416*4882a593Smuzhiyun .byte 0,12,0x14,0,0,0,6,0 1417*4882a593Smuzhiyun .long 0 1418*4882a593Smuzhiyun___ 1419*4882a593Smuzhiyun######################################################################### 1420*4882a593Smuzhiyun{{ # Optimized CTR procedure # 1421*4882a593Smuzhiyunmy $key_="r11"; 1422*4882a593Smuzhiyunmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1423*4882a593Smuzhiyunmy ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1424*4882a593Smuzhiyunmy ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1425*4882a593Smuzhiyunmy $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1426*4882a593Smuzhiyun # v26-v31 last 6 round keys 1427*4882a593Smuzhiyunmy ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1428*4882a593Smuzhiyunmy ($two,$three,$four)=($outhead,$outperm,$outmask); 1429*4882a593Smuzhiyun 1430*4882a593Smuzhiyun$code.=<<___; 1431*4882a593Smuzhiyun.align 5 1432*4882a593Smuzhiyun_aesp8_ctr32_encrypt8x: 1433*4882a593Smuzhiyun $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1434*4882a593Smuzhiyun li r10,`$FRAME+8*16+15` 1435*4882a593Smuzhiyun li r11,`$FRAME+8*16+31` 1436*4882a593Smuzhiyun stvx v20,r10,$sp # ABI says so 1437*4882a593Smuzhiyun addi r10,r10,32 1438*4882a593Smuzhiyun stvx v21,r11,$sp 1439*4882a593Smuzhiyun addi r11,r11,32 1440*4882a593Smuzhiyun stvx v22,r10,$sp 1441*4882a593Smuzhiyun addi r10,r10,32 1442*4882a593Smuzhiyun stvx v23,r11,$sp 1443*4882a593Smuzhiyun addi r11,r11,32 1444*4882a593Smuzhiyun stvx v24,r10,$sp 1445*4882a593Smuzhiyun addi r10,r10,32 1446*4882a593Smuzhiyun stvx v25,r11,$sp 1447*4882a593Smuzhiyun addi r11,r11,32 1448*4882a593Smuzhiyun stvx v26,r10,$sp 1449*4882a593Smuzhiyun addi r10,r10,32 1450*4882a593Smuzhiyun stvx v27,r11,$sp 1451*4882a593Smuzhiyun addi r11,r11,32 1452*4882a593Smuzhiyun stvx v28,r10,$sp 1453*4882a593Smuzhiyun addi r10,r10,32 1454*4882a593Smuzhiyun stvx v29,r11,$sp 1455*4882a593Smuzhiyun addi r11,r11,32 1456*4882a593Smuzhiyun stvx v30,r10,$sp 1457*4882a593Smuzhiyun stvx v31,r11,$sp 1458*4882a593Smuzhiyun li r0,-1 1459*4882a593Smuzhiyun stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1460*4882a593Smuzhiyun li $x10,0x10 1461*4882a593Smuzhiyun $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1462*4882a593Smuzhiyun li $x20,0x20 1463*4882a593Smuzhiyun $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1464*4882a593Smuzhiyun li $x30,0x30 1465*4882a593Smuzhiyun $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1466*4882a593Smuzhiyun li $x40,0x40 1467*4882a593Smuzhiyun $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1468*4882a593Smuzhiyun li $x50,0x50 1469*4882a593Smuzhiyun $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1470*4882a593Smuzhiyun li $x60,0x60 1471*4882a593Smuzhiyun $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1472*4882a593Smuzhiyun li $x70,0x70 1473*4882a593Smuzhiyun mtspr 256,r0 1474*4882a593Smuzhiyun 1475*4882a593Smuzhiyun subi $rounds,$rounds,3 # -4 in total 1476*4882a593Smuzhiyun 1477*4882a593Smuzhiyun lvx $rndkey0,$x00,$key # load key schedule 1478*4882a593Smuzhiyun lvx v30,$x10,$key 1479*4882a593Smuzhiyun addi $key,$key,0x20 1480*4882a593Smuzhiyun lvx v31,$x00,$key 1481*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,v30,$keyperm 1482*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 1483*4882a593Smuzhiyun mtctr $rounds 1484*4882a593Smuzhiyun 1485*4882a593SmuzhiyunLoad_ctr32_enc_key: 1486*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 1487*4882a593Smuzhiyun lvx v30,$x10,$key 1488*4882a593Smuzhiyun addi $key,$key,0x20 1489*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[1] 1490*4882a593Smuzhiyun ?vperm v25,v31,v30,$keyperm 1491*4882a593Smuzhiyun lvx v31,$x00,$key 1492*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[2] 1493*4882a593Smuzhiyun addi $key_,$key_,0x20 1494*4882a593Smuzhiyun bdnz Load_ctr32_enc_key 1495*4882a593Smuzhiyun 1496*4882a593Smuzhiyun lvx v26,$x10,$key 1497*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 1498*4882a593Smuzhiyun lvx v27,$x20,$key 1499*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[3] 1500*4882a593Smuzhiyun ?vperm v25,v31,v26,$keyperm 1501*4882a593Smuzhiyun lvx v28,$x30,$key 1502*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[4] 1503*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 1504*4882a593Smuzhiyun ?vperm v26,v26,v27,$keyperm 1505*4882a593Smuzhiyun lvx v29,$x40,$key 1506*4882a593Smuzhiyun ?vperm v27,v27,v28,$keyperm 1507*4882a593Smuzhiyun lvx v30,$x50,$key 1508*4882a593Smuzhiyun ?vperm v28,v28,v29,$keyperm 1509*4882a593Smuzhiyun lvx v31,$x60,$key 1510*4882a593Smuzhiyun ?vperm v29,v29,v30,$keyperm 1511*4882a593Smuzhiyun lvx $out0,$x70,$key # borrow $out0 1512*4882a593Smuzhiyun ?vperm v30,v30,v31,$keyperm 1513*4882a593Smuzhiyun lvx v24,$x00,$key_ # pre-load round[1] 1514*4882a593Smuzhiyun ?vperm v31,v31,$out0,$keyperm 1515*4882a593Smuzhiyun lvx v25,$x10,$key_ # pre-load round[2] 1516*4882a593Smuzhiyun 1517*4882a593Smuzhiyun vadduqm $two,$one,$one 1518*4882a593Smuzhiyun subi $inp,$inp,15 # undo "caller" 1519*4882a593Smuzhiyun $SHL $len,$len,4 1520*4882a593Smuzhiyun 1521*4882a593Smuzhiyun vadduqm $out1,$ivec,$one # counter values ... 1522*4882a593Smuzhiyun vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) 1523*4882a593Smuzhiyun vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1524*4882a593Smuzhiyun le?li $idx,8 1525*4882a593Smuzhiyun vadduqm $out3,$out1,$two 1526*4882a593Smuzhiyun vxor $out1,$out1,$rndkey0 1527*4882a593Smuzhiyun le?lvsl $inpperm,0,$idx 1528*4882a593Smuzhiyun vadduqm $out4,$out2,$two 1529*4882a593Smuzhiyun vxor $out2,$out2,$rndkey0 1530*4882a593Smuzhiyun le?vspltisb $tmp,0x0f 1531*4882a593Smuzhiyun vadduqm $out5,$out3,$two 1532*4882a593Smuzhiyun vxor $out3,$out3,$rndkey0 1533*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1534*4882a593Smuzhiyun vadduqm $out6,$out4,$two 1535*4882a593Smuzhiyun vxor $out4,$out4,$rndkey0 1536*4882a593Smuzhiyun vadduqm $out7,$out5,$two 1537*4882a593Smuzhiyun vxor $out5,$out5,$rndkey0 1538*4882a593Smuzhiyun vadduqm $ivec,$out6,$two # next counter value 1539*4882a593Smuzhiyun vxor $out6,$out6,$rndkey0 1540*4882a593Smuzhiyun vxor $out7,$out7,$rndkey0 1541*4882a593Smuzhiyun 1542*4882a593Smuzhiyun mtctr $rounds 1543*4882a593Smuzhiyun b Loop_ctr32_enc8x 1544*4882a593Smuzhiyun.align 5 1545*4882a593SmuzhiyunLoop_ctr32_enc8x: 1546*4882a593Smuzhiyun vcipher $out0,$out0,v24 1547*4882a593Smuzhiyun vcipher $out1,$out1,v24 1548*4882a593Smuzhiyun vcipher $out2,$out2,v24 1549*4882a593Smuzhiyun vcipher $out3,$out3,v24 1550*4882a593Smuzhiyun vcipher $out4,$out4,v24 1551*4882a593Smuzhiyun vcipher $out5,$out5,v24 1552*4882a593Smuzhiyun vcipher $out6,$out6,v24 1553*4882a593Smuzhiyun vcipher $out7,$out7,v24 1554*4882a593SmuzhiyunLoop_ctr32_enc8x_middle: 1555*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 1556*4882a593Smuzhiyun addi $key_,$key_,0x20 1557*4882a593Smuzhiyun 1558*4882a593Smuzhiyun vcipher $out0,$out0,v25 1559*4882a593Smuzhiyun vcipher $out1,$out1,v25 1560*4882a593Smuzhiyun vcipher $out2,$out2,v25 1561*4882a593Smuzhiyun vcipher $out3,$out3,v25 1562*4882a593Smuzhiyun vcipher $out4,$out4,v25 1563*4882a593Smuzhiyun vcipher $out5,$out5,v25 1564*4882a593Smuzhiyun vcipher $out6,$out6,v25 1565*4882a593Smuzhiyun vcipher $out7,$out7,v25 1566*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 1567*4882a593Smuzhiyun bdnz Loop_ctr32_enc8x 1568*4882a593Smuzhiyun 1569*4882a593Smuzhiyun subic r11,$len,256 # $len-256, borrow $key_ 1570*4882a593Smuzhiyun vcipher $out0,$out0,v24 1571*4882a593Smuzhiyun vcipher $out1,$out1,v24 1572*4882a593Smuzhiyun vcipher $out2,$out2,v24 1573*4882a593Smuzhiyun vcipher $out3,$out3,v24 1574*4882a593Smuzhiyun vcipher $out4,$out4,v24 1575*4882a593Smuzhiyun vcipher $out5,$out5,v24 1576*4882a593Smuzhiyun vcipher $out6,$out6,v24 1577*4882a593Smuzhiyun vcipher $out7,$out7,v24 1578*4882a593Smuzhiyun 1579*4882a593Smuzhiyun subfe r0,r0,r0 # borrow?-1:0 1580*4882a593Smuzhiyun vcipher $out0,$out0,v25 1581*4882a593Smuzhiyun vcipher $out1,$out1,v25 1582*4882a593Smuzhiyun vcipher $out2,$out2,v25 1583*4882a593Smuzhiyun vcipher $out3,$out3,v25 1584*4882a593Smuzhiyun vcipher $out4,$out4,v25 1585*4882a593Smuzhiyun vcipher $out5,$out5,v25 1586*4882a593Smuzhiyun vcipher $out6,$out6,v25 1587*4882a593Smuzhiyun vcipher $out7,$out7,v25 1588*4882a593Smuzhiyun 1589*4882a593Smuzhiyun and r0,r0,r11 1590*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 1591*4882a593Smuzhiyun vcipher $out0,$out0,v26 1592*4882a593Smuzhiyun vcipher $out1,$out1,v26 1593*4882a593Smuzhiyun vcipher $out2,$out2,v26 1594*4882a593Smuzhiyun vcipher $out3,$out3,v26 1595*4882a593Smuzhiyun vcipher $out4,$out4,v26 1596*4882a593Smuzhiyun vcipher $out5,$out5,v26 1597*4882a593Smuzhiyun vcipher $out6,$out6,v26 1598*4882a593Smuzhiyun vcipher $out7,$out7,v26 1599*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 1600*4882a593Smuzhiyun 1601*4882a593Smuzhiyun subic $len,$len,129 # $len-=129 1602*4882a593Smuzhiyun vcipher $out0,$out0,v27 1603*4882a593Smuzhiyun addi $len,$len,1 # $len-=128 really 1604*4882a593Smuzhiyun vcipher $out1,$out1,v27 1605*4882a593Smuzhiyun vcipher $out2,$out2,v27 1606*4882a593Smuzhiyun vcipher $out3,$out3,v27 1607*4882a593Smuzhiyun vcipher $out4,$out4,v27 1608*4882a593Smuzhiyun vcipher $out5,$out5,v27 1609*4882a593Smuzhiyun vcipher $out6,$out6,v27 1610*4882a593Smuzhiyun vcipher $out7,$out7,v27 1611*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 1612*4882a593Smuzhiyun 1613*4882a593Smuzhiyun vcipher $out0,$out0,v28 1614*4882a593Smuzhiyun lvx_u $in0,$x00,$inp # load input 1615*4882a593Smuzhiyun vcipher $out1,$out1,v28 1616*4882a593Smuzhiyun lvx_u $in1,$x10,$inp 1617*4882a593Smuzhiyun vcipher $out2,$out2,v28 1618*4882a593Smuzhiyun lvx_u $in2,$x20,$inp 1619*4882a593Smuzhiyun vcipher $out3,$out3,v28 1620*4882a593Smuzhiyun lvx_u $in3,$x30,$inp 1621*4882a593Smuzhiyun vcipher $out4,$out4,v28 1622*4882a593Smuzhiyun lvx_u $in4,$x40,$inp 1623*4882a593Smuzhiyun vcipher $out5,$out5,v28 1624*4882a593Smuzhiyun lvx_u $in5,$x50,$inp 1625*4882a593Smuzhiyun vcipher $out6,$out6,v28 1626*4882a593Smuzhiyun lvx_u $in6,$x60,$inp 1627*4882a593Smuzhiyun vcipher $out7,$out7,v28 1628*4882a593Smuzhiyun lvx_u $in7,$x70,$inp 1629*4882a593Smuzhiyun addi $inp,$inp,0x80 1630*4882a593Smuzhiyun 1631*4882a593Smuzhiyun vcipher $out0,$out0,v29 1632*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$inpperm 1633*4882a593Smuzhiyun vcipher $out1,$out1,v29 1634*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$inpperm 1635*4882a593Smuzhiyun vcipher $out2,$out2,v29 1636*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$inpperm 1637*4882a593Smuzhiyun vcipher $out3,$out3,v29 1638*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$inpperm 1639*4882a593Smuzhiyun vcipher $out4,$out4,v29 1640*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$inpperm 1641*4882a593Smuzhiyun vcipher $out5,$out5,v29 1642*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$inpperm 1643*4882a593Smuzhiyun vcipher $out6,$out6,v29 1644*4882a593Smuzhiyun le?vperm $in6,$in6,$in6,$inpperm 1645*4882a593Smuzhiyun vcipher $out7,$out7,v29 1646*4882a593Smuzhiyun le?vperm $in7,$in7,$in7,$inpperm 1647*4882a593Smuzhiyun 1648*4882a593Smuzhiyun add $inp,$inp,r0 # $inp is adjusted in such 1649*4882a593Smuzhiyun # way that at exit from the 1650*4882a593Smuzhiyun # loop inX-in7 are loaded 1651*4882a593Smuzhiyun # with last "words" 1652*4882a593Smuzhiyun subfe. r0,r0,r0 # borrow?-1:0 1653*4882a593Smuzhiyun vcipher $out0,$out0,v30 1654*4882a593Smuzhiyun vxor $in0,$in0,v31 # xor with last round key 1655*4882a593Smuzhiyun vcipher $out1,$out1,v30 1656*4882a593Smuzhiyun vxor $in1,$in1,v31 1657*4882a593Smuzhiyun vcipher $out2,$out2,v30 1658*4882a593Smuzhiyun vxor $in2,$in2,v31 1659*4882a593Smuzhiyun vcipher $out3,$out3,v30 1660*4882a593Smuzhiyun vxor $in3,$in3,v31 1661*4882a593Smuzhiyun vcipher $out4,$out4,v30 1662*4882a593Smuzhiyun vxor $in4,$in4,v31 1663*4882a593Smuzhiyun vcipher $out5,$out5,v30 1664*4882a593Smuzhiyun vxor $in5,$in5,v31 1665*4882a593Smuzhiyun vcipher $out6,$out6,v30 1666*4882a593Smuzhiyun vxor $in6,$in6,v31 1667*4882a593Smuzhiyun vcipher $out7,$out7,v30 1668*4882a593Smuzhiyun vxor $in7,$in7,v31 1669*4882a593Smuzhiyun 1670*4882a593Smuzhiyun bne Lctr32_enc8x_break # did $len-129 borrow? 1671*4882a593Smuzhiyun 1672*4882a593Smuzhiyun vcipherlast $in0,$out0,$in0 1673*4882a593Smuzhiyun vcipherlast $in1,$out1,$in1 1674*4882a593Smuzhiyun vadduqm $out1,$ivec,$one # counter values ... 1675*4882a593Smuzhiyun vcipherlast $in2,$out2,$in2 1676*4882a593Smuzhiyun vadduqm $out2,$ivec,$two 1677*4882a593Smuzhiyun vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1678*4882a593Smuzhiyun vcipherlast $in3,$out3,$in3 1679*4882a593Smuzhiyun vadduqm $out3,$out1,$two 1680*4882a593Smuzhiyun vxor $out1,$out1,$rndkey0 1681*4882a593Smuzhiyun vcipherlast $in4,$out4,$in4 1682*4882a593Smuzhiyun vadduqm $out4,$out2,$two 1683*4882a593Smuzhiyun vxor $out2,$out2,$rndkey0 1684*4882a593Smuzhiyun vcipherlast $in5,$out5,$in5 1685*4882a593Smuzhiyun vadduqm $out5,$out3,$two 1686*4882a593Smuzhiyun vxor $out3,$out3,$rndkey0 1687*4882a593Smuzhiyun vcipherlast $in6,$out6,$in6 1688*4882a593Smuzhiyun vadduqm $out6,$out4,$two 1689*4882a593Smuzhiyun vxor $out4,$out4,$rndkey0 1690*4882a593Smuzhiyun vcipherlast $in7,$out7,$in7 1691*4882a593Smuzhiyun vadduqm $out7,$out5,$two 1692*4882a593Smuzhiyun vxor $out5,$out5,$rndkey0 1693*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$inpperm 1694*4882a593Smuzhiyun vadduqm $ivec,$out6,$two # next counter value 1695*4882a593Smuzhiyun vxor $out6,$out6,$rndkey0 1696*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$inpperm 1697*4882a593Smuzhiyun vxor $out7,$out7,$rndkey0 1698*4882a593Smuzhiyun mtctr $rounds 1699*4882a593Smuzhiyun 1700*4882a593Smuzhiyun vcipher $out0,$out0,v24 1701*4882a593Smuzhiyun stvx_u $in0,$x00,$out 1702*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$inpperm 1703*4882a593Smuzhiyun vcipher $out1,$out1,v24 1704*4882a593Smuzhiyun stvx_u $in1,$x10,$out 1705*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$inpperm 1706*4882a593Smuzhiyun vcipher $out2,$out2,v24 1707*4882a593Smuzhiyun stvx_u $in2,$x20,$out 1708*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$inpperm 1709*4882a593Smuzhiyun vcipher $out3,$out3,v24 1710*4882a593Smuzhiyun stvx_u $in3,$x30,$out 1711*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$inpperm 1712*4882a593Smuzhiyun vcipher $out4,$out4,v24 1713*4882a593Smuzhiyun stvx_u $in4,$x40,$out 1714*4882a593Smuzhiyun le?vperm $in6,$in6,$in6,$inpperm 1715*4882a593Smuzhiyun vcipher $out5,$out5,v24 1716*4882a593Smuzhiyun stvx_u $in5,$x50,$out 1717*4882a593Smuzhiyun le?vperm $in7,$in7,$in7,$inpperm 1718*4882a593Smuzhiyun vcipher $out6,$out6,v24 1719*4882a593Smuzhiyun stvx_u $in6,$x60,$out 1720*4882a593Smuzhiyun vcipher $out7,$out7,v24 1721*4882a593Smuzhiyun stvx_u $in7,$x70,$out 1722*4882a593Smuzhiyun addi $out,$out,0x80 1723*4882a593Smuzhiyun 1724*4882a593Smuzhiyun b Loop_ctr32_enc8x_middle 1725*4882a593Smuzhiyun 1726*4882a593Smuzhiyun.align 5 1727*4882a593SmuzhiyunLctr32_enc8x_break: 1728*4882a593Smuzhiyun cmpwi $len,-0x60 1729*4882a593Smuzhiyun blt Lctr32_enc8x_one 1730*4882a593Smuzhiyun nop 1731*4882a593Smuzhiyun beq Lctr32_enc8x_two 1732*4882a593Smuzhiyun cmpwi $len,-0x40 1733*4882a593Smuzhiyun blt Lctr32_enc8x_three 1734*4882a593Smuzhiyun nop 1735*4882a593Smuzhiyun beq Lctr32_enc8x_four 1736*4882a593Smuzhiyun cmpwi $len,-0x20 1737*4882a593Smuzhiyun blt Lctr32_enc8x_five 1738*4882a593Smuzhiyun nop 1739*4882a593Smuzhiyun beq Lctr32_enc8x_six 1740*4882a593Smuzhiyun cmpwi $len,0x00 1741*4882a593Smuzhiyun blt Lctr32_enc8x_seven 1742*4882a593Smuzhiyun 1743*4882a593SmuzhiyunLctr32_enc8x_eight: 1744*4882a593Smuzhiyun vcipherlast $out0,$out0,$in0 1745*4882a593Smuzhiyun vcipherlast $out1,$out1,$in1 1746*4882a593Smuzhiyun vcipherlast $out2,$out2,$in2 1747*4882a593Smuzhiyun vcipherlast $out3,$out3,$in3 1748*4882a593Smuzhiyun vcipherlast $out4,$out4,$in4 1749*4882a593Smuzhiyun vcipherlast $out5,$out5,$in5 1750*4882a593Smuzhiyun vcipherlast $out6,$out6,$in6 1751*4882a593Smuzhiyun vcipherlast $out7,$out7,$in7 1752*4882a593Smuzhiyun 1753*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1754*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1755*4882a593Smuzhiyun stvx_u $out0,$x00,$out 1756*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1757*4882a593Smuzhiyun stvx_u $out1,$x10,$out 1758*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1759*4882a593Smuzhiyun stvx_u $out2,$x20,$out 1760*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1761*4882a593Smuzhiyun stvx_u $out3,$x30,$out 1762*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1763*4882a593Smuzhiyun stvx_u $out4,$x40,$out 1764*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1765*4882a593Smuzhiyun stvx_u $out5,$x50,$out 1766*4882a593Smuzhiyun le?vperm $out7,$out7,$out7,$inpperm 1767*4882a593Smuzhiyun stvx_u $out6,$x60,$out 1768*4882a593Smuzhiyun stvx_u $out7,$x70,$out 1769*4882a593Smuzhiyun addi $out,$out,0x80 1770*4882a593Smuzhiyun b Lctr32_enc8x_done 1771*4882a593Smuzhiyun 1772*4882a593Smuzhiyun.align 5 1773*4882a593SmuzhiyunLctr32_enc8x_seven: 1774*4882a593Smuzhiyun vcipherlast $out0,$out0,$in1 1775*4882a593Smuzhiyun vcipherlast $out1,$out1,$in2 1776*4882a593Smuzhiyun vcipherlast $out2,$out2,$in3 1777*4882a593Smuzhiyun vcipherlast $out3,$out3,$in4 1778*4882a593Smuzhiyun vcipherlast $out4,$out4,$in5 1779*4882a593Smuzhiyun vcipherlast $out5,$out5,$in6 1780*4882a593Smuzhiyun vcipherlast $out6,$out6,$in7 1781*4882a593Smuzhiyun 1782*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1783*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1784*4882a593Smuzhiyun stvx_u $out0,$x00,$out 1785*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1786*4882a593Smuzhiyun stvx_u $out1,$x10,$out 1787*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1788*4882a593Smuzhiyun stvx_u $out2,$x20,$out 1789*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1790*4882a593Smuzhiyun stvx_u $out3,$x30,$out 1791*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1792*4882a593Smuzhiyun stvx_u $out4,$x40,$out 1793*4882a593Smuzhiyun le?vperm $out6,$out6,$out6,$inpperm 1794*4882a593Smuzhiyun stvx_u $out5,$x50,$out 1795*4882a593Smuzhiyun stvx_u $out6,$x60,$out 1796*4882a593Smuzhiyun addi $out,$out,0x70 1797*4882a593Smuzhiyun b Lctr32_enc8x_done 1798*4882a593Smuzhiyun 1799*4882a593Smuzhiyun.align 5 1800*4882a593SmuzhiyunLctr32_enc8x_six: 1801*4882a593Smuzhiyun vcipherlast $out0,$out0,$in2 1802*4882a593Smuzhiyun vcipherlast $out1,$out1,$in3 1803*4882a593Smuzhiyun vcipherlast $out2,$out2,$in4 1804*4882a593Smuzhiyun vcipherlast $out3,$out3,$in5 1805*4882a593Smuzhiyun vcipherlast $out4,$out4,$in6 1806*4882a593Smuzhiyun vcipherlast $out5,$out5,$in7 1807*4882a593Smuzhiyun 1808*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1809*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1810*4882a593Smuzhiyun stvx_u $out0,$x00,$out 1811*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1812*4882a593Smuzhiyun stvx_u $out1,$x10,$out 1813*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1814*4882a593Smuzhiyun stvx_u $out2,$x20,$out 1815*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1816*4882a593Smuzhiyun stvx_u $out3,$x30,$out 1817*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$inpperm 1818*4882a593Smuzhiyun stvx_u $out4,$x40,$out 1819*4882a593Smuzhiyun stvx_u $out5,$x50,$out 1820*4882a593Smuzhiyun addi $out,$out,0x60 1821*4882a593Smuzhiyun b Lctr32_enc8x_done 1822*4882a593Smuzhiyun 1823*4882a593Smuzhiyun.align 5 1824*4882a593SmuzhiyunLctr32_enc8x_five: 1825*4882a593Smuzhiyun vcipherlast $out0,$out0,$in3 1826*4882a593Smuzhiyun vcipherlast $out1,$out1,$in4 1827*4882a593Smuzhiyun vcipherlast $out2,$out2,$in5 1828*4882a593Smuzhiyun vcipherlast $out3,$out3,$in6 1829*4882a593Smuzhiyun vcipherlast $out4,$out4,$in7 1830*4882a593Smuzhiyun 1831*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1832*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1833*4882a593Smuzhiyun stvx_u $out0,$x00,$out 1834*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1835*4882a593Smuzhiyun stvx_u $out1,$x10,$out 1836*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1837*4882a593Smuzhiyun stvx_u $out2,$x20,$out 1838*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$inpperm 1839*4882a593Smuzhiyun stvx_u $out3,$x30,$out 1840*4882a593Smuzhiyun stvx_u $out4,$x40,$out 1841*4882a593Smuzhiyun addi $out,$out,0x50 1842*4882a593Smuzhiyun b Lctr32_enc8x_done 1843*4882a593Smuzhiyun 1844*4882a593Smuzhiyun.align 5 1845*4882a593SmuzhiyunLctr32_enc8x_four: 1846*4882a593Smuzhiyun vcipherlast $out0,$out0,$in4 1847*4882a593Smuzhiyun vcipherlast $out1,$out1,$in5 1848*4882a593Smuzhiyun vcipherlast $out2,$out2,$in6 1849*4882a593Smuzhiyun vcipherlast $out3,$out3,$in7 1850*4882a593Smuzhiyun 1851*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1852*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1853*4882a593Smuzhiyun stvx_u $out0,$x00,$out 1854*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1855*4882a593Smuzhiyun stvx_u $out1,$x10,$out 1856*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$inpperm 1857*4882a593Smuzhiyun stvx_u $out2,$x20,$out 1858*4882a593Smuzhiyun stvx_u $out3,$x30,$out 1859*4882a593Smuzhiyun addi $out,$out,0x40 1860*4882a593Smuzhiyun b Lctr32_enc8x_done 1861*4882a593Smuzhiyun 1862*4882a593Smuzhiyun.align 5 1863*4882a593SmuzhiyunLctr32_enc8x_three: 1864*4882a593Smuzhiyun vcipherlast $out0,$out0,$in5 1865*4882a593Smuzhiyun vcipherlast $out1,$out1,$in6 1866*4882a593Smuzhiyun vcipherlast $out2,$out2,$in7 1867*4882a593Smuzhiyun 1868*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1869*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1870*4882a593Smuzhiyun stvx_u $out0,$x00,$out 1871*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$inpperm 1872*4882a593Smuzhiyun stvx_u $out1,$x10,$out 1873*4882a593Smuzhiyun stvx_u $out2,$x20,$out 1874*4882a593Smuzhiyun addi $out,$out,0x30 1875*4882a593Smuzhiyun b Lctr32_enc8x_done 1876*4882a593Smuzhiyun 1877*4882a593Smuzhiyun.align 5 1878*4882a593SmuzhiyunLctr32_enc8x_two: 1879*4882a593Smuzhiyun vcipherlast $out0,$out0,$in6 1880*4882a593Smuzhiyun vcipherlast $out1,$out1,$in7 1881*4882a593Smuzhiyun 1882*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1883*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$inpperm 1884*4882a593Smuzhiyun stvx_u $out0,$x00,$out 1885*4882a593Smuzhiyun stvx_u $out1,$x10,$out 1886*4882a593Smuzhiyun addi $out,$out,0x20 1887*4882a593Smuzhiyun b Lctr32_enc8x_done 1888*4882a593Smuzhiyun 1889*4882a593Smuzhiyun.align 5 1890*4882a593SmuzhiyunLctr32_enc8x_one: 1891*4882a593Smuzhiyun vcipherlast $out0,$out0,$in7 1892*4882a593Smuzhiyun 1893*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$inpperm 1894*4882a593Smuzhiyun stvx_u $out0,0,$out 1895*4882a593Smuzhiyun addi $out,$out,0x10 1896*4882a593Smuzhiyun 1897*4882a593SmuzhiyunLctr32_enc8x_done: 1898*4882a593Smuzhiyun li r10,`$FRAME+15` 1899*4882a593Smuzhiyun li r11,`$FRAME+31` 1900*4882a593Smuzhiyun stvx $inpperm,r10,$sp # wipe copies of round keys 1901*4882a593Smuzhiyun addi r10,r10,32 1902*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1903*4882a593Smuzhiyun addi r11,r11,32 1904*4882a593Smuzhiyun stvx $inpperm,r10,$sp 1905*4882a593Smuzhiyun addi r10,r10,32 1906*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1907*4882a593Smuzhiyun addi r11,r11,32 1908*4882a593Smuzhiyun stvx $inpperm,r10,$sp 1909*4882a593Smuzhiyun addi r10,r10,32 1910*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1911*4882a593Smuzhiyun addi r11,r11,32 1912*4882a593Smuzhiyun stvx $inpperm,r10,$sp 1913*4882a593Smuzhiyun addi r10,r10,32 1914*4882a593Smuzhiyun stvx $inpperm,r11,$sp 1915*4882a593Smuzhiyun addi r11,r11,32 1916*4882a593Smuzhiyun 1917*4882a593Smuzhiyun mtspr 256,$vrsave 1918*4882a593Smuzhiyun lvx v20,r10,$sp # ABI says so 1919*4882a593Smuzhiyun addi r10,r10,32 1920*4882a593Smuzhiyun lvx v21,r11,$sp 1921*4882a593Smuzhiyun addi r11,r11,32 1922*4882a593Smuzhiyun lvx v22,r10,$sp 1923*4882a593Smuzhiyun addi r10,r10,32 1924*4882a593Smuzhiyun lvx v23,r11,$sp 1925*4882a593Smuzhiyun addi r11,r11,32 1926*4882a593Smuzhiyun lvx v24,r10,$sp 1927*4882a593Smuzhiyun addi r10,r10,32 1928*4882a593Smuzhiyun lvx v25,r11,$sp 1929*4882a593Smuzhiyun addi r11,r11,32 1930*4882a593Smuzhiyun lvx v26,r10,$sp 1931*4882a593Smuzhiyun addi r10,r10,32 1932*4882a593Smuzhiyun lvx v27,r11,$sp 1933*4882a593Smuzhiyun addi r11,r11,32 1934*4882a593Smuzhiyun lvx v28,r10,$sp 1935*4882a593Smuzhiyun addi r10,r10,32 1936*4882a593Smuzhiyun lvx v29,r11,$sp 1937*4882a593Smuzhiyun addi r11,r11,32 1938*4882a593Smuzhiyun lvx v30,r10,$sp 1939*4882a593Smuzhiyun lvx v31,r11,$sp 1940*4882a593Smuzhiyun $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1941*4882a593Smuzhiyun $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1942*4882a593Smuzhiyun $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1943*4882a593Smuzhiyun $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1944*4882a593Smuzhiyun $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1945*4882a593Smuzhiyun $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1946*4882a593Smuzhiyun addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1947*4882a593Smuzhiyun blr 1948*4882a593Smuzhiyun .long 0 1949*4882a593Smuzhiyun .byte 0,12,0x14,0,0x80,6,6,0 1950*4882a593Smuzhiyun .long 0 1951*4882a593Smuzhiyun.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1952*4882a593Smuzhiyun___ 1953*4882a593Smuzhiyun}} }}} 1954*4882a593Smuzhiyun 1955*4882a593Smuzhiyun######################################################################### 1956*4882a593Smuzhiyun{{{ # XTS procedures # 1957*4882a593Smuzhiyun# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1958*4882a593Smuzhiyun# const AES_KEY *key1, const AES_KEY *key2, # 1959*4882a593Smuzhiyun# [const] unsigned char iv[16]); # 1960*4882a593Smuzhiyun# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1961*4882a593Smuzhiyun# input tweak value is assumed to be encrypted already, and last tweak # 1962*4882a593Smuzhiyun# value, one suitable for consecutive call on same chunk of data, is # 1963*4882a593Smuzhiyun# written back to original buffer. In addition, in "tweak chaining" # 1964*4882a593Smuzhiyun# mode only complete input blocks are processed. # 1965*4882a593Smuzhiyun 1966*4882a593Smuzhiyunmy ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1967*4882a593Smuzhiyunmy ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1968*4882a593Smuzhiyunmy ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1969*4882a593Smuzhiyunmy ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1970*4882a593Smuzhiyunmy $taillen = $key2; 1971*4882a593Smuzhiyun 1972*4882a593Smuzhiyun ($inp,$idx) = ($idx,$inp); # reassign 1973*4882a593Smuzhiyun 1974*4882a593Smuzhiyun$code.=<<___; 1975*4882a593Smuzhiyun.globl .${prefix}_xts_encrypt 1976*4882a593Smuzhiyun mr $inp,r3 # reassign 1977*4882a593Smuzhiyun li r3,-1 1978*4882a593Smuzhiyun ${UCMP}i $len,16 1979*4882a593Smuzhiyun bltlr- 1980*4882a593Smuzhiyun 1981*4882a593Smuzhiyun lis r0,0xfff0 1982*4882a593Smuzhiyun mfspr r12,256 # save vrsave 1983*4882a593Smuzhiyun li r11,0 1984*4882a593Smuzhiyun mtspr 256,r0 1985*4882a593Smuzhiyun 1986*4882a593Smuzhiyun vspltisb $seven,0x07 # 0x070707..07 1987*4882a593Smuzhiyun le?lvsl $leperm,r11,r11 1988*4882a593Smuzhiyun le?vspltisb $tmp,0x0f 1989*4882a593Smuzhiyun le?vxor $leperm,$leperm,$seven 1990*4882a593Smuzhiyun 1991*4882a593Smuzhiyun li $idx,15 1992*4882a593Smuzhiyun lvx $tweak,0,$ivp # load [unaligned] iv 1993*4882a593Smuzhiyun lvsl $inpperm,0,$ivp 1994*4882a593Smuzhiyun lvx $inptail,$idx,$ivp 1995*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 1996*4882a593Smuzhiyun vperm $tweak,$tweak,$inptail,$inpperm 1997*4882a593Smuzhiyun 1998*4882a593Smuzhiyun neg r11,$inp 1999*4882a593Smuzhiyun lvsr $inpperm,0,r11 # prepare for unaligned load 2000*4882a593Smuzhiyun lvx $inout,0,$inp 2001*4882a593Smuzhiyun addi $inp,$inp,15 # 15 is not typo 2002*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 2003*4882a593Smuzhiyun 2004*4882a593Smuzhiyun ${UCMP}i $key2,0 # key2==NULL? 2005*4882a593Smuzhiyun beq Lxts_enc_no_key2 2006*4882a593Smuzhiyun 2007*4882a593Smuzhiyun ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2008*4882a593Smuzhiyun lwz $rounds,240($key2) 2009*4882a593Smuzhiyun srwi $rounds,$rounds,1 2010*4882a593Smuzhiyun subi $rounds,$rounds,1 2011*4882a593Smuzhiyun li $idx,16 2012*4882a593Smuzhiyun 2013*4882a593Smuzhiyun lvx $rndkey0,0,$key2 2014*4882a593Smuzhiyun lvx $rndkey1,$idx,$key2 2015*4882a593Smuzhiyun addi $idx,$idx,16 2016*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2017*4882a593Smuzhiyun vxor $tweak,$tweak,$rndkey0 2018*4882a593Smuzhiyun lvx $rndkey0,$idx,$key2 2019*4882a593Smuzhiyun addi $idx,$idx,16 2020*4882a593Smuzhiyun mtctr $rounds 2021*4882a593Smuzhiyun 2022*4882a593SmuzhiyunLtweak_xts_enc: 2023*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2024*4882a593Smuzhiyun vcipher $tweak,$tweak,$rndkey1 2025*4882a593Smuzhiyun lvx $rndkey1,$idx,$key2 2026*4882a593Smuzhiyun addi $idx,$idx,16 2027*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2028*4882a593Smuzhiyun vcipher $tweak,$tweak,$rndkey0 2029*4882a593Smuzhiyun lvx $rndkey0,$idx,$key2 2030*4882a593Smuzhiyun addi $idx,$idx,16 2031*4882a593Smuzhiyun bdnz Ltweak_xts_enc 2032*4882a593Smuzhiyun 2033*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2034*4882a593Smuzhiyun vcipher $tweak,$tweak,$rndkey1 2035*4882a593Smuzhiyun lvx $rndkey1,$idx,$key2 2036*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2037*4882a593Smuzhiyun vcipherlast $tweak,$tweak,$rndkey0 2038*4882a593Smuzhiyun 2039*4882a593Smuzhiyun li $ivp,0 # don't chain the tweak 2040*4882a593Smuzhiyun b Lxts_enc 2041*4882a593Smuzhiyun 2042*4882a593SmuzhiyunLxts_enc_no_key2: 2043*4882a593Smuzhiyun li $idx,-16 2044*4882a593Smuzhiyun and $len,$len,$idx # in "tweak chaining" 2045*4882a593Smuzhiyun # mode only complete 2046*4882a593Smuzhiyun # blocks are processed 2047*4882a593SmuzhiyunLxts_enc: 2048*4882a593Smuzhiyun lvx $inptail,0,$inp 2049*4882a593Smuzhiyun addi $inp,$inp,16 2050*4882a593Smuzhiyun 2051*4882a593Smuzhiyun ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2052*4882a593Smuzhiyun lwz $rounds,240($key1) 2053*4882a593Smuzhiyun srwi $rounds,$rounds,1 2054*4882a593Smuzhiyun subi $rounds,$rounds,1 2055*4882a593Smuzhiyun li $idx,16 2056*4882a593Smuzhiyun 2057*4882a593Smuzhiyun vslb $eighty7,$seven,$seven # 0x808080..80 2058*4882a593Smuzhiyun vor $eighty7,$eighty7,$seven # 0x878787..87 2059*4882a593Smuzhiyun vspltisb $tmp,1 # 0x010101..01 2060*4882a593Smuzhiyun vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2061*4882a593Smuzhiyun 2062*4882a593Smuzhiyun ${UCMP}i $len,96 2063*4882a593Smuzhiyun bge _aesp8_xts_encrypt6x 2064*4882a593Smuzhiyun 2065*4882a593Smuzhiyun andi. $taillen,$len,15 2066*4882a593Smuzhiyun subic r0,$len,32 2067*4882a593Smuzhiyun subi $taillen,$taillen,16 2068*4882a593Smuzhiyun subfe r0,r0,r0 2069*4882a593Smuzhiyun and r0,r0,$taillen 2070*4882a593Smuzhiyun add $inp,$inp,r0 2071*4882a593Smuzhiyun 2072*4882a593Smuzhiyun lvx $rndkey0,0,$key1 2073*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2074*4882a593Smuzhiyun addi $idx,$idx,16 2075*4882a593Smuzhiyun vperm $inout,$inout,$inptail,$inpperm 2076*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2077*4882a593Smuzhiyun vxor $inout,$inout,$tweak 2078*4882a593Smuzhiyun vxor $inout,$inout,$rndkey0 2079*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2080*4882a593Smuzhiyun addi $idx,$idx,16 2081*4882a593Smuzhiyun mtctr $rounds 2082*4882a593Smuzhiyun b Loop_xts_enc 2083*4882a593Smuzhiyun 2084*4882a593Smuzhiyun.align 5 2085*4882a593SmuzhiyunLoop_xts_enc: 2086*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2087*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey1 2088*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2089*4882a593Smuzhiyun addi $idx,$idx,16 2090*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2091*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey0 2092*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2093*4882a593Smuzhiyun addi $idx,$idx,16 2094*4882a593Smuzhiyun bdnz Loop_xts_enc 2095*4882a593Smuzhiyun 2096*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2097*4882a593Smuzhiyun vcipher $inout,$inout,$rndkey1 2098*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2099*4882a593Smuzhiyun li $idx,16 2100*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2101*4882a593Smuzhiyun vxor $rndkey0,$rndkey0,$tweak 2102*4882a593Smuzhiyun vcipherlast $output,$inout,$rndkey0 2103*4882a593Smuzhiyun 2104*4882a593Smuzhiyun le?vperm $tmp,$output,$output,$leperm 2105*4882a593Smuzhiyun be?nop 2106*4882a593Smuzhiyun le?stvx_u $tmp,0,$out 2107*4882a593Smuzhiyun be?stvx_u $output,0,$out 2108*4882a593Smuzhiyun addi $out,$out,16 2109*4882a593Smuzhiyun 2110*4882a593Smuzhiyun subic. $len,$len,16 2111*4882a593Smuzhiyun beq Lxts_enc_done 2112*4882a593Smuzhiyun 2113*4882a593Smuzhiyun vmr $inout,$inptail 2114*4882a593Smuzhiyun lvx $inptail,0,$inp 2115*4882a593Smuzhiyun addi $inp,$inp,16 2116*4882a593Smuzhiyun lvx $rndkey0,0,$key1 2117*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2118*4882a593Smuzhiyun addi $idx,$idx,16 2119*4882a593Smuzhiyun 2120*4882a593Smuzhiyun subic r0,$len,32 2121*4882a593Smuzhiyun subfe r0,r0,r0 2122*4882a593Smuzhiyun and r0,r0,$taillen 2123*4882a593Smuzhiyun add $inp,$inp,r0 2124*4882a593Smuzhiyun 2125*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2126*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2127*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2128*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2129*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2130*4882a593Smuzhiyun 2131*4882a593Smuzhiyun vperm $inout,$inout,$inptail,$inpperm 2132*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2133*4882a593Smuzhiyun vxor $inout,$inout,$tweak 2134*4882a593Smuzhiyun vxor $output,$output,$rndkey0 # just in case $len<16 2135*4882a593Smuzhiyun vxor $inout,$inout,$rndkey0 2136*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2137*4882a593Smuzhiyun addi $idx,$idx,16 2138*4882a593Smuzhiyun 2139*4882a593Smuzhiyun mtctr $rounds 2140*4882a593Smuzhiyun ${UCMP}i $len,16 2141*4882a593Smuzhiyun bge Loop_xts_enc 2142*4882a593Smuzhiyun 2143*4882a593Smuzhiyun vxor $output,$output,$tweak 2144*4882a593Smuzhiyun lvsr $inpperm,0,$len # $inpperm is no longer needed 2145*4882a593Smuzhiyun vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2146*4882a593Smuzhiyun vspltisb $tmp,-1 2147*4882a593Smuzhiyun vperm $inptail,$inptail,$tmp,$inpperm 2148*4882a593Smuzhiyun vsel $inout,$inout,$output,$inptail 2149*4882a593Smuzhiyun 2150*4882a593Smuzhiyun subi r11,$out,17 2151*4882a593Smuzhiyun subi $out,$out,16 2152*4882a593Smuzhiyun mtctr $len 2153*4882a593Smuzhiyun li $len,16 2154*4882a593SmuzhiyunLoop_xts_enc_steal: 2155*4882a593Smuzhiyun lbzu r0,1(r11) 2156*4882a593Smuzhiyun stb r0,16(r11) 2157*4882a593Smuzhiyun bdnz Loop_xts_enc_steal 2158*4882a593Smuzhiyun 2159*4882a593Smuzhiyun mtctr $rounds 2160*4882a593Smuzhiyun b Loop_xts_enc # one more time... 2161*4882a593Smuzhiyun 2162*4882a593SmuzhiyunLxts_enc_done: 2163*4882a593Smuzhiyun ${UCMP}i $ivp,0 2164*4882a593Smuzhiyun beq Lxts_enc_ret 2165*4882a593Smuzhiyun 2166*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2167*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2168*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2169*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2170*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2171*4882a593Smuzhiyun 2172*4882a593Smuzhiyun le?vperm $tweak,$tweak,$tweak,$leperm 2173*4882a593Smuzhiyun stvx_u $tweak,0,$ivp 2174*4882a593Smuzhiyun 2175*4882a593SmuzhiyunLxts_enc_ret: 2176*4882a593Smuzhiyun mtspr 256,r12 # restore vrsave 2177*4882a593Smuzhiyun li r3,0 2178*4882a593Smuzhiyun blr 2179*4882a593Smuzhiyun .long 0 2180*4882a593Smuzhiyun .byte 0,12,0x04,0,0x80,6,6,0 2181*4882a593Smuzhiyun .long 0 2182*4882a593Smuzhiyun.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2183*4882a593Smuzhiyun 2184*4882a593Smuzhiyun.globl .${prefix}_xts_decrypt 2185*4882a593Smuzhiyun mr $inp,r3 # reassign 2186*4882a593Smuzhiyun li r3,-1 2187*4882a593Smuzhiyun ${UCMP}i $len,16 2188*4882a593Smuzhiyun bltlr- 2189*4882a593Smuzhiyun 2190*4882a593Smuzhiyun lis r0,0xfff8 2191*4882a593Smuzhiyun mfspr r12,256 # save vrsave 2192*4882a593Smuzhiyun li r11,0 2193*4882a593Smuzhiyun mtspr 256,r0 2194*4882a593Smuzhiyun 2195*4882a593Smuzhiyun andi. r0,$len,15 2196*4882a593Smuzhiyun neg r0,r0 2197*4882a593Smuzhiyun andi. r0,r0,16 2198*4882a593Smuzhiyun sub $len,$len,r0 2199*4882a593Smuzhiyun 2200*4882a593Smuzhiyun vspltisb $seven,0x07 # 0x070707..07 2201*4882a593Smuzhiyun le?lvsl $leperm,r11,r11 2202*4882a593Smuzhiyun le?vspltisb $tmp,0x0f 2203*4882a593Smuzhiyun le?vxor $leperm,$leperm,$seven 2204*4882a593Smuzhiyun 2205*4882a593Smuzhiyun li $idx,15 2206*4882a593Smuzhiyun lvx $tweak,0,$ivp # load [unaligned] iv 2207*4882a593Smuzhiyun lvsl $inpperm,0,$ivp 2208*4882a593Smuzhiyun lvx $inptail,$idx,$ivp 2209*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 2210*4882a593Smuzhiyun vperm $tweak,$tweak,$inptail,$inpperm 2211*4882a593Smuzhiyun 2212*4882a593Smuzhiyun neg r11,$inp 2213*4882a593Smuzhiyun lvsr $inpperm,0,r11 # prepare for unaligned load 2214*4882a593Smuzhiyun lvx $inout,0,$inp 2215*4882a593Smuzhiyun addi $inp,$inp,15 # 15 is not typo 2216*4882a593Smuzhiyun le?vxor $inpperm,$inpperm,$tmp 2217*4882a593Smuzhiyun 2218*4882a593Smuzhiyun ${UCMP}i $key2,0 # key2==NULL? 2219*4882a593Smuzhiyun beq Lxts_dec_no_key2 2220*4882a593Smuzhiyun 2221*4882a593Smuzhiyun ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2222*4882a593Smuzhiyun lwz $rounds,240($key2) 2223*4882a593Smuzhiyun srwi $rounds,$rounds,1 2224*4882a593Smuzhiyun subi $rounds,$rounds,1 2225*4882a593Smuzhiyun li $idx,16 2226*4882a593Smuzhiyun 2227*4882a593Smuzhiyun lvx $rndkey0,0,$key2 2228*4882a593Smuzhiyun lvx $rndkey1,$idx,$key2 2229*4882a593Smuzhiyun addi $idx,$idx,16 2230*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2231*4882a593Smuzhiyun vxor $tweak,$tweak,$rndkey0 2232*4882a593Smuzhiyun lvx $rndkey0,$idx,$key2 2233*4882a593Smuzhiyun addi $idx,$idx,16 2234*4882a593Smuzhiyun mtctr $rounds 2235*4882a593Smuzhiyun 2236*4882a593SmuzhiyunLtweak_xts_dec: 2237*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2238*4882a593Smuzhiyun vcipher $tweak,$tweak,$rndkey1 2239*4882a593Smuzhiyun lvx $rndkey1,$idx,$key2 2240*4882a593Smuzhiyun addi $idx,$idx,16 2241*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2242*4882a593Smuzhiyun vcipher $tweak,$tweak,$rndkey0 2243*4882a593Smuzhiyun lvx $rndkey0,$idx,$key2 2244*4882a593Smuzhiyun addi $idx,$idx,16 2245*4882a593Smuzhiyun bdnz Ltweak_xts_dec 2246*4882a593Smuzhiyun 2247*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2248*4882a593Smuzhiyun vcipher $tweak,$tweak,$rndkey1 2249*4882a593Smuzhiyun lvx $rndkey1,$idx,$key2 2250*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2251*4882a593Smuzhiyun vcipherlast $tweak,$tweak,$rndkey0 2252*4882a593Smuzhiyun 2253*4882a593Smuzhiyun li $ivp,0 # don't chain the tweak 2254*4882a593Smuzhiyun b Lxts_dec 2255*4882a593Smuzhiyun 2256*4882a593SmuzhiyunLxts_dec_no_key2: 2257*4882a593Smuzhiyun neg $idx,$len 2258*4882a593Smuzhiyun andi. $idx,$idx,15 2259*4882a593Smuzhiyun add $len,$len,$idx # in "tweak chaining" 2260*4882a593Smuzhiyun # mode only complete 2261*4882a593Smuzhiyun # blocks are processed 2262*4882a593SmuzhiyunLxts_dec: 2263*4882a593Smuzhiyun lvx $inptail,0,$inp 2264*4882a593Smuzhiyun addi $inp,$inp,16 2265*4882a593Smuzhiyun 2266*4882a593Smuzhiyun ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2267*4882a593Smuzhiyun lwz $rounds,240($key1) 2268*4882a593Smuzhiyun srwi $rounds,$rounds,1 2269*4882a593Smuzhiyun subi $rounds,$rounds,1 2270*4882a593Smuzhiyun li $idx,16 2271*4882a593Smuzhiyun 2272*4882a593Smuzhiyun vslb $eighty7,$seven,$seven # 0x808080..80 2273*4882a593Smuzhiyun vor $eighty7,$eighty7,$seven # 0x878787..87 2274*4882a593Smuzhiyun vspltisb $tmp,1 # 0x010101..01 2275*4882a593Smuzhiyun vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2276*4882a593Smuzhiyun 2277*4882a593Smuzhiyun ${UCMP}i $len,96 2278*4882a593Smuzhiyun bge _aesp8_xts_decrypt6x 2279*4882a593Smuzhiyun 2280*4882a593Smuzhiyun lvx $rndkey0,0,$key1 2281*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2282*4882a593Smuzhiyun addi $idx,$idx,16 2283*4882a593Smuzhiyun vperm $inout,$inout,$inptail,$inpperm 2284*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2285*4882a593Smuzhiyun vxor $inout,$inout,$tweak 2286*4882a593Smuzhiyun vxor $inout,$inout,$rndkey0 2287*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2288*4882a593Smuzhiyun addi $idx,$idx,16 2289*4882a593Smuzhiyun mtctr $rounds 2290*4882a593Smuzhiyun 2291*4882a593Smuzhiyun ${UCMP}i $len,16 2292*4882a593Smuzhiyun blt Ltail_xts_dec 2293*4882a593Smuzhiyun be?b Loop_xts_dec 2294*4882a593Smuzhiyun 2295*4882a593Smuzhiyun.align 5 2296*4882a593SmuzhiyunLoop_xts_dec: 2297*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2298*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey1 2299*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2300*4882a593Smuzhiyun addi $idx,$idx,16 2301*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2302*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey0 2303*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2304*4882a593Smuzhiyun addi $idx,$idx,16 2305*4882a593Smuzhiyun bdnz Loop_xts_dec 2306*4882a593Smuzhiyun 2307*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2308*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey1 2309*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2310*4882a593Smuzhiyun li $idx,16 2311*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2312*4882a593Smuzhiyun vxor $rndkey0,$rndkey0,$tweak 2313*4882a593Smuzhiyun vncipherlast $output,$inout,$rndkey0 2314*4882a593Smuzhiyun 2315*4882a593Smuzhiyun le?vperm $tmp,$output,$output,$leperm 2316*4882a593Smuzhiyun be?nop 2317*4882a593Smuzhiyun le?stvx_u $tmp,0,$out 2318*4882a593Smuzhiyun be?stvx_u $output,0,$out 2319*4882a593Smuzhiyun addi $out,$out,16 2320*4882a593Smuzhiyun 2321*4882a593Smuzhiyun subic. $len,$len,16 2322*4882a593Smuzhiyun beq Lxts_dec_done 2323*4882a593Smuzhiyun 2324*4882a593Smuzhiyun vmr $inout,$inptail 2325*4882a593Smuzhiyun lvx $inptail,0,$inp 2326*4882a593Smuzhiyun addi $inp,$inp,16 2327*4882a593Smuzhiyun lvx $rndkey0,0,$key1 2328*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2329*4882a593Smuzhiyun addi $idx,$idx,16 2330*4882a593Smuzhiyun 2331*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2332*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2333*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2334*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2335*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2336*4882a593Smuzhiyun 2337*4882a593Smuzhiyun vperm $inout,$inout,$inptail,$inpperm 2338*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2339*4882a593Smuzhiyun vxor $inout,$inout,$tweak 2340*4882a593Smuzhiyun vxor $inout,$inout,$rndkey0 2341*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2342*4882a593Smuzhiyun addi $idx,$idx,16 2343*4882a593Smuzhiyun 2344*4882a593Smuzhiyun mtctr $rounds 2345*4882a593Smuzhiyun ${UCMP}i $len,16 2346*4882a593Smuzhiyun bge Loop_xts_dec 2347*4882a593Smuzhiyun 2348*4882a593SmuzhiyunLtail_xts_dec: 2349*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2350*4882a593Smuzhiyun vaddubm $tweak1,$tweak,$tweak 2351*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2352*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2353*4882a593Smuzhiyun vxor $tweak1,$tweak1,$tmp 2354*4882a593Smuzhiyun 2355*4882a593Smuzhiyun subi $inp,$inp,16 2356*4882a593Smuzhiyun add $inp,$inp,$len 2357*4882a593Smuzhiyun 2358*4882a593Smuzhiyun vxor $inout,$inout,$tweak # :-( 2359*4882a593Smuzhiyun vxor $inout,$inout,$tweak1 # :-) 2360*4882a593Smuzhiyun 2361*4882a593SmuzhiyunLoop_xts_dec_short: 2362*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2363*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey1 2364*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2365*4882a593Smuzhiyun addi $idx,$idx,16 2366*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2367*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey0 2368*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2369*4882a593Smuzhiyun addi $idx,$idx,16 2370*4882a593Smuzhiyun bdnz Loop_xts_dec_short 2371*4882a593Smuzhiyun 2372*4882a593Smuzhiyun ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2373*4882a593Smuzhiyun vncipher $inout,$inout,$rndkey1 2374*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2375*4882a593Smuzhiyun li $idx,16 2376*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2377*4882a593Smuzhiyun vxor $rndkey0,$rndkey0,$tweak1 2378*4882a593Smuzhiyun vncipherlast $output,$inout,$rndkey0 2379*4882a593Smuzhiyun 2380*4882a593Smuzhiyun le?vperm $tmp,$output,$output,$leperm 2381*4882a593Smuzhiyun be?nop 2382*4882a593Smuzhiyun le?stvx_u $tmp,0,$out 2383*4882a593Smuzhiyun be?stvx_u $output,0,$out 2384*4882a593Smuzhiyun 2385*4882a593Smuzhiyun vmr $inout,$inptail 2386*4882a593Smuzhiyun lvx $inptail,0,$inp 2387*4882a593Smuzhiyun #addi $inp,$inp,16 2388*4882a593Smuzhiyun lvx $rndkey0,0,$key1 2389*4882a593Smuzhiyun lvx $rndkey1,$idx,$key1 2390*4882a593Smuzhiyun addi $idx,$idx,16 2391*4882a593Smuzhiyun vperm $inout,$inout,$inptail,$inpperm 2392*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2393*4882a593Smuzhiyun 2394*4882a593Smuzhiyun lvsr $inpperm,0,$len # $inpperm is no longer needed 2395*4882a593Smuzhiyun vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2396*4882a593Smuzhiyun vspltisb $tmp,-1 2397*4882a593Smuzhiyun vperm $inptail,$inptail,$tmp,$inpperm 2398*4882a593Smuzhiyun vsel $inout,$inout,$output,$inptail 2399*4882a593Smuzhiyun 2400*4882a593Smuzhiyun vxor $rndkey0,$rndkey0,$tweak 2401*4882a593Smuzhiyun vxor $inout,$inout,$rndkey0 2402*4882a593Smuzhiyun lvx $rndkey0,$idx,$key1 2403*4882a593Smuzhiyun addi $idx,$idx,16 2404*4882a593Smuzhiyun 2405*4882a593Smuzhiyun subi r11,$out,1 2406*4882a593Smuzhiyun mtctr $len 2407*4882a593Smuzhiyun li $len,16 2408*4882a593SmuzhiyunLoop_xts_dec_steal: 2409*4882a593Smuzhiyun lbzu r0,1(r11) 2410*4882a593Smuzhiyun stb r0,16(r11) 2411*4882a593Smuzhiyun bdnz Loop_xts_dec_steal 2412*4882a593Smuzhiyun 2413*4882a593Smuzhiyun mtctr $rounds 2414*4882a593Smuzhiyun b Loop_xts_dec # one more time... 2415*4882a593Smuzhiyun 2416*4882a593SmuzhiyunLxts_dec_done: 2417*4882a593Smuzhiyun ${UCMP}i $ivp,0 2418*4882a593Smuzhiyun beq Lxts_dec_ret 2419*4882a593Smuzhiyun 2420*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2421*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2422*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2423*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2424*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2425*4882a593Smuzhiyun 2426*4882a593Smuzhiyun le?vperm $tweak,$tweak,$tweak,$leperm 2427*4882a593Smuzhiyun stvx_u $tweak,0,$ivp 2428*4882a593Smuzhiyun 2429*4882a593SmuzhiyunLxts_dec_ret: 2430*4882a593Smuzhiyun mtspr 256,r12 # restore vrsave 2431*4882a593Smuzhiyun li r3,0 2432*4882a593Smuzhiyun blr 2433*4882a593Smuzhiyun .long 0 2434*4882a593Smuzhiyun .byte 0,12,0x04,0,0x80,6,6,0 2435*4882a593Smuzhiyun .long 0 2436*4882a593Smuzhiyun.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2437*4882a593Smuzhiyun___ 2438*4882a593Smuzhiyun######################################################################### 2439*4882a593Smuzhiyun{{ # Optimized XTS procedures # 2440*4882a593Smuzhiyunmy $key_=$key2; 2441*4882a593Smuzhiyunmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2442*4882a593Smuzhiyun $x00=0 if ($flavour =~ /osx/); 2443*4882a593Smuzhiyunmy ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2444*4882a593Smuzhiyunmy ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2445*4882a593Smuzhiyunmy ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2446*4882a593Smuzhiyunmy $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2447*4882a593Smuzhiyun # v26-v31 last 6 round keys 2448*4882a593Smuzhiyunmy ($keyperm)=($out0); # aliases with "caller", redundant assignment 2449*4882a593Smuzhiyunmy $taillen=$x70; 2450*4882a593Smuzhiyun 2451*4882a593Smuzhiyun$code.=<<___; 2452*4882a593Smuzhiyun.align 5 2453*4882a593Smuzhiyun_aesp8_xts_encrypt6x: 2454*4882a593Smuzhiyun $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2455*4882a593Smuzhiyun mflr r11 2456*4882a593Smuzhiyun li r7,`$FRAME+8*16+15` 2457*4882a593Smuzhiyun li r3,`$FRAME+8*16+31` 2458*4882a593Smuzhiyun $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2459*4882a593Smuzhiyun stvx v20,r7,$sp # ABI says so 2460*4882a593Smuzhiyun addi r7,r7,32 2461*4882a593Smuzhiyun stvx v21,r3,$sp 2462*4882a593Smuzhiyun addi r3,r3,32 2463*4882a593Smuzhiyun stvx v22,r7,$sp 2464*4882a593Smuzhiyun addi r7,r7,32 2465*4882a593Smuzhiyun stvx v23,r3,$sp 2466*4882a593Smuzhiyun addi r3,r3,32 2467*4882a593Smuzhiyun stvx v24,r7,$sp 2468*4882a593Smuzhiyun addi r7,r7,32 2469*4882a593Smuzhiyun stvx v25,r3,$sp 2470*4882a593Smuzhiyun addi r3,r3,32 2471*4882a593Smuzhiyun stvx v26,r7,$sp 2472*4882a593Smuzhiyun addi r7,r7,32 2473*4882a593Smuzhiyun stvx v27,r3,$sp 2474*4882a593Smuzhiyun addi r3,r3,32 2475*4882a593Smuzhiyun stvx v28,r7,$sp 2476*4882a593Smuzhiyun addi r7,r7,32 2477*4882a593Smuzhiyun stvx v29,r3,$sp 2478*4882a593Smuzhiyun addi r3,r3,32 2479*4882a593Smuzhiyun stvx v30,r7,$sp 2480*4882a593Smuzhiyun stvx v31,r3,$sp 2481*4882a593Smuzhiyun li r0,-1 2482*4882a593Smuzhiyun stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2483*4882a593Smuzhiyun li $x10,0x10 2484*4882a593Smuzhiyun $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2485*4882a593Smuzhiyun li $x20,0x20 2486*4882a593Smuzhiyun $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2487*4882a593Smuzhiyun li $x30,0x30 2488*4882a593Smuzhiyun $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2489*4882a593Smuzhiyun li $x40,0x40 2490*4882a593Smuzhiyun $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2491*4882a593Smuzhiyun li $x50,0x50 2492*4882a593Smuzhiyun $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2493*4882a593Smuzhiyun li $x60,0x60 2494*4882a593Smuzhiyun $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2495*4882a593Smuzhiyun li $x70,0x70 2496*4882a593Smuzhiyun mtspr 256,r0 2497*4882a593Smuzhiyun 2498*4882a593Smuzhiyun subi $rounds,$rounds,3 # -4 in total 2499*4882a593Smuzhiyun 2500*4882a593Smuzhiyun lvx $rndkey0,$x00,$key1 # load key schedule 2501*4882a593Smuzhiyun lvx v30,$x10,$key1 2502*4882a593Smuzhiyun addi $key1,$key1,0x20 2503*4882a593Smuzhiyun lvx v31,$x00,$key1 2504*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,v30,$keyperm 2505*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 2506*4882a593Smuzhiyun mtctr $rounds 2507*4882a593Smuzhiyun 2508*4882a593SmuzhiyunLoad_xts_enc_key: 2509*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 2510*4882a593Smuzhiyun lvx v30,$x10,$key1 2511*4882a593Smuzhiyun addi $key1,$key1,0x20 2512*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[1] 2513*4882a593Smuzhiyun ?vperm v25,v31,v30,$keyperm 2514*4882a593Smuzhiyun lvx v31,$x00,$key1 2515*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[2] 2516*4882a593Smuzhiyun addi $key_,$key_,0x20 2517*4882a593Smuzhiyun bdnz Load_xts_enc_key 2518*4882a593Smuzhiyun 2519*4882a593Smuzhiyun lvx v26,$x10,$key1 2520*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 2521*4882a593Smuzhiyun lvx v27,$x20,$key1 2522*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[3] 2523*4882a593Smuzhiyun ?vperm v25,v31,v26,$keyperm 2524*4882a593Smuzhiyun lvx v28,$x30,$key1 2525*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[4] 2526*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 2527*4882a593Smuzhiyun ?vperm v26,v26,v27,$keyperm 2528*4882a593Smuzhiyun lvx v29,$x40,$key1 2529*4882a593Smuzhiyun ?vperm v27,v27,v28,$keyperm 2530*4882a593Smuzhiyun lvx v30,$x50,$key1 2531*4882a593Smuzhiyun ?vperm v28,v28,v29,$keyperm 2532*4882a593Smuzhiyun lvx v31,$x60,$key1 2533*4882a593Smuzhiyun ?vperm v29,v29,v30,$keyperm 2534*4882a593Smuzhiyun lvx $twk5,$x70,$key1 # borrow $twk5 2535*4882a593Smuzhiyun ?vperm v30,v30,v31,$keyperm 2536*4882a593Smuzhiyun lvx v24,$x00,$key_ # pre-load round[1] 2537*4882a593Smuzhiyun ?vperm v31,v31,$twk5,$keyperm 2538*4882a593Smuzhiyun lvx v25,$x10,$key_ # pre-load round[2] 2539*4882a593Smuzhiyun 2540*4882a593Smuzhiyun vperm $in0,$inout,$inptail,$inpperm 2541*4882a593Smuzhiyun subi $inp,$inp,31 # undo "caller" 2542*4882a593Smuzhiyun vxor $twk0,$tweak,$rndkey0 2543*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2544*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2545*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2546*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2547*4882a593Smuzhiyun vxor $out0,$in0,$twk0 2548*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2549*4882a593Smuzhiyun 2550*4882a593Smuzhiyun lvx_u $in1,$x10,$inp 2551*4882a593Smuzhiyun vxor $twk1,$tweak,$rndkey0 2552*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2553*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2554*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2555*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$leperm 2556*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2557*4882a593Smuzhiyun vxor $out1,$in1,$twk1 2558*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2559*4882a593Smuzhiyun 2560*4882a593Smuzhiyun lvx_u $in2,$x20,$inp 2561*4882a593Smuzhiyun andi. $taillen,$len,15 2562*4882a593Smuzhiyun vxor $twk2,$tweak,$rndkey0 2563*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2564*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2565*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2566*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$leperm 2567*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2568*4882a593Smuzhiyun vxor $out2,$in2,$twk2 2569*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2570*4882a593Smuzhiyun 2571*4882a593Smuzhiyun lvx_u $in3,$x30,$inp 2572*4882a593Smuzhiyun sub $len,$len,$taillen 2573*4882a593Smuzhiyun vxor $twk3,$tweak,$rndkey0 2574*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2575*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2576*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2577*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$leperm 2578*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2579*4882a593Smuzhiyun vxor $out3,$in3,$twk3 2580*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2581*4882a593Smuzhiyun 2582*4882a593Smuzhiyun lvx_u $in4,$x40,$inp 2583*4882a593Smuzhiyun subi $len,$len,0x60 2584*4882a593Smuzhiyun vxor $twk4,$tweak,$rndkey0 2585*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2586*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2587*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2588*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$leperm 2589*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2590*4882a593Smuzhiyun vxor $out4,$in4,$twk4 2591*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2592*4882a593Smuzhiyun 2593*4882a593Smuzhiyun lvx_u $in5,$x50,$inp 2594*4882a593Smuzhiyun addi $inp,$inp,0x60 2595*4882a593Smuzhiyun vxor $twk5,$tweak,$rndkey0 2596*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2597*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2598*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2599*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$leperm 2600*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2601*4882a593Smuzhiyun vxor $out5,$in5,$twk5 2602*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2603*4882a593Smuzhiyun 2604*4882a593Smuzhiyun vxor v31,v31,$rndkey0 2605*4882a593Smuzhiyun mtctr $rounds 2606*4882a593Smuzhiyun b Loop_xts_enc6x 2607*4882a593Smuzhiyun 2608*4882a593Smuzhiyun.align 5 2609*4882a593SmuzhiyunLoop_xts_enc6x: 2610*4882a593Smuzhiyun vcipher $out0,$out0,v24 2611*4882a593Smuzhiyun vcipher $out1,$out1,v24 2612*4882a593Smuzhiyun vcipher $out2,$out2,v24 2613*4882a593Smuzhiyun vcipher $out3,$out3,v24 2614*4882a593Smuzhiyun vcipher $out4,$out4,v24 2615*4882a593Smuzhiyun vcipher $out5,$out5,v24 2616*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 2617*4882a593Smuzhiyun addi $key_,$key_,0x20 2618*4882a593Smuzhiyun 2619*4882a593Smuzhiyun vcipher $out0,$out0,v25 2620*4882a593Smuzhiyun vcipher $out1,$out1,v25 2621*4882a593Smuzhiyun vcipher $out2,$out2,v25 2622*4882a593Smuzhiyun vcipher $out3,$out3,v25 2623*4882a593Smuzhiyun vcipher $out4,$out4,v25 2624*4882a593Smuzhiyun vcipher $out5,$out5,v25 2625*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 2626*4882a593Smuzhiyun bdnz Loop_xts_enc6x 2627*4882a593Smuzhiyun 2628*4882a593Smuzhiyun subic $len,$len,96 # $len-=96 2629*4882a593Smuzhiyun vxor $in0,$twk0,v31 # xor with last round key 2630*4882a593Smuzhiyun vcipher $out0,$out0,v24 2631*4882a593Smuzhiyun vcipher $out1,$out1,v24 2632*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2633*4882a593Smuzhiyun vxor $twk0,$tweak,$rndkey0 2634*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2635*4882a593Smuzhiyun vcipher $out2,$out2,v24 2636*4882a593Smuzhiyun vcipher $out3,$out3,v24 2637*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2638*4882a593Smuzhiyun vcipher $out4,$out4,v24 2639*4882a593Smuzhiyun vcipher $out5,$out5,v24 2640*4882a593Smuzhiyun 2641*4882a593Smuzhiyun subfe. r0,r0,r0 # borrow?-1:0 2642*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2643*4882a593Smuzhiyun vcipher $out0,$out0,v25 2644*4882a593Smuzhiyun vcipher $out1,$out1,v25 2645*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2646*4882a593Smuzhiyun vcipher $out2,$out2,v25 2647*4882a593Smuzhiyun vcipher $out3,$out3,v25 2648*4882a593Smuzhiyun vxor $in1,$twk1,v31 2649*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2650*4882a593Smuzhiyun vxor $twk1,$tweak,$rndkey0 2651*4882a593Smuzhiyun vcipher $out4,$out4,v25 2652*4882a593Smuzhiyun vcipher $out5,$out5,v25 2653*4882a593Smuzhiyun 2654*4882a593Smuzhiyun and r0,r0,$len 2655*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2656*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2657*4882a593Smuzhiyun vcipher $out0,$out0,v26 2658*4882a593Smuzhiyun vcipher $out1,$out1,v26 2659*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2660*4882a593Smuzhiyun vcipher $out2,$out2,v26 2661*4882a593Smuzhiyun vcipher $out3,$out3,v26 2662*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2663*4882a593Smuzhiyun vcipher $out4,$out4,v26 2664*4882a593Smuzhiyun vcipher $out5,$out5,v26 2665*4882a593Smuzhiyun 2666*4882a593Smuzhiyun add $inp,$inp,r0 # $inp is adjusted in such 2667*4882a593Smuzhiyun # way that at exit from the 2668*4882a593Smuzhiyun # loop inX-in5 are loaded 2669*4882a593Smuzhiyun # with last "words" 2670*4882a593Smuzhiyun vxor $in2,$twk2,v31 2671*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2672*4882a593Smuzhiyun vxor $twk2,$tweak,$rndkey0 2673*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2674*4882a593Smuzhiyun vcipher $out0,$out0,v27 2675*4882a593Smuzhiyun vcipher $out1,$out1,v27 2676*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2677*4882a593Smuzhiyun vcipher $out2,$out2,v27 2678*4882a593Smuzhiyun vcipher $out3,$out3,v27 2679*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2680*4882a593Smuzhiyun vcipher $out4,$out4,v27 2681*4882a593Smuzhiyun vcipher $out5,$out5,v27 2682*4882a593Smuzhiyun 2683*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 2684*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2685*4882a593Smuzhiyun vcipher $out0,$out0,v28 2686*4882a593Smuzhiyun vcipher $out1,$out1,v28 2687*4882a593Smuzhiyun vxor $in3,$twk3,v31 2688*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2689*4882a593Smuzhiyun vxor $twk3,$tweak,$rndkey0 2690*4882a593Smuzhiyun vcipher $out2,$out2,v28 2691*4882a593Smuzhiyun vcipher $out3,$out3,v28 2692*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2693*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2694*4882a593Smuzhiyun vcipher $out4,$out4,v28 2695*4882a593Smuzhiyun vcipher $out5,$out5,v28 2696*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 2697*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2698*4882a593Smuzhiyun 2699*4882a593Smuzhiyun vcipher $out0,$out0,v29 2700*4882a593Smuzhiyun vcipher $out1,$out1,v29 2701*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2702*4882a593Smuzhiyun vcipher $out2,$out2,v29 2703*4882a593Smuzhiyun vcipher $out3,$out3,v29 2704*4882a593Smuzhiyun vxor $in4,$twk4,v31 2705*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2706*4882a593Smuzhiyun vxor $twk4,$tweak,$rndkey0 2707*4882a593Smuzhiyun vcipher $out4,$out4,v29 2708*4882a593Smuzhiyun vcipher $out5,$out5,v29 2709*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 2710*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2711*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2712*4882a593Smuzhiyun 2713*4882a593Smuzhiyun vcipher $out0,$out0,v30 2714*4882a593Smuzhiyun vcipher $out1,$out1,v30 2715*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2716*4882a593Smuzhiyun vcipher $out2,$out2,v30 2717*4882a593Smuzhiyun vcipher $out3,$out3,v30 2718*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2719*4882a593Smuzhiyun vcipher $out4,$out4,v30 2720*4882a593Smuzhiyun vcipher $out5,$out5,v30 2721*4882a593Smuzhiyun vxor $in5,$twk5,v31 2722*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 2723*4882a593Smuzhiyun vxor $twk5,$tweak,$rndkey0 2724*4882a593Smuzhiyun 2725*4882a593Smuzhiyun vcipherlast $out0,$out0,$in0 2726*4882a593Smuzhiyun lvx_u $in0,$x00,$inp # load next input block 2727*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 2728*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 2729*4882a593Smuzhiyun vcipherlast $out1,$out1,$in1 2730*4882a593Smuzhiyun lvx_u $in1,$x10,$inp 2731*4882a593Smuzhiyun vcipherlast $out2,$out2,$in2 2732*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 2733*4882a593Smuzhiyun lvx_u $in2,$x20,$inp 2734*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 2735*4882a593Smuzhiyun vcipherlast $out3,$out3,$in3 2736*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$leperm 2737*4882a593Smuzhiyun lvx_u $in3,$x30,$inp 2738*4882a593Smuzhiyun vcipherlast $out4,$out4,$in4 2739*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$leperm 2740*4882a593Smuzhiyun lvx_u $in4,$x40,$inp 2741*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 2742*4882a593Smuzhiyun vcipherlast $tmp,$out5,$in5 # last block might be needed 2743*4882a593Smuzhiyun # in stealing mode 2744*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$leperm 2745*4882a593Smuzhiyun lvx_u $in5,$x50,$inp 2746*4882a593Smuzhiyun addi $inp,$inp,0x60 2747*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$leperm 2748*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$leperm 2749*4882a593Smuzhiyun 2750*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 2751*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 2752*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 2753*4882a593Smuzhiyun vxor $out0,$in0,$twk0 2754*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 2755*4882a593Smuzhiyun stvx_u $out1,$x10,$out 2756*4882a593Smuzhiyun vxor $out1,$in1,$twk1 2757*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$leperm 2758*4882a593Smuzhiyun stvx_u $out2,$x20,$out 2759*4882a593Smuzhiyun vxor $out2,$in2,$twk2 2760*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$leperm 2761*4882a593Smuzhiyun stvx_u $out3,$x30,$out 2762*4882a593Smuzhiyun vxor $out3,$in3,$twk3 2763*4882a593Smuzhiyun le?vperm $out5,$tmp,$tmp,$leperm 2764*4882a593Smuzhiyun stvx_u $out4,$x40,$out 2765*4882a593Smuzhiyun vxor $out4,$in4,$twk4 2766*4882a593Smuzhiyun le?stvx_u $out5,$x50,$out 2767*4882a593Smuzhiyun be?stvx_u $tmp, $x50,$out 2768*4882a593Smuzhiyun vxor $out5,$in5,$twk5 2769*4882a593Smuzhiyun addi $out,$out,0x60 2770*4882a593Smuzhiyun 2771*4882a593Smuzhiyun mtctr $rounds 2772*4882a593Smuzhiyun beq Loop_xts_enc6x # did $len-=96 borrow? 2773*4882a593Smuzhiyun 2774*4882a593Smuzhiyun addic. $len,$len,0x60 2775*4882a593Smuzhiyun beq Lxts_enc6x_zero 2776*4882a593Smuzhiyun cmpwi $len,0x20 2777*4882a593Smuzhiyun blt Lxts_enc6x_one 2778*4882a593Smuzhiyun nop 2779*4882a593Smuzhiyun beq Lxts_enc6x_two 2780*4882a593Smuzhiyun cmpwi $len,0x40 2781*4882a593Smuzhiyun blt Lxts_enc6x_three 2782*4882a593Smuzhiyun nop 2783*4882a593Smuzhiyun beq Lxts_enc6x_four 2784*4882a593Smuzhiyun 2785*4882a593SmuzhiyunLxts_enc6x_five: 2786*4882a593Smuzhiyun vxor $out0,$in1,$twk0 2787*4882a593Smuzhiyun vxor $out1,$in2,$twk1 2788*4882a593Smuzhiyun vxor $out2,$in3,$twk2 2789*4882a593Smuzhiyun vxor $out3,$in4,$twk3 2790*4882a593Smuzhiyun vxor $out4,$in5,$twk4 2791*4882a593Smuzhiyun 2792*4882a593Smuzhiyun bl _aesp8_xts_enc5x 2793*4882a593Smuzhiyun 2794*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 2795*4882a593Smuzhiyun vmr $twk0,$twk5 # unused tweak 2796*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 2797*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 2798*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 2799*4882a593Smuzhiyun stvx_u $out1,$x10,$out 2800*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$leperm 2801*4882a593Smuzhiyun stvx_u $out2,$x20,$out 2802*4882a593Smuzhiyun vxor $tmp,$out4,$twk5 # last block prep for stealing 2803*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$leperm 2804*4882a593Smuzhiyun stvx_u $out3,$x30,$out 2805*4882a593Smuzhiyun stvx_u $out4,$x40,$out 2806*4882a593Smuzhiyun addi $out,$out,0x50 2807*4882a593Smuzhiyun bne Lxts_enc6x_steal 2808*4882a593Smuzhiyun b Lxts_enc6x_done 2809*4882a593Smuzhiyun 2810*4882a593Smuzhiyun.align 4 2811*4882a593SmuzhiyunLxts_enc6x_four: 2812*4882a593Smuzhiyun vxor $out0,$in2,$twk0 2813*4882a593Smuzhiyun vxor $out1,$in3,$twk1 2814*4882a593Smuzhiyun vxor $out2,$in4,$twk2 2815*4882a593Smuzhiyun vxor $out3,$in5,$twk3 2816*4882a593Smuzhiyun vxor $out4,$out4,$out4 2817*4882a593Smuzhiyun 2818*4882a593Smuzhiyun bl _aesp8_xts_enc5x 2819*4882a593Smuzhiyun 2820*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 2821*4882a593Smuzhiyun vmr $twk0,$twk4 # unused tweak 2822*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 2823*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 2824*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 2825*4882a593Smuzhiyun stvx_u $out1,$x10,$out 2826*4882a593Smuzhiyun vxor $tmp,$out3,$twk4 # last block prep for stealing 2827*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$leperm 2828*4882a593Smuzhiyun stvx_u $out2,$x20,$out 2829*4882a593Smuzhiyun stvx_u $out3,$x30,$out 2830*4882a593Smuzhiyun addi $out,$out,0x40 2831*4882a593Smuzhiyun bne Lxts_enc6x_steal 2832*4882a593Smuzhiyun b Lxts_enc6x_done 2833*4882a593Smuzhiyun 2834*4882a593Smuzhiyun.align 4 2835*4882a593SmuzhiyunLxts_enc6x_three: 2836*4882a593Smuzhiyun vxor $out0,$in3,$twk0 2837*4882a593Smuzhiyun vxor $out1,$in4,$twk1 2838*4882a593Smuzhiyun vxor $out2,$in5,$twk2 2839*4882a593Smuzhiyun vxor $out3,$out3,$out3 2840*4882a593Smuzhiyun vxor $out4,$out4,$out4 2841*4882a593Smuzhiyun 2842*4882a593Smuzhiyun bl _aesp8_xts_enc5x 2843*4882a593Smuzhiyun 2844*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 2845*4882a593Smuzhiyun vmr $twk0,$twk3 # unused tweak 2846*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 2847*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 2848*4882a593Smuzhiyun vxor $tmp,$out2,$twk3 # last block prep for stealing 2849*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 2850*4882a593Smuzhiyun stvx_u $out1,$x10,$out 2851*4882a593Smuzhiyun stvx_u $out2,$x20,$out 2852*4882a593Smuzhiyun addi $out,$out,0x30 2853*4882a593Smuzhiyun bne Lxts_enc6x_steal 2854*4882a593Smuzhiyun b Lxts_enc6x_done 2855*4882a593Smuzhiyun 2856*4882a593Smuzhiyun.align 4 2857*4882a593SmuzhiyunLxts_enc6x_two: 2858*4882a593Smuzhiyun vxor $out0,$in4,$twk0 2859*4882a593Smuzhiyun vxor $out1,$in5,$twk1 2860*4882a593Smuzhiyun vxor $out2,$out2,$out2 2861*4882a593Smuzhiyun vxor $out3,$out3,$out3 2862*4882a593Smuzhiyun vxor $out4,$out4,$out4 2863*4882a593Smuzhiyun 2864*4882a593Smuzhiyun bl _aesp8_xts_enc5x 2865*4882a593Smuzhiyun 2866*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 2867*4882a593Smuzhiyun vmr $twk0,$twk2 # unused tweak 2868*4882a593Smuzhiyun vxor $tmp,$out1,$twk2 # last block prep for stealing 2869*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 2870*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 2871*4882a593Smuzhiyun stvx_u $out1,$x10,$out 2872*4882a593Smuzhiyun addi $out,$out,0x20 2873*4882a593Smuzhiyun bne Lxts_enc6x_steal 2874*4882a593Smuzhiyun b Lxts_enc6x_done 2875*4882a593Smuzhiyun 2876*4882a593Smuzhiyun.align 4 2877*4882a593SmuzhiyunLxts_enc6x_one: 2878*4882a593Smuzhiyun vxor $out0,$in5,$twk0 2879*4882a593Smuzhiyun nop 2880*4882a593SmuzhiyunLoop_xts_enc1x: 2881*4882a593Smuzhiyun vcipher $out0,$out0,v24 2882*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 2883*4882a593Smuzhiyun addi $key_,$key_,0x20 2884*4882a593Smuzhiyun 2885*4882a593Smuzhiyun vcipher $out0,$out0,v25 2886*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 2887*4882a593Smuzhiyun bdnz Loop_xts_enc1x 2888*4882a593Smuzhiyun 2889*4882a593Smuzhiyun add $inp,$inp,$taillen 2890*4882a593Smuzhiyun cmpwi $taillen,0 2891*4882a593Smuzhiyun vcipher $out0,$out0,v24 2892*4882a593Smuzhiyun 2893*4882a593Smuzhiyun subi $inp,$inp,16 2894*4882a593Smuzhiyun vcipher $out0,$out0,v25 2895*4882a593Smuzhiyun 2896*4882a593Smuzhiyun lvsr $inpperm,0,$taillen 2897*4882a593Smuzhiyun vcipher $out0,$out0,v26 2898*4882a593Smuzhiyun 2899*4882a593Smuzhiyun lvx_u $in0,0,$inp 2900*4882a593Smuzhiyun vcipher $out0,$out0,v27 2901*4882a593Smuzhiyun 2902*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 2903*4882a593Smuzhiyun vcipher $out0,$out0,v28 2904*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 2905*4882a593Smuzhiyun 2906*4882a593Smuzhiyun vcipher $out0,$out0,v29 2907*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 2908*4882a593Smuzhiyun vxor $twk0,$twk0,v31 2909*4882a593Smuzhiyun 2910*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 2911*4882a593Smuzhiyun vcipher $out0,$out0,v30 2912*4882a593Smuzhiyun 2913*4882a593Smuzhiyun vperm $in0,$in0,$in0,$inpperm 2914*4882a593Smuzhiyun vcipherlast $out0,$out0,$twk0 2915*4882a593Smuzhiyun 2916*4882a593Smuzhiyun vmr $twk0,$twk1 # unused tweak 2917*4882a593Smuzhiyun vxor $tmp,$out0,$twk1 # last block prep for stealing 2918*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 2919*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 2920*4882a593Smuzhiyun addi $out,$out,0x10 2921*4882a593Smuzhiyun bne Lxts_enc6x_steal 2922*4882a593Smuzhiyun b Lxts_enc6x_done 2923*4882a593Smuzhiyun 2924*4882a593Smuzhiyun.align 4 2925*4882a593SmuzhiyunLxts_enc6x_zero: 2926*4882a593Smuzhiyun cmpwi $taillen,0 2927*4882a593Smuzhiyun beq Lxts_enc6x_done 2928*4882a593Smuzhiyun 2929*4882a593Smuzhiyun add $inp,$inp,$taillen 2930*4882a593Smuzhiyun subi $inp,$inp,16 2931*4882a593Smuzhiyun lvx_u $in0,0,$inp 2932*4882a593Smuzhiyun lvsr $inpperm,0,$taillen # $in5 is no more 2933*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 2934*4882a593Smuzhiyun vperm $in0,$in0,$in0,$inpperm 2935*4882a593Smuzhiyun vxor $tmp,$tmp,$twk0 2936*4882a593SmuzhiyunLxts_enc6x_steal: 2937*4882a593Smuzhiyun vxor $in0,$in0,$twk0 2938*4882a593Smuzhiyun vxor $out0,$out0,$out0 2939*4882a593Smuzhiyun vspltisb $out1,-1 2940*4882a593Smuzhiyun vperm $out0,$out0,$out1,$inpperm 2941*4882a593Smuzhiyun vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2942*4882a593Smuzhiyun 2943*4882a593Smuzhiyun subi r30,$out,17 2944*4882a593Smuzhiyun subi $out,$out,16 2945*4882a593Smuzhiyun mtctr $taillen 2946*4882a593SmuzhiyunLoop_xts_enc6x_steal: 2947*4882a593Smuzhiyun lbzu r0,1(r30) 2948*4882a593Smuzhiyun stb r0,16(r30) 2949*4882a593Smuzhiyun bdnz Loop_xts_enc6x_steal 2950*4882a593Smuzhiyun 2951*4882a593Smuzhiyun li $taillen,0 2952*4882a593Smuzhiyun mtctr $rounds 2953*4882a593Smuzhiyun b Loop_xts_enc1x # one more time... 2954*4882a593Smuzhiyun 2955*4882a593Smuzhiyun.align 4 2956*4882a593SmuzhiyunLxts_enc6x_done: 2957*4882a593Smuzhiyun ${UCMP}i $ivp,0 2958*4882a593Smuzhiyun beq Lxts_enc6x_ret 2959*4882a593Smuzhiyun 2960*4882a593Smuzhiyun vxor $tweak,$twk0,$rndkey0 2961*4882a593Smuzhiyun le?vperm $tweak,$tweak,$tweak,$leperm 2962*4882a593Smuzhiyun stvx_u $tweak,0,$ivp 2963*4882a593Smuzhiyun 2964*4882a593SmuzhiyunLxts_enc6x_ret: 2965*4882a593Smuzhiyun mtlr r11 2966*4882a593Smuzhiyun li r10,`$FRAME+15` 2967*4882a593Smuzhiyun li r11,`$FRAME+31` 2968*4882a593Smuzhiyun stvx $seven,r10,$sp # wipe copies of round keys 2969*4882a593Smuzhiyun addi r10,r10,32 2970*4882a593Smuzhiyun stvx $seven,r11,$sp 2971*4882a593Smuzhiyun addi r11,r11,32 2972*4882a593Smuzhiyun stvx $seven,r10,$sp 2973*4882a593Smuzhiyun addi r10,r10,32 2974*4882a593Smuzhiyun stvx $seven,r11,$sp 2975*4882a593Smuzhiyun addi r11,r11,32 2976*4882a593Smuzhiyun stvx $seven,r10,$sp 2977*4882a593Smuzhiyun addi r10,r10,32 2978*4882a593Smuzhiyun stvx $seven,r11,$sp 2979*4882a593Smuzhiyun addi r11,r11,32 2980*4882a593Smuzhiyun stvx $seven,r10,$sp 2981*4882a593Smuzhiyun addi r10,r10,32 2982*4882a593Smuzhiyun stvx $seven,r11,$sp 2983*4882a593Smuzhiyun addi r11,r11,32 2984*4882a593Smuzhiyun 2985*4882a593Smuzhiyun mtspr 256,$vrsave 2986*4882a593Smuzhiyun lvx v20,r10,$sp # ABI says so 2987*4882a593Smuzhiyun addi r10,r10,32 2988*4882a593Smuzhiyun lvx v21,r11,$sp 2989*4882a593Smuzhiyun addi r11,r11,32 2990*4882a593Smuzhiyun lvx v22,r10,$sp 2991*4882a593Smuzhiyun addi r10,r10,32 2992*4882a593Smuzhiyun lvx v23,r11,$sp 2993*4882a593Smuzhiyun addi r11,r11,32 2994*4882a593Smuzhiyun lvx v24,r10,$sp 2995*4882a593Smuzhiyun addi r10,r10,32 2996*4882a593Smuzhiyun lvx v25,r11,$sp 2997*4882a593Smuzhiyun addi r11,r11,32 2998*4882a593Smuzhiyun lvx v26,r10,$sp 2999*4882a593Smuzhiyun addi r10,r10,32 3000*4882a593Smuzhiyun lvx v27,r11,$sp 3001*4882a593Smuzhiyun addi r11,r11,32 3002*4882a593Smuzhiyun lvx v28,r10,$sp 3003*4882a593Smuzhiyun addi r10,r10,32 3004*4882a593Smuzhiyun lvx v29,r11,$sp 3005*4882a593Smuzhiyun addi r11,r11,32 3006*4882a593Smuzhiyun lvx v30,r10,$sp 3007*4882a593Smuzhiyun lvx v31,r11,$sp 3008*4882a593Smuzhiyun $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3009*4882a593Smuzhiyun $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3010*4882a593Smuzhiyun $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3011*4882a593Smuzhiyun $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3012*4882a593Smuzhiyun $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3013*4882a593Smuzhiyun $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3014*4882a593Smuzhiyun addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3015*4882a593Smuzhiyun blr 3016*4882a593Smuzhiyun .long 0 3017*4882a593Smuzhiyun .byte 0,12,0x04,1,0x80,6,6,0 3018*4882a593Smuzhiyun .long 0 3019*4882a593Smuzhiyun 3020*4882a593Smuzhiyun.align 5 3021*4882a593Smuzhiyun_aesp8_xts_enc5x: 3022*4882a593Smuzhiyun vcipher $out0,$out0,v24 3023*4882a593Smuzhiyun vcipher $out1,$out1,v24 3024*4882a593Smuzhiyun vcipher $out2,$out2,v24 3025*4882a593Smuzhiyun vcipher $out3,$out3,v24 3026*4882a593Smuzhiyun vcipher $out4,$out4,v24 3027*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 3028*4882a593Smuzhiyun addi $key_,$key_,0x20 3029*4882a593Smuzhiyun 3030*4882a593Smuzhiyun vcipher $out0,$out0,v25 3031*4882a593Smuzhiyun vcipher $out1,$out1,v25 3032*4882a593Smuzhiyun vcipher $out2,$out2,v25 3033*4882a593Smuzhiyun vcipher $out3,$out3,v25 3034*4882a593Smuzhiyun vcipher $out4,$out4,v25 3035*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 3036*4882a593Smuzhiyun bdnz _aesp8_xts_enc5x 3037*4882a593Smuzhiyun 3038*4882a593Smuzhiyun add $inp,$inp,$taillen 3039*4882a593Smuzhiyun cmpwi $taillen,0 3040*4882a593Smuzhiyun vcipher $out0,$out0,v24 3041*4882a593Smuzhiyun vcipher $out1,$out1,v24 3042*4882a593Smuzhiyun vcipher $out2,$out2,v24 3043*4882a593Smuzhiyun vcipher $out3,$out3,v24 3044*4882a593Smuzhiyun vcipher $out4,$out4,v24 3045*4882a593Smuzhiyun 3046*4882a593Smuzhiyun subi $inp,$inp,16 3047*4882a593Smuzhiyun vcipher $out0,$out0,v25 3048*4882a593Smuzhiyun vcipher $out1,$out1,v25 3049*4882a593Smuzhiyun vcipher $out2,$out2,v25 3050*4882a593Smuzhiyun vcipher $out3,$out3,v25 3051*4882a593Smuzhiyun vcipher $out4,$out4,v25 3052*4882a593Smuzhiyun vxor $twk0,$twk0,v31 3053*4882a593Smuzhiyun 3054*4882a593Smuzhiyun vcipher $out0,$out0,v26 3055*4882a593Smuzhiyun lvsr $inpperm,r0,$taillen # $in5 is no more 3056*4882a593Smuzhiyun vcipher $out1,$out1,v26 3057*4882a593Smuzhiyun vcipher $out2,$out2,v26 3058*4882a593Smuzhiyun vcipher $out3,$out3,v26 3059*4882a593Smuzhiyun vcipher $out4,$out4,v26 3060*4882a593Smuzhiyun vxor $in1,$twk1,v31 3061*4882a593Smuzhiyun 3062*4882a593Smuzhiyun vcipher $out0,$out0,v27 3063*4882a593Smuzhiyun lvx_u $in0,0,$inp 3064*4882a593Smuzhiyun vcipher $out1,$out1,v27 3065*4882a593Smuzhiyun vcipher $out2,$out2,v27 3066*4882a593Smuzhiyun vcipher $out3,$out3,v27 3067*4882a593Smuzhiyun vcipher $out4,$out4,v27 3068*4882a593Smuzhiyun vxor $in2,$twk2,v31 3069*4882a593Smuzhiyun 3070*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 3071*4882a593Smuzhiyun vcipher $out0,$out0,v28 3072*4882a593Smuzhiyun vcipher $out1,$out1,v28 3073*4882a593Smuzhiyun vcipher $out2,$out2,v28 3074*4882a593Smuzhiyun vcipher $out3,$out3,v28 3075*4882a593Smuzhiyun vcipher $out4,$out4,v28 3076*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 3077*4882a593Smuzhiyun vxor $in3,$twk3,v31 3078*4882a593Smuzhiyun 3079*4882a593Smuzhiyun vcipher $out0,$out0,v29 3080*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 3081*4882a593Smuzhiyun vcipher $out1,$out1,v29 3082*4882a593Smuzhiyun vcipher $out2,$out2,v29 3083*4882a593Smuzhiyun vcipher $out3,$out3,v29 3084*4882a593Smuzhiyun vcipher $out4,$out4,v29 3085*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 3086*4882a593Smuzhiyun vxor $in4,$twk4,v31 3087*4882a593Smuzhiyun 3088*4882a593Smuzhiyun vcipher $out0,$out0,v30 3089*4882a593Smuzhiyun vperm $in0,$in0,$in0,$inpperm 3090*4882a593Smuzhiyun vcipher $out1,$out1,v30 3091*4882a593Smuzhiyun vcipher $out2,$out2,v30 3092*4882a593Smuzhiyun vcipher $out3,$out3,v30 3093*4882a593Smuzhiyun vcipher $out4,$out4,v30 3094*4882a593Smuzhiyun 3095*4882a593Smuzhiyun vcipherlast $out0,$out0,$twk0 3096*4882a593Smuzhiyun vcipherlast $out1,$out1,$in1 3097*4882a593Smuzhiyun vcipherlast $out2,$out2,$in2 3098*4882a593Smuzhiyun vcipherlast $out3,$out3,$in3 3099*4882a593Smuzhiyun vcipherlast $out4,$out4,$in4 3100*4882a593Smuzhiyun blr 3101*4882a593Smuzhiyun .long 0 3102*4882a593Smuzhiyun .byte 0,12,0x14,0,0,0,0,0 3103*4882a593Smuzhiyun 3104*4882a593Smuzhiyun.align 5 3105*4882a593Smuzhiyun_aesp8_xts_decrypt6x: 3106*4882a593Smuzhiyun $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3107*4882a593Smuzhiyun mflr r11 3108*4882a593Smuzhiyun li r7,`$FRAME+8*16+15` 3109*4882a593Smuzhiyun li r3,`$FRAME+8*16+31` 3110*4882a593Smuzhiyun $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3111*4882a593Smuzhiyun stvx v20,r7,$sp # ABI says so 3112*4882a593Smuzhiyun addi r7,r7,32 3113*4882a593Smuzhiyun stvx v21,r3,$sp 3114*4882a593Smuzhiyun addi r3,r3,32 3115*4882a593Smuzhiyun stvx v22,r7,$sp 3116*4882a593Smuzhiyun addi r7,r7,32 3117*4882a593Smuzhiyun stvx v23,r3,$sp 3118*4882a593Smuzhiyun addi r3,r3,32 3119*4882a593Smuzhiyun stvx v24,r7,$sp 3120*4882a593Smuzhiyun addi r7,r7,32 3121*4882a593Smuzhiyun stvx v25,r3,$sp 3122*4882a593Smuzhiyun addi r3,r3,32 3123*4882a593Smuzhiyun stvx v26,r7,$sp 3124*4882a593Smuzhiyun addi r7,r7,32 3125*4882a593Smuzhiyun stvx v27,r3,$sp 3126*4882a593Smuzhiyun addi r3,r3,32 3127*4882a593Smuzhiyun stvx v28,r7,$sp 3128*4882a593Smuzhiyun addi r7,r7,32 3129*4882a593Smuzhiyun stvx v29,r3,$sp 3130*4882a593Smuzhiyun addi r3,r3,32 3131*4882a593Smuzhiyun stvx v30,r7,$sp 3132*4882a593Smuzhiyun stvx v31,r3,$sp 3133*4882a593Smuzhiyun li r0,-1 3134*4882a593Smuzhiyun stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3135*4882a593Smuzhiyun li $x10,0x10 3136*4882a593Smuzhiyun $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3137*4882a593Smuzhiyun li $x20,0x20 3138*4882a593Smuzhiyun $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3139*4882a593Smuzhiyun li $x30,0x30 3140*4882a593Smuzhiyun $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3141*4882a593Smuzhiyun li $x40,0x40 3142*4882a593Smuzhiyun $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3143*4882a593Smuzhiyun li $x50,0x50 3144*4882a593Smuzhiyun $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3145*4882a593Smuzhiyun li $x60,0x60 3146*4882a593Smuzhiyun $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3147*4882a593Smuzhiyun li $x70,0x70 3148*4882a593Smuzhiyun mtspr 256,r0 3149*4882a593Smuzhiyun 3150*4882a593Smuzhiyun subi $rounds,$rounds,3 # -4 in total 3151*4882a593Smuzhiyun 3152*4882a593Smuzhiyun lvx $rndkey0,$x00,$key1 # load key schedule 3153*4882a593Smuzhiyun lvx v30,$x10,$key1 3154*4882a593Smuzhiyun addi $key1,$key1,0x20 3155*4882a593Smuzhiyun lvx v31,$x00,$key1 3156*4882a593Smuzhiyun ?vperm $rndkey0,$rndkey0,v30,$keyperm 3157*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 3158*4882a593Smuzhiyun mtctr $rounds 3159*4882a593Smuzhiyun 3160*4882a593SmuzhiyunLoad_xts_dec_key: 3161*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 3162*4882a593Smuzhiyun lvx v30,$x10,$key1 3163*4882a593Smuzhiyun addi $key1,$key1,0x20 3164*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[1] 3165*4882a593Smuzhiyun ?vperm v25,v31,v30,$keyperm 3166*4882a593Smuzhiyun lvx v31,$x00,$key1 3167*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[2] 3168*4882a593Smuzhiyun addi $key_,$key_,0x20 3169*4882a593Smuzhiyun bdnz Load_xts_dec_key 3170*4882a593Smuzhiyun 3171*4882a593Smuzhiyun lvx v26,$x10,$key1 3172*4882a593Smuzhiyun ?vperm v24,v30,v31,$keyperm 3173*4882a593Smuzhiyun lvx v27,$x20,$key1 3174*4882a593Smuzhiyun stvx v24,$x00,$key_ # off-load round[3] 3175*4882a593Smuzhiyun ?vperm v25,v31,v26,$keyperm 3176*4882a593Smuzhiyun lvx v28,$x30,$key1 3177*4882a593Smuzhiyun stvx v25,$x10,$key_ # off-load round[4] 3178*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 3179*4882a593Smuzhiyun ?vperm v26,v26,v27,$keyperm 3180*4882a593Smuzhiyun lvx v29,$x40,$key1 3181*4882a593Smuzhiyun ?vperm v27,v27,v28,$keyperm 3182*4882a593Smuzhiyun lvx v30,$x50,$key1 3183*4882a593Smuzhiyun ?vperm v28,v28,v29,$keyperm 3184*4882a593Smuzhiyun lvx v31,$x60,$key1 3185*4882a593Smuzhiyun ?vperm v29,v29,v30,$keyperm 3186*4882a593Smuzhiyun lvx $twk5,$x70,$key1 # borrow $twk5 3187*4882a593Smuzhiyun ?vperm v30,v30,v31,$keyperm 3188*4882a593Smuzhiyun lvx v24,$x00,$key_ # pre-load round[1] 3189*4882a593Smuzhiyun ?vperm v31,v31,$twk5,$keyperm 3190*4882a593Smuzhiyun lvx v25,$x10,$key_ # pre-load round[2] 3191*4882a593Smuzhiyun 3192*4882a593Smuzhiyun vperm $in0,$inout,$inptail,$inpperm 3193*4882a593Smuzhiyun subi $inp,$inp,31 # undo "caller" 3194*4882a593Smuzhiyun vxor $twk0,$tweak,$rndkey0 3195*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3196*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3197*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3198*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3199*4882a593Smuzhiyun vxor $out0,$in0,$twk0 3200*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3201*4882a593Smuzhiyun 3202*4882a593Smuzhiyun lvx_u $in1,$x10,$inp 3203*4882a593Smuzhiyun vxor $twk1,$tweak,$rndkey0 3204*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3205*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3206*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3207*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$leperm 3208*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3209*4882a593Smuzhiyun vxor $out1,$in1,$twk1 3210*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3211*4882a593Smuzhiyun 3212*4882a593Smuzhiyun lvx_u $in2,$x20,$inp 3213*4882a593Smuzhiyun andi. $taillen,$len,15 3214*4882a593Smuzhiyun vxor $twk2,$tweak,$rndkey0 3215*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3216*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3217*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3218*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$leperm 3219*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3220*4882a593Smuzhiyun vxor $out2,$in2,$twk2 3221*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3222*4882a593Smuzhiyun 3223*4882a593Smuzhiyun lvx_u $in3,$x30,$inp 3224*4882a593Smuzhiyun sub $len,$len,$taillen 3225*4882a593Smuzhiyun vxor $twk3,$tweak,$rndkey0 3226*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3227*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3228*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3229*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$leperm 3230*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3231*4882a593Smuzhiyun vxor $out3,$in3,$twk3 3232*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3233*4882a593Smuzhiyun 3234*4882a593Smuzhiyun lvx_u $in4,$x40,$inp 3235*4882a593Smuzhiyun subi $len,$len,0x60 3236*4882a593Smuzhiyun vxor $twk4,$tweak,$rndkey0 3237*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3238*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3239*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3240*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$leperm 3241*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3242*4882a593Smuzhiyun vxor $out4,$in4,$twk4 3243*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3244*4882a593Smuzhiyun 3245*4882a593Smuzhiyun lvx_u $in5,$x50,$inp 3246*4882a593Smuzhiyun addi $inp,$inp,0x60 3247*4882a593Smuzhiyun vxor $twk5,$tweak,$rndkey0 3248*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3249*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3250*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3251*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$leperm 3252*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3253*4882a593Smuzhiyun vxor $out5,$in5,$twk5 3254*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3255*4882a593Smuzhiyun 3256*4882a593Smuzhiyun vxor v31,v31,$rndkey0 3257*4882a593Smuzhiyun mtctr $rounds 3258*4882a593Smuzhiyun b Loop_xts_dec6x 3259*4882a593Smuzhiyun 3260*4882a593Smuzhiyun.align 5 3261*4882a593SmuzhiyunLoop_xts_dec6x: 3262*4882a593Smuzhiyun vncipher $out0,$out0,v24 3263*4882a593Smuzhiyun vncipher $out1,$out1,v24 3264*4882a593Smuzhiyun vncipher $out2,$out2,v24 3265*4882a593Smuzhiyun vncipher $out3,$out3,v24 3266*4882a593Smuzhiyun vncipher $out4,$out4,v24 3267*4882a593Smuzhiyun vncipher $out5,$out5,v24 3268*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 3269*4882a593Smuzhiyun addi $key_,$key_,0x20 3270*4882a593Smuzhiyun 3271*4882a593Smuzhiyun vncipher $out0,$out0,v25 3272*4882a593Smuzhiyun vncipher $out1,$out1,v25 3273*4882a593Smuzhiyun vncipher $out2,$out2,v25 3274*4882a593Smuzhiyun vncipher $out3,$out3,v25 3275*4882a593Smuzhiyun vncipher $out4,$out4,v25 3276*4882a593Smuzhiyun vncipher $out5,$out5,v25 3277*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 3278*4882a593Smuzhiyun bdnz Loop_xts_dec6x 3279*4882a593Smuzhiyun 3280*4882a593Smuzhiyun subic $len,$len,96 # $len-=96 3281*4882a593Smuzhiyun vxor $in0,$twk0,v31 # xor with last round key 3282*4882a593Smuzhiyun vncipher $out0,$out0,v24 3283*4882a593Smuzhiyun vncipher $out1,$out1,v24 3284*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3285*4882a593Smuzhiyun vxor $twk0,$tweak,$rndkey0 3286*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3287*4882a593Smuzhiyun vncipher $out2,$out2,v24 3288*4882a593Smuzhiyun vncipher $out3,$out3,v24 3289*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3290*4882a593Smuzhiyun vncipher $out4,$out4,v24 3291*4882a593Smuzhiyun vncipher $out5,$out5,v24 3292*4882a593Smuzhiyun 3293*4882a593Smuzhiyun subfe. r0,r0,r0 # borrow?-1:0 3294*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3295*4882a593Smuzhiyun vncipher $out0,$out0,v25 3296*4882a593Smuzhiyun vncipher $out1,$out1,v25 3297*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3298*4882a593Smuzhiyun vncipher $out2,$out2,v25 3299*4882a593Smuzhiyun vncipher $out3,$out3,v25 3300*4882a593Smuzhiyun vxor $in1,$twk1,v31 3301*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3302*4882a593Smuzhiyun vxor $twk1,$tweak,$rndkey0 3303*4882a593Smuzhiyun vncipher $out4,$out4,v25 3304*4882a593Smuzhiyun vncipher $out5,$out5,v25 3305*4882a593Smuzhiyun 3306*4882a593Smuzhiyun and r0,r0,$len 3307*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3308*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3309*4882a593Smuzhiyun vncipher $out0,$out0,v26 3310*4882a593Smuzhiyun vncipher $out1,$out1,v26 3311*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3312*4882a593Smuzhiyun vncipher $out2,$out2,v26 3313*4882a593Smuzhiyun vncipher $out3,$out3,v26 3314*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3315*4882a593Smuzhiyun vncipher $out4,$out4,v26 3316*4882a593Smuzhiyun vncipher $out5,$out5,v26 3317*4882a593Smuzhiyun 3318*4882a593Smuzhiyun add $inp,$inp,r0 # $inp is adjusted in such 3319*4882a593Smuzhiyun # way that at exit from the 3320*4882a593Smuzhiyun # loop inX-in5 are loaded 3321*4882a593Smuzhiyun # with last "words" 3322*4882a593Smuzhiyun vxor $in2,$twk2,v31 3323*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3324*4882a593Smuzhiyun vxor $twk2,$tweak,$rndkey0 3325*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3326*4882a593Smuzhiyun vncipher $out0,$out0,v27 3327*4882a593Smuzhiyun vncipher $out1,$out1,v27 3328*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3329*4882a593Smuzhiyun vncipher $out2,$out2,v27 3330*4882a593Smuzhiyun vncipher $out3,$out3,v27 3331*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3332*4882a593Smuzhiyun vncipher $out4,$out4,v27 3333*4882a593Smuzhiyun vncipher $out5,$out5,v27 3334*4882a593Smuzhiyun 3335*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 3336*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3337*4882a593Smuzhiyun vncipher $out0,$out0,v28 3338*4882a593Smuzhiyun vncipher $out1,$out1,v28 3339*4882a593Smuzhiyun vxor $in3,$twk3,v31 3340*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3341*4882a593Smuzhiyun vxor $twk3,$tweak,$rndkey0 3342*4882a593Smuzhiyun vncipher $out2,$out2,v28 3343*4882a593Smuzhiyun vncipher $out3,$out3,v28 3344*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3345*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3346*4882a593Smuzhiyun vncipher $out4,$out4,v28 3347*4882a593Smuzhiyun vncipher $out5,$out5,v28 3348*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 3349*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3350*4882a593Smuzhiyun 3351*4882a593Smuzhiyun vncipher $out0,$out0,v29 3352*4882a593Smuzhiyun vncipher $out1,$out1,v29 3353*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3354*4882a593Smuzhiyun vncipher $out2,$out2,v29 3355*4882a593Smuzhiyun vncipher $out3,$out3,v29 3356*4882a593Smuzhiyun vxor $in4,$twk4,v31 3357*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3358*4882a593Smuzhiyun vxor $twk4,$tweak,$rndkey0 3359*4882a593Smuzhiyun vncipher $out4,$out4,v29 3360*4882a593Smuzhiyun vncipher $out5,$out5,v29 3361*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 3362*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3363*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3364*4882a593Smuzhiyun 3365*4882a593Smuzhiyun vncipher $out0,$out0,v30 3366*4882a593Smuzhiyun vncipher $out1,$out1,v30 3367*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3368*4882a593Smuzhiyun vncipher $out2,$out2,v30 3369*4882a593Smuzhiyun vncipher $out3,$out3,v30 3370*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3371*4882a593Smuzhiyun vncipher $out4,$out4,v30 3372*4882a593Smuzhiyun vncipher $out5,$out5,v30 3373*4882a593Smuzhiyun vxor $in5,$twk5,v31 3374*4882a593Smuzhiyun vsrab $tmp,$tweak,$seven # next tweak value 3375*4882a593Smuzhiyun vxor $twk5,$tweak,$rndkey0 3376*4882a593Smuzhiyun 3377*4882a593Smuzhiyun vncipherlast $out0,$out0,$in0 3378*4882a593Smuzhiyun lvx_u $in0,$x00,$inp # load next input block 3379*4882a593Smuzhiyun vaddubm $tweak,$tweak,$tweak 3380*4882a593Smuzhiyun vsldoi $tmp,$tmp,$tmp,15 3381*4882a593Smuzhiyun vncipherlast $out1,$out1,$in1 3382*4882a593Smuzhiyun lvx_u $in1,$x10,$inp 3383*4882a593Smuzhiyun vncipherlast $out2,$out2,$in2 3384*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 3385*4882a593Smuzhiyun lvx_u $in2,$x20,$inp 3386*4882a593Smuzhiyun vand $tmp,$tmp,$eighty7 3387*4882a593Smuzhiyun vncipherlast $out3,$out3,$in3 3388*4882a593Smuzhiyun le?vperm $in1,$in1,$in1,$leperm 3389*4882a593Smuzhiyun lvx_u $in3,$x30,$inp 3390*4882a593Smuzhiyun vncipherlast $out4,$out4,$in4 3391*4882a593Smuzhiyun le?vperm $in2,$in2,$in2,$leperm 3392*4882a593Smuzhiyun lvx_u $in4,$x40,$inp 3393*4882a593Smuzhiyun vxor $tweak,$tweak,$tmp 3394*4882a593Smuzhiyun vncipherlast $out5,$out5,$in5 3395*4882a593Smuzhiyun le?vperm $in3,$in3,$in3,$leperm 3396*4882a593Smuzhiyun lvx_u $in5,$x50,$inp 3397*4882a593Smuzhiyun addi $inp,$inp,0x60 3398*4882a593Smuzhiyun le?vperm $in4,$in4,$in4,$leperm 3399*4882a593Smuzhiyun le?vperm $in5,$in5,$in5,$leperm 3400*4882a593Smuzhiyun 3401*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 3402*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 3403*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 3404*4882a593Smuzhiyun vxor $out0,$in0,$twk0 3405*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 3406*4882a593Smuzhiyun stvx_u $out1,$x10,$out 3407*4882a593Smuzhiyun vxor $out1,$in1,$twk1 3408*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$leperm 3409*4882a593Smuzhiyun stvx_u $out2,$x20,$out 3410*4882a593Smuzhiyun vxor $out2,$in2,$twk2 3411*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$leperm 3412*4882a593Smuzhiyun stvx_u $out3,$x30,$out 3413*4882a593Smuzhiyun vxor $out3,$in3,$twk3 3414*4882a593Smuzhiyun le?vperm $out5,$out5,$out5,$leperm 3415*4882a593Smuzhiyun stvx_u $out4,$x40,$out 3416*4882a593Smuzhiyun vxor $out4,$in4,$twk4 3417*4882a593Smuzhiyun stvx_u $out5,$x50,$out 3418*4882a593Smuzhiyun vxor $out5,$in5,$twk5 3419*4882a593Smuzhiyun addi $out,$out,0x60 3420*4882a593Smuzhiyun 3421*4882a593Smuzhiyun mtctr $rounds 3422*4882a593Smuzhiyun beq Loop_xts_dec6x # did $len-=96 borrow? 3423*4882a593Smuzhiyun 3424*4882a593Smuzhiyun addic. $len,$len,0x60 3425*4882a593Smuzhiyun beq Lxts_dec6x_zero 3426*4882a593Smuzhiyun cmpwi $len,0x20 3427*4882a593Smuzhiyun blt Lxts_dec6x_one 3428*4882a593Smuzhiyun nop 3429*4882a593Smuzhiyun beq Lxts_dec6x_two 3430*4882a593Smuzhiyun cmpwi $len,0x40 3431*4882a593Smuzhiyun blt Lxts_dec6x_three 3432*4882a593Smuzhiyun nop 3433*4882a593Smuzhiyun beq Lxts_dec6x_four 3434*4882a593Smuzhiyun 3435*4882a593SmuzhiyunLxts_dec6x_five: 3436*4882a593Smuzhiyun vxor $out0,$in1,$twk0 3437*4882a593Smuzhiyun vxor $out1,$in2,$twk1 3438*4882a593Smuzhiyun vxor $out2,$in3,$twk2 3439*4882a593Smuzhiyun vxor $out3,$in4,$twk3 3440*4882a593Smuzhiyun vxor $out4,$in5,$twk4 3441*4882a593Smuzhiyun 3442*4882a593Smuzhiyun bl _aesp8_xts_dec5x 3443*4882a593Smuzhiyun 3444*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 3445*4882a593Smuzhiyun vmr $twk0,$twk5 # unused tweak 3446*4882a593Smuzhiyun vxor $twk1,$tweak,$rndkey0 3447*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 3448*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 3449*4882a593Smuzhiyun vxor $out0,$in0,$twk1 3450*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 3451*4882a593Smuzhiyun stvx_u $out1,$x10,$out 3452*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$leperm 3453*4882a593Smuzhiyun stvx_u $out2,$x20,$out 3454*4882a593Smuzhiyun le?vperm $out4,$out4,$out4,$leperm 3455*4882a593Smuzhiyun stvx_u $out3,$x30,$out 3456*4882a593Smuzhiyun stvx_u $out4,$x40,$out 3457*4882a593Smuzhiyun addi $out,$out,0x50 3458*4882a593Smuzhiyun bne Lxts_dec6x_steal 3459*4882a593Smuzhiyun b Lxts_dec6x_done 3460*4882a593Smuzhiyun 3461*4882a593Smuzhiyun.align 4 3462*4882a593SmuzhiyunLxts_dec6x_four: 3463*4882a593Smuzhiyun vxor $out0,$in2,$twk0 3464*4882a593Smuzhiyun vxor $out1,$in3,$twk1 3465*4882a593Smuzhiyun vxor $out2,$in4,$twk2 3466*4882a593Smuzhiyun vxor $out3,$in5,$twk3 3467*4882a593Smuzhiyun vxor $out4,$out4,$out4 3468*4882a593Smuzhiyun 3469*4882a593Smuzhiyun bl _aesp8_xts_dec5x 3470*4882a593Smuzhiyun 3471*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 3472*4882a593Smuzhiyun vmr $twk0,$twk4 # unused tweak 3473*4882a593Smuzhiyun vmr $twk1,$twk5 3474*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 3475*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 3476*4882a593Smuzhiyun vxor $out0,$in0,$twk5 3477*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 3478*4882a593Smuzhiyun stvx_u $out1,$x10,$out 3479*4882a593Smuzhiyun le?vperm $out3,$out3,$out3,$leperm 3480*4882a593Smuzhiyun stvx_u $out2,$x20,$out 3481*4882a593Smuzhiyun stvx_u $out3,$x30,$out 3482*4882a593Smuzhiyun addi $out,$out,0x40 3483*4882a593Smuzhiyun bne Lxts_dec6x_steal 3484*4882a593Smuzhiyun b Lxts_dec6x_done 3485*4882a593Smuzhiyun 3486*4882a593Smuzhiyun.align 4 3487*4882a593SmuzhiyunLxts_dec6x_three: 3488*4882a593Smuzhiyun vxor $out0,$in3,$twk0 3489*4882a593Smuzhiyun vxor $out1,$in4,$twk1 3490*4882a593Smuzhiyun vxor $out2,$in5,$twk2 3491*4882a593Smuzhiyun vxor $out3,$out3,$out3 3492*4882a593Smuzhiyun vxor $out4,$out4,$out4 3493*4882a593Smuzhiyun 3494*4882a593Smuzhiyun bl _aesp8_xts_dec5x 3495*4882a593Smuzhiyun 3496*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 3497*4882a593Smuzhiyun vmr $twk0,$twk3 # unused tweak 3498*4882a593Smuzhiyun vmr $twk1,$twk4 3499*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 3500*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 3501*4882a593Smuzhiyun vxor $out0,$in0,$twk4 3502*4882a593Smuzhiyun le?vperm $out2,$out2,$out2,$leperm 3503*4882a593Smuzhiyun stvx_u $out1,$x10,$out 3504*4882a593Smuzhiyun stvx_u $out2,$x20,$out 3505*4882a593Smuzhiyun addi $out,$out,0x30 3506*4882a593Smuzhiyun bne Lxts_dec6x_steal 3507*4882a593Smuzhiyun b Lxts_dec6x_done 3508*4882a593Smuzhiyun 3509*4882a593Smuzhiyun.align 4 3510*4882a593SmuzhiyunLxts_dec6x_two: 3511*4882a593Smuzhiyun vxor $out0,$in4,$twk0 3512*4882a593Smuzhiyun vxor $out1,$in5,$twk1 3513*4882a593Smuzhiyun vxor $out2,$out2,$out2 3514*4882a593Smuzhiyun vxor $out3,$out3,$out3 3515*4882a593Smuzhiyun vxor $out4,$out4,$out4 3516*4882a593Smuzhiyun 3517*4882a593Smuzhiyun bl _aesp8_xts_dec5x 3518*4882a593Smuzhiyun 3519*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 3520*4882a593Smuzhiyun vmr $twk0,$twk2 # unused tweak 3521*4882a593Smuzhiyun vmr $twk1,$twk3 3522*4882a593Smuzhiyun le?vperm $out1,$out1,$out1,$leperm 3523*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 3524*4882a593Smuzhiyun vxor $out0,$in0,$twk3 3525*4882a593Smuzhiyun stvx_u $out1,$x10,$out 3526*4882a593Smuzhiyun addi $out,$out,0x20 3527*4882a593Smuzhiyun bne Lxts_dec6x_steal 3528*4882a593Smuzhiyun b Lxts_dec6x_done 3529*4882a593Smuzhiyun 3530*4882a593Smuzhiyun.align 4 3531*4882a593SmuzhiyunLxts_dec6x_one: 3532*4882a593Smuzhiyun vxor $out0,$in5,$twk0 3533*4882a593Smuzhiyun nop 3534*4882a593SmuzhiyunLoop_xts_dec1x: 3535*4882a593Smuzhiyun vncipher $out0,$out0,v24 3536*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 3537*4882a593Smuzhiyun addi $key_,$key_,0x20 3538*4882a593Smuzhiyun 3539*4882a593Smuzhiyun vncipher $out0,$out0,v25 3540*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 3541*4882a593Smuzhiyun bdnz Loop_xts_dec1x 3542*4882a593Smuzhiyun 3543*4882a593Smuzhiyun subi r0,$taillen,1 3544*4882a593Smuzhiyun vncipher $out0,$out0,v24 3545*4882a593Smuzhiyun 3546*4882a593Smuzhiyun andi. r0,r0,16 3547*4882a593Smuzhiyun cmpwi $taillen,0 3548*4882a593Smuzhiyun vncipher $out0,$out0,v25 3549*4882a593Smuzhiyun 3550*4882a593Smuzhiyun sub $inp,$inp,r0 3551*4882a593Smuzhiyun vncipher $out0,$out0,v26 3552*4882a593Smuzhiyun 3553*4882a593Smuzhiyun lvx_u $in0,0,$inp 3554*4882a593Smuzhiyun vncipher $out0,$out0,v27 3555*4882a593Smuzhiyun 3556*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 3557*4882a593Smuzhiyun vncipher $out0,$out0,v28 3558*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 3559*4882a593Smuzhiyun 3560*4882a593Smuzhiyun vncipher $out0,$out0,v29 3561*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 3562*4882a593Smuzhiyun vxor $twk0,$twk0,v31 3563*4882a593Smuzhiyun 3564*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 3565*4882a593Smuzhiyun vncipher $out0,$out0,v30 3566*4882a593Smuzhiyun 3567*4882a593Smuzhiyun mtctr $rounds 3568*4882a593Smuzhiyun vncipherlast $out0,$out0,$twk0 3569*4882a593Smuzhiyun 3570*4882a593Smuzhiyun vmr $twk0,$twk1 # unused tweak 3571*4882a593Smuzhiyun vmr $twk1,$twk2 3572*4882a593Smuzhiyun le?vperm $out0,$out0,$out0,$leperm 3573*4882a593Smuzhiyun stvx_u $out0,$x00,$out # store output 3574*4882a593Smuzhiyun addi $out,$out,0x10 3575*4882a593Smuzhiyun vxor $out0,$in0,$twk2 3576*4882a593Smuzhiyun bne Lxts_dec6x_steal 3577*4882a593Smuzhiyun b Lxts_dec6x_done 3578*4882a593Smuzhiyun 3579*4882a593Smuzhiyun.align 4 3580*4882a593SmuzhiyunLxts_dec6x_zero: 3581*4882a593Smuzhiyun cmpwi $taillen,0 3582*4882a593Smuzhiyun beq Lxts_dec6x_done 3583*4882a593Smuzhiyun 3584*4882a593Smuzhiyun lvx_u $in0,0,$inp 3585*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 3586*4882a593Smuzhiyun vxor $out0,$in0,$twk1 3587*4882a593SmuzhiyunLxts_dec6x_steal: 3588*4882a593Smuzhiyun vncipher $out0,$out0,v24 3589*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 3590*4882a593Smuzhiyun addi $key_,$key_,0x20 3591*4882a593Smuzhiyun 3592*4882a593Smuzhiyun vncipher $out0,$out0,v25 3593*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 3594*4882a593Smuzhiyun bdnz Lxts_dec6x_steal 3595*4882a593Smuzhiyun 3596*4882a593Smuzhiyun add $inp,$inp,$taillen 3597*4882a593Smuzhiyun vncipher $out0,$out0,v24 3598*4882a593Smuzhiyun 3599*4882a593Smuzhiyun cmpwi $taillen,0 3600*4882a593Smuzhiyun vncipher $out0,$out0,v25 3601*4882a593Smuzhiyun 3602*4882a593Smuzhiyun lvx_u $in0,0,$inp 3603*4882a593Smuzhiyun vncipher $out0,$out0,v26 3604*4882a593Smuzhiyun 3605*4882a593Smuzhiyun lvsr $inpperm,0,$taillen # $in5 is no more 3606*4882a593Smuzhiyun vncipher $out0,$out0,v27 3607*4882a593Smuzhiyun 3608*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 3609*4882a593Smuzhiyun vncipher $out0,$out0,v28 3610*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 3611*4882a593Smuzhiyun 3612*4882a593Smuzhiyun vncipher $out0,$out0,v29 3613*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 3614*4882a593Smuzhiyun vxor $twk1,$twk1,v31 3615*4882a593Smuzhiyun 3616*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 3617*4882a593Smuzhiyun vncipher $out0,$out0,v30 3618*4882a593Smuzhiyun 3619*4882a593Smuzhiyun vperm $in0,$in0,$in0,$inpperm 3620*4882a593Smuzhiyun vncipherlast $tmp,$out0,$twk1 3621*4882a593Smuzhiyun 3622*4882a593Smuzhiyun le?vperm $out0,$tmp,$tmp,$leperm 3623*4882a593Smuzhiyun le?stvx_u $out0,0,$out 3624*4882a593Smuzhiyun be?stvx_u $tmp,0,$out 3625*4882a593Smuzhiyun 3626*4882a593Smuzhiyun vxor $out0,$out0,$out0 3627*4882a593Smuzhiyun vspltisb $out1,-1 3628*4882a593Smuzhiyun vperm $out0,$out0,$out1,$inpperm 3629*4882a593Smuzhiyun vsel $out0,$in0,$tmp,$out0 3630*4882a593Smuzhiyun vxor $out0,$out0,$twk0 3631*4882a593Smuzhiyun 3632*4882a593Smuzhiyun subi r30,$out,1 3633*4882a593Smuzhiyun mtctr $taillen 3634*4882a593SmuzhiyunLoop_xts_dec6x_steal: 3635*4882a593Smuzhiyun lbzu r0,1(r30) 3636*4882a593Smuzhiyun stb r0,16(r30) 3637*4882a593Smuzhiyun bdnz Loop_xts_dec6x_steal 3638*4882a593Smuzhiyun 3639*4882a593Smuzhiyun li $taillen,0 3640*4882a593Smuzhiyun mtctr $rounds 3641*4882a593Smuzhiyun b Loop_xts_dec1x # one more time... 3642*4882a593Smuzhiyun 3643*4882a593Smuzhiyun.align 4 3644*4882a593SmuzhiyunLxts_dec6x_done: 3645*4882a593Smuzhiyun ${UCMP}i $ivp,0 3646*4882a593Smuzhiyun beq Lxts_dec6x_ret 3647*4882a593Smuzhiyun 3648*4882a593Smuzhiyun vxor $tweak,$twk0,$rndkey0 3649*4882a593Smuzhiyun le?vperm $tweak,$tweak,$tweak,$leperm 3650*4882a593Smuzhiyun stvx_u $tweak,0,$ivp 3651*4882a593Smuzhiyun 3652*4882a593SmuzhiyunLxts_dec6x_ret: 3653*4882a593Smuzhiyun mtlr r11 3654*4882a593Smuzhiyun li r10,`$FRAME+15` 3655*4882a593Smuzhiyun li r11,`$FRAME+31` 3656*4882a593Smuzhiyun stvx $seven,r10,$sp # wipe copies of round keys 3657*4882a593Smuzhiyun addi r10,r10,32 3658*4882a593Smuzhiyun stvx $seven,r11,$sp 3659*4882a593Smuzhiyun addi r11,r11,32 3660*4882a593Smuzhiyun stvx $seven,r10,$sp 3661*4882a593Smuzhiyun addi r10,r10,32 3662*4882a593Smuzhiyun stvx $seven,r11,$sp 3663*4882a593Smuzhiyun addi r11,r11,32 3664*4882a593Smuzhiyun stvx $seven,r10,$sp 3665*4882a593Smuzhiyun addi r10,r10,32 3666*4882a593Smuzhiyun stvx $seven,r11,$sp 3667*4882a593Smuzhiyun addi r11,r11,32 3668*4882a593Smuzhiyun stvx $seven,r10,$sp 3669*4882a593Smuzhiyun addi r10,r10,32 3670*4882a593Smuzhiyun stvx $seven,r11,$sp 3671*4882a593Smuzhiyun addi r11,r11,32 3672*4882a593Smuzhiyun 3673*4882a593Smuzhiyun mtspr 256,$vrsave 3674*4882a593Smuzhiyun lvx v20,r10,$sp # ABI says so 3675*4882a593Smuzhiyun addi r10,r10,32 3676*4882a593Smuzhiyun lvx v21,r11,$sp 3677*4882a593Smuzhiyun addi r11,r11,32 3678*4882a593Smuzhiyun lvx v22,r10,$sp 3679*4882a593Smuzhiyun addi r10,r10,32 3680*4882a593Smuzhiyun lvx v23,r11,$sp 3681*4882a593Smuzhiyun addi r11,r11,32 3682*4882a593Smuzhiyun lvx v24,r10,$sp 3683*4882a593Smuzhiyun addi r10,r10,32 3684*4882a593Smuzhiyun lvx v25,r11,$sp 3685*4882a593Smuzhiyun addi r11,r11,32 3686*4882a593Smuzhiyun lvx v26,r10,$sp 3687*4882a593Smuzhiyun addi r10,r10,32 3688*4882a593Smuzhiyun lvx v27,r11,$sp 3689*4882a593Smuzhiyun addi r11,r11,32 3690*4882a593Smuzhiyun lvx v28,r10,$sp 3691*4882a593Smuzhiyun addi r10,r10,32 3692*4882a593Smuzhiyun lvx v29,r11,$sp 3693*4882a593Smuzhiyun addi r11,r11,32 3694*4882a593Smuzhiyun lvx v30,r10,$sp 3695*4882a593Smuzhiyun lvx v31,r11,$sp 3696*4882a593Smuzhiyun $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3697*4882a593Smuzhiyun $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3698*4882a593Smuzhiyun $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3699*4882a593Smuzhiyun $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3700*4882a593Smuzhiyun $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3701*4882a593Smuzhiyun $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3702*4882a593Smuzhiyun addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3703*4882a593Smuzhiyun blr 3704*4882a593Smuzhiyun .long 0 3705*4882a593Smuzhiyun .byte 0,12,0x04,1,0x80,6,6,0 3706*4882a593Smuzhiyun .long 0 3707*4882a593Smuzhiyun 3708*4882a593Smuzhiyun.align 5 3709*4882a593Smuzhiyun_aesp8_xts_dec5x: 3710*4882a593Smuzhiyun vncipher $out0,$out0,v24 3711*4882a593Smuzhiyun vncipher $out1,$out1,v24 3712*4882a593Smuzhiyun vncipher $out2,$out2,v24 3713*4882a593Smuzhiyun vncipher $out3,$out3,v24 3714*4882a593Smuzhiyun vncipher $out4,$out4,v24 3715*4882a593Smuzhiyun lvx v24,$x20,$key_ # round[3] 3716*4882a593Smuzhiyun addi $key_,$key_,0x20 3717*4882a593Smuzhiyun 3718*4882a593Smuzhiyun vncipher $out0,$out0,v25 3719*4882a593Smuzhiyun vncipher $out1,$out1,v25 3720*4882a593Smuzhiyun vncipher $out2,$out2,v25 3721*4882a593Smuzhiyun vncipher $out3,$out3,v25 3722*4882a593Smuzhiyun vncipher $out4,$out4,v25 3723*4882a593Smuzhiyun lvx v25,$x10,$key_ # round[4] 3724*4882a593Smuzhiyun bdnz _aesp8_xts_dec5x 3725*4882a593Smuzhiyun 3726*4882a593Smuzhiyun subi r0,$taillen,1 3727*4882a593Smuzhiyun vncipher $out0,$out0,v24 3728*4882a593Smuzhiyun vncipher $out1,$out1,v24 3729*4882a593Smuzhiyun vncipher $out2,$out2,v24 3730*4882a593Smuzhiyun vncipher $out3,$out3,v24 3731*4882a593Smuzhiyun vncipher $out4,$out4,v24 3732*4882a593Smuzhiyun 3733*4882a593Smuzhiyun andi. r0,r0,16 3734*4882a593Smuzhiyun cmpwi $taillen,0 3735*4882a593Smuzhiyun vncipher $out0,$out0,v25 3736*4882a593Smuzhiyun vncipher $out1,$out1,v25 3737*4882a593Smuzhiyun vncipher $out2,$out2,v25 3738*4882a593Smuzhiyun vncipher $out3,$out3,v25 3739*4882a593Smuzhiyun vncipher $out4,$out4,v25 3740*4882a593Smuzhiyun vxor $twk0,$twk0,v31 3741*4882a593Smuzhiyun 3742*4882a593Smuzhiyun sub $inp,$inp,r0 3743*4882a593Smuzhiyun vncipher $out0,$out0,v26 3744*4882a593Smuzhiyun vncipher $out1,$out1,v26 3745*4882a593Smuzhiyun vncipher $out2,$out2,v26 3746*4882a593Smuzhiyun vncipher $out3,$out3,v26 3747*4882a593Smuzhiyun vncipher $out4,$out4,v26 3748*4882a593Smuzhiyun vxor $in1,$twk1,v31 3749*4882a593Smuzhiyun 3750*4882a593Smuzhiyun vncipher $out0,$out0,v27 3751*4882a593Smuzhiyun lvx_u $in0,0,$inp 3752*4882a593Smuzhiyun vncipher $out1,$out1,v27 3753*4882a593Smuzhiyun vncipher $out2,$out2,v27 3754*4882a593Smuzhiyun vncipher $out3,$out3,v27 3755*4882a593Smuzhiyun vncipher $out4,$out4,v27 3756*4882a593Smuzhiyun vxor $in2,$twk2,v31 3757*4882a593Smuzhiyun 3758*4882a593Smuzhiyun addi $key_,$sp,$FRAME+15 # rewind $key_ 3759*4882a593Smuzhiyun vncipher $out0,$out0,v28 3760*4882a593Smuzhiyun vncipher $out1,$out1,v28 3761*4882a593Smuzhiyun vncipher $out2,$out2,v28 3762*4882a593Smuzhiyun vncipher $out3,$out3,v28 3763*4882a593Smuzhiyun vncipher $out4,$out4,v28 3764*4882a593Smuzhiyun lvx v24,$x00,$key_ # re-pre-load round[1] 3765*4882a593Smuzhiyun vxor $in3,$twk3,v31 3766*4882a593Smuzhiyun 3767*4882a593Smuzhiyun vncipher $out0,$out0,v29 3768*4882a593Smuzhiyun le?vperm $in0,$in0,$in0,$leperm 3769*4882a593Smuzhiyun vncipher $out1,$out1,v29 3770*4882a593Smuzhiyun vncipher $out2,$out2,v29 3771*4882a593Smuzhiyun vncipher $out3,$out3,v29 3772*4882a593Smuzhiyun vncipher $out4,$out4,v29 3773*4882a593Smuzhiyun lvx v25,$x10,$key_ # re-pre-load round[2] 3774*4882a593Smuzhiyun vxor $in4,$twk4,v31 3775*4882a593Smuzhiyun 3776*4882a593Smuzhiyun vncipher $out0,$out0,v30 3777*4882a593Smuzhiyun vncipher $out1,$out1,v30 3778*4882a593Smuzhiyun vncipher $out2,$out2,v30 3779*4882a593Smuzhiyun vncipher $out3,$out3,v30 3780*4882a593Smuzhiyun vncipher $out4,$out4,v30 3781*4882a593Smuzhiyun 3782*4882a593Smuzhiyun vncipherlast $out0,$out0,$twk0 3783*4882a593Smuzhiyun vncipherlast $out1,$out1,$in1 3784*4882a593Smuzhiyun vncipherlast $out2,$out2,$in2 3785*4882a593Smuzhiyun vncipherlast $out3,$out3,$in3 3786*4882a593Smuzhiyun vncipherlast $out4,$out4,$in4 3787*4882a593Smuzhiyun mtctr $rounds 3788*4882a593Smuzhiyun blr 3789*4882a593Smuzhiyun .long 0 3790*4882a593Smuzhiyun .byte 0,12,0x14,0,0,0,0,0 3791*4882a593Smuzhiyun___ 3792*4882a593Smuzhiyun}} }}} 3793*4882a593Smuzhiyun 3794*4882a593Smuzhiyunmy $consts=1; 3795*4882a593Smuzhiyunforeach(split("\n",$code)) { 3796*4882a593Smuzhiyun s/\`([^\`]*)\`/eval($1)/geo; 3797*4882a593Smuzhiyun 3798*4882a593Smuzhiyun # constants table endian-specific conversion 3799*4882a593Smuzhiyun if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3800*4882a593Smuzhiyun my $conv=$3; 3801*4882a593Smuzhiyun my @bytes=(); 3802*4882a593Smuzhiyun 3803*4882a593Smuzhiyun # convert to endian-agnostic format 3804*4882a593Smuzhiyun if ($1 eq "long") { 3805*4882a593Smuzhiyun foreach (split(/,\s*/,$2)) { 3806*4882a593Smuzhiyun my $l = /^0/?oct:int; 3807*4882a593Smuzhiyun push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3808*4882a593Smuzhiyun } 3809*4882a593Smuzhiyun } else { 3810*4882a593Smuzhiyun @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3811*4882a593Smuzhiyun } 3812*4882a593Smuzhiyun 3813*4882a593Smuzhiyun # little-endian conversion 3814*4882a593Smuzhiyun if ($flavour =~ /le$/o) { 3815*4882a593Smuzhiyun SWITCH: for($conv) { 3816*4882a593Smuzhiyun /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3817*4882a593Smuzhiyun /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3818*4882a593Smuzhiyun } 3819*4882a593Smuzhiyun } 3820*4882a593Smuzhiyun 3821*4882a593Smuzhiyun #emit 3822*4882a593Smuzhiyun print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3823*4882a593Smuzhiyun next; 3824*4882a593Smuzhiyun } 3825*4882a593Smuzhiyun $consts=0 if (m/Lconsts:/o); # end of table 3826*4882a593Smuzhiyun 3827*4882a593Smuzhiyun # instructions prefixed with '?' are endian-specific and need 3828*4882a593Smuzhiyun # to be adjusted accordingly... 3829*4882a593Smuzhiyun if ($flavour =~ /le$/o) { # little-endian 3830*4882a593Smuzhiyun s/le\?//o or 3831*4882a593Smuzhiyun s/be\?/#be#/o or 3832*4882a593Smuzhiyun s/\?lvsr/lvsl/o or 3833*4882a593Smuzhiyun s/\?lvsl/lvsr/o or 3834*4882a593Smuzhiyun s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3835*4882a593Smuzhiyun s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3836*4882a593Smuzhiyun s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3837*4882a593Smuzhiyun } else { # big-endian 3838*4882a593Smuzhiyun s/le\?/#le#/o or 3839*4882a593Smuzhiyun s/be\?//o or 3840*4882a593Smuzhiyun s/\?([a-z]+)/$1/o; 3841*4882a593Smuzhiyun } 3842*4882a593Smuzhiyun 3843*4882a593Smuzhiyun print $_,"\n"; 3844*4882a593Smuzhiyun} 3845*4882a593Smuzhiyun 3846*4882a593Smuzhiyunclose STDOUT; 3847