1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 6*4882a593Smuzhiyun */ 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun#include <linux/linkage.h> 9*4882a593Smuzhiyun#include <asm/assembler.h> 10*4882a593Smuzhiyun 11*4882a593Smuzhiyun .text 12*4882a593Smuzhiyun .arch armv8-a+crypto 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun k0 .req v0 15*4882a593Smuzhiyun k1 .req v1 16*4882a593Smuzhiyun k2 .req v2 17*4882a593Smuzhiyun k3 .req v3 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun t0 .req v4 20*4882a593Smuzhiyun t1 .req v5 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun dga .req q6 23*4882a593Smuzhiyun dgav .req v6 24*4882a593Smuzhiyun dgb .req s7 25*4882a593Smuzhiyun dgbv .req v7 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun dg0q .req q12 28*4882a593Smuzhiyun dg0s .req s12 29*4882a593Smuzhiyun dg0v .req v12 30*4882a593Smuzhiyun dg1s .req s13 31*4882a593Smuzhiyun dg1v .req v13 32*4882a593Smuzhiyun dg2s .req s14 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun .macro add_only, op, ev, rc, s0, dg1 35*4882a593Smuzhiyun .ifc \ev, ev 36*4882a593Smuzhiyun add t1.4s, v\s0\().4s, \rc\().4s 37*4882a593Smuzhiyun sha1h dg2s, dg0s 38*4882a593Smuzhiyun .ifnb \dg1 39*4882a593Smuzhiyun sha1\op dg0q, \dg1, t0.4s 40*4882a593Smuzhiyun .else 41*4882a593Smuzhiyun sha1\op dg0q, dg1s, t0.4s 42*4882a593Smuzhiyun .endif 43*4882a593Smuzhiyun .else 44*4882a593Smuzhiyun .ifnb \s0 45*4882a593Smuzhiyun add t0.4s, v\s0\().4s, \rc\().4s 46*4882a593Smuzhiyun .endif 47*4882a593Smuzhiyun sha1h dg1s, dg0s 48*4882a593Smuzhiyun sha1\op dg0q, dg2s, t1.4s 49*4882a593Smuzhiyun .endif 50*4882a593Smuzhiyun .endm 51*4882a593Smuzhiyun 52*4882a593Smuzhiyun .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 53*4882a593Smuzhiyun sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s 54*4882a593Smuzhiyun add_only \op, \ev, \rc, \s1, \dg1 55*4882a593Smuzhiyun sha1su1 v\s0\().4s, v\s3\().4s 56*4882a593Smuzhiyun .endm 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun .macro loadrc, k, val, tmp 59*4882a593Smuzhiyun movz \tmp, :abs_g0_nc:\val 60*4882a593Smuzhiyun movk \tmp, :abs_g1:\val 61*4882a593Smuzhiyun dup \k, \tmp 62*4882a593Smuzhiyun .endm 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun /* 65*4882a593Smuzhiyun * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, 66*4882a593Smuzhiyun * int blocks) 67*4882a593Smuzhiyun */ 68*4882a593SmuzhiyunSYM_FUNC_START(sha1_ce_transform) 69*4882a593Smuzhiyun /* load round constants */ 70*4882a593Smuzhiyun loadrc k0.4s, 0x5a827999, w6 71*4882a593Smuzhiyun loadrc k1.4s, 0x6ed9eba1, w6 72*4882a593Smuzhiyun loadrc k2.4s, 0x8f1bbcdc, w6 73*4882a593Smuzhiyun loadrc k3.4s, 0xca62c1d6, w6 74*4882a593Smuzhiyun 75*4882a593Smuzhiyun /* load state */ 76*4882a593Smuzhiyun ld1 {dgav.4s}, [x0] 77*4882a593Smuzhiyun ldr dgb, [x0, #16] 78*4882a593Smuzhiyun 79*4882a593Smuzhiyun /* load sha1_ce_state::finalize */ 80*4882a593Smuzhiyun ldr_l w4, sha1_ce_offsetof_finalize, x4 81*4882a593Smuzhiyun ldr w4, [x0, x4] 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun /* load input */ 84*4882a593Smuzhiyun0: ld1 {v8.4s-v11.4s}, [x1], #64 85*4882a593Smuzhiyun sub w2, w2, #1 86*4882a593Smuzhiyun 87*4882a593SmuzhiyunCPU_LE( rev32 v8.16b, v8.16b ) 88*4882a593SmuzhiyunCPU_LE( rev32 v9.16b, v9.16b ) 89*4882a593SmuzhiyunCPU_LE( rev32 v10.16b, v10.16b ) 90*4882a593SmuzhiyunCPU_LE( rev32 v11.16b, v11.16b ) 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun1: add t0.4s, v8.4s, k0.4s 93*4882a593Smuzhiyun mov dg0v.16b, dgav.16b 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun add_update c, ev, k0, 8, 9, 10, 11, dgb 96*4882a593Smuzhiyun add_update c, od, k0, 9, 10, 11, 8 97*4882a593Smuzhiyun add_update c, ev, k0, 10, 11, 8, 9 98*4882a593Smuzhiyun add_update c, od, k0, 11, 8, 9, 10 99*4882a593Smuzhiyun add_update c, ev, k1, 8, 9, 10, 11 100*4882a593Smuzhiyun 101*4882a593Smuzhiyun add_update p, od, k1, 9, 10, 11, 8 102*4882a593Smuzhiyun add_update p, ev, k1, 10, 11, 8, 9 103*4882a593Smuzhiyun add_update p, od, k1, 11, 8, 9, 10 104*4882a593Smuzhiyun add_update p, ev, k1, 8, 9, 10, 11 105*4882a593Smuzhiyun add_update p, od, k2, 9, 10, 11, 8 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun add_update m, ev, k2, 10, 11, 8, 9 108*4882a593Smuzhiyun add_update m, od, k2, 11, 8, 9, 10 109*4882a593Smuzhiyun add_update m, ev, k2, 8, 9, 10, 11 110*4882a593Smuzhiyun add_update m, od, k2, 9, 10, 11, 8 111*4882a593Smuzhiyun add_update m, ev, k3, 10, 11, 8, 9 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun add_update p, od, k3, 11, 8, 9, 10 114*4882a593Smuzhiyun add_only p, ev, k3, 9 115*4882a593Smuzhiyun add_only p, od, k3, 10 116*4882a593Smuzhiyun add_only p, ev, k3, 11 117*4882a593Smuzhiyun add_only p, od 118*4882a593Smuzhiyun 119*4882a593Smuzhiyun /* update state */ 120*4882a593Smuzhiyun add dgbv.2s, dgbv.2s, dg1v.2s 121*4882a593Smuzhiyun add dgav.4s, dgav.4s, dg0v.4s 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun cbz w2, 2f 124*4882a593Smuzhiyun cond_yield 3f, x5, x6 125*4882a593Smuzhiyun b 0b 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun /* 128*4882a593Smuzhiyun * Final block: add padding and total bit count. 129*4882a593Smuzhiyun * Skip if the input size was not a round multiple of the block size, 130*4882a593Smuzhiyun * the padding is handled by the C code in that case. 131*4882a593Smuzhiyun */ 132*4882a593Smuzhiyun2: cbz x4, 3f 133*4882a593Smuzhiyun ldr_l w4, sha1_ce_offsetof_count, x4 134*4882a593Smuzhiyun ldr x4, [x0, x4] 135*4882a593Smuzhiyun movi v9.2d, #0 136*4882a593Smuzhiyun mov x8, #0x80000000 137*4882a593Smuzhiyun movi v10.2d, #0 138*4882a593Smuzhiyun ror x7, x4, #29 // ror(lsl(x4, 3), 32) 139*4882a593Smuzhiyun fmov d8, x8 140*4882a593Smuzhiyun mov x4, #0 141*4882a593Smuzhiyun mov v11.d[0], xzr 142*4882a593Smuzhiyun mov v11.d[1], x7 143*4882a593Smuzhiyun b 1b 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun /* store new state */ 146*4882a593Smuzhiyun3: st1 {dgav.4s}, [x0] 147*4882a593Smuzhiyun str dgb, [x0, #16] 148*4882a593Smuzhiyun mov w0, w2 149*4882a593Smuzhiyun ret 150*4882a593SmuzhiyunSYM_FUNC_END(sha1_ce_transform) 151