xref: /OK3568_Linux_fs/kernel/arch/arm64/crypto/sha1-ce-core.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <linux/linkage.h>
9*4882a593Smuzhiyun#include <asm/assembler.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun	.text
12*4882a593Smuzhiyun	.arch		armv8-a+crypto
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun	k0		.req	v0
15*4882a593Smuzhiyun	k1		.req	v1
16*4882a593Smuzhiyun	k2		.req	v2
17*4882a593Smuzhiyun	k3		.req	v3
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun	t0		.req	v4
20*4882a593Smuzhiyun	t1		.req	v5
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun	dga		.req	q6
23*4882a593Smuzhiyun	dgav		.req	v6
24*4882a593Smuzhiyun	dgb		.req	s7
25*4882a593Smuzhiyun	dgbv		.req	v7
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun	dg0q		.req	q12
28*4882a593Smuzhiyun	dg0s		.req	s12
29*4882a593Smuzhiyun	dg0v		.req	v12
30*4882a593Smuzhiyun	dg1s		.req	s13
31*4882a593Smuzhiyun	dg1v		.req	v13
32*4882a593Smuzhiyun	dg2s		.req	s14
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun	.macro		add_only, op, ev, rc, s0, dg1
35*4882a593Smuzhiyun	.ifc		\ev, ev
36*4882a593Smuzhiyun	add		t1.4s, v\s0\().4s, \rc\().4s
37*4882a593Smuzhiyun	sha1h		dg2s, dg0s
38*4882a593Smuzhiyun	.ifnb		\dg1
39*4882a593Smuzhiyun	sha1\op		dg0q, \dg1, t0.4s
40*4882a593Smuzhiyun	.else
41*4882a593Smuzhiyun	sha1\op		dg0q, dg1s, t0.4s
42*4882a593Smuzhiyun	.endif
43*4882a593Smuzhiyun	.else
44*4882a593Smuzhiyun	.ifnb		\s0
45*4882a593Smuzhiyun	add		t0.4s, v\s0\().4s, \rc\().4s
46*4882a593Smuzhiyun	.endif
47*4882a593Smuzhiyun	sha1h		dg1s, dg0s
48*4882a593Smuzhiyun	sha1\op		dg0q, dg2s, t1.4s
49*4882a593Smuzhiyun	.endif
50*4882a593Smuzhiyun	.endm
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
53*4882a593Smuzhiyun	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
54*4882a593Smuzhiyun	add_only	\op, \ev, \rc, \s1, \dg1
55*4882a593Smuzhiyun	sha1su1		v\s0\().4s, v\s3\().4s
56*4882a593Smuzhiyun	.endm
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun	.macro		loadrc, k, val, tmp
59*4882a593Smuzhiyun	movz		\tmp, :abs_g0_nc:\val
60*4882a593Smuzhiyun	movk		\tmp, :abs_g1:\val
61*4882a593Smuzhiyun	dup		\k, \tmp
62*4882a593Smuzhiyun	.endm
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun	/*
65*4882a593Smuzhiyun	 * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
66*4882a593Smuzhiyun	 *			 int blocks)
67*4882a593Smuzhiyun	 */
68*4882a593SmuzhiyunSYM_FUNC_START(sha1_ce_transform)
69*4882a593Smuzhiyun	/* load round constants */
70*4882a593Smuzhiyun	loadrc		k0.4s, 0x5a827999, w6
71*4882a593Smuzhiyun	loadrc		k1.4s, 0x6ed9eba1, w6
72*4882a593Smuzhiyun	loadrc		k2.4s, 0x8f1bbcdc, w6
73*4882a593Smuzhiyun	loadrc		k3.4s, 0xca62c1d6, w6
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun	/* load state */
76*4882a593Smuzhiyun	ld1		{dgav.4s}, [x0]
77*4882a593Smuzhiyun	ldr		dgb, [x0, #16]
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun	/* load sha1_ce_state::finalize */
80*4882a593Smuzhiyun	ldr_l		w4, sha1_ce_offsetof_finalize, x4
81*4882a593Smuzhiyun	ldr		w4, [x0, x4]
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun	/* load input */
84*4882a593Smuzhiyun0:	ld1		{v8.4s-v11.4s}, [x1], #64
85*4882a593Smuzhiyun	sub		w2, w2, #1
86*4882a593Smuzhiyun
87*4882a593SmuzhiyunCPU_LE(	rev32		v8.16b, v8.16b		)
88*4882a593SmuzhiyunCPU_LE(	rev32		v9.16b, v9.16b		)
89*4882a593SmuzhiyunCPU_LE(	rev32		v10.16b, v10.16b	)
90*4882a593SmuzhiyunCPU_LE(	rev32		v11.16b, v11.16b	)
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun1:	add		t0.4s, v8.4s, k0.4s
93*4882a593Smuzhiyun	mov		dg0v.16b, dgav.16b
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun	add_update	c, ev, k0,  8,  9, 10, 11, dgb
96*4882a593Smuzhiyun	add_update	c, od, k0,  9, 10, 11,  8
97*4882a593Smuzhiyun	add_update	c, ev, k0, 10, 11,  8,  9
98*4882a593Smuzhiyun	add_update	c, od, k0, 11,  8,  9, 10
99*4882a593Smuzhiyun	add_update	c, ev, k1,  8,  9, 10, 11
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun	add_update	p, od, k1,  9, 10, 11,  8
102*4882a593Smuzhiyun	add_update	p, ev, k1, 10, 11,  8,  9
103*4882a593Smuzhiyun	add_update	p, od, k1, 11,  8,  9, 10
104*4882a593Smuzhiyun	add_update	p, ev, k1,  8,  9, 10, 11
105*4882a593Smuzhiyun	add_update	p, od, k2,  9, 10, 11,  8
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun	add_update	m, ev, k2, 10, 11,  8,  9
108*4882a593Smuzhiyun	add_update	m, od, k2, 11,  8,  9, 10
109*4882a593Smuzhiyun	add_update	m, ev, k2,  8,  9, 10, 11
110*4882a593Smuzhiyun	add_update	m, od, k2,  9, 10, 11,  8
111*4882a593Smuzhiyun	add_update	m, ev, k3, 10, 11,  8,  9
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun	add_update	p, od, k3, 11,  8,  9, 10
114*4882a593Smuzhiyun	add_only	p, ev, k3,  9
115*4882a593Smuzhiyun	add_only	p, od, k3, 10
116*4882a593Smuzhiyun	add_only	p, ev, k3, 11
117*4882a593Smuzhiyun	add_only	p, od
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun	/* update state */
120*4882a593Smuzhiyun	add		dgbv.2s, dgbv.2s, dg1v.2s
121*4882a593Smuzhiyun	add		dgav.4s, dgav.4s, dg0v.4s
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun	cbz		w2, 2f
124*4882a593Smuzhiyun	cond_yield	3f, x5, x6
125*4882a593Smuzhiyun	b		0b
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	/*
128*4882a593Smuzhiyun	 * Final block: add padding and total bit count.
129*4882a593Smuzhiyun	 * Skip if the input size was not a round multiple of the block size,
130*4882a593Smuzhiyun	 * the padding is handled by the C code in that case.
131*4882a593Smuzhiyun	 */
132*4882a593Smuzhiyun2:	cbz		x4, 3f
133*4882a593Smuzhiyun	ldr_l		w4, sha1_ce_offsetof_count, x4
134*4882a593Smuzhiyun	ldr		x4, [x0, x4]
135*4882a593Smuzhiyun	movi		v9.2d, #0
136*4882a593Smuzhiyun	mov		x8, #0x80000000
137*4882a593Smuzhiyun	movi		v10.2d, #0
138*4882a593Smuzhiyun	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
139*4882a593Smuzhiyun	fmov		d8, x8
140*4882a593Smuzhiyun	mov		x4, #0
141*4882a593Smuzhiyun	mov		v11.d[0], xzr
142*4882a593Smuzhiyun	mov		v11.d[1], x7
143*4882a593Smuzhiyun	b		1b
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun	/* store new state */
146*4882a593Smuzhiyun3:	st1		{dgav.4s}, [x0]
147*4882a593Smuzhiyun	str		dgb, [x0, #16]
148*4882a593Smuzhiyun	mov		w0, w2
149*4882a593Smuzhiyun	ret
150*4882a593SmuzhiyunSYM_FUNC_END(sha1_ce_transform)
151