xref: /OK3568_Linux_fs/kernel/drivers/net/wireless/rockchip_wlan/ssv6xxx/crypto/aes-armv4.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1#define __ARM_ARCH__ __LINUX_ARM_ARCH__
2@ ====================================================================
3@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@ ====================================================================
8
9@ AES for ARMv4
10
11@ January 2007.
12@
13@ Code uses single 1K S-box and is >2 times faster than code generated
14@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
15@ allows to merge logical or arithmetic operation with shift or rotate
16@ in one instruction and emit combined result every cycle. The module
17@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
18@ key [on single-issue Xscale PXA250 core].
19
20@ May 2007.
21@
22@ AES_set_[en|de]crypt_key is added.
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 12% improvement on
27@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~21.5 cycles per byte.
33
34@ A little glue here to select the correct code below for the ARM CPU
35@ that is being targetted.
36
37.text
38.code	32
39
40.type	AES_Te,%object
41.align	5
42AES_Te:
43.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
44.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
45.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
46.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
47.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
48.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
49.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
50.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
51.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
52.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
53.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
54.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
55.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
56.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
57.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
58.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
59.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
60.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
61.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
62.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
63.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
64.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
65.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
66.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
67.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
68.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
69.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
70.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
71.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
72.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
73.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
74.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
75.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
76.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
77.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
78.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
79.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
80.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
81.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
82.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
83.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
84.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
85.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
86.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
87.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
88.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
89.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
90.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
91.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
92.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
93.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
94.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
95.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
96.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
97.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
98.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
99.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
100.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
101.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
102.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
103.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
104.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
105.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
106.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
107@ Te4[256]
108.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
109.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
110.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
111.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
112.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
113.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
114.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
115.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
116.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
117.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
118.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
119.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
120.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
121.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
122.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
123.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
124.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
125.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
126.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
127.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
128.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
129.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
130.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
131.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
132.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
133.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
134.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
135.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
136.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
137.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
138.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
139.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
140@ rcon[]
141.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
142.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
143.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
144.size	AES_Te,.-AES_Te
145
146@ void AES_encrypt(const unsigned char *in, unsigned char *out,
147@ 		 const AES_KEY *key) {
148.global AES_encrypt
149.type   AES_encrypt,%function
150.align	5
151AES_encrypt:
152	sub	r3,pc,#8		@ AES_encrypt
153	stmdb   sp!,{r1,r4-r12,lr}
154	mov	r12,r0		@ inp
155	mov	r11,r2
156	sub	r10,r3,#AES_encrypt-AES_Te	@ Te
157#if __ARM_ARCH__<7
158	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
159	ldrb	r4,[r12,#2]	@ manner...
160	ldrb	r5,[r12,#1]
161	ldrb	r6,[r12,#0]
162	orr	r0,r0,r4,lsl#8
163	ldrb	r1,[r12,#7]
164	orr	r0,r0,r5,lsl#16
165	ldrb	r4,[r12,#6]
166	orr	r0,r0,r6,lsl#24
167	ldrb	r5,[r12,#5]
168	ldrb	r6,[r12,#4]
169	orr	r1,r1,r4,lsl#8
170	ldrb	r2,[r12,#11]
171	orr	r1,r1,r5,lsl#16
172	ldrb	r4,[r12,#10]
173	orr	r1,r1,r6,lsl#24
174	ldrb	r5,[r12,#9]
175	ldrb	r6,[r12,#8]
176	orr	r2,r2,r4,lsl#8
177	ldrb	r3,[r12,#15]
178	orr	r2,r2,r5,lsl#16
179	ldrb	r4,[r12,#14]
180	orr	r2,r2,r6,lsl#24
181	ldrb	r5,[r12,#13]
182	ldrb	r6,[r12,#12]
183	orr	r3,r3,r4,lsl#8
184	orr	r3,r3,r5,lsl#16
185	orr	r3,r3,r6,lsl#24
186#else
187	ldr	r0,[r12,#0]
188	ldr	r1,[r12,#4]
189	ldr	r2,[r12,#8]
190	ldr	r3,[r12,#12]
191#ifdef __ARMEL__
192	rev	r0,r0
193	rev	r1,r1
194	rev	r2,r2
195	rev	r3,r3
196#endif
197#endif
198	bl	_armv4_AES_encrypt
199
200	ldr	r12,[sp],#4		@ pop out
201#if __ARM_ARCH__>=7
202#ifdef __ARMEL__
203	rev	r0,r0
204	rev	r1,r1
205	rev	r2,r2
206	rev	r3,r3
207#endif
208	str	r0,[r12,#0]
209	str	r1,[r12,#4]
210	str	r2,[r12,#8]
211	str	r3,[r12,#12]
212#else
213	mov	r4,r0,lsr#24		@ write output in endian-neutral
214	mov	r5,r0,lsr#16		@ manner...
215	mov	r6,r0,lsr#8
216	strb	r4,[r12,#0]
217	strb	r5,[r12,#1]
218	mov	r4,r1,lsr#24
219	strb	r6,[r12,#2]
220	mov	r5,r1,lsr#16
221	strb	r0,[r12,#3]
222	mov	r6,r1,lsr#8
223	strb	r4,[r12,#4]
224	strb	r5,[r12,#5]
225	mov	r4,r2,lsr#24
226	strb	r6,[r12,#6]
227	mov	r5,r2,lsr#16
228	strb	r1,[r12,#7]
229	mov	r6,r2,lsr#8
230	strb	r4,[r12,#8]
231	strb	r5,[r12,#9]
232	mov	r4,r3,lsr#24
233	strb	r6,[r12,#10]
234	mov	r5,r3,lsr#16
235	strb	r2,[r12,#11]
236	mov	r6,r3,lsr#8
237	strb	r4,[r12,#12]
238	strb	r5,[r12,#13]
239	strb	r6,[r12,#14]
240	strb	r3,[r12,#15]
241#endif
242#if __ARM_ARCH__>=5
243	ldmia	sp!,{r4-r12,pc}
244#else
245	ldmia   sp!,{r4-r12,lr}
246	tst	lr,#1
247	moveq	pc,lr			@ be binary compatible with V4, yet
248	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
249#endif
250.size	AES_encrypt,.-AES_encrypt
251
252.type   _armv4_AES_encrypt,%function
253.align	2
254_armv4_AES_encrypt:
255	str	lr,[sp,#-4]!		@ push lr
256	ldmia	r11!,{r4-r7}
257	eor	r0,r0,r4
258	ldr	r12,[r11,#240-16]
259	eor	r1,r1,r5
260	eor	r2,r2,r6
261	eor	r3,r3,r7
262	sub	r12,r12,#1
263	mov	lr,#255
264
265	and	r7,lr,r0
266	and	r8,lr,r0,lsr#8
267	and	r9,lr,r0,lsr#16
268	mov	r0,r0,lsr#24
269.Lenc_loop:
270	ldr	r4,[r10,r7,lsl#2]	@ Te3[s0>>0]
271	and	r7,lr,r1,lsr#16	@ i0
272	ldr	r5,[r10,r8,lsl#2]	@ Te2[s0>>8]
273	and	r8,lr,r1
274	ldr	r6,[r10,r9,lsl#2]	@ Te1[s0>>16]
275	and	r9,lr,r1,lsr#8
276	ldr	r0,[r10,r0,lsl#2]	@ Te0[s0>>24]
277	mov	r1,r1,lsr#24
278
279	ldr	r7,[r10,r7,lsl#2]	@ Te1[s1>>16]
280	ldr	r8,[r10,r8,lsl#2]	@ Te3[s1>>0]
281	ldr	r9,[r10,r9,lsl#2]	@ Te2[s1>>8]
282	eor	r0,r0,r7,ror#8
283	ldr	r1,[r10,r1,lsl#2]	@ Te0[s1>>24]
284	and	r7,lr,r2,lsr#8	@ i0
285	eor	r5,r5,r8,ror#8
286	and	r8,lr,r2,lsr#16	@ i1
287	eor	r6,r6,r9,ror#8
288	and	r9,lr,r2
289	ldr	r7,[r10,r7,lsl#2]	@ Te2[s2>>8]
290	eor	r1,r1,r4,ror#24
291	ldr	r8,[r10,r8,lsl#2]	@ Te1[s2>>16]
292	mov	r2,r2,lsr#24
293
294	ldr	r9,[r10,r9,lsl#2]	@ Te3[s2>>0]
295	eor	r0,r0,r7,ror#16
296	ldr	r2,[r10,r2,lsl#2]	@ Te0[s2>>24]
297	and	r7,lr,r3		@ i0
298	eor	r1,r1,r8,ror#8
299	and	r8,lr,r3,lsr#8	@ i1
300	eor	r6,r6,r9,ror#16
301	and	r9,lr,r3,lsr#16	@ i2
302	ldr	r7,[r10,r7,lsl#2]	@ Te3[s3>>0]
303	eor	r2,r2,r5,ror#16
304	ldr	r8,[r10,r8,lsl#2]	@ Te2[s3>>8]
305	mov	r3,r3,lsr#24
306
307	ldr	r9,[r10,r9,lsl#2]	@ Te1[s3>>16]
308	eor	r0,r0,r7,ror#24
309	ldr	r7,[r11],#16
310	eor	r1,r1,r8,ror#16
311	ldr	r3,[r10,r3,lsl#2]	@ Te0[s3>>24]
312	eor	r2,r2,r9,ror#8
313	ldr	r4,[r11,#-12]
314	eor	r3,r3,r6,ror#8
315
316	ldr	r5,[r11,#-8]
317	eor	r0,r0,r7
318	ldr	r6,[r11,#-4]
319	and	r7,lr,r0
320	eor	r1,r1,r4
321	and	r8,lr,r0,lsr#8
322	eor	r2,r2,r5
323	and	r9,lr,r0,lsr#16
324	eor	r3,r3,r6
325	mov	r0,r0,lsr#24
326
327	subs	r12,r12,#1
328	bne	.Lenc_loop
329
330	add	r10,r10,#2
331
332	ldrb	r4,[r10,r7,lsl#2]	@ Te4[s0>>0]
333	and	r7,lr,r1,lsr#16	@ i0
334	ldrb	r5,[r10,r8,lsl#2]	@ Te4[s0>>8]
335	and	r8,lr,r1
336	ldrb	r6,[r10,r9,lsl#2]	@ Te4[s0>>16]
337	and	r9,lr,r1,lsr#8
338	ldrb	r0,[r10,r0,lsl#2]	@ Te4[s0>>24]
339	mov	r1,r1,lsr#24
340
341	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s1>>16]
342	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s1>>0]
343	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s1>>8]
344	eor	r0,r7,r0,lsl#8
345	ldrb	r1,[r10,r1,lsl#2]	@ Te4[s1>>24]
346	and	r7,lr,r2,lsr#8	@ i0
347	eor	r5,r8,r5,lsl#8
348	and	r8,lr,r2,lsr#16	@ i1
349	eor	r6,r9,r6,lsl#8
350	and	r9,lr,r2
351	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s2>>8]
352	eor	r1,r4,r1,lsl#24
353	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s2>>16]
354	mov	r2,r2,lsr#24
355
356	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s2>>0]
357	eor	r0,r7,r0,lsl#8
358	ldrb	r2,[r10,r2,lsl#2]	@ Te4[s2>>24]
359	and	r7,lr,r3		@ i0
360	eor	r1,r1,r8,lsl#16
361	and	r8,lr,r3,lsr#8	@ i1
362	eor	r6,r9,r6,lsl#8
363	and	r9,lr,r3,lsr#16	@ i2
364	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s3>>0]
365	eor	r2,r5,r2,lsl#24
366	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s3>>8]
367	mov	r3,r3,lsr#24
368
369	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s3>>16]
370	eor	r0,r7,r0,lsl#8
371	ldr	r7,[r11,#0]
372	ldrb	r3,[r10,r3,lsl#2]	@ Te4[s3>>24]
373	eor	r1,r1,r8,lsl#8
374	ldr	r4,[r11,#4]
375	eor	r2,r2,r9,lsl#16
376	ldr	r5,[r11,#8]
377	eor	r3,r6,r3,lsl#24
378	ldr	r6,[r11,#12]
379
380	eor	r0,r0,r7
381	eor	r1,r1,r4
382	eor	r2,r2,r5
383	eor	r3,r3,r6
384
385	sub	r10,r10,#2
386	ldr	pc,[sp],#4		@ pop and return
387.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
388
389.global private_AES_set_encrypt_key
390.type   private_AES_set_encrypt_key,%function
391.align	5
392private_AES_set_encrypt_key:
393_armv4_AES_set_encrypt_key:
394	sub	r3,pc,#8		@ AES_set_encrypt_key
395	teq	r0,#0
396	moveq	r0,#-1
397	beq	.Labrt
398	teq	r2,#0
399	moveq	r0,#-1
400	beq	.Labrt
401
402	teq	r1,#128
403	beq	.Lok
404	teq	r1,#192
405	beq	.Lok
406	teq	r1,#256
407	movne	r0,#-1
408	bne	.Labrt
409
410.Lok:	stmdb   sp!,{r4-r12,lr}
411	sub	r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
412
413	mov	r12,r0		@ inp
414	mov	lr,r1			@ bits
415	mov	r11,r2			@ key
416
417#if __ARM_ARCH__<7
418	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
419	ldrb	r4,[r12,#2]	@ manner...
420	ldrb	r5,[r12,#1]
421	ldrb	r6,[r12,#0]
422	orr	r0,r0,r4,lsl#8
423	ldrb	r1,[r12,#7]
424	orr	r0,r0,r5,lsl#16
425	ldrb	r4,[r12,#6]
426	orr	r0,r0,r6,lsl#24
427	ldrb	r5,[r12,#5]
428	ldrb	r6,[r12,#4]
429	orr	r1,r1,r4,lsl#8
430	ldrb	r2,[r12,#11]
431	orr	r1,r1,r5,lsl#16
432	ldrb	r4,[r12,#10]
433	orr	r1,r1,r6,lsl#24
434	ldrb	r5,[r12,#9]
435	ldrb	r6,[r12,#8]
436	orr	r2,r2,r4,lsl#8
437	ldrb	r3,[r12,#15]
438	orr	r2,r2,r5,lsl#16
439	ldrb	r4,[r12,#14]
440	orr	r2,r2,r6,lsl#24
441	ldrb	r5,[r12,#13]
442	ldrb	r6,[r12,#12]
443	orr	r3,r3,r4,lsl#8
444	str	r0,[r11],#16
445	orr	r3,r3,r5,lsl#16
446	str	r1,[r11,#-12]
447	orr	r3,r3,r6,lsl#24
448	str	r2,[r11,#-8]
449	str	r3,[r11,#-4]
450#else
451	ldr	r0,[r12,#0]
452	ldr	r1,[r12,#4]
453	ldr	r2,[r12,#8]
454	ldr	r3,[r12,#12]
455#ifdef __ARMEL__
456	rev	r0,r0
457	rev	r1,r1
458	rev	r2,r2
459	rev	r3,r3
460#endif
461	str	r0,[r11],#16
462	str	r1,[r11,#-12]
463	str	r2,[r11,#-8]
464	str	r3,[r11,#-4]
465#endif
466
467	teq	lr,#128
468	bne	.Lnot128
469	mov	r12,#10
470	str	r12,[r11,#240-16]
471	add	r6,r10,#256			@ rcon
472	mov	lr,#255
473
474.L128_loop:
475	and	r5,lr,r3,lsr#24
476	and	r7,lr,r3,lsr#16
477	ldrb	r5,[r10,r5]
478	and	r8,lr,r3,lsr#8
479	ldrb	r7,[r10,r7]
480	and	r9,lr,r3
481	ldrb	r8,[r10,r8]
482	orr	r5,r5,r7,lsl#24
483	ldrb	r9,[r10,r9]
484	orr	r5,r5,r8,lsl#16
485	ldr	r4,[r6],#4			@ rcon[i++]
486	orr	r5,r5,r9,lsl#8
487	eor	r5,r5,r4
488	eor	r0,r0,r5			@ rk[4]=rk[0]^...
489	eor	r1,r1,r0			@ rk[5]=rk[1]^rk[4]
490	str	r0,[r11],#16
491	eor	r2,r2,r1			@ rk[6]=rk[2]^rk[5]
492	str	r1,[r11,#-12]
493	eor	r3,r3,r2			@ rk[7]=rk[3]^rk[6]
494	str	r2,[r11,#-8]
495	subs	r12,r12,#1
496	str	r3,[r11,#-4]
497	bne	.L128_loop
498	sub	r2,r11,#176
499	b	.Ldone
500
501.Lnot128:
502#if __ARM_ARCH__<7
503	ldrb	r8,[r12,#19]
504	ldrb	r4,[r12,#18]
505	ldrb	r5,[r12,#17]
506	ldrb	r6,[r12,#16]
507	orr	r8,r8,r4,lsl#8
508	ldrb	r9,[r12,#23]
509	orr	r8,r8,r5,lsl#16
510	ldrb	r4,[r12,#22]
511	orr	r8,r8,r6,lsl#24
512	ldrb	r5,[r12,#21]
513	ldrb	r6,[r12,#20]
514	orr	r9,r9,r4,lsl#8
515	orr	r9,r9,r5,lsl#16
516	str	r8,[r11],#8
517	orr	r9,r9,r6,lsl#24
518	str	r9,[r11,#-4]
519#else
520	ldr	r8,[r12,#16]
521	ldr	r9,[r12,#20]
522#ifdef __ARMEL__
523	rev	r8,r8
524	rev	r9,r9
525#endif
526	str	r8,[r11],#8
527	str	r9,[r11,#-4]
528#endif
529
530	teq	lr,#192
531	bne	.Lnot192
532	mov	r12,#12
533	str	r12,[r11,#240-24]
534	add	r6,r10,#256			@ rcon
535	mov	lr,#255
536	mov	r12,#8
537
538.L192_loop:
539	and	r5,lr,r9,lsr#24
540	and	r7,lr,r9,lsr#16
541	ldrb	r5,[r10,r5]
542	and	r8,lr,r9,lsr#8
543	ldrb	r7,[r10,r7]
544	and	r9,lr,r9
545	ldrb	r8,[r10,r8]
546	orr	r5,r5,r7,lsl#24
547	ldrb	r9,[r10,r9]
548	orr	r5,r5,r8,lsl#16
549	ldr	r4,[r6],#4			@ rcon[i++]
550	orr	r5,r5,r9,lsl#8
551	eor	r9,r5,r4
552	eor	r0,r0,r9			@ rk[6]=rk[0]^...
553	eor	r1,r1,r0			@ rk[7]=rk[1]^rk[6]
554	str	r0,[r11],#24
555	eor	r2,r2,r1			@ rk[8]=rk[2]^rk[7]
556	str	r1,[r11,#-20]
557	eor	r3,r3,r2			@ rk[9]=rk[3]^rk[8]
558	str	r2,[r11,#-16]
559	subs	r12,r12,#1
560	str	r3,[r11,#-12]
561	subeq	r2,r11,#216
562	beq	.Ldone
563
564	ldr	r7,[r11,#-32]
565	ldr	r8,[r11,#-28]
566	eor	r7,r7,r3			@ rk[10]=rk[4]^rk[9]
567	eor	r9,r8,r7			@ rk[11]=rk[5]^rk[10]
568	str	r7,[r11,#-8]
569	str	r9,[r11,#-4]
570	b	.L192_loop
571
572.Lnot192:
573#if __ARM_ARCH__<7
574	ldrb	r8,[r12,#27]
575	ldrb	r4,[r12,#26]
576	ldrb	r5,[r12,#25]
577	ldrb	r6,[r12,#24]
578	orr	r8,r8,r4,lsl#8
579	ldrb	r9,[r12,#31]
580	orr	r8,r8,r5,lsl#16
581	ldrb	r4,[r12,#30]
582	orr	r8,r8,r6,lsl#24
583	ldrb	r5,[r12,#29]
584	ldrb	r6,[r12,#28]
585	orr	r9,r9,r4,lsl#8
586	orr	r9,r9,r5,lsl#16
587	str	r8,[r11],#8
588	orr	r9,r9,r6,lsl#24
589	str	r9,[r11,#-4]
590#else
591	ldr	r8,[r12,#24]
592	ldr	r9,[r12,#28]
593#ifdef __ARMEL__
594	rev	r8,r8
595	rev	r9,r9
596#endif
597	str	r8,[r11],#8
598	str	r9,[r11,#-4]
599#endif
600
601	mov	r12,#14
602	str	r12,[r11,#240-32]
603	add	r6,r10,#256			@ rcon
604	mov	lr,#255
605	mov	r12,#7
606
607.L256_loop:
608	and	r5,lr,r9,lsr#24
609	and	r7,lr,r9,lsr#16
610	ldrb	r5,[r10,r5]
611	and	r8,lr,r9,lsr#8
612	ldrb	r7,[r10,r7]
613	and	r9,lr,r9
614	ldrb	r8,[r10,r8]
615	orr	r5,r5,r7,lsl#24
616	ldrb	r9,[r10,r9]
617	orr	r5,r5,r8,lsl#16
618	ldr	r4,[r6],#4			@ rcon[i++]
619	orr	r5,r5,r9,lsl#8
620	eor	r9,r5,r4
621	eor	r0,r0,r9			@ rk[8]=rk[0]^...
622	eor	r1,r1,r0			@ rk[9]=rk[1]^rk[8]
623	str	r0,[r11],#32
624	eor	r2,r2,r1			@ rk[10]=rk[2]^rk[9]
625	str	r1,[r11,#-28]
626	eor	r3,r3,r2			@ rk[11]=rk[3]^rk[10]
627	str	r2,[r11,#-24]
628	subs	r12,r12,#1
629	str	r3,[r11,#-20]
630	subeq	r2,r11,#256
631	beq	.Ldone
632
633	and	r5,lr,r3
634	and	r7,lr,r3,lsr#8
635	ldrb	r5,[r10,r5]
636	and	r8,lr,r3,lsr#16
637	ldrb	r7,[r10,r7]
638	and	r9,lr,r3,lsr#24
639	ldrb	r8,[r10,r8]
640	orr	r5,r5,r7,lsl#8
641	ldrb	r9,[r10,r9]
642	orr	r5,r5,r8,lsl#16
643	ldr	r4,[r11,#-48]
644	orr	r5,r5,r9,lsl#24
645
646	ldr	r7,[r11,#-44]
647	ldr	r8,[r11,#-40]
648	eor	r4,r4,r5			@ rk[12]=rk[4]^...
649	ldr	r9,[r11,#-36]
650	eor	r7,r7,r4			@ rk[13]=rk[5]^rk[12]
651	str	r4,[r11,#-16]
652	eor	r8,r8,r7			@ rk[14]=rk[6]^rk[13]
653	str	r7,[r11,#-12]
654	eor	r9,r9,r8			@ rk[15]=rk[7]^rk[14]
655	str	r8,[r11,#-8]
656	str	r9,[r11,#-4]
657	b	.L256_loop
658
659.Ldone:	mov	r0,#0
660	ldmia   sp!,{r4-r12,lr}
661.Labrt:	tst	lr,#1
662	moveq	pc,lr			@ be binary compatible with V4, yet
663	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
664.size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
665
666.global private_AES_set_decrypt_key
667.type   private_AES_set_decrypt_key,%function
668.align	5
669private_AES_set_decrypt_key:
670	str	lr,[sp,#-4]!            @ push lr
671#if 0
672	@ kernel does both of these in setkey so optimise this bit out by
673	@ expecting the key to already have the enc_key work done (see aes_glue.c)
674	bl	_armv4_AES_set_encrypt_key
675#else
676	mov	r0,#0
677#endif
678	teq	r0,#0
679	ldrne	lr,[sp],#4              @ pop lr
680	bne	.Labrt
681
682	stmdb   sp!,{r4-r12}
683
684	ldr	r12,[r2,#240]	@ AES_set_encrypt_key preserves r2,
685	mov	r11,r2			@ which is AES_KEY *key
686	mov	r7,r2
687	add	r8,r2,r12,lsl#4
688
689.Linv:	ldr	r0,[r7]
690	ldr	r1,[r7,#4]
691	ldr	r2,[r7,#8]
692	ldr	r3,[r7,#12]
693	ldr	r4,[r8]
694	ldr	r5,[r8,#4]
695	ldr	r6,[r8,#8]
696	ldr	r9,[r8,#12]
697	str	r0,[r8],#-16
698	str	r1,[r8,#16+4]
699	str	r2,[r8,#16+8]
700	str	r3,[r8,#16+12]
701	str	r4,[r7],#16
702	str	r5,[r7,#-12]
703	str	r6,[r7,#-8]
704	str	r9,[r7,#-4]
705	teq	r7,r8
706	bne	.Linv
707	ldr	r0,[r11,#16]!		@ prefetch tp1
708	mov	r7,#0x80
709	mov	r8,#0x1b
710	orr	r7,r7,#0x8000
711	orr	r8,r8,#0x1b00
712	orr	r7,r7,r7,lsl#16
713	orr	r8,r8,r8,lsl#16
714	sub	r12,r12,#1
715	mvn	r9,r7
716	mov	r12,r12,lsl#2	@ (rounds-1)*4
717
718.Lmix:	and	r4,r0,r7
719	and	r1,r0,r9
720	sub	r4,r4,r4,lsr#7
721	and	r4,r4,r8
722	eor	r1,r4,r1,lsl#1	@ tp2
723
724	and	r4,r1,r7
725	and	r2,r1,r9
726	sub	r4,r4,r4,lsr#7
727	and	r4,r4,r8
728	eor	r2,r4,r2,lsl#1	@ tp4
729
730	and	r4,r2,r7
731	and	r3,r2,r9
732	sub	r4,r4,r4,lsr#7
733	and	r4,r4,r8
734	eor	r3,r4,r3,lsl#1	@ tp8
735
736	eor	r4,r1,r2
737	eor	r5,r0,r3		@ tp9
738	eor	r4,r4,r3		@ tpe
739	eor	r4,r4,r1,ror#24
740	eor	r4,r4,r5,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
741	eor	r4,r4,r2,ror#16
742	eor	r4,r4,r5,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
743	eor	r4,r4,r5,ror#8	@ ^= ROTATE(tp9,24)
744
745	ldr	r0,[r11,#4]		@ prefetch tp1
746	str	r4,[r11],#4
747	subs	r12,r12,#1
748	bne	.Lmix
749
750	mov	r0,#0
751#if __ARM_ARCH__>=5
752	ldmia	sp!,{r4-r12,pc}
753#else
754	ldmia   sp!,{r4-r12,lr}
755	tst	lr,#1
756	moveq	pc,lr			@ be binary compatible with V4, yet
757	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
758#endif
759.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
760
761.type	AES_Td,%object
762.align	5
763AES_Td:
764.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
765.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
766.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
767.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
768.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
769.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
770.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
771.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
772.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
773.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
774.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
775.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
776.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
777.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
778.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
779.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
780.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
781.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
782.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
783.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
784.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
785.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
786.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
787.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
788.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
789.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
790.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
791.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
792.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
793.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
794.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
795.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
796.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
797.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
798.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
799.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
800.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
801.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
802.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
803.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
804.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
805.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
806.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
807.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
808.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
809.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
810.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
811.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
812.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
813.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
814.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
815.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
816.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
817.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
818.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
819.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
820.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
821.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
822.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
823.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
824.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
825.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
826.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
827.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
828@ Td4[256]
829.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
830.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
831.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
832.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
833.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
834.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
835.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
836.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
837.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
838.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
839.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
840.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
841.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
842.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
843.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
844.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
845.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
846.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
847.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
848.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
849.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
850.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
851.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
852.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
853.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
854.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
855.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
856.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
857.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
858.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
859.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
860.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
861.size	AES_Td,.-AES_Td
862
863@ void AES_decrypt(const unsigned char *in, unsigned char *out,
864@ 		 const AES_KEY *key) {
865.global AES_decrypt
866.type   AES_decrypt,%function
867.align	5
868AES_decrypt:
869	sub	r3,pc,#8		@ AES_decrypt
870	stmdb   sp!,{r1,r4-r12,lr}
871	mov	r12,r0		@ inp
872	mov	r11,r2
873	sub	r10,r3,#AES_decrypt-AES_Td		@ Td
874#if __ARM_ARCH__<7
875	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
876	ldrb	r4,[r12,#2]	@ manner...
877	ldrb	r5,[r12,#1]
878	ldrb	r6,[r12,#0]
879	orr	r0,r0,r4,lsl#8
880	ldrb	r1,[r12,#7]
881	orr	r0,r0,r5,lsl#16
882	ldrb	r4,[r12,#6]
883	orr	r0,r0,r6,lsl#24
884	ldrb	r5,[r12,#5]
885	ldrb	r6,[r12,#4]
886	orr	r1,r1,r4,lsl#8
887	ldrb	r2,[r12,#11]
888	orr	r1,r1,r5,lsl#16
889	ldrb	r4,[r12,#10]
890	orr	r1,r1,r6,lsl#24
891	ldrb	r5,[r12,#9]
892	ldrb	r6,[r12,#8]
893	orr	r2,r2,r4,lsl#8
894	ldrb	r3,[r12,#15]
895	orr	r2,r2,r5,lsl#16
896	ldrb	r4,[r12,#14]
897	orr	r2,r2,r6,lsl#24
898	ldrb	r5,[r12,#13]
899	ldrb	r6,[r12,#12]
900	orr	r3,r3,r4,lsl#8
901	orr	r3,r3,r5,lsl#16
902	orr	r3,r3,r6,lsl#24
903#else
904	ldr	r0,[r12,#0]
905	ldr	r1,[r12,#4]
906	ldr	r2,[r12,#8]
907	ldr	r3,[r12,#12]
908#ifdef __ARMEL__
909	rev	r0,r0
910	rev	r1,r1
911	rev	r2,r2
912	rev	r3,r3
913#endif
914#endif
915	bl	_armv4_AES_decrypt
916
917	ldr	r12,[sp],#4		@ pop out
918#if __ARM_ARCH__>=7
919#ifdef __ARMEL__
920	rev	r0,r0
921	rev	r1,r1
922	rev	r2,r2
923	rev	r3,r3
924#endif
925	str	r0,[r12,#0]
926	str	r1,[r12,#4]
927	str	r2,[r12,#8]
928	str	r3,[r12,#12]
929#else
930	mov	r4,r0,lsr#24		@ write output in endian-neutral
931	mov	r5,r0,lsr#16		@ manner...
932	mov	r6,r0,lsr#8
933	strb	r4,[r12,#0]
934	strb	r5,[r12,#1]
935	mov	r4,r1,lsr#24
936	strb	r6,[r12,#2]
937	mov	r5,r1,lsr#16
938	strb	r0,[r12,#3]
939	mov	r6,r1,lsr#8
940	strb	r4,[r12,#4]
941	strb	r5,[r12,#5]
942	mov	r4,r2,lsr#24
943	strb	r6,[r12,#6]
944	mov	r5,r2,lsr#16
945	strb	r1,[r12,#7]
946	mov	r6,r2,lsr#8
947	strb	r4,[r12,#8]
948	strb	r5,[r12,#9]
949	mov	r4,r3,lsr#24
950	strb	r6,[r12,#10]
951	mov	r5,r3,lsr#16
952	strb	r2,[r12,#11]
953	mov	r6,r3,lsr#8
954	strb	r4,[r12,#12]
955	strb	r5,[r12,#13]
956	strb	r6,[r12,#14]
957	strb	r3,[r12,#15]
958#endif
959#if __ARM_ARCH__>=5
960	ldmia	sp!,{r4-r12,pc}
961#else
962	ldmia   sp!,{r4-r12,lr}
963	tst	lr,#1
964	moveq	pc,lr			@ be binary compatible with V4, yet
965	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
966#endif
967.size	AES_decrypt,.-AES_decrypt
968
969.type   _armv4_AES_decrypt,%function
970.align	2
971_armv4_AES_decrypt:
972	str	lr,[sp,#-4]!		@ push lr
973	ldmia	r11!,{r4-r7}
974	eor	r0,r0,r4
975	ldr	r12,[r11,#240-16]
976	eor	r1,r1,r5
977	eor	r2,r2,r6
978	eor	r3,r3,r7
979	sub	r12,r12,#1
980	mov	lr,#255
981
982	and	r7,lr,r0,lsr#16
983	and	r8,lr,r0,lsr#8
984	and	r9,lr,r0
985	mov	r0,r0,lsr#24
986.Ldec_loop:
987	ldr	r4,[r10,r7,lsl#2]	@ Td1[s0>>16]
988	and	r7,lr,r1		@ i0
989	ldr	r5,[r10,r8,lsl#2]	@ Td2[s0>>8]
990	and	r8,lr,r1,lsr#16
991	ldr	r6,[r10,r9,lsl#2]	@ Td3[s0>>0]
992	and	r9,lr,r1,lsr#8
993	ldr	r0,[r10,r0,lsl#2]	@ Td0[s0>>24]
994	mov	r1,r1,lsr#24
995
996	ldr	r7,[r10,r7,lsl#2]	@ Td3[s1>>0]
997	ldr	r8,[r10,r8,lsl#2]	@ Td1[s1>>16]
998	ldr	r9,[r10,r9,lsl#2]	@ Td2[s1>>8]
999	eor	r0,r0,r7,ror#24
1000	ldr	r1,[r10,r1,lsl#2]	@ Td0[s1>>24]
1001	and	r7,lr,r2,lsr#8	@ i0
1002	eor	r5,r8,r5,ror#8
1003	and	r8,lr,r2		@ i1
1004	eor	r6,r9,r6,ror#8
1005	and	r9,lr,r2,lsr#16
1006	ldr	r7,[r10,r7,lsl#2]	@ Td2[s2>>8]
1007	eor	r1,r1,r4,ror#8
1008	ldr	r8,[r10,r8,lsl#2]	@ Td3[s2>>0]
1009	mov	r2,r2,lsr#24
1010
1011	ldr	r9,[r10,r9,lsl#2]	@ Td1[s2>>16]
1012	eor	r0,r0,r7,ror#16
1013	ldr	r2,[r10,r2,lsl#2]	@ Td0[s2>>24]
1014	and	r7,lr,r3,lsr#16	@ i0
1015	eor	r1,r1,r8,ror#24
1016	and	r8,lr,r3,lsr#8	@ i1
1017	eor	r6,r9,r6,ror#8
1018	and	r9,lr,r3		@ i2
1019	ldr	r7,[r10,r7,lsl#2]	@ Td1[s3>>16]
1020	eor	r2,r2,r5,ror#8
1021	ldr	r8,[r10,r8,lsl#2]	@ Td2[s3>>8]
1022	mov	r3,r3,lsr#24
1023
1024	ldr	r9,[r10,r9,lsl#2]	@ Td3[s3>>0]
1025	eor	r0,r0,r7,ror#8
1026	ldr	r7,[r11],#16
1027	eor	r1,r1,r8,ror#16
1028	ldr	r3,[r10,r3,lsl#2]	@ Td0[s3>>24]
1029	eor	r2,r2,r9,ror#24
1030
1031	ldr	r4,[r11,#-12]
1032	eor	r0,r0,r7
1033	ldr	r5,[r11,#-8]
1034	eor	r3,r3,r6,ror#8
1035	ldr	r6,[r11,#-4]
1036	and	r7,lr,r0,lsr#16
1037	eor	r1,r1,r4
1038	and	r8,lr,r0,lsr#8
1039	eor	r2,r2,r5
1040	and	r9,lr,r0
1041	eor	r3,r3,r6
1042	mov	r0,r0,lsr#24
1043
1044	subs	r12,r12,#1
1045	bne	.Ldec_loop
1046
1047	add	r10,r10,#1024
1048
1049	ldr	r5,[r10,#0]		@ prefetch Td4
1050	ldr	r6,[r10,#32]
1051	ldr	r4,[r10,#64]
1052	ldr	r5,[r10,#96]
1053	ldr	r6,[r10,#128]
1054	ldr	r4,[r10,#160]
1055	ldr	r5,[r10,#192]
1056	ldr	r6,[r10,#224]
1057
1058	ldrb	r0,[r10,r0]		@ Td4[s0>>24]
1059	ldrb	r4,[r10,r7]		@ Td4[s0>>16]
1060	and	r7,lr,r1		@ i0
1061	ldrb	r5,[r10,r8]		@ Td4[s0>>8]
1062	and	r8,lr,r1,lsr#16
1063	ldrb	r6,[r10,r9]		@ Td4[s0>>0]
1064	and	r9,lr,r1,lsr#8
1065
1066	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
1067	ldrb	r1,[r10,r1,lsr#24]	@ Td4[s1>>24]
1068	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
1069	eor	r0,r7,r0,lsl#24
1070	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
1071	eor	r1,r4,r1,lsl#8
1072	and	r7,lr,r2,lsr#8	@ i0
1073	eor	r5,r5,r8,lsl#8
1074	and	r8,lr,r2		@ i1
1075	ldrb	r7,[r10,r7]		@ Td4[s2>>8]
1076	eor	r6,r6,r9,lsl#8
1077	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
1078	and	r9,lr,r2,lsr#16
1079
1080	ldrb	r2,[r10,r2,lsr#24]	@ Td4[s2>>24]
1081	eor	r0,r0,r7,lsl#8
1082	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
1083	eor	r1,r8,r1,lsl#16
1084	and	r7,lr,r3,lsr#16	@ i0
1085	eor	r2,r5,r2,lsl#16
1086	and	r8,lr,r3,lsr#8	@ i1
1087	ldrb	r7,[r10,r7]		@ Td4[s3>>16]
1088	eor	r6,r6,r9,lsl#16
1089	ldrb	r8,[r10,r8]		@ Td4[s3>>8]
1090	and	r9,lr,r3		@ i2
1091
1092	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
1093	ldrb	r3,[r10,r3,lsr#24]	@ Td4[s3>>24]
1094	eor	r0,r0,r7,lsl#16
1095	ldr	r7,[r11,#0]
1096	eor	r1,r1,r8,lsl#8
1097	ldr	r4,[r11,#4]
1098	eor	r2,r9,r2,lsl#8
1099	ldr	r5,[r11,#8]
1100	eor	r3,r6,r3,lsl#24
1101	ldr	r6,[r11,#12]
1102
1103	eor	r0,r0,r7
1104	eor	r1,r1,r4
1105	eor	r2,r2,r5
1106	eor	r3,r3,r6
1107
1108	sub	r10,r10,#1024
1109	ldr	pc,[sp],#4		@ pop and return
1110.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1111.asciz	"AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
1112.align	2
1113