xref: /OK3568_Linux_fs/kernel/arch/x86/crypto/des3_ede-asm_64.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun#include <linux/linkage.h>
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun.file "des3_ede-asm_64.S"
11*4882a593Smuzhiyun.text
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun#define s1 .L_s1
14*4882a593Smuzhiyun#define s2 ((s1) + (64*8))
15*4882a593Smuzhiyun#define s3 ((s2) + (64*8))
16*4882a593Smuzhiyun#define s4 ((s3) + (64*8))
17*4882a593Smuzhiyun#define s5 ((s4) + (64*8))
18*4882a593Smuzhiyun#define s6 ((s5) + (64*8))
19*4882a593Smuzhiyun#define s7 ((s6) + (64*8))
20*4882a593Smuzhiyun#define s8 ((s7) + (64*8))
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun/* register macros */
23*4882a593Smuzhiyun#define CTX %rdi
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun#define RL0 %r8
26*4882a593Smuzhiyun#define RL1 %r9
27*4882a593Smuzhiyun#define RL2 %r10
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun#define RL0d %r8d
30*4882a593Smuzhiyun#define RL1d %r9d
31*4882a593Smuzhiyun#define RL2d %r10d
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun#define RR0 %r11
34*4882a593Smuzhiyun#define RR1 %r12
35*4882a593Smuzhiyun#define RR2 %r13
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun#define RR0d %r11d
38*4882a593Smuzhiyun#define RR1d %r12d
39*4882a593Smuzhiyun#define RR2d %r13d
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun#define RW0 %rax
42*4882a593Smuzhiyun#define RW1 %rbx
43*4882a593Smuzhiyun#define RW2 %rcx
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun#define RW0d %eax
46*4882a593Smuzhiyun#define RW1d %ebx
47*4882a593Smuzhiyun#define RW2d %ecx
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun#define RW0bl %al
50*4882a593Smuzhiyun#define RW1bl %bl
51*4882a593Smuzhiyun#define RW2bl %cl
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun#define RW0bh %ah
54*4882a593Smuzhiyun#define RW1bh %bh
55*4882a593Smuzhiyun#define RW2bh %ch
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun#define RT0 %r15
58*4882a593Smuzhiyun#define RT1 %rsi
59*4882a593Smuzhiyun#define RT2 %r14
60*4882a593Smuzhiyun#define RT3 %rdx
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun#define RT0d %r15d
63*4882a593Smuzhiyun#define RT1d %esi
64*4882a593Smuzhiyun#define RT2d %r14d
65*4882a593Smuzhiyun#define RT3d %edx
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun/***********************************************************************
68*4882a593Smuzhiyun * 1-way 3DES
69*4882a593Smuzhiyun ***********************************************************************/
70*4882a593Smuzhiyun#define do_permutation(a, b, offset, mask) \
71*4882a593Smuzhiyun	movl a, RT0d; \
72*4882a593Smuzhiyun	shrl $(offset), RT0d; \
73*4882a593Smuzhiyun	xorl b, RT0d; \
74*4882a593Smuzhiyun	andl $(mask), RT0d; \
75*4882a593Smuzhiyun	xorl RT0d, b; \
76*4882a593Smuzhiyun	shll $(offset), RT0d; \
77*4882a593Smuzhiyun	xorl RT0d, a;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun#define expand_to_64bits(val, mask) \
80*4882a593Smuzhiyun	movl val##d, RT0d; \
81*4882a593Smuzhiyun	rorl $4, RT0d; \
82*4882a593Smuzhiyun	shlq $32, RT0; \
83*4882a593Smuzhiyun	orq RT0, val; \
84*4882a593Smuzhiyun	andq mask, val;
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun#define compress_to_64bits(val) \
87*4882a593Smuzhiyun	movq val, RT0; \
88*4882a593Smuzhiyun	shrq $32, RT0; \
89*4882a593Smuzhiyun	roll $4, RT0d; \
90*4882a593Smuzhiyun	orl RT0d, val##d;
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun#define initial_permutation(left, right) \
93*4882a593Smuzhiyun	do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
94*4882a593Smuzhiyun	do_permutation(left##d, right##d, 16, 0x0000ffff); \
95*4882a593Smuzhiyun	do_permutation(right##d, left##d,  2, 0x33333333); \
96*4882a593Smuzhiyun	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
97*4882a593Smuzhiyun	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
98*4882a593Smuzhiyun	movl left##d, RW0d; \
99*4882a593Smuzhiyun	roll $1, right##d; \
100*4882a593Smuzhiyun	xorl right##d, RW0d; \
101*4882a593Smuzhiyun	andl $0xaaaaaaaa, RW0d; \
102*4882a593Smuzhiyun	xorl RW0d, left##d; \
103*4882a593Smuzhiyun	xorl RW0d, right##d; \
104*4882a593Smuzhiyun	roll $1, left##d; \
105*4882a593Smuzhiyun	expand_to_64bits(right, RT3); \
106*4882a593Smuzhiyun	expand_to_64bits(left, RT3);
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun#define final_permutation(left, right) \
109*4882a593Smuzhiyun	compress_to_64bits(right); \
110*4882a593Smuzhiyun	compress_to_64bits(left); \
111*4882a593Smuzhiyun	movl right##d, RW0d; \
112*4882a593Smuzhiyun	rorl $1, left##d; \
113*4882a593Smuzhiyun	xorl left##d, RW0d; \
114*4882a593Smuzhiyun	andl $0xaaaaaaaa, RW0d; \
115*4882a593Smuzhiyun	xorl RW0d, right##d; \
116*4882a593Smuzhiyun	xorl RW0d, left##d; \
117*4882a593Smuzhiyun	rorl $1, right##d; \
118*4882a593Smuzhiyun	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
119*4882a593Smuzhiyun	do_permutation(right##d, left##d,  2, 0x33333333); \
120*4882a593Smuzhiyun	do_permutation(left##d, right##d, 16, 0x0000ffff); \
121*4882a593Smuzhiyun	do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun#define round1(n, from, to, load_next_key) \
124*4882a593Smuzhiyun	xorq from, RW0; \
125*4882a593Smuzhiyun	\
126*4882a593Smuzhiyun	movzbl RW0bl, RT0d; \
127*4882a593Smuzhiyun	movzbl RW0bh, RT1d; \
128*4882a593Smuzhiyun	shrq $16, RW0; \
129*4882a593Smuzhiyun	movzbl RW0bl, RT2d; \
130*4882a593Smuzhiyun	movzbl RW0bh, RT3d; \
131*4882a593Smuzhiyun	shrq $16, RW0; \
132*4882a593Smuzhiyun	movq s8(, RT0, 8), RT0; \
133*4882a593Smuzhiyun	xorq s6(, RT1, 8), to; \
134*4882a593Smuzhiyun	movzbl RW0bl, RL1d; \
135*4882a593Smuzhiyun	movzbl RW0bh, RT1d; \
136*4882a593Smuzhiyun	shrl $16, RW0d; \
137*4882a593Smuzhiyun	xorq s4(, RT2, 8), RT0; \
138*4882a593Smuzhiyun	xorq s2(, RT3, 8), to; \
139*4882a593Smuzhiyun	movzbl RW0bl, RT2d; \
140*4882a593Smuzhiyun	movzbl RW0bh, RT3d; \
141*4882a593Smuzhiyun	xorq s7(, RL1, 8), RT0; \
142*4882a593Smuzhiyun	xorq s5(, RT1, 8), to; \
143*4882a593Smuzhiyun	xorq s3(, RT2, 8), RT0; \
144*4882a593Smuzhiyun	load_next_key(n, RW0); \
145*4882a593Smuzhiyun	xorq RT0, to; \
146*4882a593Smuzhiyun	xorq s1(, RT3, 8), to; \
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun#define load_next_key(n, RWx) \
149*4882a593Smuzhiyun	movq (((n) + 1) * 8)(CTX), RWx;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun#define dummy2(a, b) /*_*/
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun#define read_block(io, left, right) \
154*4882a593Smuzhiyun	movl    (io), left##d; \
155*4882a593Smuzhiyun	movl   4(io), right##d; \
156*4882a593Smuzhiyun	bswapl left##d; \
157*4882a593Smuzhiyun	bswapl right##d;
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun#define write_block(io, left, right) \
160*4882a593Smuzhiyun	bswapl left##d; \
161*4882a593Smuzhiyun	bswapl right##d; \
162*4882a593Smuzhiyun	movl   left##d,   (io); \
163*4882a593Smuzhiyun	movl   right##d, 4(io);
164*4882a593Smuzhiyun
165*4882a593SmuzhiyunSYM_FUNC_START(des3_ede_x86_64_crypt_blk)
166*4882a593Smuzhiyun	/* input:
167*4882a593Smuzhiyun	 *	%rdi: round keys, CTX
168*4882a593Smuzhiyun	 *	%rsi: dst
169*4882a593Smuzhiyun	 *	%rdx: src
170*4882a593Smuzhiyun	 */
171*4882a593Smuzhiyun	pushq %rbx;
172*4882a593Smuzhiyun	pushq %r12;
173*4882a593Smuzhiyun	pushq %r13;
174*4882a593Smuzhiyun	pushq %r14;
175*4882a593Smuzhiyun	pushq %r15;
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun	pushq %rsi; /* dst */
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun	read_block(%rdx, RL0, RR0);
180*4882a593Smuzhiyun	initial_permutation(RL0, RR0);
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun	movq (CTX), RW0;
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun	round1(0, RR0, RL0, load_next_key);
185*4882a593Smuzhiyun	round1(1, RL0, RR0, load_next_key);
186*4882a593Smuzhiyun	round1(2, RR0, RL0, load_next_key);
187*4882a593Smuzhiyun	round1(3, RL0, RR0, load_next_key);
188*4882a593Smuzhiyun	round1(4, RR0, RL0, load_next_key);
189*4882a593Smuzhiyun	round1(5, RL0, RR0, load_next_key);
190*4882a593Smuzhiyun	round1(6, RR0, RL0, load_next_key);
191*4882a593Smuzhiyun	round1(7, RL0, RR0, load_next_key);
192*4882a593Smuzhiyun	round1(8, RR0, RL0, load_next_key);
193*4882a593Smuzhiyun	round1(9, RL0, RR0, load_next_key);
194*4882a593Smuzhiyun	round1(10, RR0, RL0, load_next_key);
195*4882a593Smuzhiyun	round1(11, RL0, RR0, load_next_key);
196*4882a593Smuzhiyun	round1(12, RR0, RL0, load_next_key);
197*4882a593Smuzhiyun	round1(13, RL0, RR0, load_next_key);
198*4882a593Smuzhiyun	round1(14, RR0, RL0, load_next_key);
199*4882a593Smuzhiyun	round1(15, RL0, RR0, load_next_key);
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun	round1(16+0, RL0, RR0, load_next_key);
202*4882a593Smuzhiyun	round1(16+1, RR0, RL0, load_next_key);
203*4882a593Smuzhiyun	round1(16+2, RL0, RR0, load_next_key);
204*4882a593Smuzhiyun	round1(16+3, RR0, RL0, load_next_key);
205*4882a593Smuzhiyun	round1(16+4, RL0, RR0, load_next_key);
206*4882a593Smuzhiyun	round1(16+5, RR0, RL0, load_next_key);
207*4882a593Smuzhiyun	round1(16+6, RL0, RR0, load_next_key);
208*4882a593Smuzhiyun	round1(16+7, RR0, RL0, load_next_key);
209*4882a593Smuzhiyun	round1(16+8, RL0, RR0, load_next_key);
210*4882a593Smuzhiyun	round1(16+9, RR0, RL0, load_next_key);
211*4882a593Smuzhiyun	round1(16+10, RL0, RR0, load_next_key);
212*4882a593Smuzhiyun	round1(16+11, RR0, RL0, load_next_key);
213*4882a593Smuzhiyun	round1(16+12, RL0, RR0, load_next_key);
214*4882a593Smuzhiyun	round1(16+13, RR0, RL0, load_next_key);
215*4882a593Smuzhiyun	round1(16+14, RL0, RR0, load_next_key);
216*4882a593Smuzhiyun	round1(16+15, RR0, RL0, load_next_key);
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun	round1(32+0, RR0, RL0, load_next_key);
219*4882a593Smuzhiyun	round1(32+1, RL0, RR0, load_next_key);
220*4882a593Smuzhiyun	round1(32+2, RR0, RL0, load_next_key);
221*4882a593Smuzhiyun	round1(32+3, RL0, RR0, load_next_key);
222*4882a593Smuzhiyun	round1(32+4, RR0, RL0, load_next_key);
223*4882a593Smuzhiyun	round1(32+5, RL0, RR0, load_next_key);
224*4882a593Smuzhiyun	round1(32+6, RR0, RL0, load_next_key);
225*4882a593Smuzhiyun	round1(32+7, RL0, RR0, load_next_key);
226*4882a593Smuzhiyun	round1(32+8, RR0, RL0, load_next_key);
227*4882a593Smuzhiyun	round1(32+9, RL0, RR0, load_next_key);
228*4882a593Smuzhiyun	round1(32+10, RR0, RL0, load_next_key);
229*4882a593Smuzhiyun	round1(32+11, RL0, RR0, load_next_key);
230*4882a593Smuzhiyun	round1(32+12, RR0, RL0, load_next_key);
231*4882a593Smuzhiyun	round1(32+13, RL0, RR0, load_next_key);
232*4882a593Smuzhiyun	round1(32+14, RR0, RL0, load_next_key);
233*4882a593Smuzhiyun	round1(32+15, RL0, RR0, dummy2);
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun	final_permutation(RR0, RL0);
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun	popq %rsi /* dst */
238*4882a593Smuzhiyun	write_block(%rsi, RR0, RL0);
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun	popq %r15;
241*4882a593Smuzhiyun	popq %r14;
242*4882a593Smuzhiyun	popq %r13;
243*4882a593Smuzhiyun	popq %r12;
244*4882a593Smuzhiyun	popq %rbx;
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun	RET;
247*4882a593SmuzhiyunSYM_FUNC_END(des3_ede_x86_64_crypt_blk)
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun/***********************************************************************
250*4882a593Smuzhiyun * 3-way 3DES
251*4882a593Smuzhiyun ***********************************************************************/
252*4882a593Smuzhiyun#define expand_to_64bits(val, mask) \
253*4882a593Smuzhiyun	movl val##d, RT0d; \
254*4882a593Smuzhiyun	rorl $4, RT0d; \
255*4882a593Smuzhiyun	shlq $32, RT0; \
256*4882a593Smuzhiyun	orq RT0, val; \
257*4882a593Smuzhiyun	andq mask, val;
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun#define compress_to_64bits(val) \
260*4882a593Smuzhiyun	movq val, RT0; \
261*4882a593Smuzhiyun	shrq $32, RT0; \
262*4882a593Smuzhiyun	roll $4, RT0d; \
263*4882a593Smuzhiyun	orl RT0d, val##d;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun#define initial_permutation3(left, right) \
266*4882a593Smuzhiyun	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
267*4882a593Smuzhiyun	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
268*4882a593Smuzhiyun	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
269*4882a593Smuzhiyun	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
270*4882a593Smuzhiyun	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
271*4882a593Smuzhiyun	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
272*4882a593Smuzhiyun	    \
273*4882a593Smuzhiyun	do_permutation(right##0d, left##0d,  2, 0x33333333); \
274*4882a593Smuzhiyun	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
275*4882a593Smuzhiyun	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
276*4882a593Smuzhiyun	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
277*4882a593Smuzhiyun	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
278*4882a593Smuzhiyun	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
279*4882a593Smuzhiyun	    \
280*4882a593Smuzhiyun	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
281*4882a593Smuzhiyun	    \
282*4882a593Smuzhiyun	movl left##0d, RW0d; \
283*4882a593Smuzhiyun	roll $1, right##0d; \
284*4882a593Smuzhiyun	xorl right##0d, RW0d; \
285*4882a593Smuzhiyun	andl $0xaaaaaaaa, RW0d; \
286*4882a593Smuzhiyun	xorl RW0d, left##0d; \
287*4882a593Smuzhiyun	xorl RW0d, right##0d; \
288*4882a593Smuzhiyun	roll $1, left##0d; \
289*4882a593Smuzhiyun	expand_to_64bits(right##0, RT3); \
290*4882a593Smuzhiyun	expand_to_64bits(left##0, RT3); \
291*4882a593Smuzhiyun	  movl left##1d, RW1d; \
292*4882a593Smuzhiyun	  roll $1, right##1d; \
293*4882a593Smuzhiyun	  xorl right##1d, RW1d; \
294*4882a593Smuzhiyun	  andl $0xaaaaaaaa, RW1d; \
295*4882a593Smuzhiyun	  xorl RW1d, left##1d; \
296*4882a593Smuzhiyun	  xorl RW1d, right##1d; \
297*4882a593Smuzhiyun	  roll $1, left##1d; \
298*4882a593Smuzhiyun	  expand_to_64bits(right##1, RT3); \
299*4882a593Smuzhiyun	  expand_to_64bits(left##1, RT3); \
300*4882a593Smuzhiyun	    movl left##2d, RW2d; \
301*4882a593Smuzhiyun	    roll $1, right##2d; \
302*4882a593Smuzhiyun	    xorl right##2d, RW2d; \
303*4882a593Smuzhiyun	    andl $0xaaaaaaaa, RW2d; \
304*4882a593Smuzhiyun	    xorl RW2d, left##2d; \
305*4882a593Smuzhiyun	    xorl RW2d, right##2d; \
306*4882a593Smuzhiyun	    roll $1, left##2d; \
307*4882a593Smuzhiyun	    expand_to_64bits(right##2, RT3); \
308*4882a593Smuzhiyun	    expand_to_64bits(left##2, RT3);
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun#define final_permutation3(left, right) \
311*4882a593Smuzhiyun	compress_to_64bits(right##0); \
312*4882a593Smuzhiyun	compress_to_64bits(left##0); \
313*4882a593Smuzhiyun	movl right##0d, RW0d; \
314*4882a593Smuzhiyun	rorl $1, left##0d; \
315*4882a593Smuzhiyun	xorl left##0d, RW0d; \
316*4882a593Smuzhiyun	andl $0xaaaaaaaa, RW0d; \
317*4882a593Smuzhiyun	xorl RW0d, right##0d; \
318*4882a593Smuzhiyun	xorl RW0d, left##0d; \
319*4882a593Smuzhiyun	rorl $1, right##0d; \
320*4882a593Smuzhiyun	  compress_to_64bits(right##1); \
321*4882a593Smuzhiyun	  compress_to_64bits(left##1); \
322*4882a593Smuzhiyun	  movl right##1d, RW1d; \
323*4882a593Smuzhiyun	  rorl $1, left##1d; \
324*4882a593Smuzhiyun	  xorl left##1d, RW1d; \
325*4882a593Smuzhiyun	  andl $0xaaaaaaaa, RW1d; \
326*4882a593Smuzhiyun	  xorl RW1d, right##1d; \
327*4882a593Smuzhiyun	  xorl RW1d, left##1d; \
328*4882a593Smuzhiyun	  rorl $1, right##1d; \
329*4882a593Smuzhiyun	    compress_to_64bits(right##2); \
330*4882a593Smuzhiyun	    compress_to_64bits(left##2); \
331*4882a593Smuzhiyun	    movl right##2d, RW2d; \
332*4882a593Smuzhiyun	    rorl $1, left##2d; \
333*4882a593Smuzhiyun	    xorl left##2d, RW2d; \
334*4882a593Smuzhiyun	    andl $0xaaaaaaaa, RW2d; \
335*4882a593Smuzhiyun	    xorl RW2d, right##2d; \
336*4882a593Smuzhiyun	    xorl RW2d, left##2d; \
337*4882a593Smuzhiyun	    rorl $1, right##2d; \
338*4882a593Smuzhiyun	    \
339*4882a593Smuzhiyun	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
340*4882a593Smuzhiyun	do_permutation(right##0d, left##0d,  2, 0x33333333); \
341*4882a593Smuzhiyun	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
342*4882a593Smuzhiyun	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
343*4882a593Smuzhiyun	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
344*4882a593Smuzhiyun	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
345*4882a593Smuzhiyun	    \
346*4882a593Smuzhiyun	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
347*4882a593Smuzhiyun	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
348*4882a593Smuzhiyun	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
349*4882a593Smuzhiyun	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
350*4882a593Smuzhiyun	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
351*4882a593Smuzhiyun	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun#define round3(n, from, to, load_next_key, do_movq) \
354*4882a593Smuzhiyun	xorq from##0, RW0; \
355*4882a593Smuzhiyun	movzbl RW0bl, RT3d; \
356*4882a593Smuzhiyun	movzbl RW0bh, RT1d; \
357*4882a593Smuzhiyun	shrq $16, RW0; \
358*4882a593Smuzhiyun	xorq s8(, RT3, 8), to##0; \
359*4882a593Smuzhiyun	xorq s6(, RT1, 8), to##0; \
360*4882a593Smuzhiyun	movzbl RW0bl, RT3d; \
361*4882a593Smuzhiyun	movzbl RW0bh, RT1d; \
362*4882a593Smuzhiyun	shrq $16, RW0; \
363*4882a593Smuzhiyun	xorq s4(, RT3, 8), to##0; \
364*4882a593Smuzhiyun	xorq s2(, RT1, 8), to##0; \
365*4882a593Smuzhiyun	movzbl RW0bl, RT3d; \
366*4882a593Smuzhiyun	movzbl RW0bh, RT1d; \
367*4882a593Smuzhiyun	shrl $16, RW0d; \
368*4882a593Smuzhiyun	xorq s7(, RT3, 8), to##0; \
369*4882a593Smuzhiyun	xorq s5(, RT1, 8), to##0; \
370*4882a593Smuzhiyun	movzbl RW0bl, RT3d; \
371*4882a593Smuzhiyun	movzbl RW0bh, RT1d; \
372*4882a593Smuzhiyun	load_next_key(n, RW0); \
373*4882a593Smuzhiyun	xorq s3(, RT3, 8), to##0; \
374*4882a593Smuzhiyun	xorq s1(, RT1, 8), to##0; \
375*4882a593Smuzhiyun		xorq from##1, RW1; \
376*4882a593Smuzhiyun		movzbl RW1bl, RT3d; \
377*4882a593Smuzhiyun		movzbl RW1bh, RT1d; \
378*4882a593Smuzhiyun		shrq $16, RW1; \
379*4882a593Smuzhiyun		xorq s8(, RT3, 8), to##1; \
380*4882a593Smuzhiyun		xorq s6(, RT1, 8), to##1; \
381*4882a593Smuzhiyun		movzbl RW1bl, RT3d; \
382*4882a593Smuzhiyun		movzbl RW1bh, RT1d; \
383*4882a593Smuzhiyun		shrq $16, RW1; \
384*4882a593Smuzhiyun		xorq s4(, RT3, 8), to##1; \
385*4882a593Smuzhiyun		xorq s2(, RT1, 8), to##1; \
386*4882a593Smuzhiyun		movzbl RW1bl, RT3d; \
387*4882a593Smuzhiyun		movzbl RW1bh, RT1d; \
388*4882a593Smuzhiyun		shrl $16, RW1d; \
389*4882a593Smuzhiyun		xorq s7(, RT3, 8), to##1; \
390*4882a593Smuzhiyun		xorq s5(, RT1, 8), to##1; \
391*4882a593Smuzhiyun		movzbl RW1bl, RT3d; \
392*4882a593Smuzhiyun		movzbl RW1bh, RT1d; \
393*4882a593Smuzhiyun		do_movq(RW0, RW1); \
394*4882a593Smuzhiyun		xorq s3(, RT3, 8), to##1; \
395*4882a593Smuzhiyun		xorq s1(, RT1, 8), to##1; \
396*4882a593Smuzhiyun			xorq from##2, RW2; \
397*4882a593Smuzhiyun			movzbl RW2bl, RT3d; \
398*4882a593Smuzhiyun			movzbl RW2bh, RT1d; \
399*4882a593Smuzhiyun			shrq $16, RW2; \
400*4882a593Smuzhiyun			xorq s8(, RT3, 8), to##2; \
401*4882a593Smuzhiyun			xorq s6(, RT1, 8), to##2; \
402*4882a593Smuzhiyun			movzbl RW2bl, RT3d; \
403*4882a593Smuzhiyun			movzbl RW2bh, RT1d; \
404*4882a593Smuzhiyun			shrq $16, RW2; \
405*4882a593Smuzhiyun			xorq s4(, RT3, 8), to##2; \
406*4882a593Smuzhiyun			xorq s2(, RT1, 8), to##2; \
407*4882a593Smuzhiyun			movzbl RW2bl, RT3d; \
408*4882a593Smuzhiyun			movzbl RW2bh, RT1d; \
409*4882a593Smuzhiyun			shrl $16, RW2d; \
410*4882a593Smuzhiyun			xorq s7(, RT3, 8), to##2; \
411*4882a593Smuzhiyun			xorq s5(, RT1, 8), to##2; \
412*4882a593Smuzhiyun			movzbl RW2bl, RT3d; \
413*4882a593Smuzhiyun			movzbl RW2bh, RT1d; \
414*4882a593Smuzhiyun			do_movq(RW0, RW2); \
415*4882a593Smuzhiyun			xorq s3(, RT3, 8), to##2; \
416*4882a593Smuzhiyun			xorq s1(, RT1, 8), to##2;
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun#define __movq(src, dst) \
419*4882a593Smuzhiyun	movq src, dst;
420*4882a593Smuzhiyun
421*4882a593SmuzhiyunSYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
422*4882a593Smuzhiyun	/* input:
423*4882a593Smuzhiyun	 *	%rdi: ctx, round keys
424*4882a593Smuzhiyun	 *	%rsi: dst (3 blocks)
425*4882a593Smuzhiyun	 *	%rdx: src (3 blocks)
426*4882a593Smuzhiyun	 */
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun	pushq %rbx;
429*4882a593Smuzhiyun	pushq %r12;
430*4882a593Smuzhiyun	pushq %r13;
431*4882a593Smuzhiyun	pushq %r14;
432*4882a593Smuzhiyun	pushq %r15;
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun	pushq %rsi /* dst */
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun	/* load input */
437*4882a593Smuzhiyun	movl 0 * 4(%rdx), RL0d;
438*4882a593Smuzhiyun	movl 1 * 4(%rdx), RR0d;
439*4882a593Smuzhiyun	movl 2 * 4(%rdx), RL1d;
440*4882a593Smuzhiyun	movl 3 * 4(%rdx), RR1d;
441*4882a593Smuzhiyun	movl 4 * 4(%rdx), RL2d;
442*4882a593Smuzhiyun	movl 5 * 4(%rdx), RR2d;
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun	bswapl RL0d;
445*4882a593Smuzhiyun	bswapl RR0d;
446*4882a593Smuzhiyun	bswapl RL1d;
447*4882a593Smuzhiyun	bswapl RR1d;
448*4882a593Smuzhiyun	bswapl RL2d;
449*4882a593Smuzhiyun	bswapl RR2d;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun	initial_permutation3(RL, RR);
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun	movq 0(CTX), RW0;
454*4882a593Smuzhiyun	movq RW0, RW1;
455*4882a593Smuzhiyun	movq RW0, RW2;
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun	round3(0, RR, RL, load_next_key, __movq);
458*4882a593Smuzhiyun	round3(1, RL, RR, load_next_key, __movq);
459*4882a593Smuzhiyun	round3(2, RR, RL, load_next_key, __movq);
460*4882a593Smuzhiyun	round3(3, RL, RR, load_next_key, __movq);
461*4882a593Smuzhiyun	round3(4, RR, RL, load_next_key, __movq);
462*4882a593Smuzhiyun	round3(5, RL, RR, load_next_key, __movq);
463*4882a593Smuzhiyun	round3(6, RR, RL, load_next_key, __movq);
464*4882a593Smuzhiyun	round3(7, RL, RR, load_next_key, __movq);
465*4882a593Smuzhiyun	round3(8, RR, RL, load_next_key, __movq);
466*4882a593Smuzhiyun	round3(9, RL, RR, load_next_key, __movq);
467*4882a593Smuzhiyun	round3(10, RR, RL, load_next_key, __movq);
468*4882a593Smuzhiyun	round3(11, RL, RR, load_next_key, __movq);
469*4882a593Smuzhiyun	round3(12, RR, RL, load_next_key, __movq);
470*4882a593Smuzhiyun	round3(13, RL, RR, load_next_key, __movq);
471*4882a593Smuzhiyun	round3(14, RR, RL, load_next_key, __movq);
472*4882a593Smuzhiyun	round3(15, RL, RR, load_next_key, __movq);
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun	round3(16+0, RL, RR, load_next_key, __movq);
475*4882a593Smuzhiyun	round3(16+1, RR, RL, load_next_key, __movq);
476*4882a593Smuzhiyun	round3(16+2, RL, RR, load_next_key, __movq);
477*4882a593Smuzhiyun	round3(16+3, RR, RL, load_next_key, __movq);
478*4882a593Smuzhiyun	round3(16+4, RL, RR, load_next_key, __movq);
479*4882a593Smuzhiyun	round3(16+5, RR, RL, load_next_key, __movq);
480*4882a593Smuzhiyun	round3(16+6, RL, RR, load_next_key, __movq);
481*4882a593Smuzhiyun	round3(16+7, RR, RL, load_next_key, __movq);
482*4882a593Smuzhiyun	round3(16+8, RL, RR, load_next_key, __movq);
483*4882a593Smuzhiyun	round3(16+9, RR, RL, load_next_key, __movq);
484*4882a593Smuzhiyun	round3(16+10, RL, RR, load_next_key, __movq);
485*4882a593Smuzhiyun	round3(16+11, RR, RL, load_next_key, __movq);
486*4882a593Smuzhiyun	round3(16+12, RL, RR, load_next_key, __movq);
487*4882a593Smuzhiyun	round3(16+13, RR, RL, load_next_key, __movq);
488*4882a593Smuzhiyun	round3(16+14, RL, RR, load_next_key, __movq);
489*4882a593Smuzhiyun	round3(16+15, RR, RL, load_next_key, __movq);
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun	round3(32+0, RR, RL, load_next_key, __movq);
492*4882a593Smuzhiyun	round3(32+1, RL, RR, load_next_key, __movq);
493*4882a593Smuzhiyun	round3(32+2, RR, RL, load_next_key, __movq);
494*4882a593Smuzhiyun	round3(32+3, RL, RR, load_next_key, __movq);
495*4882a593Smuzhiyun	round3(32+4, RR, RL, load_next_key, __movq);
496*4882a593Smuzhiyun	round3(32+5, RL, RR, load_next_key, __movq);
497*4882a593Smuzhiyun	round3(32+6, RR, RL, load_next_key, __movq);
498*4882a593Smuzhiyun	round3(32+7, RL, RR, load_next_key, __movq);
499*4882a593Smuzhiyun	round3(32+8, RR, RL, load_next_key, __movq);
500*4882a593Smuzhiyun	round3(32+9, RL, RR, load_next_key, __movq);
501*4882a593Smuzhiyun	round3(32+10, RR, RL, load_next_key, __movq);
502*4882a593Smuzhiyun	round3(32+11, RL, RR, load_next_key, __movq);
503*4882a593Smuzhiyun	round3(32+12, RR, RL, load_next_key, __movq);
504*4882a593Smuzhiyun	round3(32+13, RL, RR, load_next_key, __movq);
505*4882a593Smuzhiyun	round3(32+14, RR, RL, load_next_key, __movq);
506*4882a593Smuzhiyun	round3(32+15, RL, RR, dummy2, dummy2);
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun	final_permutation3(RR, RL);
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun	bswapl RR0d;
511*4882a593Smuzhiyun	bswapl RL0d;
512*4882a593Smuzhiyun	bswapl RR1d;
513*4882a593Smuzhiyun	bswapl RL1d;
514*4882a593Smuzhiyun	bswapl RR2d;
515*4882a593Smuzhiyun	bswapl RL2d;
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun	popq %rsi /* dst */
518*4882a593Smuzhiyun	movl RR0d, 0 * 4(%rsi);
519*4882a593Smuzhiyun	movl RL0d, 1 * 4(%rsi);
520*4882a593Smuzhiyun	movl RR1d, 2 * 4(%rsi);
521*4882a593Smuzhiyun	movl RL1d, 3 * 4(%rsi);
522*4882a593Smuzhiyun	movl RR2d, 4 * 4(%rsi);
523*4882a593Smuzhiyun	movl RL2d, 5 * 4(%rsi);
524*4882a593Smuzhiyun
525*4882a593Smuzhiyun	popq %r15;
526*4882a593Smuzhiyun	popq %r14;
527*4882a593Smuzhiyun	popq %r13;
528*4882a593Smuzhiyun	popq %r12;
529*4882a593Smuzhiyun	popq %rbx;
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun	RET;
532*4882a593SmuzhiyunSYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
533*4882a593Smuzhiyun
534*4882a593Smuzhiyun.section	.rodata, "a", @progbits
535*4882a593Smuzhiyun.align 16
536*4882a593Smuzhiyun.L_s1:
537*4882a593Smuzhiyun	.quad 0x0010100001010400, 0x0000000000000000
538*4882a593Smuzhiyun	.quad 0x0000100000010000, 0x0010100001010404
539*4882a593Smuzhiyun	.quad 0x0010100001010004, 0x0000100000010404
540*4882a593Smuzhiyun	.quad 0x0000000000000004, 0x0000100000010000
541*4882a593Smuzhiyun	.quad 0x0000000000000400, 0x0010100001010400
542*4882a593Smuzhiyun	.quad 0x0010100001010404, 0x0000000000000400
543*4882a593Smuzhiyun	.quad 0x0010000001000404, 0x0010100001010004
544*4882a593Smuzhiyun	.quad 0x0010000001000000, 0x0000000000000004
545*4882a593Smuzhiyun	.quad 0x0000000000000404, 0x0010000001000400
546*4882a593Smuzhiyun	.quad 0x0010000001000400, 0x0000100000010400
547*4882a593Smuzhiyun	.quad 0x0000100000010400, 0x0010100001010000
548*4882a593Smuzhiyun	.quad 0x0010100001010000, 0x0010000001000404
549*4882a593Smuzhiyun	.quad 0x0000100000010004, 0x0010000001000004
550*4882a593Smuzhiyun	.quad 0x0010000001000004, 0x0000100000010004
551*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0000000000000404
552*4882a593Smuzhiyun	.quad 0x0000100000010404, 0x0010000001000000
553*4882a593Smuzhiyun	.quad 0x0000100000010000, 0x0010100001010404
554*4882a593Smuzhiyun	.quad 0x0000000000000004, 0x0010100001010000
555*4882a593Smuzhiyun	.quad 0x0010100001010400, 0x0010000001000000
556*4882a593Smuzhiyun	.quad 0x0010000001000000, 0x0000000000000400
557*4882a593Smuzhiyun	.quad 0x0010100001010004, 0x0000100000010000
558*4882a593Smuzhiyun	.quad 0x0000100000010400, 0x0010000001000004
559*4882a593Smuzhiyun	.quad 0x0000000000000400, 0x0000000000000004
560*4882a593Smuzhiyun	.quad 0x0010000001000404, 0x0000100000010404
561*4882a593Smuzhiyun	.quad 0x0010100001010404, 0x0000100000010004
562*4882a593Smuzhiyun	.quad 0x0010100001010000, 0x0010000001000404
563*4882a593Smuzhiyun	.quad 0x0010000001000004, 0x0000000000000404
564*4882a593Smuzhiyun	.quad 0x0000100000010404, 0x0010100001010400
565*4882a593Smuzhiyun	.quad 0x0000000000000404, 0x0010000001000400
566*4882a593Smuzhiyun	.quad 0x0010000001000400, 0x0000000000000000
567*4882a593Smuzhiyun	.quad 0x0000100000010004, 0x0000100000010400
568*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0010100001010004
569*4882a593Smuzhiyun.L_s2:
570*4882a593Smuzhiyun	.quad 0x0801080200100020, 0x0800080000000000
571*4882a593Smuzhiyun	.quad 0x0000080000000000, 0x0001080200100020
572*4882a593Smuzhiyun	.quad 0x0001000000100000, 0x0000000200000020
573*4882a593Smuzhiyun	.quad 0x0801000200100020, 0x0800080200000020
574*4882a593Smuzhiyun	.quad 0x0800000200000020, 0x0801080200100020
575*4882a593Smuzhiyun	.quad 0x0801080000100000, 0x0800000000000000
576*4882a593Smuzhiyun	.quad 0x0800080000000000, 0x0001000000100000
577*4882a593Smuzhiyun	.quad 0x0000000200000020, 0x0801000200100020
578*4882a593Smuzhiyun	.quad 0x0001080000100000, 0x0001000200100020
579*4882a593Smuzhiyun	.quad 0x0800080200000020, 0x0000000000000000
580*4882a593Smuzhiyun	.quad 0x0800000000000000, 0x0000080000000000
581*4882a593Smuzhiyun	.quad 0x0001080200100020, 0x0801000000100000
582*4882a593Smuzhiyun	.quad 0x0001000200100020, 0x0800000200000020
583*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0001080000100000
584*4882a593Smuzhiyun	.quad 0x0000080200000020, 0x0801080000100000
585*4882a593Smuzhiyun	.quad 0x0801000000100000, 0x0000080200000020
586*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0001080200100020
587*4882a593Smuzhiyun	.quad 0x0801000200100020, 0x0001000000100000
588*4882a593Smuzhiyun	.quad 0x0800080200000020, 0x0801000000100000
589*4882a593Smuzhiyun	.quad 0x0801080000100000, 0x0000080000000000
590*4882a593Smuzhiyun	.quad 0x0801000000100000, 0x0800080000000000
591*4882a593Smuzhiyun	.quad 0x0000000200000020, 0x0801080200100020
592*4882a593Smuzhiyun	.quad 0x0001080200100020, 0x0000000200000020
593*4882a593Smuzhiyun	.quad 0x0000080000000000, 0x0800000000000000
594*4882a593Smuzhiyun	.quad 0x0000080200000020, 0x0801080000100000
595*4882a593Smuzhiyun	.quad 0x0001000000100000, 0x0800000200000020
596*4882a593Smuzhiyun	.quad 0x0001000200100020, 0x0800080200000020
597*4882a593Smuzhiyun	.quad 0x0800000200000020, 0x0001000200100020
598*4882a593Smuzhiyun	.quad 0x0001080000100000, 0x0000000000000000
599*4882a593Smuzhiyun	.quad 0x0800080000000000, 0x0000080200000020
600*4882a593Smuzhiyun	.quad 0x0800000000000000, 0x0801000200100020
601*4882a593Smuzhiyun	.quad 0x0801080200100020, 0x0001080000100000
602*4882a593Smuzhiyun.L_s3:
603*4882a593Smuzhiyun	.quad 0x0000002000000208, 0x0000202008020200
604*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0000200008020008
605*4882a593Smuzhiyun	.quad 0x0000002008000200, 0x0000000000000000
606*4882a593Smuzhiyun	.quad 0x0000202000020208, 0x0000002008000200
607*4882a593Smuzhiyun	.quad 0x0000200000020008, 0x0000000008000008
608*4882a593Smuzhiyun	.quad 0x0000000008000008, 0x0000200000020000
609*4882a593Smuzhiyun	.quad 0x0000202008020208, 0x0000200000020008
610*4882a593Smuzhiyun	.quad 0x0000200008020000, 0x0000002000000208
611*4882a593Smuzhiyun	.quad 0x0000000008000000, 0x0000000000000008
612*4882a593Smuzhiyun	.quad 0x0000202008020200, 0x0000002000000200
613*4882a593Smuzhiyun	.quad 0x0000202000020200, 0x0000200008020000
614*4882a593Smuzhiyun	.quad 0x0000200008020008, 0x0000202000020208
615*4882a593Smuzhiyun	.quad 0x0000002008000208, 0x0000202000020200
616*4882a593Smuzhiyun	.quad 0x0000200000020000, 0x0000002008000208
617*4882a593Smuzhiyun	.quad 0x0000000000000008, 0x0000202008020208
618*4882a593Smuzhiyun	.quad 0x0000002000000200, 0x0000000008000000
619*4882a593Smuzhiyun	.quad 0x0000202008020200, 0x0000000008000000
620*4882a593Smuzhiyun	.quad 0x0000200000020008, 0x0000002000000208
621*4882a593Smuzhiyun	.quad 0x0000200000020000, 0x0000202008020200
622*4882a593Smuzhiyun	.quad 0x0000002008000200, 0x0000000000000000
623*4882a593Smuzhiyun	.quad 0x0000002000000200, 0x0000200000020008
624*4882a593Smuzhiyun	.quad 0x0000202008020208, 0x0000002008000200
625*4882a593Smuzhiyun	.quad 0x0000000008000008, 0x0000002000000200
626*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0000200008020008
627*4882a593Smuzhiyun	.quad 0x0000002008000208, 0x0000200000020000
628*4882a593Smuzhiyun	.quad 0x0000000008000000, 0x0000202008020208
629*4882a593Smuzhiyun	.quad 0x0000000000000008, 0x0000202000020208
630*4882a593Smuzhiyun	.quad 0x0000202000020200, 0x0000000008000008
631*4882a593Smuzhiyun	.quad 0x0000200008020000, 0x0000002008000208
632*4882a593Smuzhiyun	.quad 0x0000002000000208, 0x0000200008020000
633*4882a593Smuzhiyun	.quad 0x0000202000020208, 0x0000000000000008
634*4882a593Smuzhiyun	.quad 0x0000200008020008, 0x0000202000020200
635*4882a593Smuzhiyun.L_s4:
636*4882a593Smuzhiyun	.quad 0x1008020000002001, 0x1000020800002001
637*4882a593Smuzhiyun	.quad 0x1000020800002001, 0x0000000800000000
638*4882a593Smuzhiyun	.quad 0x0008020800002000, 0x1008000800000001
639*4882a593Smuzhiyun	.quad 0x1008000000000001, 0x1000020000002001
640*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0008020000002000
641*4882a593Smuzhiyun	.quad 0x0008020000002000, 0x1008020800002001
642*4882a593Smuzhiyun	.quad 0x1000000800000001, 0x0000000000000000
643*4882a593Smuzhiyun	.quad 0x0008000800000000, 0x1008000000000001
644*4882a593Smuzhiyun	.quad 0x1000000000000001, 0x0000020000002000
645*4882a593Smuzhiyun	.quad 0x0008000000000000, 0x1008020000002001
646*4882a593Smuzhiyun	.quad 0x0000000800000000, 0x0008000000000000
647*4882a593Smuzhiyun	.quad 0x1000020000002001, 0x0000020800002000
648*4882a593Smuzhiyun	.quad 0x1008000800000001, 0x1000000000000001
649*4882a593Smuzhiyun	.quad 0x0000020800002000, 0x0008000800000000
650*4882a593Smuzhiyun	.quad 0x0000020000002000, 0x0008020800002000
651*4882a593Smuzhiyun	.quad 0x1008020800002001, 0x1000000800000001
652*4882a593Smuzhiyun	.quad 0x0008000800000000, 0x1008000000000001
653*4882a593Smuzhiyun	.quad 0x0008020000002000, 0x1008020800002001
654*4882a593Smuzhiyun	.quad 0x1000000800000001, 0x0000000000000000
655*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0008020000002000
656*4882a593Smuzhiyun	.quad 0x0000020800002000, 0x0008000800000000
657*4882a593Smuzhiyun	.quad 0x1008000800000001, 0x1000000000000001
658*4882a593Smuzhiyun	.quad 0x1008020000002001, 0x1000020800002001
659*4882a593Smuzhiyun	.quad 0x1000020800002001, 0x0000000800000000
660*4882a593Smuzhiyun	.quad 0x1008020800002001, 0x1000000800000001
661*4882a593Smuzhiyun	.quad 0x1000000000000001, 0x0000020000002000
662*4882a593Smuzhiyun	.quad 0x1008000000000001, 0x1000020000002001
663*4882a593Smuzhiyun	.quad 0x0008020800002000, 0x1008000800000001
664*4882a593Smuzhiyun	.quad 0x1000020000002001, 0x0000020800002000
665*4882a593Smuzhiyun	.quad 0x0008000000000000, 0x1008020000002001
666*4882a593Smuzhiyun	.quad 0x0000000800000000, 0x0008000000000000
667*4882a593Smuzhiyun	.quad 0x0000020000002000, 0x0008020800002000
668*4882a593Smuzhiyun.L_s5:
669*4882a593Smuzhiyun	.quad 0x0000001000000100, 0x0020001002080100
670*4882a593Smuzhiyun	.quad 0x0020000002080000, 0x0420001002000100
671*4882a593Smuzhiyun	.quad 0x0000000000080000, 0x0000001000000100
672*4882a593Smuzhiyun	.quad 0x0400000000000000, 0x0020000002080000
673*4882a593Smuzhiyun	.quad 0x0400001000080100, 0x0000000000080000
674*4882a593Smuzhiyun	.quad 0x0020001002000100, 0x0400001000080100
675*4882a593Smuzhiyun	.quad 0x0420001002000100, 0x0420000002080000
676*4882a593Smuzhiyun	.quad 0x0000001000080100, 0x0400000000000000
677*4882a593Smuzhiyun	.quad 0x0020000002000000, 0x0400000000080000
678*4882a593Smuzhiyun	.quad 0x0400000000080000, 0x0000000000000000
679*4882a593Smuzhiyun	.quad 0x0400001000000100, 0x0420001002080100
680*4882a593Smuzhiyun	.quad 0x0420001002080100, 0x0020001002000100
681*4882a593Smuzhiyun	.quad 0x0420000002080000, 0x0400001000000100
682*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0420000002000000
683*4882a593Smuzhiyun	.quad 0x0020001002080100, 0x0020000002000000
684*4882a593Smuzhiyun	.quad 0x0420000002000000, 0x0000001000080100
685*4882a593Smuzhiyun	.quad 0x0000000000080000, 0x0420001002000100
686*4882a593Smuzhiyun	.quad 0x0000001000000100, 0x0020000002000000
687*4882a593Smuzhiyun	.quad 0x0400000000000000, 0x0020000002080000
688*4882a593Smuzhiyun	.quad 0x0420001002000100, 0x0400001000080100
689*4882a593Smuzhiyun	.quad 0x0020001002000100, 0x0400000000000000
690*4882a593Smuzhiyun	.quad 0x0420000002080000, 0x0020001002080100
691*4882a593Smuzhiyun	.quad 0x0400001000080100, 0x0000001000000100
692*4882a593Smuzhiyun	.quad 0x0020000002000000, 0x0420000002080000
693*4882a593Smuzhiyun	.quad 0x0420001002080100, 0x0000001000080100
694*4882a593Smuzhiyun	.quad 0x0420000002000000, 0x0420001002080100
695*4882a593Smuzhiyun	.quad 0x0020000002080000, 0x0000000000000000
696*4882a593Smuzhiyun	.quad 0x0400000000080000, 0x0420000002000000
697*4882a593Smuzhiyun	.quad 0x0000001000080100, 0x0020001002000100
698*4882a593Smuzhiyun	.quad 0x0400001000000100, 0x0000000000080000
699*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0400000000080000
700*4882a593Smuzhiyun	.quad 0x0020001002080100, 0x0400001000000100
701*4882a593Smuzhiyun.L_s6:
702*4882a593Smuzhiyun	.quad 0x0200000120000010, 0x0204000020000000
703*4882a593Smuzhiyun	.quad 0x0000040000000000, 0x0204040120000010
704*4882a593Smuzhiyun	.quad 0x0204000020000000, 0x0000000100000010
705*4882a593Smuzhiyun	.quad 0x0204040120000010, 0x0004000000000000
706*4882a593Smuzhiyun	.quad 0x0200040020000000, 0x0004040100000010
707*4882a593Smuzhiyun	.quad 0x0004000000000000, 0x0200000120000010
708*4882a593Smuzhiyun	.quad 0x0004000100000010, 0x0200040020000000
709*4882a593Smuzhiyun	.quad 0x0200000020000000, 0x0000040100000010
710*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0004000100000010
711*4882a593Smuzhiyun	.quad 0x0200040120000010, 0x0000040000000000
712*4882a593Smuzhiyun	.quad 0x0004040000000000, 0x0200040120000010
713*4882a593Smuzhiyun	.quad 0x0000000100000010, 0x0204000120000010
714*4882a593Smuzhiyun	.quad 0x0204000120000010, 0x0000000000000000
715*4882a593Smuzhiyun	.quad 0x0004040100000010, 0x0204040020000000
716*4882a593Smuzhiyun	.quad 0x0000040100000010, 0x0004040000000000
717*4882a593Smuzhiyun	.quad 0x0204040020000000, 0x0200000020000000
718*4882a593Smuzhiyun	.quad 0x0200040020000000, 0x0000000100000010
719*4882a593Smuzhiyun	.quad 0x0204000120000010, 0x0004040000000000
720*4882a593Smuzhiyun	.quad 0x0204040120000010, 0x0004000000000000
721*4882a593Smuzhiyun	.quad 0x0000040100000010, 0x0200000120000010
722*4882a593Smuzhiyun	.quad 0x0004000000000000, 0x0200040020000000
723*4882a593Smuzhiyun	.quad 0x0200000020000000, 0x0000040100000010
724*4882a593Smuzhiyun	.quad 0x0200000120000010, 0x0204040120000010
725*4882a593Smuzhiyun	.quad 0x0004040000000000, 0x0204000020000000
726*4882a593Smuzhiyun	.quad 0x0004040100000010, 0x0204040020000000
727*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0204000120000010
728*4882a593Smuzhiyun	.quad 0x0000000100000010, 0x0000040000000000
729*4882a593Smuzhiyun	.quad 0x0204000020000000, 0x0004040100000010
730*4882a593Smuzhiyun	.quad 0x0000040000000000, 0x0004000100000010
731*4882a593Smuzhiyun	.quad 0x0200040120000010, 0x0000000000000000
732*4882a593Smuzhiyun	.quad 0x0204040020000000, 0x0200000020000000
733*4882a593Smuzhiyun	.quad 0x0004000100000010, 0x0200040120000010
734*4882a593Smuzhiyun.L_s7:
735*4882a593Smuzhiyun	.quad 0x0002000000200000, 0x2002000004200002
736*4882a593Smuzhiyun	.quad 0x2000000004000802, 0x0000000000000000
737*4882a593Smuzhiyun	.quad 0x0000000000000800, 0x2000000004000802
738*4882a593Smuzhiyun	.quad 0x2002000000200802, 0x0002000004200800
739*4882a593Smuzhiyun	.quad 0x2002000004200802, 0x0002000000200000
740*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x2000000004000002
741*4882a593Smuzhiyun	.quad 0x2000000000000002, 0x0000000004000000
742*4882a593Smuzhiyun	.quad 0x2002000004200002, 0x2000000000000802
743*4882a593Smuzhiyun	.quad 0x0000000004000800, 0x2002000000200802
744*4882a593Smuzhiyun	.quad 0x2002000000200002, 0x0000000004000800
745*4882a593Smuzhiyun	.quad 0x2000000004000002, 0x0002000004200000
746*4882a593Smuzhiyun	.quad 0x0002000004200800, 0x2002000000200002
747*4882a593Smuzhiyun	.quad 0x0002000004200000, 0x0000000000000800
748*4882a593Smuzhiyun	.quad 0x2000000000000802, 0x2002000004200802
749*4882a593Smuzhiyun	.quad 0x0002000000200800, 0x2000000000000002
750*4882a593Smuzhiyun	.quad 0x0000000004000000, 0x0002000000200800
751*4882a593Smuzhiyun	.quad 0x0000000004000000, 0x0002000000200800
752*4882a593Smuzhiyun	.quad 0x0002000000200000, 0x2000000004000802
753*4882a593Smuzhiyun	.quad 0x2000000004000802, 0x2002000004200002
754*4882a593Smuzhiyun	.quad 0x2002000004200002, 0x2000000000000002
755*4882a593Smuzhiyun	.quad 0x2002000000200002, 0x0000000004000000
756*4882a593Smuzhiyun	.quad 0x0000000004000800, 0x0002000000200000
757*4882a593Smuzhiyun	.quad 0x0002000004200800, 0x2000000000000802
758*4882a593Smuzhiyun	.quad 0x2002000000200802, 0x0002000004200800
759*4882a593Smuzhiyun	.quad 0x2000000000000802, 0x2000000004000002
760*4882a593Smuzhiyun	.quad 0x2002000004200802, 0x0002000004200000
761*4882a593Smuzhiyun	.quad 0x0002000000200800, 0x0000000000000000
762*4882a593Smuzhiyun	.quad 0x2000000000000002, 0x2002000004200802
763*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x2002000000200802
764*4882a593Smuzhiyun	.quad 0x0002000004200000, 0x0000000000000800
765*4882a593Smuzhiyun	.quad 0x2000000004000002, 0x0000000004000800
766*4882a593Smuzhiyun	.quad 0x0000000000000800, 0x2002000000200002
767*4882a593Smuzhiyun.L_s8:
768*4882a593Smuzhiyun	.quad 0x0100010410001000, 0x0000010000001000
769*4882a593Smuzhiyun	.quad 0x0000000000040000, 0x0100010410041000
770*4882a593Smuzhiyun	.quad 0x0100000010000000, 0x0100010410001000
771*4882a593Smuzhiyun	.quad 0x0000000400000000, 0x0100000010000000
772*4882a593Smuzhiyun	.quad 0x0000000400040000, 0x0100000010040000
773*4882a593Smuzhiyun	.quad 0x0100010410041000, 0x0000010000041000
774*4882a593Smuzhiyun	.quad 0x0100010010041000, 0x0000010400041000
775*4882a593Smuzhiyun	.quad 0x0000010000001000, 0x0000000400000000
776*4882a593Smuzhiyun	.quad 0x0100000010040000, 0x0100000410000000
777*4882a593Smuzhiyun	.quad 0x0100010010001000, 0x0000010400001000
778*4882a593Smuzhiyun	.quad 0x0000010000041000, 0x0000000400040000
779*4882a593Smuzhiyun	.quad 0x0100000410040000, 0x0100010010041000
780*4882a593Smuzhiyun	.quad 0x0000010400001000, 0x0000000000000000
781*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0100000410040000
782*4882a593Smuzhiyun	.quad 0x0100000410000000, 0x0100010010001000
783*4882a593Smuzhiyun	.quad 0x0000010400041000, 0x0000000000040000
784*4882a593Smuzhiyun	.quad 0x0000010400041000, 0x0000000000040000
785*4882a593Smuzhiyun	.quad 0x0100010010041000, 0x0000010000001000
786*4882a593Smuzhiyun	.quad 0x0000000400000000, 0x0100000410040000
787*4882a593Smuzhiyun	.quad 0x0000010000001000, 0x0000010400041000
788*4882a593Smuzhiyun	.quad 0x0100010010001000, 0x0000000400000000
789*4882a593Smuzhiyun	.quad 0x0100000410000000, 0x0100000010040000
790*4882a593Smuzhiyun	.quad 0x0100000410040000, 0x0100000010000000
791*4882a593Smuzhiyun	.quad 0x0000000000040000, 0x0100010410001000
792*4882a593Smuzhiyun	.quad 0x0000000000000000, 0x0100010410041000
793*4882a593Smuzhiyun	.quad 0x0000000400040000, 0x0100000410000000
794*4882a593Smuzhiyun	.quad 0x0100000010040000, 0x0100010010001000
795*4882a593Smuzhiyun	.quad 0x0100010410001000, 0x0000000000000000
796*4882a593Smuzhiyun	.quad 0x0100010410041000, 0x0000010000041000
797*4882a593Smuzhiyun	.quad 0x0000010000041000, 0x0000010400001000
798*4882a593Smuzhiyun	.quad 0x0000010400001000, 0x0000000400040000
799*4882a593Smuzhiyun	.quad 0x0100000010000000, 0x0100010010041000
800