xref: /OK3568_Linux_fs/kernel/arch/alpha/lib/csum_ipv6_magic.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * arch/alpha/lib/csum_ipv6_magic.S
4*4882a593Smuzhiyun * Contributed by Richard Henderson <rth@tamu.edu>
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
7*4882a593Smuzhiyun *                                struct in6_addr *daddr,
8*4882a593Smuzhiyun *                                __u32 len,
9*4882a593Smuzhiyun *                                unsigned short proto,
10*4882a593Smuzhiyun *                                unsigned int csum);
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * Misalignment handling (which costs 16 instructions / 8 cycles)
13*4882a593Smuzhiyun * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
14*4882a593Smuzhiyun */
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun#include <asm/export.h>
17*4882a593Smuzhiyun	.globl csum_ipv6_magic
18*4882a593Smuzhiyun	.align 4
19*4882a593Smuzhiyun	.ent csum_ipv6_magic
20*4882a593Smuzhiyun	.frame $30,0,$26,0
21*4882a593Smuzhiyuncsum_ipv6_magic:
22*4882a593Smuzhiyun	.prologue 0
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun	ldq_u	$0,0($16)	# e0    : load src & dst addr words
25*4882a593Smuzhiyun	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
26*4882a593Smuzhiyun	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
27*4882a593Smuzhiyun	ldq_u	$21,7($16)	# .. e1 : handle misalignment
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun	extbl	$18,1,$5	# e0	:
30*4882a593Smuzhiyun	ldq_u	$1,8($16)	# .. e1 :
31*4882a593Smuzhiyun	extbl	$18,2,$6	# e0 	:
32*4882a593Smuzhiyun	ldq_u	$22,15($16)	# .. e1 :
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun	extbl	$18,3,$18	# e0	:
35*4882a593Smuzhiyun	ldq_u	$2,0($17)	# .. e1 :
36*4882a593Smuzhiyun	sra	$4,32,$4	# e0	:
37*4882a593Smuzhiyun	ldq_u	$23,7($17)	# .. e1 :
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun	extql	$0,$16,$0	# e0	:
40*4882a593Smuzhiyun	ldq_u	$3,8($17)	# .. e1 :
41*4882a593Smuzhiyun	extqh	$21,$16,$21	# e0	:
42*4882a593Smuzhiyun	ldq_u	$24,15($17)	# .. e1 :
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun	sll	$5,16,$5	# e0	:
45*4882a593Smuzhiyun	or	$0,$21,$0	# .. e1 : 1st src word complete
46*4882a593Smuzhiyun	extql	$1,$16,$1	# e0	:
47*4882a593Smuzhiyun	addq	$20,$0,$20	# .. e1 : begin summing the words
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun	extqh	$22,$16,$22	# e0	:
50*4882a593Smuzhiyun	cmpult	$20,$0,$0	# .. e1 :
51*4882a593Smuzhiyun	sll	$6,8,$6		# e0	:
52*4882a593Smuzhiyun	or	$1,$22,$1	# .. e1 : 2nd src word complete
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun	extql	$2,$17,$2	# e0	:
55*4882a593Smuzhiyun	or	$4,$18,$18	# .. e1 :
56*4882a593Smuzhiyun	extqh	$23,$17,$23	# e0	:
57*4882a593Smuzhiyun	or	$5,$6,$5	# .. e1 :
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun	extql	$3,$17,$3	# e0	:
60*4882a593Smuzhiyun	or	$2,$23,$2	# .. e1 : 1st dst word complete
61*4882a593Smuzhiyun	extqh	$24,$17,$24	# e0	:
62*4882a593Smuzhiyun	or	$18,$5,$18	# .. e1 : len complete
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun	extwh	$19,7,$7	# e0    :
65*4882a593Smuzhiyun	or	$3,$24,$3	# .. e1 : 2nd dst word complete
66*4882a593Smuzhiyun	extbl	$19,1,$19	# e0    :
67*4882a593Smuzhiyun	addq	$20,$1,$20	# .. e1 :
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun	or	$19,$7,$19	# e0    :
70*4882a593Smuzhiyun	cmpult	$20,$1,$1	# .. e1 :
71*4882a593Smuzhiyun	sll	$19,48,$19	# e0    :
72*4882a593Smuzhiyun	nop			# .. e0 :
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun	sra	$19,32,$19	# e0    : proto complete
75*4882a593Smuzhiyun	addq	$20,$2,$20	# .. e1 :
76*4882a593Smuzhiyun	cmpult	$20,$2,$2	# e0    :
77*4882a593Smuzhiyun	addq	$20,$3,$20	# .. e1 :
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun	cmpult	$20,$3,$3	# e0    :
80*4882a593Smuzhiyun	addq	$20,$18,$20	# .. e1 :
81*4882a593Smuzhiyun	cmpult	$20,$18,$18	# e0    :
82*4882a593Smuzhiyun	addq	$20,$19,$20	# .. e1 :
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun	cmpult	$20,$19,$19	# e0    :
85*4882a593Smuzhiyun	addq	$0,$1,$0	# .. e1 : merge the carries back into the csum
86*4882a593Smuzhiyun	addq	$2,$3,$2	# e0    :
87*4882a593Smuzhiyun	addq	$18,$19,$18	# .. e1 :
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun	addq	$0,$2,$0	# e0    :
90*4882a593Smuzhiyun	addq	$20,$18,$20	# .. e1 :
91*4882a593Smuzhiyun	addq	$0,$20,$0	# e0    :
92*4882a593Smuzhiyun	unop			#       :
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun	extwl	$0,2,$2		# e0    : begin folding the 64-bit value
95*4882a593Smuzhiyun	zapnot	$0,3,$3		# .. e1 :
96*4882a593Smuzhiyun	extwl	$0,4,$1		# e0    :
97*4882a593Smuzhiyun	addq	$2,$3,$3	# .. e1 :
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun	extwl	$0,6,$0		# e0    :
100*4882a593Smuzhiyun	addq	$3,$1,$3	# .. e1 :
101*4882a593Smuzhiyun	addq	$0,$3,$0	# e0    :
102*4882a593Smuzhiyun	unop			#       :
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun	extwl	$0,2,$1		# e0    : fold 18-bit value
105*4882a593Smuzhiyun	zapnot	$0,3,$0		# .. e1 :
106*4882a593Smuzhiyun	addq	$0,$1,$0	# e0    :
107*4882a593Smuzhiyun	unop			#       :
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun	extwl	$0,2,$1		# e0    : fold 17-bit value
110*4882a593Smuzhiyun	zapnot	$0,3,$0		# .. e1 :
111*4882a593Smuzhiyun	addq	$0,$1,$0	# e0    :
112*4882a593Smuzhiyun	not	$0,$0		# .. e1 : and complement.
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun	zapnot	$0,3,$0		# e0    :
115*4882a593Smuzhiyun	ret			# .. e1 :
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun	.end csum_ipv6_magic
118*4882a593Smuzhiyun	EXPORT_SYMBOL(csum_ipv6_magic)
119