xref: /OK3568_Linux_fs/kernel/arch/alpha/lib/checksum.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * arch/alpha/lib/checksum.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * This file contains network checksum routines that are better done
6*4882a593Smuzhiyun  * in an architecture-specific manner due to speed..
7*4882a593Smuzhiyun  * Comments in other versions indicate that the algorithms are from RFC1071
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * accelerated versions (and 21264 assembly versions ) contributed by
10*4882a593Smuzhiyun  *	Rick Gorton	<rick.gorton@alpha-processor.com>
11*4882a593Smuzhiyun  */
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/string.h>
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #include <asm/byteorder.h>
17*4882a593Smuzhiyun 
from64to16(unsigned long x)18*4882a593Smuzhiyun static inline unsigned short from64to16(unsigned long x)
19*4882a593Smuzhiyun {
20*4882a593Smuzhiyun 	/* Using extract instructions is a bit more efficient
21*4882a593Smuzhiyun 	   than the original shift/bitmask version.  */
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun 	union {
24*4882a593Smuzhiyun 		unsigned long	ul;
25*4882a593Smuzhiyun 		unsigned int	ui[2];
26*4882a593Smuzhiyun 		unsigned short	us[4];
27*4882a593Smuzhiyun 	} in_v, tmp_v, out_v;
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun 	in_v.ul = x;
30*4882a593Smuzhiyun 	tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1];
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun 	/* Since the bits of tmp_v.sh[3] are going to always be zero,
33*4882a593Smuzhiyun 	   we don't have to bother to add that in.  */
34*4882a593Smuzhiyun 	out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1]
35*4882a593Smuzhiyun 			+ (unsigned long) tmp_v.us[2];
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	/* Similarly, out_v.us[2] is always zero for the final add.  */
38*4882a593Smuzhiyun 	return out_v.us[0] + out_v.us[1];
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun /*
42*4882a593Smuzhiyun  * computes the checksum of the TCP/UDP pseudo-header
43*4882a593Smuzhiyun  * returns a 16-bit checksum, already complemented.
44*4882a593Smuzhiyun  */
csum_tcpudp_magic(__be32 saddr,__be32 daddr,__u32 len,__u8 proto,__wsum sum)45*4882a593Smuzhiyun __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
46*4882a593Smuzhiyun 			  __u32 len, __u8 proto, __wsum sum)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun 	return (__force __sum16)~from64to16(
49*4882a593Smuzhiyun 		(__force u64)saddr + (__force u64)daddr +
50*4882a593Smuzhiyun 		(__force u64)sum + ((len + proto) << 8));
51*4882a593Smuzhiyun }
52*4882a593Smuzhiyun EXPORT_SYMBOL(csum_tcpudp_magic);
53*4882a593Smuzhiyun 
csum_tcpudp_nofold(__be32 saddr,__be32 daddr,__u32 len,__u8 proto,__wsum sum)54*4882a593Smuzhiyun __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
55*4882a593Smuzhiyun 			  __u32 len, __u8 proto, __wsum sum)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	unsigned long result;
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 	result = (__force u64)saddr + (__force u64)daddr +
60*4882a593Smuzhiyun 		 (__force u64)sum + ((len + proto) << 8);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	/* Fold down to 32-bits so we don't lose in the typedef-less
63*4882a593Smuzhiyun 	   network stack.  */
64*4882a593Smuzhiyun 	/* 64 to 33 */
65*4882a593Smuzhiyun 	result = (result & 0xffffffff) + (result >> 32);
66*4882a593Smuzhiyun 	/* 33 to 32 */
67*4882a593Smuzhiyun 	result = (result & 0xffffffff) + (result >> 32);
68*4882a593Smuzhiyun 	return (__force __wsum)result;
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun EXPORT_SYMBOL(csum_tcpudp_nofold);
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun /*
73*4882a593Smuzhiyun  * Do a 64-bit checksum on an arbitrary memory area..
74*4882a593Smuzhiyun  *
75*4882a593Smuzhiyun  * This isn't a great routine, but it's not _horrible_ either. The
76*4882a593Smuzhiyun  * inner loop could be unrolled a bit further, and there are better
77*4882a593Smuzhiyun  * ways to do the carry, but this is reasonable.
78*4882a593Smuzhiyun  */
do_csum(const unsigned char * buff,int len)79*4882a593Smuzhiyun static inline unsigned long do_csum(const unsigned char * buff, int len)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun 	int odd, count;
82*4882a593Smuzhiyun 	unsigned long result = 0;
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	if (len <= 0)
85*4882a593Smuzhiyun 		goto out;
86*4882a593Smuzhiyun 	odd = 1 & (unsigned long) buff;
87*4882a593Smuzhiyun 	if (odd) {
88*4882a593Smuzhiyun 		result = *buff << 8;
89*4882a593Smuzhiyun 		len--;
90*4882a593Smuzhiyun 		buff++;
91*4882a593Smuzhiyun 	}
92*4882a593Smuzhiyun 	count = len >> 1;		/* nr of 16-bit words.. */
93*4882a593Smuzhiyun 	if (count) {
94*4882a593Smuzhiyun 		if (2 & (unsigned long) buff) {
95*4882a593Smuzhiyun 			result += *(unsigned short *) buff;
96*4882a593Smuzhiyun 			count--;
97*4882a593Smuzhiyun 			len -= 2;
98*4882a593Smuzhiyun 			buff += 2;
99*4882a593Smuzhiyun 		}
100*4882a593Smuzhiyun 		count >>= 1;		/* nr of 32-bit words.. */
101*4882a593Smuzhiyun 		if (count) {
102*4882a593Smuzhiyun 			if (4 & (unsigned long) buff) {
103*4882a593Smuzhiyun 				result += *(unsigned int *) buff;
104*4882a593Smuzhiyun 				count--;
105*4882a593Smuzhiyun 				len -= 4;
106*4882a593Smuzhiyun 				buff += 4;
107*4882a593Smuzhiyun 			}
108*4882a593Smuzhiyun 			count >>= 1;	/* nr of 64-bit words.. */
109*4882a593Smuzhiyun 			if (count) {
110*4882a593Smuzhiyun 				unsigned long carry = 0;
111*4882a593Smuzhiyun 				do {
112*4882a593Smuzhiyun 					unsigned long w = *(unsigned long *) buff;
113*4882a593Smuzhiyun 					count--;
114*4882a593Smuzhiyun 					buff += 8;
115*4882a593Smuzhiyun 					result += carry;
116*4882a593Smuzhiyun 					result += w;
117*4882a593Smuzhiyun 					carry = (w > result);
118*4882a593Smuzhiyun 				} while (count);
119*4882a593Smuzhiyun 				result += carry;
120*4882a593Smuzhiyun 				result = (result & 0xffffffff) + (result >> 32);
121*4882a593Smuzhiyun 			}
122*4882a593Smuzhiyun 			if (len & 4) {
123*4882a593Smuzhiyun 				result += *(unsigned int *) buff;
124*4882a593Smuzhiyun 				buff += 4;
125*4882a593Smuzhiyun 			}
126*4882a593Smuzhiyun 		}
127*4882a593Smuzhiyun 		if (len & 2) {
128*4882a593Smuzhiyun 			result += *(unsigned short *) buff;
129*4882a593Smuzhiyun 			buff += 2;
130*4882a593Smuzhiyun 		}
131*4882a593Smuzhiyun 	}
132*4882a593Smuzhiyun 	if (len & 1)
133*4882a593Smuzhiyun 		result += *buff;
134*4882a593Smuzhiyun 	result = from64to16(result);
135*4882a593Smuzhiyun 	if (odd)
136*4882a593Smuzhiyun 		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
137*4882a593Smuzhiyun out:
138*4882a593Smuzhiyun 	return result;
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun /*
142*4882a593Smuzhiyun  *	This is a version of ip_compute_csum() optimized for IP headers,
143*4882a593Smuzhiyun  *	which always checksum on 4 octet boundaries.
144*4882a593Smuzhiyun  */
ip_fast_csum(const void * iph,unsigned int ihl)145*4882a593Smuzhiyun __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
146*4882a593Smuzhiyun {
147*4882a593Smuzhiyun 	return (__force __sum16)~do_csum(iph,ihl*4);
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun EXPORT_SYMBOL(ip_fast_csum);
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun /*
152*4882a593Smuzhiyun  * computes the checksum of a memory block at buff, length len,
153*4882a593Smuzhiyun  * and adds in "sum" (32-bit)
154*4882a593Smuzhiyun  *
155*4882a593Smuzhiyun  * returns a 32-bit number suitable for feeding into itself
156*4882a593Smuzhiyun  * or csum_tcpudp_magic
157*4882a593Smuzhiyun  *
158*4882a593Smuzhiyun  * this function must be called with even lengths, except
159*4882a593Smuzhiyun  * for the last fragment, which may be odd
160*4882a593Smuzhiyun  *
161*4882a593Smuzhiyun  * it's best to have buff aligned on a 32-bit boundary
162*4882a593Smuzhiyun  */
csum_partial(const void * buff,int len,__wsum sum)163*4882a593Smuzhiyun __wsum csum_partial(const void *buff, int len, __wsum sum)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun 	unsigned long result = do_csum(buff, len);
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	/* add in old sum, and carry.. */
168*4882a593Smuzhiyun 	result += (__force u32)sum;
169*4882a593Smuzhiyun 	/* 32+c bits -> 32 bits */
170*4882a593Smuzhiyun 	result = (result & 0xffffffff) + (result >> 32);
171*4882a593Smuzhiyun 	return (__force __wsum)result;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun EXPORT_SYMBOL(csum_partial);
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun /*
177*4882a593Smuzhiyun  * this routine is used for miscellaneous IP-like checksums, mainly
178*4882a593Smuzhiyun  * in icmp.c
179*4882a593Smuzhiyun  */
ip_compute_csum(const void * buff,int len)180*4882a593Smuzhiyun __sum16 ip_compute_csum(const void *buff, int len)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun 	return (__force __sum16)~from64to16(do_csum(buff,len));
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun EXPORT_SYMBOL(ip_compute_csum);
185