xref: /OK3568_Linux_fs/kernel/arch/alpha/lib/csum_partial_copy.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * csum_partial_copy - do IP checksumming and copy
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * (C) Copyright 1996 Linus Torvalds
6*4882a593Smuzhiyun  * accelerated versions (and 21264 assembly versions ) contributed by
7*4882a593Smuzhiyun  *	Rick Gorton	<rick.gorton@alpha-processor.com>
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * Don't look at this too closely - you'll go mad. The things
10*4882a593Smuzhiyun  * we do for performance..
11*4882a593Smuzhiyun  */
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #include <linux/types.h>
14*4882a593Smuzhiyun #include <linux/string.h>
15*4882a593Smuzhiyun #include <linux/uaccess.h>
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #define ldq_u(x,y) \
19*4882a593Smuzhiyun __asm__ __volatile__("ldq_u %0,%1":"=r" (x):"m" (*(const unsigned long *)(y)))
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun #define stq_u(x,y) \
22*4882a593Smuzhiyun __asm__ __volatile__("stq_u %1,%0":"=m" (*(unsigned long *)(y)):"r" (x))
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun #define extql(x,y,z) \
25*4882a593Smuzhiyun __asm__ __volatile__("extql %1,%2,%0":"=r" (z):"r" (x),"r" (y))
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #define extqh(x,y,z) \
28*4882a593Smuzhiyun __asm__ __volatile__("extqh %1,%2,%0":"=r" (z):"r" (x),"r" (y))
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun #define mskql(x,y,z) \
31*4882a593Smuzhiyun __asm__ __volatile__("mskql %1,%2,%0":"=r" (z):"r" (x),"r" (y))
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun #define mskqh(x,y,z) \
34*4882a593Smuzhiyun __asm__ __volatile__("mskqh %1,%2,%0":"=r" (z):"r" (x),"r" (y))
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #define insql(x,y,z) \
37*4882a593Smuzhiyun __asm__ __volatile__("insql %1,%2,%0":"=r" (z):"r" (x),"r" (y))
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun #define insqh(x,y,z) \
40*4882a593Smuzhiyun __asm__ __volatile__("insqh %1,%2,%0":"=r" (z):"r" (x),"r" (y))
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun #define __get_word(insn,x,ptr)				\
43*4882a593Smuzhiyun ({							\
44*4882a593Smuzhiyun 	long __guu_err;					\
45*4882a593Smuzhiyun 	__asm__ __volatile__(				\
46*4882a593Smuzhiyun 	"1:	"#insn" %0,%2\n"			\
47*4882a593Smuzhiyun 	"2:\n"						\
48*4882a593Smuzhiyun 	EXC(1b,2b,%0,%1)				\
49*4882a593Smuzhiyun 		: "=r"(x), "=r"(__guu_err)		\
50*4882a593Smuzhiyun 		: "m"(__m(ptr)), "1"(0));		\
51*4882a593Smuzhiyun 	__guu_err;					\
52*4882a593Smuzhiyun })
53*4882a593Smuzhiyun 
from64to16(unsigned long x)54*4882a593Smuzhiyun static inline unsigned short from64to16(unsigned long x)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	/* Using extract instructions is a bit more efficient
57*4882a593Smuzhiyun 	   than the original shift/bitmask version.  */
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 	union {
60*4882a593Smuzhiyun 		unsigned long	ul;
61*4882a593Smuzhiyun 		unsigned int	ui[2];
62*4882a593Smuzhiyun 		unsigned short	us[4];
63*4882a593Smuzhiyun 	} in_v, tmp_v, out_v;
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 	in_v.ul = x;
66*4882a593Smuzhiyun 	tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1];
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	/* Since the bits of tmp_v.sh[3] are going to always be zero,
69*4882a593Smuzhiyun 	   we don't have to bother to add that in.  */
70*4882a593Smuzhiyun 	out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1]
71*4882a593Smuzhiyun 			+ (unsigned long) tmp_v.us[2];
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun 	/* Similarly, out_v.us[2] is always zero for the final add.  */
74*4882a593Smuzhiyun 	return out_v.us[0] + out_v.us[1];
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun /*
80*4882a593Smuzhiyun  * Ok. This isn't fun, but this is the EASY case.
81*4882a593Smuzhiyun  */
82*4882a593Smuzhiyun static inline unsigned long
csum_partial_cfu_aligned(const unsigned long __user * src,unsigned long * dst,long len)83*4882a593Smuzhiyun csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
84*4882a593Smuzhiyun 			 long len)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun 	unsigned long checksum = ~0U;
87*4882a593Smuzhiyun 	unsigned long carry = 0;
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	while (len >= 0) {
90*4882a593Smuzhiyun 		unsigned long word;
91*4882a593Smuzhiyun 		if (__get_word(ldq, word, src))
92*4882a593Smuzhiyun 			return 0;
93*4882a593Smuzhiyun 		checksum += carry;
94*4882a593Smuzhiyun 		src++;
95*4882a593Smuzhiyun 		checksum += word;
96*4882a593Smuzhiyun 		len -= 8;
97*4882a593Smuzhiyun 		carry = checksum < word;
98*4882a593Smuzhiyun 		*dst = word;
99*4882a593Smuzhiyun 		dst++;
100*4882a593Smuzhiyun 	}
101*4882a593Smuzhiyun 	len += 8;
102*4882a593Smuzhiyun 	checksum += carry;
103*4882a593Smuzhiyun 	if (len) {
104*4882a593Smuzhiyun 		unsigned long word, tmp;
105*4882a593Smuzhiyun 		if (__get_word(ldq, word, src))
106*4882a593Smuzhiyun 			return 0;
107*4882a593Smuzhiyun 		tmp = *dst;
108*4882a593Smuzhiyun 		mskql(word, len, word);
109*4882a593Smuzhiyun 		checksum += word;
110*4882a593Smuzhiyun 		mskqh(tmp, len, tmp);
111*4882a593Smuzhiyun 		carry = checksum < word;
112*4882a593Smuzhiyun 		*dst = word | tmp;
113*4882a593Smuzhiyun 		checksum += carry;
114*4882a593Smuzhiyun 	}
115*4882a593Smuzhiyun 	return checksum;
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun /*
119*4882a593Smuzhiyun  * This is even less fun, but this is still reasonably
120*4882a593Smuzhiyun  * easy.
121*4882a593Smuzhiyun  */
122*4882a593Smuzhiyun static inline unsigned long
csum_partial_cfu_dest_aligned(const unsigned long __user * src,unsigned long * dst,unsigned long soff,long len)123*4882a593Smuzhiyun csum_partial_cfu_dest_aligned(const unsigned long __user *src,
124*4882a593Smuzhiyun 			      unsigned long *dst,
125*4882a593Smuzhiyun 			      unsigned long soff,
126*4882a593Smuzhiyun 			      long len)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun 	unsigned long first;
129*4882a593Smuzhiyun 	unsigned long word, carry;
130*4882a593Smuzhiyun 	unsigned long lastsrc = 7+len+(unsigned long)src;
131*4882a593Smuzhiyun 	unsigned long checksum = ~0U;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	if (__get_word(ldq_u, first,src))
134*4882a593Smuzhiyun 		return 0;
135*4882a593Smuzhiyun 	carry = 0;
136*4882a593Smuzhiyun 	while (len >= 0) {
137*4882a593Smuzhiyun 		unsigned long second;
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 		if (__get_word(ldq_u, second, src+1))
140*4882a593Smuzhiyun 			return 0;
141*4882a593Smuzhiyun 		extql(first, soff, word);
142*4882a593Smuzhiyun 		len -= 8;
143*4882a593Smuzhiyun 		src++;
144*4882a593Smuzhiyun 		extqh(second, soff, first);
145*4882a593Smuzhiyun 		checksum += carry;
146*4882a593Smuzhiyun 		word |= first;
147*4882a593Smuzhiyun 		first = second;
148*4882a593Smuzhiyun 		checksum += word;
149*4882a593Smuzhiyun 		*dst = word;
150*4882a593Smuzhiyun 		dst++;
151*4882a593Smuzhiyun 		carry = checksum < word;
152*4882a593Smuzhiyun 	}
153*4882a593Smuzhiyun 	len += 8;
154*4882a593Smuzhiyun 	checksum += carry;
155*4882a593Smuzhiyun 	if (len) {
156*4882a593Smuzhiyun 		unsigned long tmp;
157*4882a593Smuzhiyun 		unsigned long second;
158*4882a593Smuzhiyun 		if (__get_word(ldq_u, second, lastsrc))
159*4882a593Smuzhiyun 			return 0;
160*4882a593Smuzhiyun 		tmp = *dst;
161*4882a593Smuzhiyun 		extql(first, soff, word);
162*4882a593Smuzhiyun 		extqh(second, soff, first);
163*4882a593Smuzhiyun 		word |= first;
164*4882a593Smuzhiyun 		mskql(word, len, word);
165*4882a593Smuzhiyun 		checksum += word;
166*4882a593Smuzhiyun 		mskqh(tmp, len, tmp);
167*4882a593Smuzhiyun 		carry = checksum < word;
168*4882a593Smuzhiyun 		*dst = word | tmp;
169*4882a593Smuzhiyun 		checksum += carry;
170*4882a593Smuzhiyun 	}
171*4882a593Smuzhiyun 	return checksum;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun /*
175*4882a593Smuzhiyun  * This is slightly less fun than the above..
176*4882a593Smuzhiyun  */
177*4882a593Smuzhiyun static inline unsigned long
csum_partial_cfu_src_aligned(const unsigned long __user * src,unsigned long * dst,unsigned long doff,long len,unsigned long partial_dest)178*4882a593Smuzhiyun csum_partial_cfu_src_aligned(const unsigned long __user *src,
179*4882a593Smuzhiyun 			     unsigned long *dst,
180*4882a593Smuzhiyun 			     unsigned long doff,
181*4882a593Smuzhiyun 			     long len,
182*4882a593Smuzhiyun 			     unsigned long partial_dest)
183*4882a593Smuzhiyun {
184*4882a593Smuzhiyun 	unsigned long carry = 0;
185*4882a593Smuzhiyun 	unsigned long word;
186*4882a593Smuzhiyun 	unsigned long second_dest;
187*4882a593Smuzhiyun 	unsigned long checksum = ~0U;
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun 	mskql(partial_dest, doff, partial_dest);
190*4882a593Smuzhiyun 	while (len >= 0) {
191*4882a593Smuzhiyun 		if (__get_word(ldq, word, src))
192*4882a593Smuzhiyun 			return 0;
193*4882a593Smuzhiyun 		len -= 8;
194*4882a593Smuzhiyun 		insql(word, doff, second_dest);
195*4882a593Smuzhiyun 		checksum += carry;
196*4882a593Smuzhiyun 		stq_u(partial_dest | second_dest, dst);
197*4882a593Smuzhiyun 		src++;
198*4882a593Smuzhiyun 		checksum += word;
199*4882a593Smuzhiyun 		insqh(word, doff, partial_dest);
200*4882a593Smuzhiyun 		carry = checksum < word;
201*4882a593Smuzhiyun 		dst++;
202*4882a593Smuzhiyun 	}
203*4882a593Smuzhiyun 	len += 8;
204*4882a593Smuzhiyun 	if (len) {
205*4882a593Smuzhiyun 		checksum += carry;
206*4882a593Smuzhiyun 		if (__get_word(ldq, word, src))
207*4882a593Smuzhiyun 			return 0;
208*4882a593Smuzhiyun 		mskql(word, len, word);
209*4882a593Smuzhiyun 		len -= 8;
210*4882a593Smuzhiyun 		checksum += word;
211*4882a593Smuzhiyun 		insql(word, doff, second_dest);
212*4882a593Smuzhiyun 		len += doff;
213*4882a593Smuzhiyun 		carry = checksum < word;
214*4882a593Smuzhiyun 		partial_dest |= second_dest;
215*4882a593Smuzhiyun 		if (len >= 0) {
216*4882a593Smuzhiyun 			stq_u(partial_dest, dst);
217*4882a593Smuzhiyun 			if (!len) goto out;
218*4882a593Smuzhiyun 			dst++;
219*4882a593Smuzhiyun 			insqh(word, doff, partial_dest);
220*4882a593Smuzhiyun 		}
221*4882a593Smuzhiyun 		doff = len;
222*4882a593Smuzhiyun 	}
223*4882a593Smuzhiyun 	ldq_u(second_dest, dst);
224*4882a593Smuzhiyun 	mskqh(second_dest, doff, second_dest);
225*4882a593Smuzhiyun 	stq_u(partial_dest | second_dest, dst);
226*4882a593Smuzhiyun out:
227*4882a593Smuzhiyun 	checksum += carry;
228*4882a593Smuzhiyun 	return checksum;
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun /*
232*4882a593Smuzhiyun  * This is so totally un-fun that it's frightening. Don't
233*4882a593Smuzhiyun  * look at this too closely, you'll go blind.
234*4882a593Smuzhiyun  */
235*4882a593Smuzhiyun static inline unsigned long
csum_partial_cfu_unaligned(const unsigned long __user * src,unsigned long * dst,unsigned long soff,unsigned long doff,long len,unsigned long partial_dest)236*4882a593Smuzhiyun csum_partial_cfu_unaligned(const unsigned long __user * src,
237*4882a593Smuzhiyun 			   unsigned long * dst,
238*4882a593Smuzhiyun 			   unsigned long soff, unsigned long doff,
239*4882a593Smuzhiyun 			   long len, unsigned long partial_dest)
240*4882a593Smuzhiyun {
241*4882a593Smuzhiyun 	unsigned long carry = 0;
242*4882a593Smuzhiyun 	unsigned long first;
243*4882a593Smuzhiyun 	unsigned long lastsrc;
244*4882a593Smuzhiyun 	unsigned long checksum = ~0U;
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	if (__get_word(ldq_u, first, src))
247*4882a593Smuzhiyun 		return 0;
248*4882a593Smuzhiyun 	lastsrc = 7+len+(unsigned long)src;
249*4882a593Smuzhiyun 	mskql(partial_dest, doff, partial_dest);
250*4882a593Smuzhiyun 	while (len >= 0) {
251*4882a593Smuzhiyun 		unsigned long second, word;
252*4882a593Smuzhiyun 		unsigned long second_dest;
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 		if (__get_word(ldq_u, second, src+1))
255*4882a593Smuzhiyun 			return 0;
256*4882a593Smuzhiyun 		extql(first, soff, word);
257*4882a593Smuzhiyun 		checksum += carry;
258*4882a593Smuzhiyun 		len -= 8;
259*4882a593Smuzhiyun 		extqh(second, soff, first);
260*4882a593Smuzhiyun 		src++;
261*4882a593Smuzhiyun 		word |= first;
262*4882a593Smuzhiyun 		first = second;
263*4882a593Smuzhiyun 		insql(word, doff, second_dest);
264*4882a593Smuzhiyun 		checksum += word;
265*4882a593Smuzhiyun 		stq_u(partial_dest | second_dest, dst);
266*4882a593Smuzhiyun 		carry = checksum < word;
267*4882a593Smuzhiyun 		insqh(word, doff, partial_dest);
268*4882a593Smuzhiyun 		dst++;
269*4882a593Smuzhiyun 	}
270*4882a593Smuzhiyun 	len += doff;
271*4882a593Smuzhiyun 	checksum += carry;
272*4882a593Smuzhiyun 	if (len >= 0) {
273*4882a593Smuzhiyun 		unsigned long second, word;
274*4882a593Smuzhiyun 		unsigned long second_dest;
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 		if (__get_word(ldq_u, second, lastsrc))
277*4882a593Smuzhiyun 			return 0;
278*4882a593Smuzhiyun 		extql(first, soff, word);
279*4882a593Smuzhiyun 		extqh(second, soff, first);
280*4882a593Smuzhiyun 		word |= first;
281*4882a593Smuzhiyun 		first = second;
282*4882a593Smuzhiyun 		mskql(word, len-doff, word);
283*4882a593Smuzhiyun 		checksum += word;
284*4882a593Smuzhiyun 		insql(word, doff, second_dest);
285*4882a593Smuzhiyun 		carry = checksum < word;
286*4882a593Smuzhiyun 		stq_u(partial_dest | second_dest, dst);
287*4882a593Smuzhiyun 		if (len) {
288*4882a593Smuzhiyun 			ldq_u(second_dest, dst+1);
289*4882a593Smuzhiyun 			insqh(word, doff, partial_dest);
290*4882a593Smuzhiyun 			mskqh(second_dest, len, second_dest);
291*4882a593Smuzhiyun 			stq_u(partial_dest | second_dest, dst+1);
292*4882a593Smuzhiyun 		}
293*4882a593Smuzhiyun 		checksum += carry;
294*4882a593Smuzhiyun 	} else {
295*4882a593Smuzhiyun 		unsigned long second, word;
296*4882a593Smuzhiyun 		unsigned long second_dest;
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 		if (__get_word(ldq_u, second, lastsrc))
299*4882a593Smuzhiyun 			return 0;
300*4882a593Smuzhiyun 		extql(first, soff, word);
301*4882a593Smuzhiyun 		extqh(second, soff, first);
302*4882a593Smuzhiyun 		word |= first;
303*4882a593Smuzhiyun 		ldq_u(second_dest, dst);
304*4882a593Smuzhiyun 		mskql(word, len-doff, word);
305*4882a593Smuzhiyun 		checksum += word;
306*4882a593Smuzhiyun 		mskqh(second_dest, len, second_dest);
307*4882a593Smuzhiyun 		carry = checksum < word;
308*4882a593Smuzhiyun 		insql(word, doff, word);
309*4882a593Smuzhiyun 		stq_u(partial_dest | word | second_dest, dst);
310*4882a593Smuzhiyun 		checksum += carry;
311*4882a593Smuzhiyun 	}
312*4882a593Smuzhiyun 	return checksum;
313*4882a593Smuzhiyun }
314*4882a593Smuzhiyun 
__csum_and_copy(const void __user * src,void * dst,int len)315*4882a593Smuzhiyun static __wsum __csum_and_copy(const void __user *src, void *dst, int len)
316*4882a593Smuzhiyun {
317*4882a593Smuzhiyun 	unsigned long soff = 7 & (unsigned long) src;
318*4882a593Smuzhiyun 	unsigned long doff = 7 & (unsigned long) dst;
319*4882a593Smuzhiyun 	unsigned long checksum;
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	if (!doff) {
322*4882a593Smuzhiyun 		if (!soff)
323*4882a593Smuzhiyun 			checksum = csum_partial_cfu_aligned(
324*4882a593Smuzhiyun 				(const unsigned long __user *) src,
325*4882a593Smuzhiyun 				(unsigned long *) dst, len-8);
326*4882a593Smuzhiyun 		else
327*4882a593Smuzhiyun 			checksum = csum_partial_cfu_dest_aligned(
328*4882a593Smuzhiyun 				(const unsigned long __user *) src,
329*4882a593Smuzhiyun 				(unsigned long *) dst,
330*4882a593Smuzhiyun 				soff, len-8);
331*4882a593Smuzhiyun 	} else {
332*4882a593Smuzhiyun 		unsigned long partial_dest;
333*4882a593Smuzhiyun 		ldq_u(partial_dest, dst);
334*4882a593Smuzhiyun 		if (!soff)
335*4882a593Smuzhiyun 			checksum = csum_partial_cfu_src_aligned(
336*4882a593Smuzhiyun 				(const unsigned long __user *) src,
337*4882a593Smuzhiyun 				(unsigned long *) dst,
338*4882a593Smuzhiyun 				doff, len-8, partial_dest);
339*4882a593Smuzhiyun 		else
340*4882a593Smuzhiyun 			checksum = csum_partial_cfu_unaligned(
341*4882a593Smuzhiyun 				(const unsigned long __user *) src,
342*4882a593Smuzhiyun 				(unsigned long *) dst,
343*4882a593Smuzhiyun 				soff, doff, len-8, partial_dest);
344*4882a593Smuzhiyun 	}
345*4882a593Smuzhiyun 	return (__force __wsum)from64to16 (checksum);
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun __wsum
csum_and_copy_from_user(const void __user * src,void * dst,int len)349*4882a593Smuzhiyun csum_and_copy_from_user(const void __user *src, void *dst, int len)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun 	if (!access_ok(src, len))
352*4882a593Smuzhiyun 		return 0;
353*4882a593Smuzhiyun 	return __csum_and_copy(src, dst, len);
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun EXPORT_SYMBOL(csum_and_copy_from_user);
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun __wsum
csum_partial_copy_nocheck(const void * src,void * dst,int len)358*4882a593Smuzhiyun csum_partial_copy_nocheck(const void *src, void *dst, int len)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun 	return __csum_and_copy((__force const void __user *)src,
361*4882a593Smuzhiyun 						dst, len);
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun EXPORT_SYMBOL(csum_partial_copy_nocheck);
364