xref: /OK3568_Linux_fs/kernel/arch/sh/lib/checksum.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0+
2*4882a593Smuzhiyun *
3*4882a593Smuzhiyun * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * INET		An implementation of the TCP/IP protocol suite for the LINUX
6*4882a593Smuzhiyun *		operating system.  INET is implemented using the  BSD Socket
7*4882a593Smuzhiyun *		interface as the means of communication with the user level.
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun *		IP/TCP/UDP checksumming routines
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
12*4882a593Smuzhiyun *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13*4882a593Smuzhiyun *		Tom May, <ftom@netcom.com>
14*4882a593Smuzhiyun *              Pentium Pro/II routines:
15*4882a593Smuzhiyun *              Alexander Kjeldaas <astor@guardian.no>
16*4882a593Smuzhiyun *              Finn Arne Gangstad <finnag@guardian.no>
17*4882a593Smuzhiyun *		Lots of code moved from tcp.c and ip.c; see those files
18*4882a593Smuzhiyun *		for more names.
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
21*4882a593Smuzhiyun *			     handling.
22*4882a593Smuzhiyun *		Andi Kleen,  add zeroing on error
23*4882a593Smuzhiyun *                   converted to pure assembler
24*4882a593Smuzhiyun *
25*4882a593Smuzhiyun * SuperH version:  Copyright (C) 1999  Niibe Yutaka
26*4882a593Smuzhiyun */
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun#include <asm/errno.h>
29*4882a593Smuzhiyun#include <linux/linkage.h>
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun/*
32*4882a593Smuzhiyun * computes a partial checksum, e.g. for TCP/UDP fragments
33*4882a593Smuzhiyun */
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun/*
36*4882a593Smuzhiyun * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
37*4882a593Smuzhiyun */
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun.text
40*4882a593SmuzhiyunENTRY(csum_partial)
41*4882a593Smuzhiyun	  /*
42*4882a593Smuzhiyun	   * Experiments with Ethernet and SLIP connections show that buff
43*4882a593Smuzhiyun	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
44*4882a593Smuzhiyun	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
45*4882a593Smuzhiyun	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
46*4882a593Smuzhiyun	   * alignment for the unrolled loop.
47*4882a593Smuzhiyun	   */
48*4882a593Smuzhiyun	mov	r4, r0
49*4882a593Smuzhiyun	tst	#3, r0		! Check alignment.
50*4882a593Smuzhiyun	bt/s	2f		! Jump if alignment is ok.
51*4882a593Smuzhiyun	 mov	r4, r7		! Keep a copy to check for alignment
52*4882a593Smuzhiyun	!
53*4882a593Smuzhiyun	tst	#1, r0		! Check alignment.
54*4882a593Smuzhiyun	bt	21f		! Jump if alignment is boundary of 2bytes.
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun	! buf is odd
57*4882a593Smuzhiyun	tst	r5, r5
58*4882a593Smuzhiyun	add	#-1, r5
59*4882a593Smuzhiyun	bt	9f
60*4882a593Smuzhiyun	mov.b	@r4+, r0
61*4882a593Smuzhiyun	extu.b	r0, r0
62*4882a593Smuzhiyun	addc	r0, r6		! t=0 from previous tst
63*4882a593Smuzhiyun	mov	r6, r0
64*4882a593Smuzhiyun	shll8	r6
65*4882a593Smuzhiyun	shlr16	r0
66*4882a593Smuzhiyun	shlr8	r0
67*4882a593Smuzhiyun	or	r0, r6
68*4882a593Smuzhiyun	mov	r4, r0
69*4882a593Smuzhiyun	tst	#2, r0
70*4882a593Smuzhiyun	bt	2f
71*4882a593Smuzhiyun21:
72*4882a593Smuzhiyun	! buf is 2 byte aligned (len could be 0)
73*4882a593Smuzhiyun	add	#-2, r5		! Alignment uses up two bytes.
74*4882a593Smuzhiyun	cmp/pz	r5		!
75*4882a593Smuzhiyun	bt/s	1f		! Jump if we had at least two bytes.
76*4882a593Smuzhiyun	 clrt
77*4882a593Smuzhiyun	bra	6f
78*4882a593Smuzhiyun	 add	#2, r5		! r5 was < 2.  Deal with it.
79*4882a593Smuzhiyun1:
80*4882a593Smuzhiyun	mov.w	@r4+, r0
81*4882a593Smuzhiyun	extu.w	r0, r0
82*4882a593Smuzhiyun	addc	r0, r6
83*4882a593Smuzhiyun	bf	2f
84*4882a593Smuzhiyun	add	#1, r6
85*4882a593Smuzhiyun2:
86*4882a593Smuzhiyun	! buf is 4 byte aligned (len could be 0)
87*4882a593Smuzhiyun	mov	r5, r1
88*4882a593Smuzhiyun	mov	#-5, r0
89*4882a593Smuzhiyun	shld	r0, r1
90*4882a593Smuzhiyun	tst	r1, r1
91*4882a593Smuzhiyun	bt/s	4f		! if it's =0, go to 4f
92*4882a593Smuzhiyun	 clrt
93*4882a593Smuzhiyun	.align	2
94*4882a593Smuzhiyun3:
95*4882a593Smuzhiyun	mov.l	@r4+, r0
96*4882a593Smuzhiyun	mov.l	@r4+, r2
97*4882a593Smuzhiyun	mov.l	@r4+, r3
98*4882a593Smuzhiyun	addc	r0, r6
99*4882a593Smuzhiyun	mov.l	@r4+, r0
100*4882a593Smuzhiyun	addc	r2, r6
101*4882a593Smuzhiyun	mov.l	@r4+, r2
102*4882a593Smuzhiyun	addc	r3, r6
103*4882a593Smuzhiyun	mov.l	@r4+, r3
104*4882a593Smuzhiyun	addc	r0, r6
105*4882a593Smuzhiyun	mov.l	@r4+, r0
106*4882a593Smuzhiyun	addc	r2, r6
107*4882a593Smuzhiyun	mov.l	@r4+, r2
108*4882a593Smuzhiyun	addc	r3, r6
109*4882a593Smuzhiyun	addc	r0, r6
110*4882a593Smuzhiyun	addc	r2, r6
111*4882a593Smuzhiyun	movt	r0
112*4882a593Smuzhiyun	dt	r1
113*4882a593Smuzhiyun	bf/s	3b
114*4882a593Smuzhiyun	 cmp/eq	#1, r0
115*4882a593Smuzhiyun	! here, we know r1==0
116*4882a593Smuzhiyun	addc	r1, r6			! add carry to r6
117*4882a593Smuzhiyun4:
118*4882a593Smuzhiyun	mov	r5, r0
119*4882a593Smuzhiyun	and	#0x1c, r0
120*4882a593Smuzhiyun	tst	r0, r0
121*4882a593Smuzhiyun	bt	6f
122*4882a593Smuzhiyun	! 4 bytes or more remaining
123*4882a593Smuzhiyun	mov	r0, r1
124*4882a593Smuzhiyun	shlr2	r1
125*4882a593Smuzhiyun	mov	#0, r2
126*4882a593Smuzhiyun5:
127*4882a593Smuzhiyun	addc	r2, r6
128*4882a593Smuzhiyun	mov.l	@r4+, r2
129*4882a593Smuzhiyun	movt	r0
130*4882a593Smuzhiyun	dt	r1
131*4882a593Smuzhiyun	bf/s	5b
132*4882a593Smuzhiyun	 cmp/eq	#1, r0
133*4882a593Smuzhiyun	addc	r2, r6
134*4882a593Smuzhiyun	addc	r1, r6		! r1==0 here, so it means add carry-bit
135*4882a593Smuzhiyun6:
136*4882a593Smuzhiyun	! 3 bytes or less remaining
137*4882a593Smuzhiyun	mov	#3, r0
138*4882a593Smuzhiyun	and	r0, r5
139*4882a593Smuzhiyun	tst	r5, r5
140*4882a593Smuzhiyun	bt	9f		! if it's =0 go to 9f
141*4882a593Smuzhiyun	mov	#2, r1
142*4882a593Smuzhiyun	cmp/hs  r1, r5
143*4882a593Smuzhiyun	bf	7f
144*4882a593Smuzhiyun	mov.w	@r4+, r0
145*4882a593Smuzhiyun	extu.w	r0, r0
146*4882a593Smuzhiyun	cmp/eq	r1, r5
147*4882a593Smuzhiyun	bt/s	8f
148*4882a593Smuzhiyun	 clrt
149*4882a593Smuzhiyun	shll16	r0
150*4882a593Smuzhiyun	addc	r0, r6
151*4882a593Smuzhiyun7:
152*4882a593Smuzhiyun	mov.b	@r4+, r0
153*4882a593Smuzhiyun	extu.b	r0, r0
154*4882a593Smuzhiyun#ifndef	__LITTLE_ENDIAN__
155*4882a593Smuzhiyun	shll8	r0
156*4882a593Smuzhiyun#endif
157*4882a593Smuzhiyun8:
158*4882a593Smuzhiyun	addc	r0, r6
159*4882a593Smuzhiyun	mov	#0, r0
160*4882a593Smuzhiyun	addc	r0, r6
161*4882a593Smuzhiyun9:
162*4882a593Smuzhiyun	! Check if the buffer was misaligned, if so realign sum
163*4882a593Smuzhiyun	mov	r7, r0
164*4882a593Smuzhiyun	tst	#1, r0
165*4882a593Smuzhiyun	bt	10f
166*4882a593Smuzhiyun	mov	r6, r0
167*4882a593Smuzhiyun	shll8	r6
168*4882a593Smuzhiyun	shlr16	r0
169*4882a593Smuzhiyun	shlr8	r0
170*4882a593Smuzhiyun	or	r0, r6
171*4882a593Smuzhiyun10:
172*4882a593Smuzhiyun	rts
173*4882a593Smuzhiyun	 mov	r6, r0
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun/*
176*4882a593Smuzhiyununsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
177*4882a593Smuzhiyun */
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun/*
180*4882a593Smuzhiyun * Copy from ds while checksumming, otherwise like csum_partial with initial
181*4882a593Smuzhiyun * sum being ~0U
182*4882a593Smuzhiyun */
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun#define EXC(...)			\
185*4882a593Smuzhiyun	9999: __VA_ARGS__ ;		\
186*4882a593Smuzhiyun	.section __ex_table, "a";	\
187*4882a593Smuzhiyun	.long 9999b, 6001f	;	\
188*4882a593Smuzhiyun	.previous
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun!
191*4882a593Smuzhiyun! r4:	const char *SRC
192*4882a593Smuzhiyun! r5:	char *DST
193*4882a593Smuzhiyun! r6:	int LEN
194*4882a593Smuzhiyun!
195*4882a593SmuzhiyunENTRY(csum_partial_copy_generic)
196*4882a593Smuzhiyun	mov	#-1,r7
197*4882a593Smuzhiyun	mov	#3,r0		! Check src and dest are equally aligned
198*4882a593Smuzhiyun	mov	r4,r1
199*4882a593Smuzhiyun	and	r0,r1
200*4882a593Smuzhiyun	and	r5,r0
201*4882a593Smuzhiyun	cmp/eq	r1,r0
202*4882a593Smuzhiyun	bf	3f		! Different alignments, use slow version
203*4882a593Smuzhiyun	tst	#1,r0		! Check dest word aligned
204*4882a593Smuzhiyun	bf	3f		! If not, do it the slow way
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun	mov	#2,r0
207*4882a593Smuzhiyun	tst	r0,r5		! Check dest alignment.
208*4882a593Smuzhiyun	bt	2f		! Jump if alignment is ok.
209*4882a593Smuzhiyun	add	#-2,r6		! Alignment uses up two bytes.
210*4882a593Smuzhiyun	cmp/pz	r6		! Jump if we had at least two bytes.
211*4882a593Smuzhiyun	bt/s	1f
212*4882a593Smuzhiyun	 clrt
213*4882a593Smuzhiyun	add	#2,r6		! r6 was < 2.	Deal with it.
214*4882a593Smuzhiyun	bra	4f
215*4882a593Smuzhiyun	 mov	r6,r2
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun3:	! Handle different src and dest alignments.
218*4882a593Smuzhiyun	! This is not common, so simple byte by byte copy will do.
219*4882a593Smuzhiyun	mov	r6,r2
220*4882a593Smuzhiyun	shlr	r6
221*4882a593Smuzhiyun	tst	r6,r6
222*4882a593Smuzhiyun	bt	4f
223*4882a593Smuzhiyun	clrt
224*4882a593Smuzhiyun	.align	2
225*4882a593Smuzhiyun5:
226*4882a593SmuzhiyunEXC(	mov.b	@r4+,r1 	)
227*4882a593SmuzhiyunEXC(	mov.b	@r4+,r0		)
228*4882a593Smuzhiyun	extu.b	r1,r1
229*4882a593SmuzhiyunEXC(	mov.b	r1,@r5		)
230*4882a593SmuzhiyunEXC(	mov.b	r0,@(1,r5)	)
231*4882a593Smuzhiyun	extu.b	r0,r0
232*4882a593Smuzhiyun	add	#2,r5
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun#ifdef	__LITTLE_ENDIAN__
235*4882a593Smuzhiyun	shll8	r0
236*4882a593Smuzhiyun#else
237*4882a593Smuzhiyun	shll8	r1
238*4882a593Smuzhiyun#endif
239*4882a593Smuzhiyun	or	r1,r0
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun	addc	r0,r7
242*4882a593Smuzhiyun	movt	r0
243*4882a593Smuzhiyun	dt	r6
244*4882a593Smuzhiyun	bf/s	5b
245*4882a593Smuzhiyun	 cmp/eq	#1,r0
246*4882a593Smuzhiyun	mov	#0,r0
247*4882a593Smuzhiyun	addc	r0, r7
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun	mov	r2, r0
250*4882a593Smuzhiyun	tst	#1, r0
251*4882a593Smuzhiyun	bt	7f
252*4882a593Smuzhiyun	bra	5f
253*4882a593Smuzhiyun	 clrt
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun	! src and dest equally aligned, but to a two byte boundary.
256*4882a593Smuzhiyun	! Handle first two bytes as a special case
257*4882a593Smuzhiyun	.align	2
258*4882a593Smuzhiyun1:
259*4882a593SmuzhiyunEXC(	mov.w	@r4+,r0		)
260*4882a593SmuzhiyunEXC(	mov.w	r0,@r5		)
261*4882a593Smuzhiyun	add	#2,r5
262*4882a593Smuzhiyun	extu.w	r0,r0
263*4882a593Smuzhiyun	addc	r0,r7
264*4882a593Smuzhiyun	mov	#0,r0
265*4882a593Smuzhiyun	addc	r0,r7
266*4882a593Smuzhiyun2:
267*4882a593Smuzhiyun	mov	r6,r2
268*4882a593Smuzhiyun	mov	#-5,r0
269*4882a593Smuzhiyun	shld	r0,r6
270*4882a593Smuzhiyun	tst	r6,r6
271*4882a593Smuzhiyun	bt/s	2f
272*4882a593Smuzhiyun	 clrt
273*4882a593Smuzhiyun	.align	2
274*4882a593Smuzhiyun1:
275*4882a593SmuzhiyunEXC(	mov.l	@r4+,r0		)
276*4882a593SmuzhiyunEXC(	mov.l	@r4+,r1		)
277*4882a593Smuzhiyun	addc	r0,r7
278*4882a593SmuzhiyunEXC(	mov.l	r0,@r5		)
279*4882a593SmuzhiyunEXC(	mov.l	r1,@(4,r5)	)
280*4882a593Smuzhiyun	addc	r1,r7
281*4882a593Smuzhiyun
282*4882a593SmuzhiyunEXC(	mov.l	@r4+,r0		)
283*4882a593SmuzhiyunEXC(	mov.l	@r4+,r1		)
284*4882a593Smuzhiyun	addc	r0,r7
285*4882a593SmuzhiyunEXC(	mov.l	r0,@(8,r5)	)
286*4882a593SmuzhiyunEXC(	mov.l	r1,@(12,r5)	)
287*4882a593Smuzhiyun	addc	r1,r7
288*4882a593Smuzhiyun
289*4882a593SmuzhiyunEXC(	mov.l	@r4+,r0 	)
290*4882a593SmuzhiyunEXC(	mov.l	@r4+,r1		)
291*4882a593Smuzhiyun	addc	r0,r7
292*4882a593SmuzhiyunEXC(	mov.l	r0,@(16,r5)	)
293*4882a593SmuzhiyunEXC(	mov.l	r1,@(20,r5)	)
294*4882a593Smuzhiyun	addc	r1,r7
295*4882a593Smuzhiyun
296*4882a593SmuzhiyunEXC(	mov.l	@r4+,r0		)
297*4882a593SmuzhiyunEXC(	mov.l	@r4+,r1		)
298*4882a593Smuzhiyun	addc	r0,r7
299*4882a593SmuzhiyunEXC(	mov.l	r0,@(24,r5)	)
300*4882a593SmuzhiyunEXC(	mov.l	r1,@(28,r5)	)
301*4882a593Smuzhiyun	addc	r1,r7
302*4882a593Smuzhiyun	add	#32,r5
303*4882a593Smuzhiyun	movt	r0
304*4882a593Smuzhiyun	dt	r6
305*4882a593Smuzhiyun	bf/s	1b
306*4882a593Smuzhiyun	 cmp/eq	#1,r0
307*4882a593Smuzhiyun	mov	#0,r0
308*4882a593Smuzhiyun	addc	r0,r7
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun2:	mov	r2,r6
311*4882a593Smuzhiyun	mov	#0x1c,r0
312*4882a593Smuzhiyun	and	r0,r6
313*4882a593Smuzhiyun	cmp/pl	r6
314*4882a593Smuzhiyun	bf/s	4f
315*4882a593Smuzhiyun	 clrt
316*4882a593Smuzhiyun	shlr2	r6
317*4882a593Smuzhiyun3:
318*4882a593SmuzhiyunEXC(	mov.l	@r4+,r0	)
319*4882a593Smuzhiyun	addc	r0,r7
320*4882a593SmuzhiyunEXC(	mov.l	r0,@r5	)
321*4882a593Smuzhiyun	add	#4,r5
322*4882a593Smuzhiyun	movt	r0
323*4882a593Smuzhiyun	dt	r6
324*4882a593Smuzhiyun	bf/s	3b
325*4882a593Smuzhiyun	 cmp/eq	#1,r0
326*4882a593Smuzhiyun	mov	#0,r0
327*4882a593Smuzhiyun	addc	r0,r7
328*4882a593Smuzhiyun4:	mov	r2,r6
329*4882a593Smuzhiyun	mov	#3,r0
330*4882a593Smuzhiyun	and	r0,r6
331*4882a593Smuzhiyun	cmp/pl	r6
332*4882a593Smuzhiyun	bf	7f
333*4882a593Smuzhiyun	mov	#2,r1
334*4882a593Smuzhiyun	cmp/hs	r1,r6
335*4882a593Smuzhiyun	bf	5f
336*4882a593SmuzhiyunEXC(	mov.w	@r4+,r0	)
337*4882a593SmuzhiyunEXC(	mov.w	r0,@r5	)
338*4882a593Smuzhiyun	extu.w	r0,r0
339*4882a593Smuzhiyun	add	#2,r5
340*4882a593Smuzhiyun	cmp/eq	r1,r6
341*4882a593Smuzhiyun	bt/s	6f
342*4882a593Smuzhiyun	 clrt
343*4882a593Smuzhiyun	shll16	r0
344*4882a593Smuzhiyun	addc	r0,r7
345*4882a593Smuzhiyun5:
346*4882a593SmuzhiyunEXC(	mov.b	@r4+,r0	)
347*4882a593SmuzhiyunEXC(	mov.b	r0,@r5	)
348*4882a593Smuzhiyun	extu.b	r0,r0
349*4882a593Smuzhiyun#ifndef	__LITTLE_ENDIAN__
350*4882a593Smuzhiyun	shll8	r0
351*4882a593Smuzhiyun#endif
352*4882a593Smuzhiyun6:	addc	r0,r7
353*4882a593Smuzhiyun	mov	#0,r0
354*4882a593Smuzhiyun	addc	r0,r7
355*4882a593Smuzhiyun7:
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun# Exception handler:
358*4882a593Smuzhiyun.section .fixup, "ax"
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun6001:
361*4882a593Smuzhiyun	rts
362*4882a593Smuzhiyun	 mov	#0,r0
363*4882a593Smuzhiyun.previous
364*4882a593Smuzhiyun	rts
365*4882a593Smuzhiyun	 mov	r7,r0
366