xref: /OK3568_Linux_fs/kernel/arch/arm/lib/lib1funcs.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/*
2*4882a593Smuzhiyun * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Author: Nicolas Pitre <nico@fluxnic.net>
5*4882a593Smuzhiyun *   - contributed to gcc-3.4 on Sep 30, 2003
6*4882a593Smuzhiyun *   - adapted for the Linux kernel on Oct 2, 2003
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10*4882a593Smuzhiyun
11*4882a593SmuzhiyunThis file is free software; you can redistribute it and/or modify it
12*4882a593Smuzhiyununder the terms of the GNU General Public License as published by the
13*4882a593SmuzhiyunFree Software Foundation; either version 2, or (at your option) any
14*4882a593Smuzhiyunlater version.
15*4882a593Smuzhiyun
16*4882a593SmuzhiyunIn addition to the permissions in the GNU General Public License, the
17*4882a593SmuzhiyunFree Software Foundation gives you unlimited permission to link the
18*4882a593Smuzhiyuncompiled version of this file into combinations with other programs,
19*4882a593Smuzhiyunand to distribute those combinations without any restriction coming
20*4882a593Smuzhiyunfrom the use of this file.  (The General Public License restrictions
21*4882a593Smuzhiyundo apply in other respects; for example, they cover modification of
22*4882a593Smuzhiyunthe file, and distribution when not linked into a combine
23*4882a593Smuzhiyunexecutable.)
24*4882a593Smuzhiyun
25*4882a593SmuzhiyunThis file is distributed in the hope that it will be useful, but
26*4882a593SmuzhiyunWITHOUT ANY WARRANTY; without even the implied warranty of
27*4882a593SmuzhiyunMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28*4882a593SmuzhiyunGeneral Public License for more details.
29*4882a593Smuzhiyun
30*4882a593SmuzhiyunYou should have received a copy of the GNU General Public License
31*4882a593Smuzhiyunalong with this program; see the file COPYING.  If not, write to
32*4882a593Smuzhiyunthe Free Software Foundation, 59 Temple Place - Suite 330,
33*4882a593SmuzhiyunBoston, MA 02111-1307, USA.  */
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun#include <linux/linkage.h>
37*4882a593Smuzhiyun#include <asm/assembler.h>
38*4882a593Smuzhiyun#include <asm/unwind.h>
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun.macro ARM_DIV_BODY dividend, divisor, result, curbit
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun	clz	\curbit, \divisor
45*4882a593Smuzhiyun	clz	\result, \dividend
46*4882a593Smuzhiyun	sub	\result, \curbit, \result
47*4882a593Smuzhiyun	mov	\curbit, #1
48*4882a593Smuzhiyun	mov	\divisor, \divisor, lsl \result
49*4882a593Smuzhiyun	mov	\curbit, \curbit, lsl \result
50*4882a593Smuzhiyun	mov	\result, #0
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun#else
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun	@ Initially shift the divisor left 3 bits if possible,
55*4882a593Smuzhiyun	@ set curbit accordingly.  This allows for curbit to be located
56*4882a593Smuzhiyun	@ at the left end of each 4 bit nibbles in the division loop
57*4882a593Smuzhiyun	@ to save one loop in most cases.
58*4882a593Smuzhiyun	tst	\divisor, #0xe0000000
59*4882a593Smuzhiyun	moveq	\divisor, \divisor, lsl #3
60*4882a593Smuzhiyun	moveq	\curbit, #8
61*4882a593Smuzhiyun	movne	\curbit, #1
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun	@ Unless the divisor is very big, shift it up in multiples of
64*4882a593Smuzhiyun	@ four bits, since this is the amount of unwinding in the main
65*4882a593Smuzhiyun	@ division loop.  Continue shifting until the divisor is
66*4882a593Smuzhiyun	@ larger than the dividend.
67*4882a593Smuzhiyun1:	cmp	\divisor, #0x10000000
68*4882a593Smuzhiyun	cmplo	\divisor, \dividend
69*4882a593Smuzhiyun	movlo	\divisor, \divisor, lsl #4
70*4882a593Smuzhiyun	movlo	\curbit, \curbit, lsl #4
71*4882a593Smuzhiyun	blo	1b
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun	@ For very big divisors, we must shift it a bit at a time, or
74*4882a593Smuzhiyun	@ we will be in danger of overflowing.
75*4882a593Smuzhiyun1:	cmp	\divisor, #0x80000000
76*4882a593Smuzhiyun	cmplo	\divisor, \dividend
77*4882a593Smuzhiyun	movlo	\divisor, \divisor, lsl #1
78*4882a593Smuzhiyun	movlo	\curbit, \curbit, lsl #1
79*4882a593Smuzhiyun	blo	1b
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun	mov	\result, #0
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun#endif
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun	@ Division loop
86*4882a593Smuzhiyun1:	cmp	\dividend, \divisor
87*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor
88*4882a593Smuzhiyun	orrhs	\result,   \result,   \curbit
89*4882a593Smuzhiyun	cmp	\dividend, \divisor,  lsr #1
90*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor, lsr #1
91*4882a593Smuzhiyun	orrhs	\result,   \result,   \curbit,  lsr #1
92*4882a593Smuzhiyun	cmp	\dividend, \divisor,  lsr #2
93*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor, lsr #2
94*4882a593Smuzhiyun	orrhs	\result,   \result,   \curbit,  lsr #2
95*4882a593Smuzhiyun	cmp	\dividend, \divisor,  lsr #3
96*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor, lsr #3
97*4882a593Smuzhiyun	orrhs	\result,   \result,   \curbit,  lsr #3
98*4882a593Smuzhiyun	cmp	\dividend, #0			@ Early termination?
99*4882a593Smuzhiyun	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
100*4882a593Smuzhiyun	movne	\divisor,  \divisor, lsr #4
101*4882a593Smuzhiyun	bne	1b
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun.endm
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun.macro ARM_DIV2_ORDER divisor, order
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun	clz	\order, \divisor
111*4882a593Smuzhiyun	rsb	\order, \order, #31
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun#else
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun	cmp	\divisor, #(1 << 16)
116*4882a593Smuzhiyun	movhs	\divisor, \divisor, lsr #16
117*4882a593Smuzhiyun	movhs	\order, #16
118*4882a593Smuzhiyun	movlo	\order, #0
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun	cmp	\divisor, #(1 << 8)
121*4882a593Smuzhiyun	movhs	\divisor, \divisor, lsr #8
122*4882a593Smuzhiyun	addhs	\order, \order, #8
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun	cmp	\divisor, #(1 << 4)
125*4882a593Smuzhiyun	movhs	\divisor, \divisor, lsr #4
126*4882a593Smuzhiyun	addhs	\order, \order, #4
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun	cmp	\divisor, #(1 << 2)
129*4882a593Smuzhiyun	addhi	\order, \order, #3
130*4882a593Smuzhiyun	addls	\order, \order, \divisor, lsr #1
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun#endif
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun.endm
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun.macro ARM_MOD_BODY dividend, divisor, order, spare
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun	clz	\order, \divisor
142*4882a593Smuzhiyun	clz	\spare, \dividend
143*4882a593Smuzhiyun	sub	\order, \order, \spare
144*4882a593Smuzhiyun	mov	\divisor, \divisor, lsl \order
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun#else
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun	mov	\order, #0
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun	@ Unless the divisor is very big, shift it up in multiples of
151*4882a593Smuzhiyun	@ four bits, since this is the amount of unwinding in the main
152*4882a593Smuzhiyun	@ division loop.  Continue shifting until the divisor is
153*4882a593Smuzhiyun	@ larger than the dividend.
154*4882a593Smuzhiyun1:	cmp	\divisor, #0x10000000
155*4882a593Smuzhiyun	cmplo	\divisor, \dividend
156*4882a593Smuzhiyun	movlo	\divisor, \divisor, lsl #4
157*4882a593Smuzhiyun	addlo	\order, \order, #4
158*4882a593Smuzhiyun	blo	1b
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun	@ For very big divisors, we must shift it a bit at a time, or
161*4882a593Smuzhiyun	@ we will be in danger of overflowing.
162*4882a593Smuzhiyun1:	cmp	\divisor, #0x80000000
163*4882a593Smuzhiyun	cmplo	\divisor, \dividend
164*4882a593Smuzhiyun	movlo	\divisor, \divisor, lsl #1
165*4882a593Smuzhiyun	addlo	\order, \order, #1
166*4882a593Smuzhiyun	blo	1b
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun#endif
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun	@ Perform all needed subtractions to keep only the reminder.
171*4882a593Smuzhiyun	@ Do comparisons in batch of 4 first.
172*4882a593Smuzhiyun	subs	\order, \order, #3		@ yes, 3 is intended here
173*4882a593Smuzhiyun	blt	2f
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun1:	cmp	\dividend, \divisor
176*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor
177*4882a593Smuzhiyun	cmp	\dividend, \divisor,  lsr #1
178*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor, lsr #1
179*4882a593Smuzhiyun	cmp	\dividend, \divisor,  lsr #2
180*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor, lsr #2
181*4882a593Smuzhiyun	cmp	\dividend, \divisor,  lsr #3
182*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor, lsr #3
183*4882a593Smuzhiyun	cmp	\dividend, #1
184*4882a593Smuzhiyun	mov	\divisor, \divisor, lsr #4
185*4882a593Smuzhiyun	subsge	\order, \order, #4
186*4882a593Smuzhiyun	bge	1b
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun	tst	\order, #3
189*4882a593Smuzhiyun	teqne	\dividend, #0
190*4882a593Smuzhiyun	beq	5f
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun	@ Either 1, 2 or 3 comparison/subtractions are left.
193*4882a593Smuzhiyun2:	cmn	\order, #2
194*4882a593Smuzhiyun	blt	4f
195*4882a593Smuzhiyun	beq	3f
196*4882a593Smuzhiyun	cmp	\dividend, \divisor
197*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor
198*4882a593Smuzhiyun	mov	\divisor,  \divisor,  lsr #1
199*4882a593Smuzhiyun3:	cmp	\dividend, \divisor
200*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor
201*4882a593Smuzhiyun	mov	\divisor,  \divisor,  lsr #1
202*4882a593Smuzhiyun4:	cmp	\dividend, \divisor
203*4882a593Smuzhiyun	subhs	\dividend, \dividend, \divisor
204*4882a593Smuzhiyun5:
205*4882a593Smuzhiyun.endm
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun#ifdef CONFIG_ARM_PATCH_IDIV
209*4882a593Smuzhiyun	.align	3
210*4882a593Smuzhiyun#endif
211*4882a593Smuzhiyun
212*4882a593SmuzhiyunENTRY(__udivsi3)
213*4882a593SmuzhiyunENTRY(__aeabi_uidiv)
214*4882a593SmuzhiyunUNWIND(.fnstart)
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun	subs	r2, r1, #1
217*4882a593Smuzhiyun	reteq	lr
218*4882a593Smuzhiyun	bcc	Ldiv0
219*4882a593Smuzhiyun	cmp	r0, r1
220*4882a593Smuzhiyun	bls	11f
221*4882a593Smuzhiyun	tst	r1, r2
222*4882a593Smuzhiyun	beq	12f
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun	ARM_DIV_BODY r0, r1, r2, r3
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun	mov	r0, r2
227*4882a593Smuzhiyun	ret	lr
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun11:	moveq	r0, #1
230*4882a593Smuzhiyun	movne	r0, #0
231*4882a593Smuzhiyun	ret	lr
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun12:	ARM_DIV2_ORDER r1, r2
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun	mov	r0, r0, lsr r2
236*4882a593Smuzhiyun	ret	lr
237*4882a593Smuzhiyun
238*4882a593SmuzhiyunUNWIND(.fnend)
239*4882a593SmuzhiyunENDPROC(__udivsi3)
240*4882a593SmuzhiyunENDPROC(__aeabi_uidiv)
241*4882a593Smuzhiyun
242*4882a593SmuzhiyunENTRY(__umodsi3)
243*4882a593SmuzhiyunUNWIND(.fnstart)
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun	subs	r2, r1, #1			@ compare divisor with 1
246*4882a593Smuzhiyun	bcc	Ldiv0
247*4882a593Smuzhiyun	cmpne	r0, r1				@ compare dividend with divisor
248*4882a593Smuzhiyun	moveq   r0, #0
249*4882a593Smuzhiyun	tsthi	r1, r2				@ see if divisor is power of 2
250*4882a593Smuzhiyun	andeq	r0, r0, r2
251*4882a593Smuzhiyun	retls	lr
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun	ARM_MOD_BODY r0, r1, r2, r3
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun	ret	lr
256*4882a593Smuzhiyun
257*4882a593SmuzhiyunUNWIND(.fnend)
258*4882a593SmuzhiyunENDPROC(__umodsi3)
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun#ifdef CONFIG_ARM_PATCH_IDIV
261*4882a593Smuzhiyun	.align 3
262*4882a593Smuzhiyun#endif
263*4882a593Smuzhiyun
264*4882a593SmuzhiyunENTRY(__divsi3)
265*4882a593SmuzhiyunENTRY(__aeabi_idiv)
266*4882a593SmuzhiyunUNWIND(.fnstart)
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun	cmp	r1, #0
269*4882a593Smuzhiyun	eor	ip, r0, r1			@ save the sign of the result.
270*4882a593Smuzhiyun	beq	Ldiv0
271*4882a593Smuzhiyun	rsbmi	r1, r1, #0			@ loops below use unsigned.
272*4882a593Smuzhiyun	subs	r2, r1, #1			@ division by 1 or -1 ?
273*4882a593Smuzhiyun	beq	10f
274*4882a593Smuzhiyun	movs	r3, r0
275*4882a593Smuzhiyun	rsbmi	r3, r0, #0			@ positive dividend value
276*4882a593Smuzhiyun	cmp	r3, r1
277*4882a593Smuzhiyun	bls	11f
278*4882a593Smuzhiyun	tst	r1, r2				@ divisor is power of 2 ?
279*4882a593Smuzhiyun	beq	12f
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun	ARM_DIV_BODY r3, r1, r0, r2
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun	cmp	ip, #0
284*4882a593Smuzhiyun	rsbmi	r0, r0, #0
285*4882a593Smuzhiyun	ret	lr
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun10:	teq	ip, r0				@ same sign ?
288*4882a593Smuzhiyun	rsbmi	r0, r0, #0
289*4882a593Smuzhiyun	ret	lr
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun11:	movlo	r0, #0
292*4882a593Smuzhiyun	moveq	r0, ip, asr #31
293*4882a593Smuzhiyun	orreq	r0, r0, #1
294*4882a593Smuzhiyun	ret	lr
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun12:	ARM_DIV2_ORDER r1, r2
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun	cmp	ip, #0
299*4882a593Smuzhiyun	mov	r0, r3, lsr r2
300*4882a593Smuzhiyun	rsbmi	r0, r0, #0
301*4882a593Smuzhiyun	ret	lr
302*4882a593Smuzhiyun
303*4882a593SmuzhiyunUNWIND(.fnend)
304*4882a593SmuzhiyunENDPROC(__divsi3)
305*4882a593SmuzhiyunENDPROC(__aeabi_idiv)
306*4882a593Smuzhiyun
307*4882a593SmuzhiyunENTRY(__modsi3)
308*4882a593SmuzhiyunUNWIND(.fnstart)
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun	cmp	r1, #0
311*4882a593Smuzhiyun	beq	Ldiv0
312*4882a593Smuzhiyun	rsbmi	r1, r1, #0			@ loops below use unsigned.
313*4882a593Smuzhiyun	movs	ip, r0				@ preserve sign of dividend
314*4882a593Smuzhiyun	rsbmi	r0, r0, #0			@ if negative make positive
315*4882a593Smuzhiyun	subs	r2, r1, #1			@ compare divisor with 1
316*4882a593Smuzhiyun	cmpne	r0, r1				@ compare dividend with divisor
317*4882a593Smuzhiyun	moveq	r0, #0
318*4882a593Smuzhiyun	tsthi	r1, r2				@ see if divisor is power of 2
319*4882a593Smuzhiyun	andeq	r0, r0, r2
320*4882a593Smuzhiyun	bls	10f
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun	ARM_MOD_BODY r0, r1, r2, r3
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun10:	cmp	ip, #0
325*4882a593Smuzhiyun	rsbmi	r0, r0, #0
326*4882a593Smuzhiyun	ret	lr
327*4882a593Smuzhiyun
328*4882a593SmuzhiyunUNWIND(.fnend)
329*4882a593SmuzhiyunENDPROC(__modsi3)
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun#ifdef CONFIG_AEABI
332*4882a593Smuzhiyun
333*4882a593SmuzhiyunENTRY(__aeabi_uidivmod)
334*4882a593SmuzhiyunUNWIND(.fnstart)
335*4882a593SmuzhiyunUNWIND(.save {r0, r1, ip, lr}	)
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun	stmfd	sp!, {r0, r1, ip, lr}
338*4882a593Smuzhiyun	bl	__aeabi_uidiv
339*4882a593Smuzhiyun	ldmfd	sp!, {r1, r2, ip, lr}
340*4882a593Smuzhiyun	mul	r3, r0, r2
341*4882a593Smuzhiyun	sub	r1, r1, r3
342*4882a593Smuzhiyun	ret	lr
343*4882a593Smuzhiyun
344*4882a593SmuzhiyunUNWIND(.fnend)
345*4882a593SmuzhiyunENDPROC(__aeabi_uidivmod)
346*4882a593Smuzhiyun
347*4882a593SmuzhiyunENTRY(__aeabi_idivmod)
348*4882a593SmuzhiyunUNWIND(.fnstart)
349*4882a593SmuzhiyunUNWIND(.save {r0, r1, ip, lr}	)
350*4882a593Smuzhiyun	stmfd	sp!, {r0, r1, ip, lr}
351*4882a593Smuzhiyun	bl	__aeabi_idiv
352*4882a593Smuzhiyun	ldmfd	sp!, {r1, r2, ip, lr}
353*4882a593Smuzhiyun	mul	r3, r0, r2
354*4882a593Smuzhiyun	sub	r1, r1, r3
355*4882a593Smuzhiyun	ret	lr
356*4882a593Smuzhiyun
357*4882a593SmuzhiyunUNWIND(.fnend)
358*4882a593SmuzhiyunENDPROC(__aeabi_idivmod)
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun#endif
361*4882a593Smuzhiyun
362*4882a593SmuzhiyunLdiv0:
363*4882a593SmuzhiyunUNWIND(.fnstart)
364*4882a593SmuzhiyunUNWIND(.pad #4)
365*4882a593SmuzhiyunUNWIND(.save {lr})
366*4882a593Smuzhiyun	str	lr, [sp, #-8]!
367*4882a593Smuzhiyun	bl	__div0
368*4882a593Smuzhiyun	mov	r0, #0			@ About as wrong as it could be.
369*4882a593Smuzhiyun	ldr	pc, [sp], #8
370*4882a593SmuzhiyunUNWIND(.fnend)
371*4882a593SmuzhiyunENDPROC(Ldiv0)
372