xref: /rk3399_rockchip-uboot/arch/arm/lib/lib1funcs.S (revision c5a543ea2d4bcedd87754b78babb68929cf8ab33)
1*c5a543eaSMarek Vasut/*
2*c5a543eaSMarek Vasut * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3*c5a543eaSMarek Vasut *
4*c5a543eaSMarek Vasut * Author: Nicolas Pitre <nico@fluxnic.net>
5*c5a543eaSMarek Vasut *   - contributed to gcc-3.4 on Sep 30, 2003
6*c5a543eaSMarek Vasut *   - adapted for the Linux kernel on Oct 2, 2003
7*c5a543eaSMarek Vasut */
8*c5a543eaSMarek Vasut
9*c5a543eaSMarek Vasut/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10*c5a543eaSMarek Vasut
11*c5a543eaSMarek Vasut * SPDX-License-Identifier:	GPL-2.0+
12*c5a543eaSMarek Vasut */
13*c5a543eaSMarek Vasut
14*c5a543eaSMarek Vasut
15*c5a543eaSMarek Vasut#include <linux/linkage.h>
16*c5a543eaSMarek Vasut#include <asm/assembler.h>
17*c5a543eaSMarek Vasut
18*c5a543eaSMarek Vasut/*
19*c5a543eaSMarek Vasut * U-Boot compatibility bit, define empty UNWIND() macro as, since we
20*c5a543eaSMarek Vasut * do not support stack unwinding and define CONFIG_AEABI to make all
21*c5a543eaSMarek Vasut * of the functions available without diverging from Linux code.
22*c5a543eaSMarek Vasut */
23*c5a543eaSMarek Vasut#ifdef __UBOOT__
24*c5a543eaSMarek Vasut#define UNWIND(x...)
25*c5a543eaSMarek Vasut#define CONFIG_AEABI
26*c5a543eaSMarek Vasut#endif
27*c5a543eaSMarek Vasut
28*c5a543eaSMarek Vasut.macro ARM_DIV_BODY dividend, divisor, result, curbit
29*c5a543eaSMarek Vasut
30*c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
31*c5a543eaSMarek Vasut
32*c5a543eaSMarek Vasut	clz	\curbit, \divisor
33*c5a543eaSMarek Vasut	clz	\result, \dividend
34*c5a543eaSMarek Vasut	sub	\result, \curbit, \result
35*c5a543eaSMarek Vasut	mov	\curbit, #1
36*c5a543eaSMarek Vasut	mov	\divisor, \divisor, lsl \result
37*c5a543eaSMarek Vasut	mov	\curbit, \curbit, lsl \result
38*c5a543eaSMarek Vasut	mov	\result, #0
39*c5a543eaSMarek Vasut
40*c5a543eaSMarek Vasut#else
41*c5a543eaSMarek Vasut
42*c5a543eaSMarek Vasut	@ Initially shift the divisor left 3 bits if possible,
43*c5a543eaSMarek Vasut	@ set curbit accordingly.  This allows for curbit to be located
44*c5a543eaSMarek Vasut	@ at the left end of each 4 bit nibbles in the division loop
45*c5a543eaSMarek Vasut	@ to save one loop in most cases.
46*c5a543eaSMarek Vasut	tst	\divisor, #0xe0000000
47*c5a543eaSMarek Vasut	moveq	\divisor, \divisor, lsl #3
48*c5a543eaSMarek Vasut	moveq	\curbit, #8
49*c5a543eaSMarek Vasut	movne	\curbit, #1
50*c5a543eaSMarek Vasut
51*c5a543eaSMarek Vasut	@ Unless the divisor is very big, shift it up in multiples of
52*c5a543eaSMarek Vasut	@ four bits, since this is the amount of unwinding in the main
53*c5a543eaSMarek Vasut	@ division loop.  Continue shifting until the divisor is
54*c5a543eaSMarek Vasut	@ larger than the dividend.
55*c5a543eaSMarek Vasut1:	cmp	\divisor, #0x10000000
56*c5a543eaSMarek Vasut	cmplo	\divisor, \dividend
57*c5a543eaSMarek Vasut	movlo	\divisor, \divisor, lsl #4
58*c5a543eaSMarek Vasut	movlo	\curbit, \curbit, lsl #4
59*c5a543eaSMarek Vasut	blo	1b
60*c5a543eaSMarek Vasut
61*c5a543eaSMarek Vasut	@ For very big divisors, we must shift it a bit at a time, or
62*c5a543eaSMarek Vasut	@ we will be in danger of overflowing.
63*c5a543eaSMarek Vasut1:	cmp	\divisor, #0x80000000
64*c5a543eaSMarek Vasut	cmplo	\divisor, \dividend
65*c5a543eaSMarek Vasut	movlo	\divisor, \divisor, lsl #1
66*c5a543eaSMarek Vasut	movlo	\curbit, \curbit, lsl #1
67*c5a543eaSMarek Vasut	blo	1b
68*c5a543eaSMarek Vasut
69*c5a543eaSMarek Vasut	mov	\result, #0
70*c5a543eaSMarek Vasut
71*c5a543eaSMarek Vasut#endif
72*c5a543eaSMarek Vasut
73*c5a543eaSMarek Vasut	@ Division loop
74*c5a543eaSMarek Vasut1:	cmp	\dividend, \divisor
75*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor
76*c5a543eaSMarek Vasut	orrhs	\result,   \result,   \curbit
77*c5a543eaSMarek Vasut	cmp	\dividend, \divisor,  lsr #1
78*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor, lsr #1
79*c5a543eaSMarek Vasut	orrhs	\result,   \result,   \curbit,  lsr #1
80*c5a543eaSMarek Vasut	cmp	\dividend, \divisor,  lsr #2
81*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor, lsr #2
82*c5a543eaSMarek Vasut	orrhs	\result,   \result,   \curbit,  lsr #2
83*c5a543eaSMarek Vasut	cmp	\dividend, \divisor,  lsr #3
84*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor, lsr #3
85*c5a543eaSMarek Vasut	orrhs	\result,   \result,   \curbit,  lsr #3
86*c5a543eaSMarek Vasut	cmp	\dividend, #0			@ Early termination?
87*c5a543eaSMarek Vasut	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
88*c5a543eaSMarek Vasut	movne	\divisor,  \divisor, lsr #4
89*c5a543eaSMarek Vasut	bne	1b
90*c5a543eaSMarek Vasut
91*c5a543eaSMarek Vasut.endm
92*c5a543eaSMarek Vasut
93*c5a543eaSMarek Vasut
94*c5a543eaSMarek Vasut.macro ARM_DIV2_ORDER divisor, order
95*c5a543eaSMarek Vasut
96*c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
97*c5a543eaSMarek Vasut
98*c5a543eaSMarek Vasut	clz	\order, \divisor
99*c5a543eaSMarek Vasut	rsb	\order, \order, #31
100*c5a543eaSMarek Vasut
101*c5a543eaSMarek Vasut#else
102*c5a543eaSMarek Vasut
103*c5a543eaSMarek Vasut	cmp	\divisor, #(1 << 16)
104*c5a543eaSMarek Vasut	movhs	\divisor, \divisor, lsr #16
105*c5a543eaSMarek Vasut	movhs	\order, #16
106*c5a543eaSMarek Vasut	movlo	\order, #0
107*c5a543eaSMarek Vasut
108*c5a543eaSMarek Vasut	cmp	\divisor, #(1 << 8)
109*c5a543eaSMarek Vasut	movhs	\divisor, \divisor, lsr #8
110*c5a543eaSMarek Vasut	addhs	\order, \order, #8
111*c5a543eaSMarek Vasut
112*c5a543eaSMarek Vasut	cmp	\divisor, #(1 << 4)
113*c5a543eaSMarek Vasut	movhs	\divisor, \divisor, lsr #4
114*c5a543eaSMarek Vasut	addhs	\order, \order, #4
115*c5a543eaSMarek Vasut
116*c5a543eaSMarek Vasut	cmp	\divisor, #(1 << 2)
117*c5a543eaSMarek Vasut	addhi	\order, \order, #3
118*c5a543eaSMarek Vasut	addls	\order, \order, \divisor, lsr #1
119*c5a543eaSMarek Vasut
120*c5a543eaSMarek Vasut#endif
121*c5a543eaSMarek Vasut
122*c5a543eaSMarek Vasut.endm
123*c5a543eaSMarek Vasut
124*c5a543eaSMarek Vasut
125*c5a543eaSMarek Vasut.macro ARM_MOD_BODY dividend, divisor, order, spare
126*c5a543eaSMarek Vasut
127*c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
128*c5a543eaSMarek Vasut
129*c5a543eaSMarek Vasut	clz	\order, \divisor
130*c5a543eaSMarek Vasut	clz	\spare, \dividend
131*c5a543eaSMarek Vasut	sub	\order, \order, \spare
132*c5a543eaSMarek Vasut	mov	\divisor, \divisor, lsl \order
133*c5a543eaSMarek Vasut
134*c5a543eaSMarek Vasut#else
135*c5a543eaSMarek Vasut
136*c5a543eaSMarek Vasut	mov	\order, #0
137*c5a543eaSMarek Vasut
138*c5a543eaSMarek Vasut	@ Unless the divisor is very big, shift it up in multiples of
139*c5a543eaSMarek Vasut	@ four bits, since this is the amount of unwinding in the main
140*c5a543eaSMarek Vasut	@ division loop.  Continue shifting until the divisor is
141*c5a543eaSMarek Vasut	@ larger than the dividend.
142*c5a543eaSMarek Vasut1:	cmp	\divisor, #0x10000000
143*c5a543eaSMarek Vasut	cmplo	\divisor, \dividend
144*c5a543eaSMarek Vasut	movlo	\divisor, \divisor, lsl #4
145*c5a543eaSMarek Vasut	addlo	\order, \order, #4
146*c5a543eaSMarek Vasut	blo	1b
147*c5a543eaSMarek Vasut
148*c5a543eaSMarek Vasut	@ For very big divisors, we must shift it a bit at a time, or
149*c5a543eaSMarek Vasut	@ we will be in danger of overflowing.
150*c5a543eaSMarek Vasut1:	cmp	\divisor, #0x80000000
151*c5a543eaSMarek Vasut	cmplo	\divisor, \dividend
152*c5a543eaSMarek Vasut	movlo	\divisor, \divisor, lsl #1
153*c5a543eaSMarek Vasut	addlo	\order, \order, #1
154*c5a543eaSMarek Vasut	blo	1b
155*c5a543eaSMarek Vasut
156*c5a543eaSMarek Vasut#endif
157*c5a543eaSMarek Vasut
158*c5a543eaSMarek Vasut	@ Perform all needed subtractions to keep only the reminder.
159*c5a543eaSMarek Vasut	@ Do comparisons in batch of 4 first.
160*c5a543eaSMarek Vasut	subs	\order, \order, #3		@ yes, 3 is intended here
161*c5a543eaSMarek Vasut	blt	2f
162*c5a543eaSMarek Vasut
163*c5a543eaSMarek Vasut1:	cmp	\dividend, \divisor
164*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor
165*c5a543eaSMarek Vasut	cmp	\dividend, \divisor,  lsr #1
166*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor, lsr #1
167*c5a543eaSMarek Vasut	cmp	\dividend, \divisor,  lsr #2
168*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor, lsr #2
169*c5a543eaSMarek Vasut	cmp	\dividend, \divisor,  lsr #3
170*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor, lsr #3
171*c5a543eaSMarek Vasut	cmp	\dividend, #1
172*c5a543eaSMarek Vasut	mov	\divisor, \divisor, lsr #4
173*c5a543eaSMarek Vasut	subges	\order, \order, #4
174*c5a543eaSMarek Vasut	bge	1b
175*c5a543eaSMarek Vasut
176*c5a543eaSMarek Vasut	tst	\order, #3
177*c5a543eaSMarek Vasut	teqne	\dividend, #0
178*c5a543eaSMarek Vasut	beq	5f
179*c5a543eaSMarek Vasut
180*c5a543eaSMarek Vasut	@ Either 1, 2 or 3 comparison/subtractions are left.
181*c5a543eaSMarek Vasut2:	cmn	\order, #2
182*c5a543eaSMarek Vasut	blt	4f
183*c5a543eaSMarek Vasut	beq	3f
184*c5a543eaSMarek Vasut	cmp	\dividend, \divisor
185*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor
186*c5a543eaSMarek Vasut	mov	\divisor,  \divisor,  lsr #1
187*c5a543eaSMarek Vasut3:	cmp	\dividend, \divisor
188*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor
189*c5a543eaSMarek Vasut	mov	\divisor,  \divisor,  lsr #1
190*c5a543eaSMarek Vasut4:	cmp	\dividend, \divisor
191*c5a543eaSMarek Vasut	subhs	\dividend, \dividend, \divisor
192*c5a543eaSMarek Vasut5:
193*c5a543eaSMarek Vasut.endm
194*c5a543eaSMarek Vasut
195*c5a543eaSMarek Vasut
196*c5a543eaSMarek VasutENTRY(__udivsi3)
197*c5a543eaSMarek VasutENTRY(__aeabi_uidiv)
198*c5a543eaSMarek VasutUNWIND(.fnstart)
199*c5a543eaSMarek Vasut
200*c5a543eaSMarek Vasut	subs	r2, r1, #1
201*c5a543eaSMarek Vasut	reteq	lr
202*c5a543eaSMarek Vasut	bcc	Ldiv0
203*c5a543eaSMarek Vasut	cmp	r0, r1
204*c5a543eaSMarek Vasut	bls	11f
205*c5a543eaSMarek Vasut	tst	r1, r2
206*c5a543eaSMarek Vasut	beq	12f
207*c5a543eaSMarek Vasut
208*c5a543eaSMarek Vasut	ARM_DIV_BODY r0, r1, r2, r3
209*c5a543eaSMarek Vasut
210*c5a543eaSMarek Vasut	mov	r0, r2
211*c5a543eaSMarek Vasut	ret	lr
212*c5a543eaSMarek Vasut
213*c5a543eaSMarek Vasut11:	moveq	r0, #1
214*c5a543eaSMarek Vasut	movne	r0, #0
215*c5a543eaSMarek Vasut	ret	lr
216*c5a543eaSMarek Vasut
217*c5a543eaSMarek Vasut12:	ARM_DIV2_ORDER r1, r2
218*c5a543eaSMarek Vasut
219*c5a543eaSMarek Vasut	mov	r0, r0, lsr r2
220*c5a543eaSMarek Vasut	ret	lr
221*c5a543eaSMarek Vasut
222*c5a543eaSMarek VasutUNWIND(.fnend)
223*c5a543eaSMarek VasutENDPROC(__udivsi3)
224*c5a543eaSMarek VasutENDPROC(__aeabi_uidiv)
225*c5a543eaSMarek Vasut
226*c5a543eaSMarek VasutENTRY(__umodsi3)
227*c5a543eaSMarek VasutUNWIND(.fnstart)
228*c5a543eaSMarek Vasut
229*c5a543eaSMarek Vasut	subs	r2, r1, #1			@ compare divisor with 1
230*c5a543eaSMarek Vasut	bcc	Ldiv0
231*c5a543eaSMarek Vasut	cmpne	r0, r1				@ compare dividend with divisor
232*c5a543eaSMarek Vasut	moveq   r0, #0
233*c5a543eaSMarek Vasut	tsthi	r1, r2				@ see if divisor is power of 2
234*c5a543eaSMarek Vasut	andeq	r0, r0, r2
235*c5a543eaSMarek Vasut	retls	lr
236*c5a543eaSMarek Vasut
237*c5a543eaSMarek Vasut	ARM_MOD_BODY r0, r1, r2, r3
238*c5a543eaSMarek Vasut
239*c5a543eaSMarek Vasut	ret	lr
240*c5a543eaSMarek Vasut
241*c5a543eaSMarek VasutUNWIND(.fnend)
242*c5a543eaSMarek VasutENDPROC(__umodsi3)
243*c5a543eaSMarek Vasut
244*c5a543eaSMarek VasutENTRY(__divsi3)
245*c5a543eaSMarek VasutENTRY(__aeabi_idiv)
246*c5a543eaSMarek VasutUNWIND(.fnstart)
247*c5a543eaSMarek Vasut
248*c5a543eaSMarek Vasut	cmp	r1, #0
249*c5a543eaSMarek Vasut	eor	ip, r0, r1			@ save the sign of the result.
250*c5a543eaSMarek Vasut	beq	Ldiv0
251*c5a543eaSMarek Vasut	rsbmi	r1, r1, #0			@ loops below use unsigned.
252*c5a543eaSMarek Vasut	subs	r2, r1, #1			@ division by 1 or -1 ?
253*c5a543eaSMarek Vasut	beq	10f
254*c5a543eaSMarek Vasut	movs	r3, r0
255*c5a543eaSMarek Vasut	rsbmi	r3, r0, #0			@ positive dividend value
256*c5a543eaSMarek Vasut	cmp	r3, r1
257*c5a543eaSMarek Vasut	bls	11f
258*c5a543eaSMarek Vasut	tst	r1, r2				@ divisor is power of 2 ?
259*c5a543eaSMarek Vasut	beq	12f
260*c5a543eaSMarek Vasut
261*c5a543eaSMarek Vasut	ARM_DIV_BODY r3, r1, r0, r2
262*c5a543eaSMarek Vasut
263*c5a543eaSMarek Vasut	cmp	ip, #0
264*c5a543eaSMarek Vasut	rsbmi	r0, r0, #0
265*c5a543eaSMarek Vasut	ret	lr
266*c5a543eaSMarek Vasut
267*c5a543eaSMarek Vasut10:	teq	ip, r0				@ same sign ?
268*c5a543eaSMarek Vasut	rsbmi	r0, r0, #0
269*c5a543eaSMarek Vasut	ret	lr
270*c5a543eaSMarek Vasut
271*c5a543eaSMarek Vasut11:	movlo	r0, #0
272*c5a543eaSMarek Vasut	moveq	r0, ip, asr #31
273*c5a543eaSMarek Vasut	orreq	r0, r0, #1
274*c5a543eaSMarek Vasut	ret	lr
275*c5a543eaSMarek Vasut
276*c5a543eaSMarek Vasut12:	ARM_DIV2_ORDER r1, r2
277*c5a543eaSMarek Vasut
278*c5a543eaSMarek Vasut	cmp	ip, #0
279*c5a543eaSMarek Vasut	mov	r0, r3, lsr r2
280*c5a543eaSMarek Vasut	rsbmi	r0, r0, #0
281*c5a543eaSMarek Vasut	ret	lr
282*c5a543eaSMarek Vasut
283*c5a543eaSMarek VasutUNWIND(.fnend)
284*c5a543eaSMarek VasutENDPROC(__divsi3)
285*c5a543eaSMarek VasutENDPROC(__aeabi_idiv)
286*c5a543eaSMarek Vasut
287*c5a543eaSMarek VasutENTRY(__modsi3)
288*c5a543eaSMarek VasutUNWIND(.fnstart)
289*c5a543eaSMarek Vasut
290*c5a543eaSMarek Vasut	cmp	r1, #0
291*c5a543eaSMarek Vasut	beq	Ldiv0
292*c5a543eaSMarek Vasut	rsbmi	r1, r1, #0			@ loops below use unsigned.
293*c5a543eaSMarek Vasut	movs	ip, r0				@ preserve sign of dividend
294*c5a543eaSMarek Vasut	rsbmi	r0, r0, #0			@ if negative make positive
295*c5a543eaSMarek Vasut	subs	r2, r1, #1			@ compare divisor with 1
296*c5a543eaSMarek Vasut	cmpne	r0, r1				@ compare dividend with divisor
297*c5a543eaSMarek Vasut	moveq	r0, #0
298*c5a543eaSMarek Vasut	tsthi	r1, r2				@ see if divisor is power of 2
299*c5a543eaSMarek Vasut	andeq	r0, r0, r2
300*c5a543eaSMarek Vasut	bls	10f
301*c5a543eaSMarek Vasut
302*c5a543eaSMarek Vasut	ARM_MOD_BODY r0, r1, r2, r3
303*c5a543eaSMarek Vasut
304*c5a543eaSMarek Vasut10:	cmp	ip, #0
305*c5a543eaSMarek Vasut	rsbmi	r0, r0, #0
306*c5a543eaSMarek Vasut	ret	lr
307*c5a543eaSMarek Vasut
308*c5a543eaSMarek VasutUNWIND(.fnend)
309*c5a543eaSMarek VasutENDPROC(__modsi3)
310*c5a543eaSMarek Vasut
311*c5a543eaSMarek Vasut#ifdef CONFIG_AEABI
312*c5a543eaSMarek Vasut
313*c5a543eaSMarek VasutENTRY(__aeabi_uidivmod)
314*c5a543eaSMarek VasutUNWIND(.fnstart)
315*c5a543eaSMarek VasutUNWIND(.save {r0, r1, ip, lr}	)
316*c5a543eaSMarek Vasut
317*c5a543eaSMarek Vasut	stmfd	sp!, {r0, r1, ip, lr}
318*c5a543eaSMarek Vasut	bl	__aeabi_uidiv
319*c5a543eaSMarek Vasut	ldmfd	sp!, {r1, r2, ip, lr}
320*c5a543eaSMarek Vasut	mul	r3, r0, r2
321*c5a543eaSMarek Vasut	sub	r1, r1, r3
322*c5a543eaSMarek Vasut	ret	lr
323*c5a543eaSMarek Vasut
324*c5a543eaSMarek VasutUNWIND(.fnend)
325*c5a543eaSMarek VasutENDPROC(__aeabi_uidivmod)
326*c5a543eaSMarek Vasut
327*c5a543eaSMarek VasutENTRY(__aeabi_idivmod)
328*c5a543eaSMarek VasutUNWIND(.fnstart)
329*c5a543eaSMarek VasutUNWIND(.save {r0, r1, ip, lr}	)
330*c5a543eaSMarek Vasut	stmfd	sp!, {r0, r1, ip, lr}
331*c5a543eaSMarek Vasut	bl	__aeabi_idiv
332*c5a543eaSMarek Vasut	ldmfd	sp!, {r1, r2, ip, lr}
333*c5a543eaSMarek Vasut	mul	r3, r0, r2
334*c5a543eaSMarek Vasut	sub	r1, r1, r3
335*c5a543eaSMarek Vasut	ret	lr
336*c5a543eaSMarek Vasut
337*c5a543eaSMarek VasutUNWIND(.fnend)
338*c5a543eaSMarek VasutENDPROC(__aeabi_idivmod)
339*c5a543eaSMarek Vasut
340*c5a543eaSMarek Vasut#endif
341*c5a543eaSMarek Vasut
342*c5a543eaSMarek VasutLdiv0:
343*c5a543eaSMarek VasutUNWIND(.fnstart)
344*c5a543eaSMarek VasutUNWIND(.pad #4)
345*c5a543eaSMarek VasutUNWIND(.save {lr})
346*c5a543eaSMarek Vasut	str	lr, [sp, #-8]!
347*c5a543eaSMarek Vasut	bl	__div0
348*c5a543eaSMarek Vasut	mov	r0, #0			@ About as wrong as it could be.
349*c5a543eaSMarek Vasut	ldr	pc, [sp], #8
350*c5a543eaSMarek VasutUNWIND(.fnend)
351*c5a543eaSMarek VasutENDPROC(Ldiv0)
352