xref: /rk3399_rockchip-uboot/arch/arm/lib/div64.S (revision dc557e9a1fe00ca9d884bd88feef5bebf23fede4)
17b9f9c5dSMarek Vasut/*
27b9f9c5dSMarek Vasut *  linux/arch/arm/lib/div64.S
37b9f9c5dSMarek Vasut *
47b9f9c5dSMarek Vasut *  Optimized computation of 64-bit dividend / 32-bit divisor
57b9f9c5dSMarek Vasut *
67b9f9c5dSMarek Vasut *  Author:	Nicolas Pitre
77b9f9c5dSMarek Vasut *  Created:	Oct 5, 2003
87b9f9c5dSMarek Vasut *  Copyright:	Monta Vista Software, Inc.
97b9f9c5dSMarek Vasut *
107b9f9c5dSMarek Vasut *  SPDX-License-Identifier:	GPL-2.0
117b9f9c5dSMarek Vasut */
127b9f9c5dSMarek Vasut
137b9f9c5dSMarek Vasut#include <linux/linkage.h>
147b9f9c5dSMarek Vasut#include <asm/assembler.h>
157b9f9c5dSMarek Vasut#ifdef __UBOOT__
167b9f9c5dSMarek Vasut#define UNWIND(x...)
177b9f9c5dSMarek Vasut#endif
187b9f9c5dSMarek Vasut
197b9f9c5dSMarek Vasut#ifdef __ARMEB__
207b9f9c5dSMarek Vasut#define xh r0
217b9f9c5dSMarek Vasut#define xl r1
227b9f9c5dSMarek Vasut#define yh r2
237b9f9c5dSMarek Vasut#define yl r3
247b9f9c5dSMarek Vasut#else
257b9f9c5dSMarek Vasut#define xl r0
267b9f9c5dSMarek Vasut#define xh r1
277b9f9c5dSMarek Vasut#define yl r2
287b9f9c5dSMarek Vasut#define yh r3
297b9f9c5dSMarek Vasut#endif
307b9f9c5dSMarek Vasut
317b9f9c5dSMarek Vasut/*
327b9f9c5dSMarek Vasut * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
337b9f9c5dSMarek Vasut *
347b9f9c5dSMarek Vasut * Note: Calling convention is totally non standard for optimal code.
357b9f9c5dSMarek Vasut *       This is meant to be used by do_div() from include/asm/div64.h only.
367b9f9c5dSMarek Vasut *
377b9f9c5dSMarek Vasut * Input parameters:
387b9f9c5dSMarek Vasut * 	xh-xl	= dividend (clobbered)
397b9f9c5dSMarek Vasut * 	r4	= divisor (preserved)
407b9f9c5dSMarek Vasut *
417b9f9c5dSMarek Vasut * Output values:
427b9f9c5dSMarek Vasut * 	yh-yl	= result
437b9f9c5dSMarek Vasut * 	xh	= remainder
447b9f9c5dSMarek Vasut *
457b9f9c5dSMarek Vasut * Clobbered regs: xl, ip
467b9f9c5dSMarek Vasut */
477b9f9c5dSMarek Vasut
48*b2f18584SStephen Warren.pushsection .text.__do_div64, "ax"
497b9f9c5dSMarek VasutENTRY(__do_div64)
507b9f9c5dSMarek VasutUNWIND(.fnstart)
517b9f9c5dSMarek Vasut
527b9f9c5dSMarek Vasut	@ Test for easy paths first.
537b9f9c5dSMarek Vasut	subs	ip, r4, #1
547b9f9c5dSMarek Vasut	bls	9f			@ divisor is 0 or 1
557b9f9c5dSMarek Vasut	tst	ip, r4
567b9f9c5dSMarek Vasut	beq	8f			@ divisor is power of 2
577b9f9c5dSMarek Vasut
587b9f9c5dSMarek Vasut	@ See if we need to handle upper 32-bit result.
597b9f9c5dSMarek Vasut	cmp	xh, r4
607b9f9c5dSMarek Vasut	mov	yh, #0
617b9f9c5dSMarek Vasut	blo	3f
627b9f9c5dSMarek Vasut
637b9f9c5dSMarek Vasut	@ Align divisor with upper part of dividend.
647b9f9c5dSMarek Vasut	@ The aligned divisor is stored in yl preserving the original.
657b9f9c5dSMarek Vasut	@ The bit position is stored in ip.
667b9f9c5dSMarek Vasut
677b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
687b9f9c5dSMarek Vasut
697b9f9c5dSMarek Vasut	clz	yl, r4
707b9f9c5dSMarek Vasut	clz	ip, xh
717b9f9c5dSMarek Vasut	sub	yl, yl, ip
727b9f9c5dSMarek Vasut	mov	ip, #1
737b9f9c5dSMarek Vasut	mov	ip, ip, lsl yl
747b9f9c5dSMarek Vasut	mov	yl, r4, lsl yl
757b9f9c5dSMarek Vasut
767b9f9c5dSMarek Vasut#else
777b9f9c5dSMarek Vasut
787b9f9c5dSMarek Vasut	mov	yl, r4
797b9f9c5dSMarek Vasut	mov	ip, #1
807b9f9c5dSMarek Vasut1:	cmp	yl, #0x80000000
817b9f9c5dSMarek Vasut	cmpcc	yl, xh
827b9f9c5dSMarek Vasut	movcc	yl, yl, lsl #1
837b9f9c5dSMarek Vasut	movcc	ip, ip, lsl #1
847b9f9c5dSMarek Vasut	bcc	1b
857b9f9c5dSMarek Vasut
867b9f9c5dSMarek Vasut#endif
877b9f9c5dSMarek Vasut
887b9f9c5dSMarek Vasut	@ The division loop for needed upper bit positions.
897b9f9c5dSMarek Vasut 	@ Break out early if dividend reaches 0.
907b9f9c5dSMarek Vasut2:	cmp	xh, yl
917b9f9c5dSMarek Vasut	orrcs	yh, yh, ip
9240d67c75SMarek Vasut	subscs	xh, xh, yl
9340d67c75SMarek Vasut	movsne	ip, ip, lsr #1
947b9f9c5dSMarek Vasut	mov	yl, yl, lsr #1
957b9f9c5dSMarek Vasut	bne	2b
967b9f9c5dSMarek Vasut
977b9f9c5dSMarek Vasut	@ See if we need to handle lower 32-bit result.
987b9f9c5dSMarek Vasut3:	cmp	xh, #0
997b9f9c5dSMarek Vasut	mov	yl, #0
1007b9f9c5dSMarek Vasut	cmpeq	xl, r4
1017b9f9c5dSMarek Vasut	movlo	xh, xl
1027b9f9c5dSMarek Vasut	retlo	lr
1037b9f9c5dSMarek Vasut
1047b9f9c5dSMarek Vasut	@ The division loop for lower bit positions.
1057b9f9c5dSMarek Vasut	@ Here we shift remainer bits leftwards rather than moving the
1067b9f9c5dSMarek Vasut	@ divisor for comparisons, considering the carry-out bit as well.
1077b9f9c5dSMarek Vasut	mov	ip, #0x80000000
1087b9f9c5dSMarek Vasut4:	movs	xl, xl, lsl #1
1097b9f9c5dSMarek Vasut	adcs	xh, xh, xh
1107b9f9c5dSMarek Vasut	beq	6f
1117b9f9c5dSMarek Vasut	cmpcc	xh, r4
1127b9f9c5dSMarek Vasut5:	orrcs	yl, yl, ip
1137b9f9c5dSMarek Vasut	subcs	xh, xh, r4
1147b9f9c5dSMarek Vasut	movs	ip, ip, lsr #1
1157b9f9c5dSMarek Vasut	bne	4b
1167b9f9c5dSMarek Vasut	ret	lr
1177b9f9c5dSMarek Vasut
1187b9f9c5dSMarek Vasut	@ The top part of remainder became zero.  If carry is set
1197b9f9c5dSMarek Vasut	@ (the 33th bit) this is a false positive so resume the loop.
1207b9f9c5dSMarek Vasut	@ Otherwise, if lower part is also null then we are done.
1217b9f9c5dSMarek Vasut6:	bcs	5b
1227b9f9c5dSMarek Vasut	cmp	xl, #0
1237b9f9c5dSMarek Vasut	reteq	lr
1247b9f9c5dSMarek Vasut
1257b9f9c5dSMarek Vasut	@ We still have remainer bits in the low part.  Bring them up.
1267b9f9c5dSMarek Vasut
1277b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
1287b9f9c5dSMarek Vasut
1297b9f9c5dSMarek Vasut	clz	xh, xl			@ we know xh is zero here so...
1307b9f9c5dSMarek Vasut	add	xh, xh, #1
1317b9f9c5dSMarek Vasut	mov	xl, xl, lsl xh
1327b9f9c5dSMarek Vasut	mov	ip, ip, lsr xh
1337b9f9c5dSMarek Vasut
1347b9f9c5dSMarek Vasut#else
1357b9f9c5dSMarek Vasut
1367b9f9c5dSMarek Vasut7:	movs	xl, xl, lsl #1
1377b9f9c5dSMarek Vasut	mov	ip, ip, lsr #1
1387b9f9c5dSMarek Vasut	bcc	7b
1397b9f9c5dSMarek Vasut
1407b9f9c5dSMarek Vasut#endif
1417b9f9c5dSMarek Vasut
1427b9f9c5dSMarek Vasut	@ Current remainder is now 1.  It is worthless to compare with
1437b9f9c5dSMarek Vasut	@ divisor at this point since divisor can not be smaller than 3 here.
1447b9f9c5dSMarek Vasut	@ If possible, branch for another shift in the division loop.
1457b9f9c5dSMarek Vasut	@ If no bit position left then we are done.
1467b9f9c5dSMarek Vasut	movs	ip, ip, lsr #1
1477b9f9c5dSMarek Vasut	mov	xh, #1
1487b9f9c5dSMarek Vasut	bne	4b
1497b9f9c5dSMarek Vasut	ret	lr
1507b9f9c5dSMarek Vasut
1517b9f9c5dSMarek Vasut8:	@ Division by a power of 2: determine what that divisor order is
1527b9f9c5dSMarek Vasut	@ then simply shift values around
1537b9f9c5dSMarek Vasut
1547b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
1557b9f9c5dSMarek Vasut
1567b9f9c5dSMarek Vasut	clz	ip, r4
1577b9f9c5dSMarek Vasut	rsb	ip, ip, #31
1587b9f9c5dSMarek Vasut
1597b9f9c5dSMarek Vasut#else
1607b9f9c5dSMarek Vasut
1617b9f9c5dSMarek Vasut	mov	yl, r4
1627b9f9c5dSMarek Vasut	cmp	r4, #(1 << 16)
1637b9f9c5dSMarek Vasut	mov	ip, #0
1647b9f9c5dSMarek Vasut	movhs	yl, yl, lsr #16
1657b9f9c5dSMarek Vasut	movhs	ip, #16
1667b9f9c5dSMarek Vasut
1677b9f9c5dSMarek Vasut	cmp	yl, #(1 << 8)
1687b9f9c5dSMarek Vasut	movhs	yl, yl, lsr #8
1697b9f9c5dSMarek Vasut	addhs	ip, ip, #8
1707b9f9c5dSMarek Vasut
1717b9f9c5dSMarek Vasut	cmp	yl, #(1 << 4)
1727b9f9c5dSMarek Vasut	movhs	yl, yl, lsr #4
1737b9f9c5dSMarek Vasut	addhs	ip, ip, #4
1747b9f9c5dSMarek Vasut
1757b9f9c5dSMarek Vasut	cmp	yl, #(1 << 2)
1767b9f9c5dSMarek Vasut	addhi	ip, ip, #3
1777b9f9c5dSMarek Vasut	addls	ip, ip, yl, lsr #1
1787b9f9c5dSMarek Vasut
1797b9f9c5dSMarek Vasut#endif
1807b9f9c5dSMarek Vasut
1817b9f9c5dSMarek Vasut	mov	yh, xh, lsr ip
1827b9f9c5dSMarek Vasut	mov	yl, xl, lsr ip
1837b9f9c5dSMarek Vasut	rsb	ip, ip, #32
1847b9f9c5dSMarek Vasut ARM(	orr	yl, yl, xh, lsl ip	)
1857b9f9c5dSMarek Vasut THUMB(	lsl	xh, xh, ip		)
1867b9f9c5dSMarek Vasut THUMB(	orr	yl, yl, xh		)
1877b9f9c5dSMarek Vasut	mov	xh, xl, lsl ip
1887b9f9c5dSMarek Vasut	mov	xh, xh, lsr ip
1897b9f9c5dSMarek Vasut	ret	lr
1907b9f9c5dSMarek Vasut
1917b9f9c5dSMarek Vasut	@ eq -> division by 1: obvious enough...
1927b9f9c5dSMarek Vasut9:	moveq	yl, xl
1937b9f9c5dSMarek Vasut	moveq	yh, xh
1947b9f9c5dSMarek Vasut	moveq	xh, #0
1957b9f9c5dSMarek Vasut	reteq	lr
1967b9f9c5dSMarek VasutUNWIND(.fnend)
1977b9f9c5dSMarek Vasut
1987b9f9c5dSMarek VasutUNWIND(.fnstart)
1997b9f9c5dSMarek VasutUNWIND(.pad #4)
2007b9f9c5dSMarek VasutUNWIND(.save {lr})
2017b9f9c5dSMarek VasutLdiv0_64:
2027b9f9c5dSMarek Vasut	@ Division by 0:
2037b9f9c5dSMarek Vasut	str	lr, [sp, #-8]!
2047b9f9c5dSMarek Vasut	bl	__div0
2057b9f9c5dSMarek Vasut
2067b9f9c5dSMarek Vasut	@ as wrong as it could be...
2077b9f9c5dSMarek Vasut	mov	yl, #0
2087b9f9c5dSMarek Vasut	mov	yh, #0
2097b9f9c5dSMarek Vasut	mov	xh, #0
2107b9f9c5dSMarek Vasut	ldr	pc, [sp], #8
2117b9f9c5dSMarek Vasut
2127b9f9c5dSMarek VasutUNWIND(.fnend)
2137b9f9c5dSMarek VasutENDPROC(__do_div64)
214*b2f18584SStephen Warren.popsection
215