17b9f9c5dSMarek Vasut/* 27b9f9c5dSMarek Vasut * linux/arch/arm/lib/div64.S 37b9f9c5dSMarek Vasut * 47b9f9c5dSMarek Vasut * Optimized computation of 64-bit dividend / 32-bit divisor 57b9f9c5dSMarek Vasut * 67b9f9c5dSMarek Vasut * Author: Nicolas Pitre 77b9f9c5dSMarek Vasut * Created: Oct 5, 2003 87b9f9c5dSMarek Vasut * Copyright: Monta Vista Software, Inc. 97b9f9c5dSMarek Vasut * 107b9f9c5dSMarek Vasut * SPDX-License-Identifier: GPL-2.0 117b9f9c5dSMarek Vasut */ 127b9f9c5dSMarek Vasut 137b9f9c5dSMarek Vasut#include <linux/linkage.h> 147b9f9c5dSMarek Vasut#include <asm/assembler.h> 157b9f9c5dSMarek Vasut#ifdef __UBOOT__ 167b9f9c5dSMarek Vasut#define UNWIND(x...) 177b9f9c5dSMarek Vasut#endif 187b9f9c5dSMarek Vasut 197b9f9c5dSMarek Vasut#ifdef __ARMEB__ 207b9f9c5dSMarek Vasut#define xh r0 217b9f9c5dSMarek Vasut#define xl r1 227b9f9c5dSMarek Vasut#define yh r2 237b9f9c5dSMarek Vasut#define yl r3 247b9f9c5dSMarek Vasut#else 257b9f9c5dSMarek Vasut#define xl r0 267b9f9c5dSMarek Vasut#define xh r1 277b9f9c5dSMarek Vasut#define yl r2 287b9f9c5dSMarek Vasut#define yh r3 297b9f9c5dSMarek Vasut#endif 307b9f9c5dSMarek Vasut 317b9f9c5dSMarek Vasut/* 327b9f9c5dSMarek Vasut * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. 337b9f9c5dSMarek Vasut * 347b9f9c5dSMarek Vasut * Note: Calling convention is totally non standard for optimal code. 357b9f9c5dSMarek Vasut * This is meant to be used by do_div() from include/asm/div64.h only. 367b9f9c5dSMarek Vasut * 377b9f9c5dSMarek Vasut * Input parameters: 387b9f9c5dSMarek Vasut * xh-xl = dividend (clobbered) 397b9f9c5dSMarek Vasut * r4 = divisor (preserved) 407b9f9c5dSMarek Vasut * 417b9f9c5dSMarek Vasut * Output values: 427b9f9c5dSMarek Vasut * yh-yl = result 437b9f9c5dSMarek Vasut * xh = remainder 447b9f9c5dSMarek Vasut * 457b9f9c5dSMarek Vasut * Clobbered regs: xl, ip 467b9f9c5dSMarek Vasut */ 477b9f9c5dSMarek Vasut 48*b2f18584SStephen Warren.pushsection .text.__do_div64, "ax" 497b9f9c5dSMarek VasutENTRY(__do_div64) 507b9f9c5dSMarek VasutUNWIND(.fnstart) 517b9f9c5dSMarek Vasut 527b9f9c5dSMarek Vasut @ Test for easy paths first. 537b9f9c5dSMarek Vasut subs ip, r4, #1 547b9f9c5dSMarek Vasut bls 9f @ divisor is 0 or 1 557b9f9c5dSMarek Vasut tst ip, r4 567b9f9c5dSMarek Vasut beq 8f @ divisor is power of 2 577b9f9c5dSMarek Vasut 587b9f9c5dSMarek Vasut @ See if we need to handle upper 32-bit result. 597b9f9c5dSMarek Vasut cmp xh, r4 607b9f9c5dSMarek Vasut mov yh, #0 617b9f9c5dSMarek Vasut blo 3f 627b9f9c5dSMarek Vasut 637b9f9c5dSMarek Vasut @ Align divisor with upper part of dividend. 647b9f9c5dSMarek Vasut @ The aligned divisor is stored in yl preserving the original. 657b9f9c5dSMarek Vasut @ The bit position is stored in ip. 667b9f9c5dSMarek Vasut 677b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 687b9f9c5dSMarek Vasut 697b9f9c5dSMarek Vasut clz yl, r4 707b9f9c5dSMarek Vasut clz ip, xh 717b9f9c5dSMarek Vasut sub yl, yl, ip 727b9f9c5dSMarek Vasut mov ip, #1 737b9f9c5dSMarek Vasut mov ip, ip, lsl yl 747b9f9c5dSMarek Vasut mov yl, r4, lsl yl 757b9f9c5dSMarek Vasut 767b9f9c5dSMarek Vasut#else 777b9f9c5dSMarek Vasut 787b9f9c5dSMarek Vasut mov yl, r4 797b9f9c5dSMarek Vasut mov ip, #1 807b9f9c5dSMarek Vasut1: cmp yl, #0x80000000 817b9f9c5dSMarek Vasut cmpcc yl, xh 827b9f9c5dSMarek Vasut movcc yl, yl, lsl #1 837b9f9c5dSMarek Vasut movcc ip, ip, lsl #1 847b9f9c5dSMarek Vasut bcc 1b 857b9f9c5dSMarek Vasut 867b9f9c5dSMarek Vasut#endif 877b9f9c5dSMarek Vasut 887b9f9c5dSMarek Vasut @ The division loop for needed upper bit positions. 897b9f9c5dSMarek Vasut @ Break out early if dividend reaches 0. 907b9f9c5dSMarek Vasut2: cmp xh, yl 917b9f9c5dSMarek Vasut orrcs yh, yh, ip 9240d67c75SMarek Vasut subscs xh, xh, yl 9340d67c75SMarek Vasut movsne ip, ip, lsr #1 947b9f9c5dSMarek Vasut mov yl, yl, lsr #1 957b9f9c5dSMarek Vasut bne 2b 967b9f9c5dSMarek Vasut 977b9f9c5dSMarek Vasut @ See if we need to handle lower 32-bit result. 987b9f9c5dSMarek Vasut3: cmp xh, #0 997b9f9c5dSMarek Vasut mov yl, #0 1007b9f9c5dSMarek Vasut cmpeq xl, r4 1017b9f9c5dSMarek Vasut movlo xh, xl 1027b9f9c5dSMarek Vasut retlo lr 1037b9f9c5dSMarek Vasut 1047b9f9c5dSMarek Vasut @ The division loop for lower bit positions. 1057b9f9c5dSMarek Vasut @ Here we shift remainer bits leftwards rather than moving the 1067b9f9c5dSMarek Vasut @ divisor for comparisons, considering the carry-out bit as well. 1077b9f9c5dSMarek Vasut mov ip, #0x80000000 1087b9f9c5dSMarek Vasut4: movs xl, xl, lsl #1 1097b9f9c5dSMarek Vasut adcs xh, xh, xh 1107b9f9c5dSMarek Vasut beq 6f 1117b9f9c5dSMarek Vasut cmpcc xh, r4 1127b9f9c5dSMarek Vasut5: orrcs yl, yl, ip 1137b9f9c5dSMarek Vasut subcs xh, xh, r4 1147b9f9c5dSMarek Vasut movs ip, ip, lsr #1 1157b9f9c5dSMarek Vasut bne 4b 1167b9f9c5dSMarek Vasut ret lr 1177b9f9c5dSMarek Vasut 1187b9f9c5dSMarek Vasut @ The top part of remainder became zero. If carry is set 1197b9f9c5dSMarek Vasut @ (the 33th bit) this is a false positive so resume the loop. 1207b9f9c5dSMarek Vasut @ Otherwise, if lower part is also null then we are done. 1217b9f9c5dSMarek Vasut6: bcs 5b 1227b9f9c5dSMarek Vasut cmp xl, #0 1237b9f9c5dSMarek Vasut reteq lr 1247b9f9c5dSMarek Vasut 1257b9f9c5dSMarek Vasut @ We still have remainer bits in the low part. Bring them up. 1267b9f9c5dSMarek Vasut 1277b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 1287b9f9c5dSMarek Vasut 1297b9f9c5dSMarek Vasut clz xh, xl @ we know xh is zero here so... 1307b9f9c5dSMarek Vasut add xh, xh, #1 1317b9f9c5dSMarek Vasut mov xl, xl, lsl xh 1327b9f9c5dSMarek Vasut mov ip, ip, lsr xh 1337b9f9c5dSMarek Vasut 1347b9f9c5dSMarek Vasut#else 1357b9f9c5dSMarek Vasut 1367b9f9c5dSMarek Vasut7: movs xl, xl, lsl #1 1377b9f9c5dSMarek Vasut mov ip, ip, lsr #1 1387b9f9c5dSMarek Vasut bcc 7b 1397b9f9c5dSMarek Vasut 1407b9f9c5dSMarek Vasut#endif 1417b9f9c5dSMarek Vasut 1427b9f9c5dSMarek Vasut @ Current remainder is now 1. It is worthless to compare with 1437b9f9c5dSMarek Vasut @ divisor at this point since divisor can not be smaller than 3 here. 1447b9f9c5dSMarek Vasut @ If possible, branch for another shift in the division loop. 1457b9f9c5dSMarek Vasut @ If no bit position left then we are done. 1467b9f9c5dSMarek Vasut movs ip, ip, lsr #1 1477b9f9c5dSMarek Vasut mov xh, #1 1487b9f9c5dSMarek Vasut bne 4b 1497b9f9c5dSMarek Vasut ret lr 1507b9f9c5dSMarek Vasut 1517b9f9c5dSMarek Vasut8: @ Division by a power of 2: determine what that divisor order is 1527b9f9c5dSMarek Vasut @ then simply shift values around 1537b9f9c5dSMarek Vasut 1547b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 1557b9f9c5dSMarek Vasut 1567b9f9c5dSMarek Vasut clz ip, r4 1577b9f9c5dSMarek Vasut rsb ip, ip, #31 1587b9f9c5dSMarek Vasut 1597b9f9c5dSMarek Vasut#else 1607b9f9c5dSMarek Vasut 1617b9f9c5dSMarek Vasut mov yl, r4 1627b9f9c5dSMarek Vasut cmp r4, #(1 << 16) 1637b9f9c5dSMarek Vasut mov ip, #0 1647b9f9c5dSMarek Vasut movhs yl, yl, lsr #16 1657b9f9c5dSMarek Vasut movhs ip, #16 1667b9f9c5dSMarek Vasut 1677b9f9c5dSMarek Vasut cmp yl, #(1 << 8) 1687b9f9c5dSMarek Vasut movhs yl, yl, lsr #8 1697b9f9c5dSMarek Vasut addhs ip, ip, #8 1707b9f9c5dSMarek Vasut 1717b9f9c5dSMarek Vasut cmp yl, #(1 << 4) 1727b9f9c5dSMarek Vasut movhs yl, yl, lsr #4 1737b9f9c5dSMarek Vasut addhs ip, ip, #4 1747b9f9c5dSMarek Vasut 1757b9f9c5dSMarek Vasut cmp yl, #(1 << 2) 1767b9f9c5dSMarek Vasut addhi ip, ip, #3 1777b9f9c5dSMarek Vasut addls ip, ip, yl, lsr #1 1787b9f9c5dSMarek Vasut 1797b9f9c5dSMarek Vasut#endif 1807b9f9c5dSMarek Vasut 1817b9f9c5dSMarek Vasut mov yh, xh, lsr ip 1827b9f9c5dSMarek Vasut mov yl, xl, lsr ip 1837b9f9c5dSMarek Vasut rsb ip, ip, #32 1847b9f9c5dSMarek Vasut ARM( orr yl, yl, xh, lsl ip ) 1857b9f9c5dSMarek Vasut THUMB( lsl xh, xh, ip ) 1867b9f9c5dSMarek Vasut THUMB( orr yl, yl, xh ) 1877b9f9c5dSMarek Vasut mov xh, xl, lsl ip 1887b9f9c5dSMarek Vasut mov xh, xh, lsr ip 1897b9f9c5dSMarek Vasut ret lr 1907b9f9c5dSMarek Vasut 1917b9f9c5dSMarek Vasut @ eq -> division by 1: obvious enough... 1927b9f9c5dSMarek Vasut9: moveq yl, xl 1937b9f9c5dSMarek Vasut moveq yh, xh 1947b9f9c5dSMarek Vasut moveq xh, #0 1957b9f9c5dSMarek Vasut reteq lr 1967b9f9c5dSMarek VasutUNWIND(.fnend) 1977b9f9c5dSMarek Vasut 1987b9f9c5dSMarek VasutUNWIND(.fnstart) 1997b9f9c5dSMarek VasutUNWIND(.pad #4) 2007b9f9c5dSMarek VasutUNWIND(.save {lr}) 2017b9f9c5dSMarek VasutLdiv0_64: 2027b9f9c5dSMarek Vasut @ Division by 0: 2037b9f9c5dSMarek Vasut str lr, [sp, #-8]! 2047b9f9c5dSMarek Vasut bl __div0 2057b9f9c5dSMarek Vasut 2067b9f9c5dSMarek Vasut @ as wrong as it could be... 2077b9f9c5dSMarek Vasut mov yl, #0 2087b9f9c5dSMarek Vasut mov yh, #0 2097b9f9c5dSMarek Vasut mov xh, #0 2107b9f9c5dSMarek Vasut ldr pc, [sp], #8 2117b9f9c5dSMarek Vasut 2127b9f9c5dSMarek VasutUNWIND(.fnend) 2137b9f9c5dSMarek VasutENDPROC(__do_div64) 214*b2f18584SStephen Warren.popsection 215