1c5a543eaSMarek Vasut/* 2c5a543eaSMarek Vasut * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3c5a543eaSMarek Vasut * 4c5a543eaSMarek Vasut * Author: Nicolas Pitre <nico@fluxnic.net> 5c5a543eaSMarek Vasut * - contributed to gcc-3.4 on Sep 30, 2003 6c5a543eaSMarek Vasut * - adapted for the Linux kernel on Oct 2, 2003 7c5a543eaSMarek Vasut */ 8c5a543eaSMarek Vasut 9c5a543eaSMarek Vasut/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10c5a543eaSMarek Vasut 11c5a543eaSMarek Vasut * SPDX-License-Identifier: GPL-2.0+ 12c5a543eaSMarek Vasut */ 13c5a543eaSMarek Vasut 14c5a543eaSMarek Vasut 15c5a543eaSMarek Vasut#include <linux/linkage.h> 16c5a543eaSMarek Vasut#include <asm/assembler.h> 17c5a543eaSMarek Vasut 18c5a543eaSMarek Vasut/* 19c5a543eaSMarek Vasut * U-Boot compatibility bit, define empty UNWIND() macro as, since we 20c5a543eaSMarek Vasut * do not support stack unwinding and define CONFIG_AEABI to make all 21c5a543eaSMarek Vasut * of the functions available without diverging from Linux code. 22c5a543eaSMarek Vasut */ 23c5a543eaSMarek Vasut#ifdef __UBOOT__ 24c5a543eaSMarek Vasut#define UNWIND(x...) 25c5a543eaSMarek Vasut#define CONFIG_AEABI 26c5a543eaSMarek Vasut#endif 27c5a543eaSMarek Vasut 28c5a543eaSMarek Vasut.macro ARM_DIV_BODY dividend, divisor, result, curbit 29c5a543eaSMarek Vasut 30c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 31c5a543eaSMarek Vasut 32c5a543eaSMarek Vasut clz \curbit, \divisor 33c5a543eaSMarek Vasut clz \result, \dividend 34c5a543eaSMarek Vasut sub \result, \curbit, \result 35c5a543eaSMarek Vasut mov \curbit, #1 36c5a543eaSMarek Vasut mov \divisor, \divisor, lsl \result 37c5a543eaSMarek Vasut mov \curbit, \curbit, lsl \result 38c5a543eaSMarek Vasut mov \result, #0 39c5a543eaSMarek Vasut 40c5a543eaSMarek Vasut#else 41c5a543eaSMarek Vasut 42c5a543eaSMarek Vasut @ Initially shift the divisor left 3 bits if possible, 43c5a543eaSMarek Vasut @ set curbit accordingly. This allows for curbit to be located 44c5a543eaSMarek Vasut @ at the left end of each 4 bit nibbles in the division loop 45c5a543eaSMarek Vasut @ to save one loop in most cases. 46c5a543eaSMarek Vasut tst \divisor, #0xe0000000 47c5a543eaSMarek Vasut moveq \divisor, \divisor, lsl #3 48c5a543eaSMarek Vasut moveq \curbit, #8 49c5a543eaSMarek Vasut movne \curbit, #1 50c5a543eaSMarek Vasut 51c5a543eaSMarek Vasut @ Unless the divisor is very big, shift it up in multiples of 52c5a543eaSMarek Vasut @ four bits, since this is the amount of unwinding in the main 53c5a543eaSMarek Vasut @ division loop. Continue shifting until the divisor is 54c5a543eaSMarek Vasut @ larger than the dividend. 55c5a543eaSMarek Vasut1: cmp \divisor, #0x10000000 56c5a543eaSMarek Vasut cmplo \divisor, \dividend 57c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #4 58c5a543eaSMarek Vasut movlo \curbit, \curbit, lsl #4 59c5a543eaSMarek Vasut blo 1b 60c5a543eaSMarek Vasut 61c5a543eaSMarek Vasut @ For very big divisors, we must shift it a bit at a time, or 62c5a543eaSMarek Vasut @ we will be in danger of overflowing. 63c5a543eaSMarek Vasut1: cmp \divisor, #0x80000000 64c5a543eaSMarek Vasut cmplo \divisor, \dividend 65c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #1 66c5a543eaSMarek Vasut movlo \curbit, \curbit, lsl #1 67c5a543eaSMarek Vasut blo 1b 68c5a543eaSMarek Vasut 69c5a543eaSMarek Vasut mov \result, #0 70c5a543eaSMarek Vasut 71c5a543eaSMarek Vasut#endif 72c5a543eaSMarek Vasut 73c5a543eaSMarek Vasut @ Division loop 74c5a543eaSMarek Vasut1: cmp \dividend, \divisor 75c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 76c5a543eaSMarek Vasut orrhs \result, \result, \curbit 77c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #1 78c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #1 79c5a543eaSMarek Vasut orrhs \result, \result, \curbit, lsr #1 80c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #2 81c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #2 82c5a543eaSMarek Vasut orrhs \result, \result, \curbit, lsr #2 83c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #3 84c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #3 85c5a543eaSMarek Vasut orrhs \result, \result, \curbit, lsr #3 86c5a543eaSMarek Vasut cmp \dividend, #0 @ Early termination? 87*40d67c75SMarek Vasut movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 88c5a543eaSMarek Vasut movne \divisor, \divisor, lsr #4 89c5a543eaSMarek Vasut bne 1b 90c5a543eaSMarek Vasut 91c5a543eaSMarek Vasut.endm 92c5a543eaSMarek Vasut 93c5a543eaSMarek Vasut 94c5a543eaSMarek Vasut.macro ARM_DIV2_ORDER divisor, order 95c5a543eaSMarek Vasut 96c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 97c5a543eaSMarek Vasut 98c5a543eaSMarek Vasut clz \order, \divisor 99c5a543eaSMarek Vasut rsb \order, \order, #31 100c5a543eaSMarek Vasut 101c5a543eaSMarek Vasut#else 102c5a543eaSMarek Vasut 103c5a543eaSMarek Vasut cmp \divisor, #(1 << 16) 104c5a543eaSMarek Vasut movhs \divisor, \divisor, lsr #16 105c5a543eaSMarek Vasut movhs \order, #16 106c5a543eaSMarek Vasut movlo \order, #0 107c5a543eaSMarek Vasut 108c5a543eaSMarek Vasut cmp \divisor, #(1 << 8) 109c5a543eaSMarek Vasut movhs \divisor, \divisor, lsr #8 110c5a543eaSMarek Vasut addhs \order, \order, #8 111c5a543eaSMarek Vasut 112c5a543eaSMarek Vasut cmp \divisor, #(1 << 4) 113c5a543eaSMarek Vasut movhs \divisor, \divisor, lsr #4 114c5a543eaSMarek Vasut addhs \order, \order, #4 115c5a543eaSMarek Vasut 116c5a543eaSMarek Vasut cmp \divisor, #(1 << 2) 117c5a543eaSMarek Vasut addhi \order, \order, #3 118c5a543eaSMarek Vasut addls \order, \order, \divisor, lsr #1 119c5a543eaSMarek Vasut 120c5a543eaSMarek Vasut#endif 121c5a543eaSMarek Vasut 122c5a543eaSMarek Vasut.endm 123c5a543eaSMarek Vasut 124c5a543eaSMarek Vasut 125c5a543eaSMarek Vasut.macro ARM_MOD_BODY dividend, divisor, order, spare 126c5a543eaSMarek Vasut 127c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 128c5a543eaSMarek Vasut 129c5a543eaSMarek Vasut clz \order, \divisor 130c5a543eaSMarek Vasut clz \spare, \dividend 131c5a543eaSMarek Vasut sub \order, \order, \spare 132c5a543eaSMarek Vasut mov \divisor, \divisor, lsl \order 133c5a543eaSMarek Vasut 134c5a543eaSMarek Vasut#else 135c5a543eaSMarek Vasut 136c5a543eaSMarek Vasut mov \order, #0 137c5a543eaSMarek Vasut 138c5a543eaSMarek Vasut @ Unless the divisor is very big, shift it up in multiples of 139c5a543eaSMarek Vasut @ four bits, since this is the amount of unwinding in the main 140c5a543eaSMarek Vasut @ division loop. Continue shifting until the divisor is 141c5a543eaSMarek Vasut @ larger than the dividend. 142c5a543eaSMarek Vasut1: cmp \divisor, #0x10000000 143c5a543eaSMarek Vasut cmplo \divisor, \dividend 144c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #4 145c5a543eaSMarek Vasut addlo \order, \order, #4 146c5a543eaSMarek Vasut blo 1b 147c5a543eaSMarek Vasut 148c5a543eaSMarek Vasut @ For very big divisors, we must shift it a bit at a time, or 149c5a543eaSMarek Vasut @ we will be in danger of overflowing. 150c5a543eaSMarek Vasut1: cmp \divisor, #0x80000000 151c5a543eaSMarek Vasut cmplo \divisor, \dividend 152c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #1 153c5a543eaSMarek Vasut addlo \order, \order, #1 154c5a543eaSMarek Vasut blo 1b 155c5a543eaSMarek Vasut 156c5a543eaSMarek Vasut#endif 157c5a543eaSMarek Vasut 158c5a543eaSMarek Vasut @ Perform all needed subtractions to keep only the reminder. 159c5a543eaSMarek Vasut @ Do comparisons in batch of 4 first. 160c5a543eaSMarek Vasut subs \order, \order, #3 @ yes, 3 is intended here 161c5a543eaSMarek Vasut blt 2f 162c5a543eaSMarek Vasut 163c5a543eaSMarek Vasut1: cmp \dividend, \divisor 164c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 165c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #1 166c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #1 167c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #2 168c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #2 169c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #3 170c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #3 171c5a543eaSMarek Vasut cmp \dividend, #1 172c5a543eaSMarek Vasut mov \divisor, \divisor, lsr #4 173*40d67c75SMarek Vasut subsge \order, \order, #4 174c5a543eaSMarek Vasut bge 1b 175c5a543eaSMarek Vasut 176c5a543eaSMarek Vasut tst \order, #3 177c5a543eaSMarek Vasut teqne \dividend, #0 178c5a543eaSMarek Vasut beq 5f 179c5a543eaSMarek Vasut 180c5a543eaSMarek Vasut @ Either 1, 2 or 3 comparison/subtractions are left. 181c5a543eaSMarek Vasut2: cmn \order, #2 182c5a543eaSMarek Vasut blt 4f 183c5a543eaSMarek Vasut beq 3f 184c5a543eaSMarek Vasut cmp \dividend, \divisor 185c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 186c5a543eaSMarek Vasut mov \divisor, \divisor, lsr #1 187c5a543eaSMarek Vasut3: cmp \dividend, \divisor 188c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 189c5a543eaSMarek Vasut mov \divisor, \divisor, lsr #1 190c5a543eaSMarek Vasut4: cmp \dividend, \divisor 191c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 192c5a543eaSMarek Vasut5: 193c5a543eaSMarek Vasut.endm 194c5a543eaSMarek Vasut 195c5a543eaSMarek Vasut 196c5a543eaSMarek VasutENTRY(__udivsi3) 197c5a543eaSMarek VasutENTRY(__aeabi_uidiv) 198c5a543eaSMarek VasutUNWIND(.fnstart) 199c5a543eaSMarek Vasut 200c5a543eaSMarek Vasut subs r2, r1, #1 201c5a543eaSMarek Vasut reteq lr 202c5a543eaSMarek Vasut bcc Ldiv0 203c5a543eaSMarek Vasut cmp r0, r1 204c5a543eaSMarek Vasut bls 11f 205c5a543eaSMarek Vasut tst r1, r2 206c5a543eaSMarek Vasut beq 12f 207c5a543eaSMarek Vasut 208c5a543eaSMarek Vasut ARM_DIV_BODY r0, r1, r2, r3 209c5a543eaSMarek Vasut 210c5a543eaSMarek Vasut mov r0, r2 211c5a543eaSMarek Vasut ret lr 212c5a543eaSMarek Vasut 213c5a543eaSMarek Vasut11: moveq r0, #1 214c5a543eaSMarek Vasut movne r0, #0 215c5a543eaSMarek Vasut ret lr 216c5a543eaSMarek Vasut 217c5a543eaSMarek Vasut12: ARM_DIV2_ORDER r1, r2 218c5a543eaSMarek Vasut 219c5a543eaSMarek Vasut mov r0, r0, lsr r2 220c5a543eaSMarek Vasut ret lr 221c5a543eaSMarek Vasut 222c5a543eaSMarek VasutUNWIND(.fnend) 223c5a543eaSMarek VasutENDPROC(__udivsi3) 224c5a543eaSMarek VasutENDPROC(__aeabi_uidiv) 225c5a543eaSMarek Vasut 226c5a543eaSMarek VasutENTRY(__umodsi3) 227c5a543eaSMarek VasutUNWIND(.fnstart) 228c5a543eaSMarek Vasut 229c5a543eaSMarek Vasut subs r2, r1, #1 @ compare divisor with 1 230c5a543eaSMarek Vasut bcc Ldiv0 231c5a543eaSMarek Vasut cmpne r0, r1 @ compare dividend with divisor 232c5a543eaSMarek Vasut moveq r0, #0 233c5a543eaSMarek Vasut tsthi r1, r2 @ see if divisor is power of 2 234c5a543eaSMarek Vasut andeq r0, r0, r2 235c5a543eaSMarek Vasut retls lr 236c5a543eaSMarek Vasut 237c5a543eaSMarek Vasut ARM_MOD_BODY r0, r1, r2, r3 238c5a543eaSMarek Vasut 239c5a543eaSMarek Vasut ret lr 240c5a543eaSMarek Vasut 241c5a543eaSMarek VasutUNWIND(.fnend) 242c5a543eaSMarek VasutENDPROC(__umodsi3) 243c5a543eaSMarek Vasut 244c5a543eaSMarek VasutENTRY(__divsi3) 245c5a543eaSMarek VasutENTRY(__aeabi_idiv) 246c5a543eaSMarek VasutUNWIND(.fnstart) 247c5a543eaSMarek Vasut 248c5a543eaSMarek Vasut cmp r1, #0 249c5a543eaSMarek Vasut eor ip, r0, r1 @ save the sign of the result. 250c5a543eaSMarek Vasut beq Ldiv0 251c5a543eaSMarek Vasut rsbmi r1, r1, #0 @ loops below use unsigned. 252c5a543eaSMarek Vasut subs r2, r1, #1 @ division by 1 or -1 ? 253c5a543eaSMarek Vasut beq 10f 254c5a543eaSMarek Vasut movs r3, r0 255c5a543eaSMarek Vasut rsbmi r3, r0, #0 @ positive dividend value 256c5a543eaSMarek Vasut cmp r3, r1 257c5a543eaSMarek Vasut bls 11f 258c5a543eaSMarek Vasut tst r1, r2 @ divisor is power of 2 ? 259c5a543eaSMarek Vasut beq 12f 260c5a543eaSMarek Vasut 261c5a543eaSMarek Vasut ARM_DIV_BODY r3, r1, r0, r2 262c5a543eaSMarek Vasut 263c5a543eaSMarek Vasut cmp ip, #0 264c5a543eaSMarek Vasut rsbmi r0, r0, #0 265c5a543eaSMarek Vasut ret lr 266c5a543eaSMarek Vasut 267c5a543eaSMarek Vasut10: teq ip, r0 @ same sign ? 268c5a543eaSMarek Vasut rsbmi r0, r0, #0 269c5a543eaSMarek Vasut ret lr 270c5a543eaSMarek Vasut 271c5a543eaSMarek Vasut11: movlo r0, #0 272c5a543eaSMarek Vasut moveq r0, ip, asr #31 273c5a543eaSMarek Vasut orreq r0, r0, #1 274c5a543eaSMarek Vasut ret lr 275c5a543eaSMarek Vasut 276c5a543eaSMarek Vasut12: ARM_DIV2_ORDER r1, r2 277c5a543eaSMarek Vasut 278c5a543eaSMarek Vasut cmp ip, #0 279c5a543eaSMarek Vasut mov r0, r3, lsr r2 280c5a543eaSMarek Vasut rsbmi r0, r0, #0 281c5a543eaSMarek Vasut ret lr 282c5a543eaSMarek Vasut 283c5a543eaSMarek VasutUNWIND(.fnend) 284c5a543eaSMarek VasutENDPROC(__divsi3) 285c5a543eaSMarek VasutENDPROC(__aeabi_idiv) 286c5a543eaSMarek Vasut 287c5a543eaSMarek VasutENTRY(__modsi3) 288c5a543eaSMarek VasutUNWIND(.fnstart) 289c5a543eaSMarek Vasut 290c5a543eaSMarek Vasut cmp r1, #0 291c5a543eaSMarek Vasut beq Ldiv0 292c5a543eaSMarek Vasut rsbmi r1, r1, #0 @ loops below use unsigned. 293c5a543eaSMarek Vasut movs ip, r0 @ preserve sign of dividend 294c5a543eaSMarek Vasut rsbmi r0, r0, #0 @ if negative make positive 295c5a543eaSMarek Vasut subs r2, r1, #1 @ compare divisor with 1 296c5a543eaSMarek Vasut cmpne r0, r1 @ compare dividend with divisor 297c5a543eaSMarek Vasut moveq r0, #0 298c5a543eaSMarek Vasut tsthi r1, r2 @ see if divisor is power of 2 299c5a543eaSMarek Vasut andeq r0, r0, r2 300c5a543eaSMarek Vasut bls 10f 301c5a543eaSMarek Vasut 302c5a543eaSMarek Vasut ARM_MOD_BODY r0, r1, r2, r3 303c5a543eaSMarek Vasut 304c5a543eaSMarek Vasut10: cmp ip, #0 305c5a543eaSMarek Vasut rsbmi r0, r0, #0 306c5a543eaSMarek Vasut ret lr 307c5a543eaSMarek Vasut 308c5a543eaSMarek VasutUNWIND(.fnend) 309c5a543eaSMarek VasutENDPROC(__modsi3) 310c5a543eaSMarek Vasut 311c5a543eaSMarek Vasut#ifdef CONFIG_AEABI 312c5a543eaSMarek Vasut 313c5a543eaSMarek VasutENTRY(__aeabi_uidivmod) 314c5a543eaSMarek VasutUNWIND(.fnstart) 315c5a543eaSMarek VasutUNWIND(.save {r0, r1, ip, lr} ) 316c5a543eaSMarek Vasut 317c5a543eaSMarek Vasut stmfd sp!, {r0, r1, ip, lr} 318c5a543eaSMarek Vasut bl __aeabi_uidiv 319c5a543eaSMarek Vasut ldmfd sp!, {r1, r2, ip, lr} 320c5a543eaSMarek Vasut mul r3, r0, r2 321c5a543eaSMarek Vasut sub r1, r1, r3 322c5a543eaSMarek Vasut ret lr 323c5a543eaSMarek Vasut 324c5a543eaSMarek VasutUNWIND(.fnend) 325c5a543eaSMarek VasutENDPROC(__aeabi_uidivmod) 326c5a543eaSMarek Vasut 327c5a543eaSMarek VasutENTRY(__aeabi_idivmod) 328c5a543eaSMarek VasutUNWIND(.fnstart) 329c5a543eaSMarek VasutUNWIND(.save {r0, r1, ip, lr} ) 330c5a543eaSMarek Vasut stmfd sp!, {r0, r1, ip, lr} 331c5a543eaSMarek Vasut bl __aeabi_idiv 332c5a543eaSMarek Vasut ldmfd sp!, {r1, r2, ip, lr} 333c5a543eaSMarek Vasut mul r3, r0, r2 334c5a543eaSMarek Vasut sub r1, r1, r3 335c5a543eaSMarek Vasut ret lr 336c5a543eaSMarek Vasut 337c5a543eaSMarek VasutUNWIND(.fnend) 338c5a543eaSMarek VasutENDPROC(__aeabi_idivmod) 339c5a543eaSMarek Vasut 340c5a543eaSMarek Vasut#endif 341c5a543eaSMarek Vasut 342c5a543eaSMarek VasutLdiv0: 343c5a543eaSMarek VasutUNWIND(.fnstart) 344c5a543eaSMarek VasutUNWIND(.pad #4) 345c5a543eaSMarek VasutUNWIND(.save {lr}) 346c5a543eaSMarek Vasut str lr, [sp, #-8]! 347c5a543eaSMarek Vasut bl __div0 348c5a543eaSMarek Vasut mov r0, #0 @ About as wrong as it could be. 349c5a543eaSMarek Vasut ldr pc, [sp], #8 350c5a543eaSMarek VasutUNWIND(.fnend) 351c5a543eaSMarek VasutENDPROC(Ldiv0) 352