1*c5a543eaSMarek Vasut/* 2*c5a543eaSMarek Vasut * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3*c5a543eaSMarek Vasut * 4*c5a543eaSMarek Vasut * Author: Nicolas Pitre <nico@fluxnic.net> 5*c5a543eaSMarek Vasut * - contributed to gcc-3.4 on Sep 30, 2003 6*c5a543eaSMarek Vasut * - adapted for the Linux kernel on Oct 2, 2003 7*c5a543eaSMarek Vasut */ 8*c5a543eaSMarek Vasut 9*c5a543eaSMarek Vasut/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10*c5a543eaSMarek Vasut 11*c5a543eaSMarek Vasut * SPDX-License-Identifier: GPL-2.0+ 12*c5a543eaSMarek Vasut */ 13*c5a543eaSMarek Vasut 14*c5a543eaSMarek Vasut 15*c5a543eaSMarek Vasut#include <linux/linkage.h> 16*c5a543eaSMarek Vasut#include <asm/assembler.h> 17*c5a543eaSMarek Vasut 18*c5a543eaSMarek Vasut/* 19*c5a543eaSMarek Vasut * U-Boot compatibility bit, define empty UNWIND() macro as, since we 20*c5a543eaSMarek Vasut * do not support stack unwinding and define CONFIG_AEABI to make all 21*c5a543eaSMarek Vasut * of the functions available without diverging from Linux code. 22*c5a543eaSMarek Vasut */ 23*c5a543eaSMarek Vasut#ifdef __UBOOT__ 24*c5a543eaSMarek Vasut#define UNWIND(x...) 25*c5a543eaSMarek Vasut#define CONFIG_AEABI 26*c5a543eaSMarek Vasut#endif 27*c5a543eaSMarek Vasut 28*c5a543eaSMarek Vasut.macro ARM_DIV_BODY dividend, divisor, result, curbit 29*c5a543eaSMarek Vasut 30*c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 31*c5a543eaSMarek Vasut 32*c5a543eaSMarek Vasut clz \curbit, \divisor 33*c5a543eaSMarek Vasut clz \result, \dividend 34*c5a543eaSMarek Vasut sub \result, \curbit, \result 35*c5a543eaSMarek Vasut mov \curbit, #1 36*c5a543eaSMarek Vasut mov \divisor, \divisor, lsl \result 37*c5a543eaSMarek Vasut mov \curbit, \curbit, lsl \result 38*c5a543eaSMarek Vasut mov \result, #0 39*c5a543eaSMarek Vasut 40*c5a543eaSMarek Vasut#else 41*c5a543eaSMarek Vasut 42*c5a543eaSMarek Vasut @ Initially shift the divisor left 3 bits if possible, 43*c5a543eaSMarek Vasut @ set curbit accordingly. This allows for curbit to be located 44*c5a543eaSMarek Vasut @ at the left end of each 4 bit nibbles in the division loop 45*c5a543eaSMarek Vasut @ to save one loop in most cases. 46*c5a543eaSMarek Vasut tst \divisor, #0xe0000000 47*c5a543eaSMarek Vasut moveq \divisor, \divisor, lsl #3 48*c5a543eaSMarek Vasut moveq \curbit, #8 49*c5a543eaSMarek Vasut movne \curbit, #1 50*c5a543eaSMarek Vasut 51*c5a543eaSMarek Vasut @ Unless the divisor is very big, shift it up in multiples of 52*c5a543eaSMarek Vasut @ four bits, since this is the amount of unwinding in the main 53*c5a543eaSMarek Vasut @ division loop. Continue shifting until the divisor is 54*c5a543eaSMarek Vasut @ larger than the dividend. 55*c5a543eaSMarek Vasut1: cmp \divisor, #0x10000000 56*c5a543eaSMarek Vasut cmplo \divisor, \dividend 57*c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #4 58*c5a543eaSMarek Vasut movlo \curbit, \curbit, lsl #4 59*c5a543eaSMarek Vasut blo 1b 60*c5a543eaSMarek Vasut 61*c5a543eaSMarek Vasut @ For very big divisors, we must shift it a bit at a time, or 62*c5a543eaSMarek Vasut @ we will be in danger of overflowing. 63*c5a543eaSMarek Vasut1: cmp \divisor, #0x80000000 64*c5a543eaSMarek Vasut cmplo \divisor, \dividend 65*c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #1 66*c5a543eaSMarek Vasut movlo \curbit, \curbit, lsl #1 67*c5a543eaSMarek Vasut blo 1b 68*c5a543eaSMarek Vasut 69*c5a543eaSMarek Vasut mov \result, #0 70*c5a543eaSMarek Vasut 71*c5a543eaSMarek Vasut#endif 72*c5a543eaSMarek Vasut 73*c5a543eaSMarek Vasut @ Division loop 74*c5a543eaSMarek Vasut1: cmp \dividend, \divisor 75*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 76*c5a543eaSMarek Vasut orrhs \result, \result, \curbit 77*c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #1 78*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #1 79*c5a543eaSMarek Vasut orrhs \result, \result, \curbit, lsr #1 80*c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #2 81*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #2 82*c5a543eaSMarek Vasut orrhs \result, \result, \curbit, lsr #2 83*c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #3 84*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #3 85*c5a543eaSMarek Vasut orrhs \result, \result, \curbit, lsr #3 86*c5a543eaSMarek Vasut cmp \dividend, #0 @ Early termination? 87*c5a543eaSMarek Vasut movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 88*c5a543eaSMarek Vasut movne \divisor, \divisor, lsr #4 89*c5a543eaSMarek Vasut bne 1b 90*c5a543eaSMarek Vasut 91*c5a543eaSMarek Vasut.endm 92*c5a543eaSMarek Vasut 93*c5a543eaSMarek Vasut 94*c5a543eaSMarek Vasut.macro ARM_DIV2_ORDER divisor, order 95*c5a543eaSMarek Vasut 96*c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 97*c5a543eaSMarek Vasut 98*c5a543eaSMarek Vasut clz \order, \divisor 99*c5a543eaSMarek Vasut rsb \order, \order, #31 100*c5a543eaSMarek Vasut 101*c5a543eaSMarek Vasut#else 102*c5a543eaSMarek Vasut 103*c5a543eaSMarek Vasut cmp \divisor, #(1 << 16) 104*c5a543eaSMarek Vasut movhs \divisor, \divisor, lsr #16 105*c5a543eaSMarek Vasut movhs \order, #16 106*c5a543eaSMarek Vasut movlo \order, #0 107*c5a543eaSMarek Vasut 108*c5a543eaSMarek Vasut cmp \divisor, #(1 << 8) 109*c5a543eaSMarek Vasut movhs \divisor, \divisor, lsr #8 110*c5a543eaSMarek Vasut addhs \order, \order, #8 111*c5a543eaSMarek Vasut 112*c5a543eaSMarek Vasut cmp \divisor, #(1 << 4) 113*c5a543eaSMarek Vasut movhs \divisor, \divisor, lsr #4 114*c5a543eaSMarek Vasut addhs \order, \order, #4 115*c5a543eaSMarek Vasut 116*c5a543eaSMarek Vasut cmp \divisor, #(1 << 2) 117*c5a543eaSMarek Vasut addhi \order, \order, #3 118*c5a543eaSMarek Vasut addls \order, \order, \divisor, lsr #1 119*c5a543eaSMarek Vasut 120*c5a543eaSMarek Vasut#endif 121*c5a543eaSMarek Vasut 122*c5a543eaSMarek Vasut.endm 123*c5a543eaSMarek Vasut 124*c5a543eaSMarek Vasut 125*c5a543eaSMarek Vasut.macro ARM_MOD_BODY dividend, divisor, order, spare 126*c5a543eaSMarek Vasut 127*c5a543eaSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 128*c5a543eaSMarek Vasut 129*c5a543eaSMarek Vasut clz \order, \divisor 130*c5a543eaSMarek Vasut clz \spare, \dividend 131*c5a543eaSMarek Vasut sub \order, \order, \spare 132*c5a543eaSMarek Vasut mov \divisor, \divisor, lsl \order 133*c5a543eaSMarek Vasut 134*c5a543eaSMarek Vasut#else 135*c5a543eaSMarek Vasut 136*c5a543eaSMarek Vasut mov \order, #0 137*c5a543eaSMarek Vasut 138*c5a543eaSMarek Vasut @ Unless the divisor is very big, shift it up in multiples of 139*c5a543eaSMarek Vasut @ four bits, since this is the amount of unwinding in the main 140*c5a543eaSMarek Vasut @ division loop. Continue shifting until the divisor is 141*c5a543eaSMarek Vasut @ larger than the dividend. 142*c5a543eaSMarek Vasut1: cmp \divisor, #0x10000000 143*c5a543eaSMarek Vasut cmplo \divisor, \dividend 144*c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #4 145*c5a543eaSMarek Vasut addlo \order, \order, #4 146*c5a543eaSMarek Vasut blo 1b 147*c5a543eaSMarek Vasut 148*c5a543eaSMarek Vasut @ For very big divisors, we must shift it a bit at a time, or 149*c5a543eaSMarek Vasut @ we will be in danger of overflowing. 150*c5a543eaSMarek Vasut1: cmp \divisor, #0x80000000 151*c5a543eaSMarek Vasut cmplo \divisor, \dividend 152*c5a543eaSMarek Vasut movlo \divisor, \divisor, lsl #1 153*c5a543eaSMarek Vasut addlo \order, \order, #1 154*c5a543eaSMarek Vasut blo 1b 155*c5a543eaSMarek Vasut 156*c5a543eaSMarek Vasut#endif 157*c5a543eaSMarek Vasut 158*c5a543eaSMarek Vasut @ Perform all needed subtractions to keep only the reminder. 159*c5a543eaSMarek Vasut @ Do comparisons in batch of 4 first. 160*c5a543eaSMarek Vasut subs \order, \order, #3 @ yes, 3 is intended here 161*c5a543eaSMarek Vasut blt 2f 162*c5a543eaSMarek Vasut 163*c5a543eaSMarek Vasut1: cmp \dividend, \divisor 164*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 165*c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #1 166*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #1 167*c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #2 168*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #2 169*c5a543eaSMarek Vasut cmp \dividend, \divisor, lsr #3 170*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor, lsr #3 171*c5a543eaSMarek Vasut cmp \dividend, #1 172*c5a543eaSMarek Vasut mov \divisor, \divisor, lsr #4 173*c5a543eaSMarek Vasut subges \order, \order, #4 174*c5a543eaSMarek Vasut bge 1b 175*c5a543eaSMarek Vasut 176*c5a543eaSMarek Vasut tst \order, #3 177*c5a543eaSMarek Vasut teqne \dividend, #0 178*c5a543eaSMarek Vasut beq 5f 179*c5a543eaSMarek Vasut 180*c5a543eaSMarek Vasut @ Either 1, 2 or 3 comparison/subtractions are left. 181*c5a543eaSMarek Vasut2: cmn \order, #2 182*c5a543eaSMarek Vasut blt 4f 183*c5a543eaSMarek Vasut beq 3f 184*c5a543eaSMarek Vasut cmp \dividend, \divisor 185*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 186*c5a543eaSMarek Vasut mov \divisor, \divisor, lsr #1 187*c5a543eaSMarek Vasut3: cmp \dividend, \divisor 188*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 189*c5a543eaSMarek Vasut mov \divisor, \divisor, lsr #1 190*c5a543eaSMarek Vasut4: cmp \dividend, \divisor 191*c5a543eaSMarek Vasut subhs \dividend, \dividend, \divisor 192*c5a543eaSMarek Vasut5: 193*c5a543eaSMarek Vasut.endm 194*c5a543eaSMarek Vasut 195*c5a543eaSMarek Vasut 196*c5a543eaSMarek VasutENTRY(__udivsi3) 197*c5a543eaSMarek VasutENTRY(__aeabi_uidiv) 198*c5a543eaSMarek VasutUNWIND(.fnstart) 199*c5a543eaSMarek Vasut 200*c5a543eaSMarek Vasut subs r2, r1, #1 201*c5a543eaSMarek Vasut reteq lr 202*c5a543eaSMarek Vasut bcc Ldiv0 203*c5a543eaSMarek Vasut cmp r0, r1 204*c5a543eaSMarek Vasut bls 11f 205*c5a543eaSMarek Vasut tst r1, r2 206*c5a543eaSMarek Vasut beq 12f 207*c5a543eaSMarek Vasut 208*c5a543eaSMarek Vasut ARM_DIV_BODY r0, r1, r2, r3 209*c5a543eaSMarek Vasut 210*c5a543eaSMarek Vasut mov r0, r2 211*c5a543eaSMarek Vasut ret lr 212*c5a543eaSMarek Vasut 213*c5a543eaSMarek Vasut11: moveq r0, #1 214*c5a543eaSMarek Vasut movne r0, #0 215*c5a543eaSMarek Vasut ret lr 216*c5a543eaSMarek Vasut 217*c5a543eaSMarek Vasut12: ARM_DIV2_ORDER r1, r2 218*c5a543eaSMarek Vasut 219*c5a543eaSMarek Vasut mov r0, r0, lsr r2 220*c5a543eaSMarek Vasut ret lr 221*c5a543eaSMarek Vasut 222*c5a543eaSMarek VasutUNWIND(.fnend) 223*c5a543eaSMarek VasutENDPROC(__udivsi3) 224*c5a543eaSMarek VasutENDPROC(__aeabi_uidiv) 225*c5a543eaSMarek Vasut 226*c5a543eaSMarek VasutENTRY(__umodsi3) 227*c5a543eaSMarek VasutUNWIND(.fnstart) 228*c5a543eaSMarek Vasut 229*c5a543eaSMarek Vasut subs r2, r1, #1 @ compare divisor with 1 230*c5a543eaSMarek Vasut bcc Ldiv0 231*c5a543eaSMarek Vasut cmpne r0, r1 @ compare dividend with divisor 232*c5a543eaSMarek Vasut moveq r0, #0 233*c5a543eaSMarek Vasut tsthi r1, r2 @ see if divisor is power of 2 234*c5a543eaSMarek Vasut andeq r0, r0, r2 235*c5a543eaSMarek Vasut retls lr 236*c5a543eaSMarek Vasut 237*c5a543eaSMarek Vasut ARM_MOD_BODY r0, r1, r2, r3 238*c5a543eaSMarek Vasut 239*c5a543eaSMarek Vasut ret lr 240*c5a543eaSMarek Vasut 241*c5a543eaSMarek VasutUNWIND(.fnend) 242*c5a543eaSMarek VasutENDPROC(__umodsi3) 243*c5a543eaSMarek Vasut 244*c5a543eaSMarek VasutENTRY(__divsi3) 245*c5a543eaSMarek VasutENTRY(__aeabi_idiv) 246*c5a543eaSMarek VasutUNWIND(.fnstart) 247*c5a543eaSMarek Vasut 248*c5a543eaSMarek Vasut cmp r1, #0 249*c5a543eaSMarek Vasut eor ip, r0, r1 @ save the sign of the result. 250*c5a543eaSMarek Vasut beq Ldiv0 251*c5a543eaSMarek Vasut rsbmi r1, r1, #0 @ loops below use unsigned. 252*c5a543eaSMarek Vasut subs r2, r1, #1 @ division by 1 or -1 ? 253*c5a543eaSMarek Vasut beq 10f 254*c5a543eaSMarek Vasut movs r3, r0 255*c5a543eaSMarek Vasut rsbmi r3, r0, #0 @ positive dividend value 256*c5a543eaSMarek Vasut cmp r3, r1 257*c5a543eaSMarek Vasut bls 11f 258*c5a543eaSMarek Vasut tst r1, r2 @ divisor is power of 2 ? 259*c5a543eaSMarek Vasut beq 12f 260*c5a543eaSMarek Vasut 261*c5a543eaSMarek Vasut ARM_DIV_BODY r3, r1, r0, r2 262*c5a543eaSMarek Vasut 263*c5a543eaSMarek Vasut cmp ip, #0 264*c5a543eaSMarek Vasut rsbmi r0, r0, #0 265*c5a543eaSMarek Vasut ret lr 266*c5a543eaSMarek Vasut 267*c5a543eaSMarek Vasut10: teq ip, r0 @ same sign ? 268*c5a543eaSMarek Vasut rsbmi r0, r0, #0 269*c5a543eaSMarek Vasut ret lr 270*c5a543eaSMarek Vasut 271*c5a543eaSMarek Vasut11: movlo r0, #0 272*c5a543eaSMarek Vasut moveq r0, ip, asr #31 273*c5a543eaSMarek Vasut orreq r0, r0, #1 274*c5a543eaSMarek Vasut ret lr 275*c5a543eaSMarek Vasut 276*c5a543eaSMarek Vasut12: ARM_DIV2_ORDER r1, r2 277*c5a543eaSMarek Vasut 278*c5a543eaSMarek Vasut cmp ip, #0 279*c5a543eaSMarek Vasut mov r0, r3, lsr r2 280*c5a543eaSMarek Vasut rsbmi r0, r0, #0 281*c5a543eaSMarek Vasut ret lr 282*c5a543eaSMarek Vasut 283*c5a543eaSMarek VasutUNWIND(.fnend) 284*c5a543eaSMarek VasutENDPROC(__divsi3) 285*c5a543eaSMarek VasutENDPROC(__aeabi_idiv) 286*c5a543eaSMarek Vasut 287*c5a543eaSMarek VasutENTRY(__modsi3) 288*c5a543eaSMarek VasutUNWIND(.fnstart) 289*c5a543eaSMarek Vasut 290*c5a543eaSMarek Vasut cmp r1, #0 291*c5a543eaSMarek Vasut beq Ldiv0 292*c5a543eaSMarek Vasut rsbmi r1, r1, #0 @ loops below use unsigned. 293*c5a543eaSMarek Vasut movs ip, r0 @ preserve sign of dividend 294*c5a543eaSMarek Vasut rsbmi r0, r0, #0 @ if negative make positive 295*c5a543eaSMarek Vasut subs r2, r1, #1 @ compare divisor with 1 296*c5a543eaSMarek Vasut cmpne r0, r1 @ compare dividend with divisor 297*c5a543eaSMarek Vasut moveq r0, #0 298*c5a543eaSMarek Vasut tsthi r1, r2 @ see if divisor is power of 2 299*c5a543eaSMarek Vasut andeq r0, r0, r2 300*c5a543eaSMarek Vasut bls 10f 301*c5a543eaSMarek Vasut 302*c5a543eaSMarek Vasut ARM_MOD_BODY r0, r1, r2, r3 303*c5a543eaSMarek Vasut 304*c5a543eaSMarek Vasut10: cmp ip, #0 305*c5a543eaSMarek Vasut rsbmi r0, r0, #0 306*c5a543eaSMarek Vasut ret lr 307*c5a543eaSMarek Vasut 308*c5a543eaSMarek VasutUNWIND(.fnend) 309*c5a543eaSMarek VasutENDPROC(__modsi3) 310*c5a543eaSMarek Vasut 311*c5a543eaSMarek Vasut#ifdef CONFIG_AEABI 312*c5a543eaSMarek Vasut 313*c5a543eaSMarek VasutENTRY(__aeabi_uidivmod) 314*c5a543eaSMarek VasutUNWIND(.fnstart) 315*c5a543eaSMarek VasutUNWIND(.save {r0, r1, ip, lr} ) 316*c5a543eaSMarek Vasut 317*c5a543eaSMarek Vasut stmfd sp!, {r0, r1, ip, lr} 318*c5a543eaSMarek Vasut bl __aeabi_uidiv 319*c5a543eaSMarek Vasut ldmfd sp!, {r1, r2, ip, lr} 320*c5a543eaSMarek Vasut mul r3, r0, r2 321*c5a543eaSMarek Vasut sub r1, r1, r3 322*c5a543eaSMarek Vasut ret lr 323*c5a543eaSMarek Vasut 324*c5a543eaSMarek VasutUNWIND(.fnend) 325*c5a543eaSMarek VasutENDPROC(__aeabi_uidivmod) 326*c5a543eaSMarek Vasut 327*c5a543eaSMarek VasutENTRY(__aeabi_idivmod) 328*c5a543eaSMarek VasutUNWIND(.fnstart) 329*c5a543eaSMarek VasutUNWIND(.save {r0, r1, ip, lr} ) 330*c5a543eaSMarek Vasut stmfd sp!, {r0, r1, ip, lr} 331*c5a543eaSMarek Vasut bl __aeabi_idiv 332*c5a543eaSMarek Vasut ldmfd sp!, {r1, r2, ip, lr} 333*c5a543eaSMarek Vasut mul r3, r0, r2 334*c5a543eaSMarek Vasut sub r1, r1, r3 335*c5a543eaSMarek Vasut ret lr 336*c5a543eaSMarek Vasut 337*c5a543eaSMarek VasutUNWIND(.fnend) 338*c5a543eaSMarek VasutENDPROC(__aeabi_idivmod) 339*c5a543eaSMarek Vasut 340*c5a543eaSMarek Vasut#endif 341*c5a543eaSMarek Vasut 342*c5a543eaSMarek VasutLdiv0: 343*c5a543eaSMarek VasutUNWIND(.fnstart) 344*c5a543eaSMarek VasutUNWIND(.pad #4) 345*c5a543eaSMarek VasutUNWIND(.save {lr}) 346*c5a543eaSMarek Vasut str lr, [sp, #-8]! 347*c5a543eaSMarek Vasut bl __div0 348*c5a543eaSMarek Vasut mov r0, #0 @ About as wrong as it could be. 349*c5a543eaSMarek Vasut ldr pc, [sp], #8 350*c5a543eaSMarek VasutUNWIND(.fnend) 351*c5a543eaSMarek VasutENDPROC(Ldiv0) 352