1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * Author: Nicolas Pitre <nico@fluxnic.net> 5*4882a593Smuzhiyun * - contributed to gcc-3.4 on Sep 30, 2003 6*4882a593Smuzhiyun * - adapted for the Linux kernel on Oct 2, 2003 7*4882a593Smuzhiyun */ 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10*4882a593Smuzhiyun 11*4882a593SmuzhiyunThis file is free software; you can redistribute it and/or modify it 12*4882a593Smuzhiyununder the terms of the GNU General Public License as published by the 13*4882a593SmuzhiyunFree Software Foundation; either version 2, or (at your option) any 14*4882a593Smuzhiyunlater version. 15*4882a593Smuzhiyun 16*4882a593SmuzhiyunIn addition to the permissions in the GNU General Public License, the 17*4882a593SmuzhiyunFree Software Foundation gives you unlimited permission to link the 18*4882a593Smuzhiyuncompiled version of this file into combinations with other programs, 19*4882a593Smuzhiyunand to distribute those combinations without any restriction coming 20*4882a593Smuzhiyunfrom the use of this file. (The General Public License restrictions 21*4882a593Smuzhiyundo apply in other respects; for example, they cover modification of 22*4882a593Smuzhiyunthe file, and distribution when not linked into a combine 23*4882a593Smuzhiyunexecutable.) 24*4882a593Smuzhiyun 25*4882a593SmuzhiyunThis file is distributed in the hope that it will be useful, but 26*4882a593SmuzhiyunWITHOUT ANY WARRANTY; without even the implied warranty of 27*4882a593SmuzhiyunMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 28*4882a593SmuzhiyunGeneral Public License for more details. 29*4882a593Smuzhiyun 30*4882a593SmuzhiyunYou should have received a copy of the GNU General Public License 31*4882a593Smuzhiyunalong with this program; see the file COPYING. If not, write to 32*4882a593Smuzhiyunthe Free Software Foundation, 59 Temple Place - Suite 330, 33*4882a593SmuzhiyunBoston, MA 02111-1307, USA. */ 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun 36*4882a593Smuzhiyun#include <linux/linkage.h> 37*4882a593Smuzhiyun#include <asm/assembler.h> 38*4882a593Smuzhiyun#include <asm/unwind.h> 39*4882a593Smuzhiyun 40*4882a593Smuzhiyun.macro ARM_DIV_BODY dividend, divisor, result, curbit 41*4882a593Smuzhiyun 42*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun clz \curbit, \divisor 45*4882a593Smuzhiyun clz \result, \dividend 46*4882a593Smuzhiyun sub \result, \curbit, \result 47*4882a593Smuzhiyun mov \curbit, #1 48*4882a593Smuzhiyun mov \divisor, \divisor, lsl \result 49*4882a593Smuzhiyun mov \curbit, \curbit, lsl \result 50*4882a593Smuzhiyun mov \result, #0 51*4882a593Smuzhiyun 52*4882a593Smuzhiyun#else 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun @ Initially shift the divisor left 3 bits if possible, 55*4882a593Smuzhiyun @ set curbit accordingly. This allows for curbit to be located 56*4882a593Smuzhiyun @ at the left end of each 4 bit nibbles in the division loop 57*4882a593Smuzhiyun @ to save one loop in most cases. 58*4882a593Smuzhiyun tst \divisor, #0xe0000000 59*4882a593Smuzhiyun moveq \divisor, \divisor, lsl #3 60*4882a593Smuzhiyun moveq \curbit, #8 61*4882a593Smuzhiyun movne \curbit, #1 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun @ Unless the divisor is very big, shift it up in multiples of 64*4882a593Smuzhiyun @ four bits, since this is the amount of unwinding in the main 65*4882a593Smuzhiyun @ division loop. Continue shifting until the divisor is 66*4882a593Smuzhiyun @ larger than the dividend. 67*4882a593Smuzhiyun1: cmp \divisor, #0x10000000 68*4882a593Smuzhiyun cmplo \divisor, \dividend 69*4882a593Smuzhiyun movlo \divisor, \divisor, lsl #4 70*4882a593Smuzhiyun movlo \curbit, \curbit, lsl #4 71*4882a593Smuzhiyun blo 1b 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun @ For very big divisors, we must shift it a bit at a time, or 74*4882a593Smuzhiyun @ we will be in danger of overflowing. 75*4882a593Smuzhiyun1: cmp \divisor, #0x80000000 76*4882a593Smuzhiyun cmplo \divisor, \dividend 77*4882a593Smuzhiyun movlo \divisor, \divisor, lsl #1 78*4882a593Smuzhiyun movlo \curbit, \curbit, lsl #1 79*4882a593Smuzhiyun blo 1b 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun mov \result, #0 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun#endif 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun @ Division loop 86*4882a593Smuzhiyun1: cmp \dividend, \divisor 87*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor 88*4882a593Smuzhiyun orrhs \result, \result, \curbit 89*4882a593Smuzhiyun cmp \dividend, \divisor, lsr #1 90*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor, lsr #1 91*4882a593Smuzhiyun orrhs \result, \result, \curbit, lsr #1 92*4882a593Smuzhiyun cmp \dividend, \divisor, lsr #2 93*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor, lsr #2 94*4882a593Smuzhiyun orrhs \result, \result, \curbit, lsr #2 95*4882a593Smuzhiyun cmp \dividend, \divisor, lsr #3 96*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor, lsr #3 97*4882a593Smuzhiyun orrhs \result, \result, \curbit, lsr #3 98*4882a593Smuzhiyun cmp \dividend, #0 @ Early termination? 99*4882a593Smuzhiyun movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 100*4882a593Smuzhiyun movne \divisor, \divisor, lsr #4 101*4882a593Smuzhiyun bne 1b 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun.endm 104*4882a593Smuzhiyun 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun.macro ARM_DIV2_ORDER divisor, order 107*4882a593Smuzhiyun 108*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun clz \order, \divisor 111*4882a593Smuzhiyun rsb \order, \order, #31 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun#else 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun cmp \divisor, #(1 << 16) 116*4882a593Smuzhiyun movhs \divisor, \divisor, lsr #16 117*4882a593Smuzhiyun movhs \order, #16 118*4882a593Smuzhiyun movlo \order, #0 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun cmp \divisor, #(1 << 8) 121*4882a593Smuzhiyun movhs \divisor, \divisor, lsr #8 122*4882a593Smuzhiyun addhs \order, \order, #8 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun cmp \divisor, #(1 << 4) 125*4882a593Smuzhiyun movhs \divisor, \divisor, lsr #4 126*4882a593Smuzhiyun addhs \order, \order, #4 127*4882a593Smuzhiyun 128*4882a593Smuzhiyun cmp \divisor, #(1 << 2) 129*4882a593Smuzhiyun addhi \order, \order, #3 130*4882a593Smuzhiyun addls \order, \order, \divisor, lsr #1 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun#endif 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun.endm 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun 137*4882a593Smuzhiyun.macro ARM_MOD_BODY dividend, divisor, order, spare 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5 140*4882a593Smuzhiyun 141*4882a593Smuzhiyun clz \order, \divisor 142*4882a593Smuzhiyun clz \spare, \dividend 143*4882a593Smuzhiyun sub \order, \order, \spare 144*4882a593Smuzhiyun mov \divisor, \divisor, lsl \order 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun#else 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun mov \order, #0 149*4882a593Smuzhiyun 150*4882a593Smuzhiyun @ Unless the divisor is very big, shift it up in multiples of 151*4882a593Smuzhiyun @ four bits, since this is the amount of unwinding in the main 152*4882a593Smuzhiyun @ division loop. Continue shifting until the divisor is 153*4882a593Smuzhiyun @ larger than the dividend. 154*4882a593Smuzhiyun1: cmp \divisor, #0x10000000 155*4882a593Smuzhiyun cmplo \divisor, \dividend 156*4882a593Smuzhiyun movlo \divisor, \divisor, lsl #4 157*4882a593Smuzhiyun addlo \order, \order, #4 158*4882a593Smuzhiyun blo 1b 159*4882a593Smuzhiyun 160*4882a593Smuzhiyun @ For very big divisors, we must shift it a bit at a time, or 161*4882a593Smuzhiyun @ we will be in danger of overflowing. 162*4882a593Smuzhiyun1: cmp \divisor, #0x80000000 163*4882a593Smuzhiyun cmplo \divisor, \dividend 164*4882a593Smuzhiyun movlo \divisor, \divisor, lsl #1 165*4882a593Smuzhiyun addlo \order, \order, #1 166*4882a593Smuzhiyun blo 1b 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun#endif 169*4882a593Smuzhiyun 170*4882a593Smuzhiyun @ Perform all needed subtractions to keep only the reminder. 171*4882a593Smuzhiyun @ Do comparisons in batch of 4 first. 172*4882a593Smuzhiyun subs \order, \order, #3 @ yes, 3 is intended here 173*4882a593Smuzhiyun blt 2f 174*4882a593Smuzhiyun 175*4882a593Smuzhiyun1: cmp \dividend, \divisor 176*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor 177*4882a593Smuzhiyun cmp \dividend, \divisor, lsr #1 178*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor, lsr #1 179*4882a593Smuzhiyun cmp \dividend, \divisor, lsr #2 180*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor, lsr #2 181*4882a593Smuzhiyun cmp \dividend, \divisor, lsr #3 182*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor, lsr #3 183*4882a593Smuzhiyun cmp \dividend, #1 184*4882a593Smuzhiyun mov \divisor, \divisor, lsr #4 185*4882a593Smuzhiyun subsge \order, \order, #4 186*4882a593Smuzhiyun bge 1b 187*4882a593Smuzhiyun 188*4882a593Smuzhiyun tst \order, #3 189*4882a593Smuzhiyun teqne \dividend, #0 190*4882a593Smuzhiyun beq 5f 191*4882a593Smuzhiyun 192*4882a593Smuzhiyun @ Either 1, 2 or 3 comparison/subtractions are left. 193*4882a593Smuzhiyun2: cmn \order, #2 194*4882a593Smuzhiyun blt 4f 195*4882a593Smuzhiyun beq 3f 196*4882a593Smuzhiyun cmp \dividend, \divisor 197*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor 198*4882a593Smuzhiyun mov \divisor, \divisor, lsr #1 199*4882a593Smuzhiyun3: cmp \dividend, \divisor 200*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor 201*4882a593Smuzhiyun mov \divisor, \divisor, lsr #1 202*4882a593Smuzhiyun4: cmp \dividend, \divisor 203*4882a593Smuzhiyun subhs \dividend, \dividend, \divisor 204*4882a593Smuzhiyun5: 205*4882a593Smuzhiyun.endm 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun#ifdef CONFIG_ARM_PATCH_IDIV 209*4882a593Smuzhiyun .align 3 210*4882a593Smuzhiyun#endif 211*4882a593Smuzhiyun 212*4882a593SmuzhiyunENTRY(__udivsi3) 213*4882a593SmuzhiyunENTRY(__aeabi_uidiv) 214*4882a593SmuzhiyunUNWIND(.fnstart) 215*4882a593Smuzhiyun 216*4882a593Smuzhiyun subs r2, r1, #1 217*4882a593Smuzhiyun reteq lr 218*4882a593Smuzhiyun bcc Ldiv0 219*4882a593Smuzhiyun cmp r0, r1 220*4882a593Smuzhiyun bls 11f 221*4882a593Smuzhiyun tst r1, r2 222*4882a593Smuzhiyun beq 12f 223*4882a593Smuzhiyun 224*4882a593Smuzhiyun ARM_DIV_BODY r0, r1, r2, r3 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun mov r0, r2 227*4882a593Smuzhiyun ret lr 228*4882a593Smuzhiyun 229*4882a593Smuzhiyun11: moveq r0, #1 230*4882a593Smuzhiyun movne r0, #0 231*4882a593Smuzhiyun ret lr 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun12: ARM_DIV2_ORDER r1, r2 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun mov r0, r0, lsr r2 236*4882a593Smuzhiyun ret lr 237*4882a593Smuzhiyun 238*4882a593SmuzhiyunUNWIND(.fnend) 239*4882a593SmuzhiyunENDPROC(__udivsi3) 240*4882a593SmuzhiyunENDPROC(__aeabi_uidiv) 241*4882a593Smuzhiyun 242*4882a593SmuzhiyunENTRY(__umodsi3) 243*4882a593SmuzhiyunUNWIND(.fnstart) 244*4882a593Smuzhiyun 245*4882a593Smuzhiyun subs r2, r1, #1 @ compare divisor with 1 246*4882a593Smuzhiyun bcc Ldiv0 247*4882a593Smuzhiyun cmpne r0, r1 @ compare dividend with divisor 248*4882a593Smuzhiyun moveq r0, #0 249*4882a593Smuzhiyun tsthi r1, r2 @ see if divisor is power of 2 250*4882a593Smuzhiyun andeq r0, r0, r2 251*4882a593Smuzhiyun retls lr 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun ARM_MOD_BODY r0, r1, r2, r3 254*4882a593Smuzhiyun 255*4882a593Smuzhiyun ret lr 256*4882a593Smuzhiyun 257*4882a593SmuzhiyunUNWIND(.fnend) 258*4882a593SmuzhiyunENDPROC(__umodsi3) 259*4882a593Smuzhiyun 260*4882a593Smuzhiyun#ifdef CONFIG_ARM_PATCH_IDIV 261*4882a593Smuzhiyun .align 3 262*4882a593Smuzhiyun#endif 263*4882a593Smuzhiyun 264*4882a593SmuzhiyunENTRY(__divsi3) 265*4882a593SmuzhiyunENTRY(__aeabi_idiv) 266*4882a593SmuzhiyunUNWIND(.fnstart) 267*4882a593Smuzhiyun 268*4882a593Smuzhiyun cmp r1, #0 269*4882a593Smuzhiyun eor ip, r0, r1 @ save the sign of the result. 270*4882a593Smuzhiyun beq Ldiv0 271*4882a593Smuzhiyun rsbmi r1, r1, #0 @ loops below use unsigned. 272*4882a593Smuzhiyun subs r2, r1, #1 @ division by 1 or -1 ? 273*4882a593Smuzhiyun beq 10f 274*4882a593Smuzhiyun movs r3, r0 275*4882a593Smuzhiyun rsbmi r3, r0, #0 @ positive dividend value 276*4882a593Smuzhiyun cmp r3, r1 277*4882a593Smuzhiyun bls 11f 278*4882a593Smuzhiyun tst r1, r2 @ divisor is power of 2 ? 279*4882a593Smuzhiyun beq 12f 280*4882a593Smuzhiyun 281*4882a593Smuzhiyun ARM_DIV_BODY r3, r1, r0, r2 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun cmp ip, #0 284*4882a593Smuzhiyun rsbmi r0, r0, #0 285*4882a593Smuzhiyun ret lr 286*4882a593Smuzhiyun 287*4882a593Smuzhiyun10: teq ip, r0 @ same sign ? 288*4882a593Smuzhiyun rsbmi r0, r0, #0 289*4882a593Smuzhiyun ret lr 290*4882a593Smuzhiyun 291*4882a593Smuzhiyun11: movlo r0, #0 292*4882a593Smuzhiyun moveq r0, ip, asr #31 293*4882a593Smuzhiyun orreq r0, r0, #1 294*4882a593Smuzhiyun ret lr 295*4882a593Smuzhiyun 296*4882a593Smuzhiyun12: ARM_DIV2_ORDER r1, r2 297*4882a593Smuzhiyun 298*4882a593Smuzhiyun cmp ip, #0 299*4882a593Smuzhiyun mov r0, r3, lsr r2 300*4882a593Smuzhiyun rsbmi r0, r0, #0 301*4882a593Smuzhiyun ret lr 302*4882a593Smuzhiyun 303*4882a593SmuzhiyunUNWIND(.fnend) 304*4882a593SmuzhiyunENDPROC(__divsi3) 305*4882a593SmuzhiyunENDPROC(__aeabi_idiv) 306*4882a593Smuzhiyun 307*4882a593SmuzhiyunENTRY(__modsi3) 308*4882a593SmuzhiyunUNWIND(.fnstart) 309*4882a593Smuzhiyun 310*4882a593Smuzhiyun cmp r1, #0 311*4882a593Smuzhiyun beq Ldiv0 312*4882a593Smuzhiyun rsbmi r1, r1, #0 @ loops below use unsigned. 313*4882a593Smuzhiyun movs ip, r0 @ preserve sign of dividend 314*4882a593Smuzhiyun rsbmi r0, r0, #0 @ if negative make positive 315*4882a593Smuzhiyun subs r2, r1, #1 @ compare divisor with 1 316*4882a593Smuzhiyun cmpne r0, r1 @ compare dividend with divisor 317*4882a593Smuzhiyun moveq r0, #0 318*4882a593Smuzhiyun tsthi r1, r2 @ see if divisor is power of 2 319*4882a593Smuzhiyun andeq r0, r0, r2 320*4882a593Smuzhiyun bls 10f 321*4882a593Smuzhiyun 322*4882a593Smuzhiyun ARM_MOD_BODY r0, r1, r2, r3 323*4882a593Smuzhiyun 324*4882a593Smuzhiyun10: cmp ip, #0 325*4882a593Smuzhiyun rsbmi r0, r0, #0 326*4882a593Smuzhiyun ret lr 327*4882a593Smuzhiyun 328*4882a593SmuzhiyunUNWIND(.fnend) 329*4882a593SmuzhiyunENDPROC(__modsi3) 330*4882a593Smuzhiyun 331*4882a593Smuzhiyun#ifdef CONFIG_AEABI 332*4882a593Smuzhiyun 333*4882a593SmuzhiyunENTRY(__aeabi_uidivmod) 334*4882a593SmuzhiyunUNWIND(.fnstart) 335*4882a593SmuzhiyunUNWIND(.save {r0, r1, ip, lr} ) 336*4882a593Smuzhiyun 337*4882a593Smuzhiyun stmfd sp!, {r0, r1, ip, lr} 338*4882a593Smuzhiyun bl __aeabi_uidiv 339*4882a593Smuzhiyun ldmfd sp!, {r1, r2, ip, lr} 340*4882a593Smuzhiyun mul r3, r0, r2 341*4882a593Smuzhiyun sub r1, r1, r3 342*4882a593Smuzhiyun ret lr 343*4882a593Smuzhiyun 344*4882a593SmuzhiyunUNWIND(.fnend) 345*4882a593SmuzhiyunENDPROC(__aeabi_uidivmod) 346*4882a593Smuzhiyun 347*4882a593SmuzhiyunENTRY(__aeabi_idivmod) 348*4882a593SmuzhiyunUNWIND(.fnstart) 349*4882a593SmuzhiyunUNWIND(.save {r0, r1, ip, lr} ) 350*4882a593Smuzhiyun stmfd sp!, {r0, r1, ip, lr} 351*4882a593Smuzhiyun bl __aeabi_idiv 352*4882a593Smuzhiyun ldmfd sp!, {r1, r2, ip, lr} 353*4882a593Smuzhiyun mul r3, r0, r2 354*4882a593Smuzhiyun sub r1, r1, r3 355*4882a593Smuzhiyun ret lr 356*4882a593Smuzhiyun 357*4882a593SmuzhiyunUNWIND(.fnend) 358*4882a593SmuzhiyunENDPROC(__aeabi_idivmod) 359*4882a593Smuzhiyun 360*4882a593Smuzhiyun#endif 361*4882a593Smuzhiyun 362*4882a593SmuzhiyunLdiv0: 363*4882a593SmuzhiyunUNWIND(.fnstart) 364*4882a593SmuzhiyunUNWIND(.pad #4) 365*4882a593SmuzhiyunUNWIND(.save {lr}) 366*4882a593Smuzhiyun str lr, [sp, #-8]! 367*4882a593Smuzhiyun bl __div0 368*4882a593Smuzhiyun mov r0, #0 @ About as wrong as it could be. 369*4882a593Smuzhiyun ldr pc, [sp], #8 370*4882a593SmuzhiyunUNWIND(.fnend) 371*4882a593SmuzhiyunENDPROC(Ldiv0) 372