1/* 2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3 * 4 * Author: Nicolas Pitre <nico@fluxnic.net> 5 * - contributed to gcc-3.4 on Sep 30, 2003 6 * - adapted for the Linux kernel on Oct 2, 2003 7 */ 8 9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10 11 * SPDX-License-Identifier: GPL-2.0+ 12 */ 13 14 15#include <linux/linkage.h> 16#include <asm/assembler.h> 17 18/* 19 * U-Boot compatibility bit, define empty UNWIND() macro as, since we 20 * do not support stack unwinding and define CONFIG_AEABI to make all 21 * of the functions available without diverging from Linux code. 22 */ 23#ifdef __UBOOT__ 24#define UNWIND(x...) 25#define CONFIG_AEABI 26#endif 27 28.macro ARM_DIV_BODY dividend, divisor, result, curbit 29 30#if __LINUX_ARM_ARCH__ >= 5 31 32 clz \curbit, \divisor 33 clz \result, \dividend 34 sub \result, \curbit, \result 35 mov \curbit, #1 36 mov \divisor, \divisor, lsl \result 37 mov \curbit, \curbit, lsl \result 38 mov \result, #0 39 40#else 41 42 @ Initially shift the divisor left 3 bits if possible, 43 @ set curbit accordingly. This allows for curbit to be located 44 @ at the left end of each 4 bit nibbles in the division loop 45 @ to save one loop in most cases. 46 tst \divisor, #0xe0000000 47 moveq \divisor, \divisor, lsl #3 48 moveq \curbit, #8 49 movne \curbit, #1 50 51 @ Unless the divisor is very big, shift it up in multiples of 52 @ four bits, since this is the amount of unwinding in the main 53 @ division loop. Continue shifting until the divisor is 54 @ larger than the dividend. 551: cmp \divisor, #0x10000000 56 cmplo \divisor, \dividend 57 movlo \divisor, \divisor, lsl #4 58 movlo \curbit, \curbit, lsl #4 59 blo 1b 60 61 @ For very big divisors, we must shift it a bit at a time, or 62 @ we will be in danger of overflowing. 631: cmp \divisor, #0x80000000 64 cmplo \divisor, \dividend 65 movlo \divisor, \divisor, lsl #1 66 movlo \curbit, \curbit, lsl #1 67 blo 1b 68 69 mov \result, #0 70 71#endif 72 73 @ Division loop 741: cmp \dividend, \divisor 75 subhs \dividend, \dividend, \divisor 76 orrhs \result, \result, \curbit 77 cmp \dividend, \divisor, lsr #1 78 subhs \dividend, \dividend, \divisor, lsr #1 79 orrhs \result, \result, \curbit, lsr #1 80 cmp \dividend, \divisor, lsr #2 81 subhs \dividend, \dividend, \divisor, lsr #2 82 orrhs \result, \result, \curbit, lsr #2 83 cmp \dividend, \divisor, lsr #3 84 subhs \dividend, \dividend, \divisor, lsr #3 85 orrhs \result, \result, \curbit, lsr #3 86 cmp \dividend, #0 @ Early termination? 87 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 88 movne \divisor, \divisor, lsr #4 89 bne 1b 90 91.endm 92 93 94.macro ARM_DIV2_ORDER divisor, order 95 96#if __LINUX_ARM_ARCH__ >= 5 97 98 clz \order, \divisor 99 rsb \order, \order, #31 100 101#else 102 103 cmp \divisor, #(1 << 16) 104 movhs \divisor, \divisor, lsr #16 105 movhs \order, #16 106 movlo \order, #0 107 108 cmp \divisor, #(1 << 8) 109 movhs \divisor, \divisor, lsr #8 110 addhs \order, \order, #8 111 112 cmp \divisor, #(1 << 4) 113 movhs \divisor, \divisor, lsr #4 114 addhs \order, \order, #4 115 116 cmp \divisor, #(1 << 2) 117 addhi \order, \order, #3 118 addls \order, \order, \divisor, lsr #1 119 120#endif 121 122.endm 123 124 125.macro ARM_MOD_BODY dividend, divisor, order, spare 126 127#if __LINUX_ARM_ARCH__ >= 5 128 129 clz \order, \divisor 130 clz \spare, \dividend 131 sub \order, \order, \spare 132 mov \divisor, \divisor, lsl \order 133 134#else 135 136 mov \order, #0 137 138 @ Unless the divisor is very big, shift it up in multiples of 139 @ four bits, since this is the amount of unwinding in the main 140 @ division loop. Continue shifting until the divisor is 141 @ larger than the dividend. 1421: cmp \divisor, #0x10000000 143 cmplo \divisor, \dividend 144 movlo \divisor, \divisor, lsl #4 145 addlo \order, \order, #4 146 blo 1b 147 148 @ For very big divisors, we must shift it a bit at a time, or 149 @ we will be in danger of overflowing. 1501: cmp \divisor, #0x80000000 151 cmplo \divisor, \dividend 152 movlo \divisor, \divisor, lsl #1 153 addlo \order, \order, #1 154 blo 1b 155 156#endif 157 158 @ Perform all needed subtractions to keep only the reminder. 159 @ Do comparisons in batch of 4 first. 160 subs \order, \order, #3 @ yes, 3 is intended here 161 blt 2f 162 1631: cmp \dividend, \divisor 164 subhs \dividend, \dividend, \divisor 165 cmp \dividend, \divisor, lsr #1 166 subhs \dividend, \dividend, \divisor, lsr #1 167 cmp \dividend, \divisor, lsr #2 168 subhs \dividend, \dividend, \divisor, lsr #2 169 cmp \dividend, \divisor, lsr #3 170 subhs \dividend, \dividend, \divisor, lsr #3 171 cmp \dividend, #1 172 mov \divisor, \divisor, lsr #4 173 subges \order, \order, #4 174 bge 1b 175 176 tst \order, #3 177 teqne \dividend, #0 178 beq 5f 179 180 @ Either 1, 2 or 3 comparison/subtractions are left. 1812: cmn \order, #2 182 blt 4f 183 beq 3f 184 cmp \dividend, \divisor 185 subhs \dividend, \dividend, \divisor 186 mov \divisor, \divisor, lsr #1 1873: cmp \dividend, \divisor 188 subhs \dividend, \dividend, \divisor 189 mov \divisor, \divisor, lsr #1 1904: cmp \dividend, \divisor 191 subhs \dividend, \dividend, \divisor 1925: 193.endm 194 195 196ENTRY(__udivsi3) 197ENTRY(__aeabi_uidiv) 198UNWIND(.fnstart) 199 200 subs r2, r1, #1 201 reteq lr 202 bcc Ldiv0 203 cmp r0, r1 204 bls 11f 205 tst r1, r2 206 beq 12f 207 208 ARM_DIV_BODY r0, r1, r2, r3 209 210 mov r0, r2 211 ret lr 212 21311: moveq r0, #1 214 movne r0, #0 215 ret lr 216 21712: ARM_DIV2_ORDER r1, r2 218 219 mov r0, r0, lsr r2 220 ret lr 221 222UNWIND(.fnend) 223ENDPROC(__udivsi3) 224ENDPROC(__aeabi_uidiv) 225 226ENTRY(__umodsi3) 227UNWIND(.fnstart) 228 229 subs r2, r1, #1 @ compare divisor with 1 230 bcc Ldiv0 231 cmpne r0, r1 @ compare dividend with divisor 232 moveq r0, #0 233 tsthi r1, r2 @ see if divisor is power of 2 234 andeq r0, r0, r2 235 retls lr 236 237 ARM_MOD_BODY r0, r1, r2, r3 238 239 ret lr 240 241UNWIND(.fnend) 242ENDPROC(__umodsi3) 243 244ENTRY(__divsi3) 245ENTRY(__aeabi_idiv) 246UNWIND(.fnstart) 247 248 cmp r1, #0 249 eor ip, r0, r1 @ save the sign of the result. 250 beq Ldiv0 251 rsbmi r1, r1, #0 @ loops below use unsigned. 252 subs r2, r1, #1 @ division by 1 or -1 ? 253 beq 10f 254 movs r3, r0 255 rsbmi r3, r0, #0 @ positive dividend value 256 cmp r3, r1 257 bls 11f 258 tst r1, r2 @ divisor is power of 2 ? 259 beq 12f 260 261 ARM_DIV_BODY r3, r1, r0, r2 262 263 cmp ip, #0 264 rsbmi r0, r0, #0 265 ret lr 266 26710: teq ip, r0 @ same sign ? 268 rsbmi r0, r0, #0 269 ret lr 270 27111: movlo r0, #0 272 moveq r0, ip, asr #31 273 orreq r0, r0, #1 274 ret lr 275 27612: ARM_DIV2_ORDER r1, r2 277 278 cmp ip, #0 279 mov r0, r3, lsr r2 280 rsbmi r0, r0, #0 281 ret lr 282 283UNWIND(.fnend) 284ENDPROC(__divsi3) 285ENDPROC(__aeabi_idiv) 286 287ENTRY(__modsi3) 288UNWIND(.fnstart) 289 290 cmp r1, #0 291 beq Ldiv0 292 rsbmi r1, r1, #0 @ loops below use unsigned. 293 movs ip, r0 @ preserve sign of dividend 294 rsbmi r0, r0, #0 @ if negative make positive 295 subs r2, r1, #1 @ compare divisor with 1 296 cmpne r0, r1 @ compare dividend with divisor 297 moveq r0, #0 298 tsthi r1, r2 @ see if divisor is power of 2 299 andeq r0, r0, r2 300 bls 10f 301 302 ARM_MOD_BODY r0, r1, r2, r3 303 30410: cmp ip, #0 305 rsbmi r0, r0, #0 306 ret lr 307 308UNWIND(.fnend) 309ENDPROC(__modsi3) 310 311#ifdef CONFIG_AEABI 312 313ENTRY(__aeabi_uidivmod) 314UNWIND(.fnstart) 315UNWIND(.save {r0, r1, ip, lr} ) 316 317 stmfd sp!, {r0, r1, ip, lr} 318 bl __aeabi_uidiv 319 ldmfd sp!, {r1, r2, ip, lr} 320 mul r3, r0, r2 321 sub r1, r1, r3 322 ret lr 323 324UNWIND(.fnend) 325ENDPROC(__aeabi_uidivmod) 326 327ENTRY(__aeabi_idivmod) 328UNWIND(.fnstart) 329UNWIND(.save {r0, r1, ip, lr} ) 330 stmfd sp!, {r0, r1, ip, lr} 331 bl __aeabi_idiv 332 ldmfd sp!, {r1, r2, ip, lr} 333 mul r3, r0, r2 334 sub r1, r1, r3 335 ret lr 336 337UNWIND(.fnend) 338ENDPROC(__aeabi_idivmod) 339 340#endif 341 342Ldiv0: 343UNWIND(.fnstart) 344UNWIND(.pad #4) 345UNWIND(.save {lr}) 346 str lr, [sp, #-8]! 347 bl __div0 348 mov r0, #0 @ About as wrong as it could be. 349 ldr pc, [sp], #8 350UNWIND(.fnend) 351ENDPROC(Ldiv0) 352