1/* 2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3 * 4 * Author: Nicolas Pitre <nico@fluxnic.net> 5 * - contributed to gcc-3.4 on Sep 30, 2003 6 * - adapted for the Linux kernel on Oct 2, 2003 7 */ 8 9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10 11 * SPDX-License-Identifier: GPL-2.0+ 12 */ 13 14 15#include <linux/linkage.h> 16#include <asm/assembler.h> 17 18/* 19 * U-Boot compatibility bit, define empty UNWIND() macro as, since we 20 * do not support stack unwinding and define CONFIG_AEABI to make all 21 * of the functions available without diverging from Linux code. 22 */ 23#ifdef __UBOOT__ 24#define UNWIND(x...) 25#define CONFIG_AEABI 26#endif 27 28.macro ARM_DIV_BODY dividend, divisor, result, curbit 29 30#if __LINUX_ARM_ARCH__ >= 5 31 32 clz \curbit, \divisor 33 clz \result, \dividend 34 sub \result, \curbit, \result 35 mov \curbit, #1 36 mov \divisor, \divisor, lsl \result 37 mov \curbit, \curbit, lsl \result 38 mov \result, #0 39 40#else 41 42 @ Initially shift the divisor left 3 bits if possible, 43 @ set curbit accordingly. This allows for curbit to be located 44 @ at the left end of each 4 bit nibbles in the division loop 45 @ to save one loop in most cases. 46 tst \divisor, #0xe0000000 47 moveq \divisor, \divisor, lsl #3 48 moveq \curbit, #8 49 movne \curbit, #1 50 51 @ Unless the divisor is very big, shift it up in multiples of 52 @ four bits, since this is the amount of unwinding in the main 53 @ division loop. Continue shifting until the divisor is 54 @ larger than the dividend. 551: cmp \divisor, #0x10000000 56 cmplo \divisor, \dividend 57 movlo \divisor, \divisor, lsl #4 58 movlo \curbit, \curbit, lsl #4 59 blo 1b 60 61 @ For very big divisors, we must shift it a bit at a time, or 62 @ we will be in danger of overflowing. 631: cmp \divisor, #0x80000000 64 cmplo \divisor, \dividend 65 movlo \divisor, \divisor, lsl #1 66 movlo \curbit, \curbit, lsl #1 67 blo 1b 68 69 mov \result, #0 70 71#endif 72 73 @ Division loop 741: cmp \dividend, \divisor 75 subhs \dividend, \dividend, \divisor 76 orrhs \result, \result, \curbit 77 cmp \dividend, \divisor, lsr #1 78 subhs \dividend, \dividend, \divisor, lsr #1 79 orrhs \result, \result, \curbit, lsr #1 80 cmp \dividend, \divisor, lsr #2 81 subhs \dividend, \dividend, \divisor, lsr #2 82 orrhs \result, \result, \curbit, lsr #2 83 cmp \dividend, \divisor, lsr #3 84 subhs \dividend, \dividend, \divisor, lsr #3 85 orrhs \result, \result, \curbit, lsr #3 86 cmp \dividend, #0 @ Early termination? 87 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 88 movne \divisor, \divisor, lsr #4 89 bne 1b 90 91.endm 92 93 94.macro ARM_DIV2_ORDER divisor, order 95 96#if __LINUX_ARM_ARCH__ >= 5 97 98 clz \order, \divisor 99 rsb \order, \order, #31 100 101#else 102 103 cmp \divisor, #(1 << 16) 104 movhs \divisor, \divisor, lsr #16 105 movhs \order, #16 106 movlo \order, #0 107 108 cmp \divisor, #(1 << 8) 109 movhs \divisor, \divisor, lsr #8 110 addhs \order, \order, #8 111 112 cmp \divisor, #(1 << 4) 113 movhs \divisor, \divisor, lsr #4 114 addhs \order, \order, #4 115 116 cmp \divisor, #(1 << 2) 117 addhi \order, \order, #3 118 addls \order, \order, \divisor, lsr #1 119 120#endif 121 122.endm 123 124 125.macro ARM_MOD_BODY dividend, divisor, order, spare 126 127#if __LINUX_ARM_ARCH__ >= 5 128 129 clz \order, \divisor 130 clz \spare, \dividend 131 sub \order, \order, \spare 132 mov \divisor, \divisor, lsl \order 133 134#else 135 136 mov \order, #0 137 138 @ Unless the divisor is very big, shift it up in multiples of 139 @ four bits, since this is the amount of unwinding in the main 140 @ division loop. Continue shifting until the divisor is 141 @ larger than the dividend. 1421: cmp \divisor, #0x10000000 143 cmplo \divisor, \dividend 144 movlo \divisor, \divisor, lsl #4 145 addlo \order, \order, #4 146 blo 1b 147 148 @ For very big divisors, we must shift it a bit at a time, or 149 @ we will be in danger of overflowing. 1501: cmp \divisor, #0x80000000 151 cmplo \divisor, \dividend 152 movlo \divisor, \divisor, lsl #1 153 addlo \order, \order, #1 154 blo 1b 155 156#endif 157 158 @ Perform all needed subtractions to keep only the reminder. 159 @ Do comparisons in batch of 4 first. 160 subs \order, \order, #3 @ yes, 3 is intended here 161 blt 2f 162 1631: cmp \dividend, \divisor 164 subhs \dividend, \dividend, \divisor 165 cmp \dividend, \divisor, lsr #1 166 subhs \dividend, \dividend, \divisor, lsr #1 167 cmp \dividend, \divisor, lsr #2 168 subhs \dividend, \dividend, \divisor, lsr #2 169 cmp \dividend, \divisor, lsr #3 170 subhs \dividend, \dividend, \divisor, lsr #3 171 cmp \dividend, #1 172 mov \divisor, \divisor, lsr #4 173 subsge \order, \order, #4 174 bge 1b 175 176 tst \order, #3 177 teqne \dividend, #0 178 beq 5f 179 180 @ Either 1, 2 or 3 comparison/subtractions are left. 1812: cmn \order, #2 182 blt 4f 183 beq 3f 184 cmp \dividend, \divisor 185 subhs \dividend, \dividend, \divisor 186 mov \divisor, \divisor, lsr #1 1873: cmp \dividend, \divisor 188 subhs \dividend, \dividend, \divisor 189 mov \divisor, \divisor, lsr #1 1904: cmp \dividend, \divisor 191 subhs \dividend, \dividend, \divisor 1925: 193.endm 194 195 196ENTRY(__udivsi3) 197ENTRY(__aeabi_uidiv) 198UNWIND(.fnstart) 199.pushsection .text.__udivsi3, "ax" 200 201 subs r2, r1, #1 202 reteq lr 203 bcc Ldiv0 204 cmp r0, r1 205 bls 11f 206 tst r1, r2 207 beq 12f 208 209 ARM_DIV_BODY r0, r1, r2, r3 210 211 mov r0, r2 212 ret lr 213 21411: moveq r0, #1 215 movne r0, #0 216 ret lr 217 21812: ARM_DIV2_ORDER r1, r2 219 220 mov r0, r0, lsr r2 221 ret lr 222 223.popsection 224UNWIND(.fnend) 225ENDPROC(__udivsi3) 226ENDPROC(__aeabi_uidiv) 227 228ENTRY(__umodsi3) 229UNWIND(.fnstart) 230.pushsection .text.__umodsi3, "ax" 231 232 subs r2, r1, #1 @ compare divisor with 1 233 bcc Ldiv0 234 cmpne r0, r1 @ compare dividend with divisor 235 moveq r0, #0 236 tsthi r1, r2 @ see if divisor is power of 2 237 andeq r0, r0, r2 238 retls lr 239 240 ARM_MOD_BODY r0, r1, r2, r3 241 242 ret lr 243 244.popsection 245UNWIND(.fnend) 246ENDPROC(__umodsi3) 247 248ENTRY(__divsi3) 249ENTRY(__aeabi_idiv) 250UNWIND(.fnstart) 251.pushsection .text.__divsi3, "ax" 252 253 cmp r1, #0 254 eor ip, r0, r1 @ save the sign of the result. 255 beq Ldiv0 256 rsbmi r1, r1, #0 @ loops below use unsigned. 257 subs r2, r1, #1 @ division by 1 or -1 ? 258 beq 10f 259 movs r3, r0 260 rsbmi r3, r0, #0 @ positive dividend value 261 cmp r3, r1 262 bls 11f 263 tst r1, r2 @ divisor is power of 2 ? 264 beq 12f 265 266 ARM_DIV_BODY r3, r1, r0, r2 267 268 cmp ip, #0 269 rsbmi r0, r0, #0 270 ret lr 271 27210: teq ip, r0 @ same sign ? 273 rsbmi r0, r0, #0 274 ret lr 275 27611: movlo r0, #0 277 moveq r0, ip, asr #31 278 orreq r0, r0, #1 279 ret lr 280 28112: ARM_DIV2_ORDER r1, r2 282 283 cmp ip, #0 284 mov r0, r3, lsr r2 285 rsbmi r0, r0, #0 286 ret lr 287 288.popsection 289UNWIND(.fnend) 290ENDPROC(__divsi3) 291ENDPROC(__aeabi_idiv) 292 293ENTRY(__modsi3) 294UNWIND(.fnstart) 295.pushsection .text.__modsi3, "ax" 296 297 cmp r1, #0 298 beq Ldiv0 299 rsbmi r1, r1, #0 @ loops below use unsigned. 300 movs ip, r0 @ preserve sign of dividend 301 rsbmi r0, r0, #0 @ if negative make positive 302 subs r2, r1, #1 @ compare divisor with 1 303 cmpne r0, r1 @ compare dividend with divisor 304 moveq r0, #0 305 tsthi r1, r2 @ see if divisor is power of 2 306 andeq r0, r0, r2 307 bls 10f 308 309 ARM_MOD_BODY r0, r1, r2, r3 310 31110: cmp ip, #0 312 rsbmi r0, r0, #0 313 ret lr 314 315.popsection 316UNWIND(.fnend) 317ENDPROC(__modsi3) 318 319#ifdef CONFIG_AEABI 320 321ENTRY(__aeabi_uidivmod) 322UNWIND(.fnstart) 323UNWIND(.save {r0, r1, ip, lr} ) 324.pushsection .text.__aeabi_uidivmod, "ax" 325 326 stmfd sp!, {r0, r1, ip, lr} 327 bl __aeabi_uidiv 328 ldmfd sp!, {r1, r2, ip, lr} 329 mul r3, r0, r2 330 sub r1, r1, r3 331 ret lr 332 333.popsection 334UNWIND(.fnend) 335ENDPROC(__aeabi_uidivmod) 336 337ENTRY(__aeabi_idivmod) 338UNWIND(.fnstart) 339UNWIND(.save {r0, r1, ip, lr} ) 340.pushsection .text.__aeabi_uidivmod, "ax" 341 342 stmfd sp!, {r0, r1, ip, lr} 343 bl __aeabi_idiv 344 ldmfd sp!, {r1, r2, ip, lr} 345 mul r3, r0, r2 346 sub r1, r1, r3 347 ret lr 348 349.popsection 350UNWIND(.fnend) 351ENDPROC(__aeabi_idivmod) 352 353#endif 354 355Ldiv0: 356UNWIND(.fnstart) 357UNWIND(.pad #4) 358UNWIND(.save {lr}) 359.pushsection .text.Ldiv0, "ax" 360 361 str lr, [sp, #-8]! 362 bl __div0 363 mov r0, #0 @ About as wrong as it could be. 364 ldr pc, [sp], #8 365 366.popsection 367UNWIND(.fnend) 368ENDPROC(Ldiv0) 369 370/* Thumb-1 specialities */ 371#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) 372ENTRY(__gnu_thumb1_case_sqi) 373.pushsection .text.__gnu_thumb1_case_sqi, "ax" 374 push {r1} 375 mov r1, lr 376 lsrs r1, r1, #1 377 lsls r1, r1, #1 378 ldrsb r1, [r1, r0] 379 lsls r1, r1, #1 380 add lr, lr, r1 381 pop {r1} 382 bx lr 383.popsection 384ENDPROC(__gnu_thumb1_case_sqi) 385 386ENTRY(__gnu_thumb1_case_uqi) 387.pushsection .text.__gnu_thumb1_case_uqi, "ax" 388 push {r1} 389 mov r1, lr 390 lsrs r1, r1, #1 391 lsls r1, r1, #1 392 ldrb r1, [r1, r0] 393 lsls r1, r1, #1 394 add lr, lr, r1 395 pop {r1} 396 bx lr 397.popsection 398ENDPROC(__gnu_thumb1_case_uqi) 399 400ENTRY(__gnu_thumb1_case_shi) 401.pushsection .text.__gnu_thumb1_case_shi, "ax" 402 push {r0, r1} 403 mov r1, lr 404 lsrs r1, r1, #1 405 lsls r0, r0, #1 406 lsls r1, r1, #1 407 ldrsh r1, [r1, r0] 408 lsls r1, r1, #1 409 add lr, lr, r1 410 pop {r0, r1} 411 bx lr 412.popsection 413ENDPROC(__gnu_thumb1_case_shi) 414 415ENTRY(__gnu_thumb1_case_uhi) 416.pushsection .text.__gnu_thumb1_case_uhi, "ax" 417 push {r0, r1} 418 mov r1, lr 419 lsrs r1, r1, #1 420 lsls r0, r0, #1 421 lsls r1, r1, #1 422 ldrh r1, [r1, r0] 423 lsls r1, r1, #1 424 add lr, lr, r1 425 pop {r0, r1} 426 bx lr 427.popsection 428ENDPROC(__gnu_thumb1_case_uhi) 429#endif 430