1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * Copyright 2010, Google Inc. 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * Brought in from coreboot uldivmod.S 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * SPDX-License-Identifier: GPL-2.0 7*4882a593Smuzhiyun */ 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun#include <linux/linkage.h> 10*4882a593Smuzhiyun#include <asm/assembler.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun/* 13*4882a593Smuzhiyun * A, Q = r0 + (r1 << 32) 14*4882a593Smuzhiyun * B, R = r2 + (r3 << 32) 15*4882a593Smuzhiyun * A / B = Q ... R 16*4882a593Smuzhiyun */ 17*4882a593Smuzhiyun 18*4882a593SmuzhiyunA_0 .req r0 19*4882a593SmuzhiyunA_1 .req r1 20*4882a593SmuzhiyunB_0 .req r2 21*4882a593SmuzhiyunB_1 .req r3 22*4882a593SmuzhiyunC_0 .req r4 23*4882a593SmuzhiyunC_1 .req r5 24*4882a593SmuzhiyunD_0 .req r6 25*4882a593SmuzhiyunD_1 .req r7 26*4882a593Smuzhiyun 27*4882a593SmuzhiyunQ_0 .req r0 28*4882a593SmuzhiyunQ_1 .req r1 29*4882a593SmuzhiyunR_0 .req r2 30*4882a593SmuzhiyunR_1 .req r3 31*4882a593Smuzhiyun 32*4882a593SmuzhiyunTHUMB( 33*4882a593SmuzhiyunTMP .req r8 34*4882a593Smuzhiyun) 35*4882a593Smuzhiyun 36*4882a593Smuzhiyun.pushsection .text.__aeabi_uldivmod, "ax" 37*4882a593SmuzhiyunENTRY(__aeabi_uldivmod) 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} 40*4882a593Smuzhiyun @ Test if B == 0 41*4882a593Smuzhiyun orrs ip, B_0, B_1 @ Z set -> B == 0 42*4882a593Smuzhiyun beq L_div_by_0 43*4882a593Smuzhiyun @ Test if B is power of 2: (B & (B - 1)) == 0 44*4882a593Smuzhiyun subs C_0, B_0, #1 45*4882a593Smuzhiyun sbc C_1, B_1, #0 46*4882a593Smuzhiyun tst C_0, B_0 47*4882a593Smuzhiyun tsteq B_1, C_1 48*4882a593Smuzhiyun beq L_pow2 49*4882a593Smuzhiyun @ Test if A_1 == B_1 == 0 50*4882a593Smuzhiyun orrs ip, A_1, B_1 51*4882a593Smuzhiyun beq L_div_32_32 52*4882a593Smuzhiyun 53*4882a593SmuzhiyunL_div_64_64: 54*4882a593Smuzhiyun/* CLZ only exists in ARM architecture version 5 and above. */ 55*4882a593Smuzhiyun#ifdef HAVE_CLZ 56*4882a593Smuzhiyun mov C_0, #1 57*4882a593Smuzhiyun mov C_1, #0 58*4882a593Smuzhiyun @ D_0 = clz A 59*4882a593Smuzhiyun teq A_1, #0 60*4882a593Smuzhiyun clz D_0, A_1 61*4882a593Smuzhiyun clzeq ip, A_0 62*4882a593Smuzhiyun addeq D_0, D_0, ip 63*4882a593Smuzhiyun @ D_1 = clz B 64*4882a593Smuzhiyun teq B_1, #0 65*4882a593Smuzhiyun clz D_1, B_1 66*4882a593Smuzhiyun clzeq ip, B_0 67*4882a593Smuzhiyun addeq D_1, D_1, ip 68*4882a593Smuzhiyun @ if clz B - clz A > 0 69*4882a593Smuzhiyun subs D_0, D_1, D_0 70*4882a593Smuzhiyun bls L_done_shift 71*4882a593Smuzhiyun @ B <<= (clz B - clz A) 72*4882a593Smuzhiyun subs D_1, D_0, #32 73*4882a593Smuzhiyun rsb ip, D_0, #32 74*4882a593Smuzhiyun movmi B_1, B_1, lsl D_0 75*4882a593SmuzhiyunARM( orrmi B_1, B_1, B_0, lsr ip ) 76*4882a593SmuzhiyunTHUMB( lsrmi TMP, B_0, ip ) 77*4882a593SmuzhiyunTHUMB( orrmi B_1, B_1, TMP ) 78*4882a593Smuzhiyun movpl B_1, B_0, lsl D_1 79*4882a593Smuzhiyun mov B_0, B_0, lsl D_0 80*4882a593Smuzhiyun @ C = 1 << (clz B - clz A) 81*4882a593Smuzhiyun movmi C_1, C_1, lsl D_0 82*4882a593SmuzhiyunARM( orrmi C_1, C_1, C_0, lsr ip ) 83*4882a593SmuzhiyunTHUMB( lsrmi TMP, C_0, ip ) 84*4882a593SmuzhiyunTHUMB( orrmi C_1, C_1, TMP ) 85*4882a593Smuzhiyun movpl C_1, C_0, lsl D_1 86*4882a593Smuzhiyun mov C_0, C_0, lsl D_0 87*4882a593SmuzhiyunL_done_shift: 88*4882a593Smuzhiyun mov D_0, #0 89*4882a593Smuzhiyun mov D_1, #0 90*4882a593Smuzhiyun @ C: current bit; D: result 91*4882a593Smuzhiyun#else 92*4882a593Smuzhiyun @ C: current bit; D: result 93*4882a593Smuzhiyun mov C_0, #1 94*4882a593Smuzhiyun mov C_1, #0 95*4882a593Smuzhiyun mov D_0, #0 96*4882a593Smuzhiyun mov D_1, #0 97*4882a593SmuzhiyunL_lsl_4: 98*4882a593Smuzhiyun cmp B_1, #0x10000000 99*4882a593Smuzhiyun cmpcc B_1, A_1 100*4882a593Smuzhiyun cmpeq B_0, A_0 101*4882a593Smuzhiyun bcs L_lsl_1 102*4882a593Smuzhiyun @ B <<= 4 103*4882a593Smuzhiyun mov B_1, B_1, lsl #4 104*4882a593Smuzhiyun orr B_1, B_1, B_0, lsr #28 105*4882a593Smuzhiyun mov B_0, B_0, lsl #4 106*4882a593Smuzhiyun @ C <<= 4 107*4882a593Smuzhiyun mov C_1, C_1, lsl #4 108*4882a593Smuzhiyun orr C_1, C_1, C_0, lsr #28 109*4882a593Smuzhiyun mov C_0, C_0, lsl #4 110*4882a593Smuzhiyun b L_lsl_4 111*4882a593SmuzhiyunL_lsl_1: 112*4882a593Smuzhiyun cmp B_1, #0x80000000 113*4882a593Smuzhiyun cmpcc B_1, A_1 114*4882a593Smuzhiyun cmpeq B_0, A_0 115*4882a593Smuzhiyun bcs L_subtract 116*4882a593Smuzhiyun @ B <<= 1 117*4882a593Smuzhiyun mov B_1, B_1, lsl #1 118*4882a593Smuzhiyun orr B_1, B_1, B_0, lsr #31 119*4882a593Smuzhiyun mov B_0, B_0, lsl #1 120*4882a593Smuzhiyun @ C <<= 1 121*4882a593Smuzhiyun mov C_1, C_1, lsl #1 122*4882a593Smuzhiyun orr C_1, C_1, C_0, lsr #31 123*4882a593Smuzhiyun mov C_0, C_0, lsl #1 124*4882a593Smuzhiyun b L_lsl_1 125*4882a593Smuzhiyun#endif 126*4882a593SmuzhiyunL_subtract: 127*4882a593Smuzhiyun @ if A >= B 128*4882a593Smuzhiyun cmp A_1, B_1 129*4882a593Smuzhiyun cmpeq A_0, B_0 130*4882a593Smuzhiyun bcc L_update 131*4882a593Smuzhiyun @ A -= B 132*4882a593Smuzhiyun subs A_0, A_0, B_0 133*4882a593Smuzhiyun sbc A_1, A_1, B_1 134*4882a593Smuzhiyun @ D |= C 135*4882a593Smuzhiyun orr D_0, D_0, C_0 136*4882a593Smuzhiyun orr D_1, D_1, C_1 137*4882a593SmuzhiyunL_update: 138*4882a593Smuzhiyun @ if A == 0: break 139*4882a593Smuzhiyun orrs ip, A_1, A_0 140*4882a593Smuzhiyun beq L_exit 141*4882a593Smuzhiyun @ C >>= 1 142*4882a593Smuzhiyun movs C_1, C_1, lsr #1 143*4882a593Smuzhiyun movs C_0, C_0, rrx 144*4882a593Smuzhiyun @ if C == 0: break 145*4882a593Smuzhiyun orrs ip, C_1, C_0 146*4882a593Smuzhiyun beq L_exit 147*4882a593Smuzhiyun @ B >>= 1 148*4882a593Smuzhiyun movs B_1, B_1, lsr #1 149*4882a593Smuzhiyun mov B_0, B_0, rrx 150*4882a593Smuzhiyun b L_subtract 151*4882a593SmuzhiyunL_exit: 152*4882a593Smuzhiyun @ Note: A, B & Q, R are aliases 153*4882a593Smuzhiyun mov R_0, A_0 154*4882a593Smuzhiyun mov R_1, A_1 155*4882a593Smuzhiyun mov Q_0, D_0 156*4882a593Smuzhiyun mov Q_1, D_1 157*4882a593Smuzhiyun ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 158*4882a593Smuzhiyun 159*4882a593SmuzhiyunL_div_32_32: 160*4882a593Smuzhiyun @ Note: A_0 & r0 are aliases 161*4882a593Smuzhiyun @ Q_1 r1 162*4882a593Smuzhiyun mov r1, B_0 163*4882a593Smuzhiyun bl __aeabi_uidivmod 164*4882a593Smuzhiyun mov R_0, r1 165*4882a593Smuzhiyun mov R_1, #0 166*4882a593Smuzhiyun mov Q_1, #0 167*4882a593Smuzhiyun ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 168*4882a593Smuzhiyun 169*4882a593SmuzhiyunL_pow2: 170*4882a593Smuzhiyun#ifdef HAVE_CLZ 171*4882a593Smuzhiyun @ Note: A, B and Q, R are aliases 172*4882a593Smuzhiyun @ R = A & (B - 1) 173*4882a593Smuzhiyun and C_0, A_0, C_0 174*4882a593Smuzhiyun and C_1, A_1, C_1 175*4882a593Smuzhiyun @ Q = A >> log2(B) 176*4882a593Smuzhiyun @ Note: B must not be 0 here! 177*4882a593Smuzhiyun clz D_0, B_0 178*4882a593Smuzhiyun add D_1, D_0, #1 179*4882a593Smuzhiyun rsbs D_0, D_0, #31 180*4882a593Smuzhiyun bpl L_1 181*4882a593Smuzhiyun clz D_0, B_1 182*4882a593Smuzhiyun rsb D_0, D_0, #31 183*4882a593Smuzhiyun mov A_0, A_1, lsr D_0 184*4882a593Smuzhiyun add D_0, D_0, #32 185*4882a593SmuzhiyunL_1: 186*4882a593Smuzhiyun movpl A_0, A_0, lsr D_0 187*4882a593SmuzhiyunARM( orrpl A_0, A_0, A_1, lsl D_1 ) 188*4882a593SmuzhiyunTHUMB( lslpl TMP, A_1, D_1 ) 189*4882a593SmuzhiyunTHUMB( orrpl A_0, A_0, TMP ) 190*4882a593Smuzhiyun mov A_1, A_1, lsr D_0 191*4882a593Smuzhiyun @ Mov back C to R 192*4882a593Smuzhiyun mov R_0, C_0 193*4882a593Smuzhiyun mov R_1, C_1 194*4882a593Smuzhiyun ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 195*4882a593Smuzhiyun#else 196*4882a593Smuzhiyun @ Note: A, B and Q, R are aliases 197*4882a593Smuzhiyun @ R = A & (B - 1) 198*4882a593Smuzhiyun and C_0, A_0, C_0 199*4882a593Smuzhiyun and C_1, A_1, C_1 200*4882a593Smuzhiyun @ Q = A >> log2(B) 201*4882a593Smuzhiyun @ Note: B must not be 0 here! 202*4882a593Smuzhiyun @ Count the leading zeroes in B. 203*4882a593Smuzhiyun mov D_0, #0 204*4882a593Smuzhiyun orrs B_0, B_0, B_0 205*4882a593Smuzhiyun @ If B is greater than 1 << 31, divide A and B by 1 << 32. 206*4882a593Smuzhiyun moveq A_0, A_1 207*4882a593Smuzhiyun moveq A_1, #0 208*4882a593Smuzhiyun moveq B_0, B_1 209*4882a593Smuzhiyun @ Count the remaining leading zeroes in B. 210*4882a593Smuzhiyun movs B_1, B_0, lsl #16 211*4882a593Smuzhiyun addeq D_0, #16 212*4882a593Smuzhiyun moveq B_0, B_0, lsr #16 213*4882a593Smuzhiyun tst B_0, #0xff 214*4882a593Smuzhiyun addeq D_0, #8 215*4882a593Smuzhiyun moveq B_0, B_0, lsr #8 216*4882a593Smuzhiyun tst B_0, #0xf 217*4882a593Smuzhiyun addeq D_0, #4 218*4882a593Smuzhiyun moveq B_0, B_0, lsr #4 219*4882a593Smuzhiyun tst B_0, #0x3 220*4882a593Smuzhiyun addeq D_0, #2 221*4882a593Smuzhiyun moveq B_0, B_0, lsr #2 222*4882a593Smuzhiyun tst B_0, #0x1 223*4882a593Smuzhiyun addeq D_0, #1 224*4882a593Smuzhiyun @ Shift A to the right by the appropriate amount. 225*4882a593Smuzhiyun rsb D_1, D_0, #32 226*4882a593Smuzhiyun mov Q_0, A_0, lsr D_0 227*4882a593Smuzhiyun ARM( orr Q_0, Q_0, A_1, lsl D_1 ) 228*4882a593Smuzhiyun THUMB( lsl A_1, D_1 ) 229*4882a593Smuzhiyun THUMB( orr Q_0, A_1 ) 230*4882a593Smuzhiyun mov Q_1, A_1, lsr D_0 231*4882a593Smuzhiyun @ Move C to R 232*4882a593Smuzhiyun mov R_0, C_0 233*4882a593Smuzhiyun mov R_1, C_1 234*4882a593Smuzhiyun ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 235*4882a593Smuzhiyun#endif 236*4882a593Smuzhiyun 237*4882a593SmuzhiyunL_div_by_0: 238*4882a593Smuzhiyun bl __div0 239*4882a593Smuzhiyun @ As wrong as it could be 240*4882a593Smuzhiyun mov Q_0, #0 241*4882a593Smuzhiyun mov Q_1, #0 242*4882a593Smuzhiyun mov R_0, #0 243*4882a593Smuzhiyun mov R_1, #0 244*4882a593Smuzhiyun ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 245*4882a593SmuzhiyunENDPROC(__aeabi_uldivmod) 246*4882a593Smuzhiyun.popsection 247