1*11b1a9b2SMarek Vasut/* 2*11b1a9b2SMarek Vasut * Copyright 2010, Google Inc. 3*11b1a9b2SMarek Vasut * 4*11b1a9b2SMarek Vasut * Brought in from coreboot uldivmod.S 5*11b1a9b2SMarek Vasut * 6*11b1a9b2SMarek Vasut * SPDX-License-Identifier: GPL-2.0 7*11b1a9b2SMarek Vasut */ 8*11b1a9b2SMarek Vasut 9*11b1a9b2SMarek Vasut#include <linux/linkage.h> 10*11b1a9b2SMarek Vasut#include <asm/assembler.h> 11*11b1a9b2SMarek Vasut 12*11b1a9b2SMarek Vasut/* We don't use Thumb instructions for now */ 13*11b1a9b2SMarek Vasut#define ARM(x...) x 14*11b1a9b2SMarek Vasut#define THUMB(x...) 15*11b1a9b2SMarek Vasut 16*11b1a9b2SMarek Vasut/* 17*11b1a9b2SMarek Vasut * A, Q = r0 + (r1 << 32) 18*11b1a9b2SMarek Vasut * B, R = r2 + (r3 << 32) 19*11b1a9b2SMarek Vasut * A / B = Q ... R 20*11b1a9b2SMarek Vasut */ 21*11b1a9b2SMarek Vasut 22*11b1a9b2SMarek VasutA_0 .req r0 23*11b1a9b2SMarek VasutA_1 .req r1 24*11b1a9b2SMarek VasutB_0 .req r2 25*11b1a9b2SMarek VasutB_1 .req r3 26*11b1a9b2SMarek VasutC_0 .req r4 27*11b1a9b2SMarek VasutC_1 .req r5 28*11b1a9b2SMarek VasutD_0 .req r6 29*11b1a9b2SMarek VasutD_1 .req r7 30*11b1a9b2SMarek Vasut 31*11b1a9b2SMarek VasutQ_0 .req r0 32*11b1a9b2SMarek VasutQ_1 .req r1 33*11b1a9b2SMarek VasutR_0 .req r2 34*11b1a9b2SMarek VasutR_1 .req r3 35*11b1a9b2SMarek Vasut 36*11b1a9b2SMarek VasutTHUMB( 37*11b1a9b2SMarek VasutTMP .req r8 38*11b1a9b2SMarek Vasut) 39*11b1a9b2SMarek Vasut 40*11b1a9b2SMarek VasutENTRY(__aeabi_uldivmod) 41*11b1a9b2SMarek Vasut stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} 42*11b1a9b2SMarek Vasut @ Test if B == 0 43*11b1a9b2SMarek Vasut orrs ip, B_0, B_1 @ Z set -> B == 0 44*11b1a9b2SMarek Vasut beq L_div_by_0 45*11b1a9b2SMarek Vasut @ Test if B is power of 2: (B & (B - 1)) == 0 46*11b1a9b2SMarek Vasut subs C_0, B_0, #1 47*11b1a9b2SMarek Vasut sbc C_1, B_1, #0 48*11b1a9b2SMarek Vasut tst C_0, B_0 49*11b1a9b2SMarek Vasut tsteq B_1, C_1 50*11b1a9b2SMarek Vasut beq L_pow2 51*11b1a9b2SMarek Vasut @ Test if A_1 == B_1 == 0 52*11b1a9b2SMarek Vasut orrs ip, A_1, B_1 53*11b1a9b2SMarek Vasut beq L_div_32_32 54*11b1a9b2SMarek Vasut 55*11b1a9b2SMarek VasutL_div_64_64: 56*11b1a9b2SMarek Vasut/* CLZ only exists in ARM architecture version 5 and above. */ 57*11b1a9b2SMarek Vasut#ifdef HAVE_CLZ 58*11b1a9b2SMarek Vasut mov C_0, #1 59*11b1a9b2SMarek Vasut mov C_1, #0 60*11b1a9b2SMarek Vasut @ D_0 = clz A 61*11b1a9b2SMarek Vasut teq A_1, #0 62*11b1a9b2SMarek Vasut clz D_0, A_1 63*11b1a9b2SMarek Vasut clzeq ip, A_0 64*11b1a9b2SMarek Vasut addeq D_0, D_0, ip 65*11b1a9b2SMarek Vasut @ D_1 = clz B 66*11b1a9b2SMarek Vasut teq B_1, #0 67*11b1a9b2SMarek Vasut clz D_1, B_1 68*11b1a9b2SMarek Vasut clzeq ip, B_0 69*11b1a9b2SMarek Vasut addeq D_1, D_1, ip 70*11b1a9b2SMarek Vasut @ if clz B - clz A > 0 71*11b1a9b2SMarek Vasut subs D_0, D_1, D_0 72*11b1a9b2SMarek Vasut bls L_done_shift 73*11b1a9b2SMarek Vasut @ B <<= (clz B - clz A) 74*11b1a9b2SMarek Vasut subs D_1, D_0, #32 75*11b1a9b2SMarek Vasut rsb ip, D_0, #32 76*11b1a9b2SMarek Vasut movmi B_1, B_1, lsl D_0 77*11b1a9b2SMarek VasutARM( orrmi B_1, B_1, B_0, lsr ip ) 78*11b1a9b2SMarek VasutTHUMB( lsrmi TMP, B_0, ip ) 79*11b1a9b2SMarek VasutTHUMB( orrmi B_1, B_1, TMP ) 80*11b1a9b2SMarek Vasut movpl B_1, B_0, lsl D_1 81*11b1a9b2SMarek Vasut mov B_0, B_0, lsl D_0 82*11b1a9b2SMarek Vasut @ C = 1 << (clz B - clz A) 83*11b1a9b2SMarek Vasut movmi C_1, C_1, lsl D_0 84*11b1a9b2SMarek VasutARM( orrmi C_1, C_1, C_0, lsr ip ) 85*11b1a9b2SMarek VasutTHUMB( lsrmi TMP, C_0, ip ) 86*11b1a9b2SMarek VasutTHUMB( orrmi C_1, C_1, TMP ) 87*11b1a9b2SMarek Vasut movpl C_1, C_0, lsl D_1 88*11b1a9b2SMarek Vasut mov C_0, C_0, lsl D_0 89*11b1a9b2SMarek VasutL_done_shift: 90*11b1a9b2SMarek Vasut mov D_0, #0 91*11b1a9b2SMarek Vasut mov D_1, #0 92*11b1a9b2SMarek Vasut @ C: current bit; D: result 93*11b1a9b2SMarek Vasut#else 94*11b1a9b2SMarek Vasut @ C: current bit; D: result 95*11b1a9b2SMarek Vasut mov C_0, #1 96*11b1a9b2SMarek Vasut mov C_1, #0 97*11b1a9b2SMarek Vasut mov D_0, #0 98*11b1a9b2SMarek Vasut mov D_1, #0 99*11b1a9b2SMarek VasutL_lsl_4: 100*11b1a9b2SMarek Vasut cmp B_1, #0x10000000 101*11b1a9b2SMarek Vasut cmpcc B_1, A_1 102*11b1a9b2SMarek Vasut cmpeq B_0, A_0 103*11b1a9b2SMarek Vasut bcs L_lsl_1 104*11b1a9b2SMarek Vasut @ B <<= 4 105*11b1a9b2SMarek Vasut mov B_1, B_1, lsl #4 106*11b1a9b2SMarek Vasut orr B_1, B_1, B_0, lsr #28 107*11b1a9b2SMarek Vasut mov B_0, B_0, lsl #4 108*11b1a9b2SMarek Vasut @ C <<= 4 109*11b1a9b2SMarek Vasut mov C_1, C_1, lsl #4 110*11b1a9b2SMarek Vasut orr C_1, C_1, C_0, lsr #28 111*11b1a9b2SMarek Vasut mov C_0, C_0, lsl #4 112*11b1a9b2SMarek Vasut b L_lsl_4 113*11b1a9b2SMarek VasutL_lsl_1: 114*11b1a9b2SMarek Vasut cmp B_1, #0x80000000 115*11b1a9b2SMarek Vasut cmpcc B_1, A_1 116*11b1a9b2SMarek Vasut cmpeq B_0, A_0 117*11b1a9b2SMarek Vasut bcs L_subtract 118*11b1a9b2SMarek Vasut @ B <<= 1 119*11b1a9b2SMarek Vasut mov B_1, B_1, lsl #1 120*11b1a9b2SMarek Vasut orr B_1, B_1, B_0, lsr #31 121*11b1a9b2SMarek Vasut mov B_0, B_0, lsl #1 122*11b1a9b2SMarek Vasut @ C <<= 1 123*11b1a9b2SMarek Vasut mov C_1, C_1, lsl #1 124*11b1a9b2SMarek Vasut orr C_1, C_1, C_0, lsr #31 125*11b1a9b2SMarek Vasut mov C_0, C_0, lsl #1 126*11b1a9b2SMarek Vasut b L_lsl_1 127*11b1a9b2SMarek Vasut#endif 128*11b1a9b2SMarek VasutL_subtract: 129*11b1a9b2SMarek Vasut @ if A >= B 130*11b1a9b2SMarek Vasut cmp A_1, B_1 131*11b1a9b2SMarek Vasut cmpeq A_0, B_0 132*11b1a9b2SMarek Vasut bcc L_update 133*11b1a9b2SMarek Vasut @ A -= B 134*11b1a9b2SMarek Vasut subs A_0, A_0, B_0 135*11b1a9b2SMarek Vasut sbc A_1, A_1, B_1 136*11b1a9b2SMarek Vasut @ D |= C 137*11b1a9b2SMarek Vasut orr D_0, D_0, C_0 138*11b1a9b2SMarek Vasut orr D_1, D_1, C_1 139*11b1a9b2SMarek VasutL_update: 140*11b1a9b2SMarek Vasut @ if A == 0: break 141*11b1a9b2SMarek Vasut orrs ip, A_1, A_0 142*11b1a9b2SMarek Vasut beq L_exit 143*11b1a9b2SMarek Vasut @ C >>= 1 144*11b1a9b2SMarek Vasut movs C_1, C_1, lsr #1 145*11b1a9b2SMarek Vasut movs C_0, C_0, rrx 146*11b1a9b2SMarek Vasut @ if C == 0: break 147*11b1a9b2SMarek Vasut orrs ip, C_1, C_0 148*11b1a9b2SMarek Vasut beq L_exit 149*11b1a9b2SMarek Vasut @ B >>= 1 150*11b1a9b2SMarek Vasut movs B_1, B_1, lsr #1 151*11b1a9b2SMarek Vasut mov B_0, B_0, rrx 152*11b1a9b2SMarek Vasut b L_subtract 153*11b1a9b2SMarek VasutL_exit: 154*11b1a9b2SMarek Vasut @ Note: A, B & Q, R are aliases 155*11b1a9b2SMarek Vasut mov R_0, A_0 156*11b1a9b2SMarek Vasut mov R_1, A_1 157*11b1a9b2SMarek Vasut mov Q_0, D_0 158*11b1a9b2SMarek Vasut mov Q_1, D_1 159*11b1a9b2SMarek Vasut ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 160*11b1a9b2SMarek Vasut 161*11b1a9b2SMarek VasutL_div_32_32: 162*11b1a9b2SMarek Vasut @ Note: A_0 & r0 are aliases 163*11b1a9b2SMarek Vasut @ Q_1 r1 164*11b1a9b2SMarek Vasut mov r1, B_0 165*11b1a9b2SMarek Vasut bl __aeabi_uidivmod 166*11b1a9b2SMarek Vasut mov R_0, r1 167*11b1a9b2SMarek Vasut mov R_1, #0 168*11b1a9b2SMarek Vasut mov Q_1, #0 169*11b1a9b2SMarek Vasut ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 170*11b1a9b2SMarek Vasut 171*11b1a9b2SMarek VasutL_pow2: 172*11b1a9b2SMarek Vasut#ifdef HAVE_CLZ 173*11b1a9b2SMarek Vasut @ Note: A, B and Q, R are aliases 174*11b1a9b2SMarek Vasut @ R = A & (B - 1) 175*11b1a9b2SMarek Vasut and C_0, A_0, C_0 176*11b1a9b2SMarek Vasut and C_1, A_1, C_1 177*11b1a9b2SMarek Vasut @ Q = A >> log2(B) 178*11b1a9b2SMarek Vasut @ Note: B must not be 0 here! 179*11b1a9b2SMarek Vasut clz D_0, B_0 180*11b1a9b2SMarek Vasut add D_1, D_0, #1 181*11b1a9b2SMarek Vasut rsbs D_0, D_0, #31 182*11b1a9b2SMarek Vasut bpl L_1 183*11b1a9b2SMarek Vasut clz D_0, B_1 184*11b1a9b2SMarek Vasut rsb D_0, D_0, #31 185*11b1a9b2SMarek Vasut mov A_0, A_1, lsr D_0 186*11b1a9b2SMarek Vasut add D_0, D_0, #32 187*11b1a9b2SMarek VasutL_1: 188*11b1a9b2SMarek Vasut movpl A_0, A_0, lsr D_0 189*11b1a9b2SMarek VasutARM( orrpl A_0, A_0, A_1, lsl D_1 ) 190*11b1a9b2SMarek VasutTHUMB( lslpl TMP, A_1, D_1 ) 191*11b1a9b2SMarek VasutTHUMB( orrpl A_0, A_0, TMP ) 192*11b1a9b2SMarek Vasut mov A_1, A_1, lsr D_0 193*11b1a9b2SMarek Vasut @ Mov back C to R 194*11b1a9b2SMarek Vasut mov R_0, C_0 195*11b1a9b2SMarek Vasut mov R_1, C_1 196*11b1a9b2SMarek Vasut ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 197*11b1a9b2SMarek Vasut#else 198*11b1a9b2SMarek Vasut @ Note: A, B and Q, R are aliases 199*11b1a9b2SMarek Vasut @ R = A & (B - 1) 200*11b1a9b2SMarek Vasut and C_0, A_0, C_0 201*11b1a9b2SMarek Vasut and C_1, A_1, C_1 202*11b1a9b2SMarek Vasut @ Q = A >> log2(B) 203*11b1a9b2SMarek Vasut @ Note: B must not be 0 here! 204*11b1a9b2SMarek Vasut @ Count the leading zeroes in B. 205*11b1a9b2SMarek Vasut mov D_0, #0 206*11b1a9b2SMarek Vasut orrs B_0, B_0, B_0 207*11b1a9b2SMarek Vasut @ If B is greater than 1 << 31, divide A and B by 1 << 32. 208*11b1a9b2SMarek Vasut moveq A_0, A_1 209*11b1a9b2SMarek Vasut moveq A_1, #0 210*11b1a9b2SMarek Vasut moveq B_0, B_1 211*11b1a9b2SMarek Vasut @ Count the remaining leading zeroes in B. 212*11b1a9b2SMarek Vasut movs B_1, B_0, lsl #16 213*11b1a9b2SMarek Vasut addeq D_0, #16 214*11b1a9b2SMarek Vasut moveq B_0, B_0, lsr #16 215*11b1a9b2SMarek Vasut tst B_0, #0xff 216*11b1a9b2SMarek Vasut addeq D_0, #8 217*11b1a9b2SMarek Vasut moveq B_0, B_0, lsr #8 218*11b1a9b2SMarek Vasut tst B_0, #0xf 219*11b1a9b2SMarek Vasut addeq D_0, #4 220*11b1a9b2SMarek Vasut moveq B_0, B_0, lsr #4 221*11b1a9b2SMarek Vasut tst B_0, #0x3 222*11b1a9b2SMarek Vasut addeq D_0, #2 223*11b1a9b2SMarek Vasut moveq B_0, B_0, lsr #2 224*11b1a9b2SMarek Vasut tst B_0, #0x1 225*11b1a9b2SMarek Vasut addeq D_0, #1 226*11b1a9b2SMarek Vasut @ Shift A to the right by the appropriate amount. 227*11b1a9b2SMarek Vasut rsb D_1, D_0, #32 228*11b1a9b2SMarek Vasut mov Q_0, A_0, lsr D_0 229*11b1a9b2SMarek Vasut orr Q_0, A_1, lsl D_1 230*11b1a9b2SMarek Vasut mov Q_1, A_1, lsr D_0 231*11b1a9b2SMarek Vasut @ Move C to R 232*11b1a9b2SMarek Vasut mov R_0, C_0 233*11b1a9b2SMarek Vasut mov R_1, C_1 234*11b1a9b2SMarek Vasut ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 235*11b1a9b2SMarek Vasut#endif 236*11b1a9b2SMarek Vasut 237*11b1a9b2SMarek VasutL_div_by_0: 238*11b1a9b2SMarek Vasut bl __div0 239*11b1a9b2SMarek Vasut @ As wrong as it could be 240*11b1a9b2SMarek Vasut mov Q_0, #0 241*11b1a9b2SMarek Vasut mov Q_1, #0 242*11b1a9b2SMarek Vasut mov R_0, #0 243*11b1a9b2SMarek Vasut mov R_1, #0 244*11b1a9b2SMarek Vasut ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} 245*11b1a9b2SMarek VasutENDPROC(__aeabi_uldivmod) 246