1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0-or-later */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Divide a 64-bit unsigned number by a 32-bit unsigned number. 4*4882a593Smuzhiyun * This routine assumes that the top 32 bits of the dividend are 5*4882a593Smuzhiyun * non-zero to start with. 6*4882a593Smuzhiyun * On entry, r3 points to the dividend, which get overwritten with 7*4882a593Smuzhiyun * the 64-bit quotient, and r4 contains the divisor. 8*4882a593Smuzhiyun * On exit, r3 contains the remainder. 9*4882a593Smuzhiyun * 10*4882a593Smuzhiyun * Copyright (C) 2002 Paul Mackerras, IBM Corp. 11*4882a593Smuzhiyun */ 12*4882a593Smuzhiyun#include "ppc_asm.h" 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun .globl __div64_32 15*4882a593Smuzhiyun__div64_32: 16*4882a593Smuzhiyun lwz r5,0(r3) # get the dividend into r5/r6 17*4882a593Smuzhiyun lwz r6,4(r3) 18*4882a593Smuzhiyun cmplw r5,r4 19*4882a593Smuzhiyun li r7,0 20*4882a593Smuzhiyun li r8,0 21*4882a593Smuzhiyun blt 1f 22*4882a593Smuzhiyun divwu r7,r5,r4 # if dividend.hi >= divisor, 23*4882a593Smuzhiyun mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor 24*4882a593Smuzhiyun subf. r5,r0,r5 # dividend.hi %= divisor 25*4882a593Smuzhiyun beq 3f 26*4882a593Smuzhiyun1: mr r11,r5 # here dividend.hi != 0 27*4882a593Smuzhiyun andis. r0,r5,0xc000 28*4882a593Smuzhiyun bne 2f 29*4882a593Smuzhiyun cntlzw r0,r5 # we are shifting the dividend right 30*4882a593Smuzhiyun li r10,-1 # to make it < 2^32, and shifting 31*4882a593Smuzhiyun srw r10,r10,r0 # the divisor right the same amount, 32*4882a593Smuzhiyun addc r9,r4,r10 # rounding up (so the estimate cannot 33*4882a593Smuzhiyun andc r11,r6,r10 # ever be too large, only too small) 34*4882a593Smuzhiyun andc r9,r9,r10 35*4882a593Smuzhiyun addze r9,r9 36*4882a593Smuzhiyun or r11,r5,r11 37*4882a593Smuzhiyun rotlw r9,r9,r0 38*4882a593Smuzhiyun rotlw r11,r11,r0 39*4882a593Smuzhiyun divwu r11,r11,r9 # then we divide the shifted quantities 40*4882a593Smuzhiyun2: mullw r10,r11,r4 # to get an estimate of the quotient, 41*4882a593Smuzhiyun mulhwu r9,r11,r4 # multiply the estimate by the divisor, 42*4882a593Smuzhiyun subfc r6,r10,r6 # take the product from the divisor, 43*4882a593Smuzhiyun add r8,r8,r11 # and add the estimate to the accumulated 44*4882a593Smuzhiyun subfe. r5,r9,r5 # quotient 45*4882a593Smuzhiyun bne 1b 46*4882a593Smuzhiyun3: cmplw r6,r4 47*4882a593Smuzhiyun blt 4f 48*4882a593Smuzhiyun divwu r0,r6,r4 # perform the remaining 32-bit division 49*4882a593Smuzhiyun mullw r10,r0,r4 # and get the remainder 50*4882a593Smuzhiyun add r8,r8,r0 51*4882a593Smuzhiyun subf r6,r10,r6 52*4882a593Smuzhiyun4: stw r7,0(r3) # return the quotient in *r3 53*4882a593Smuzhiyun stw r8,4(r3) 54*4882a593Smuzhiyun mr r3,r6 # return the remainder in r3 55*4882a593Smuzhiyun blr 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun/* 58*4882a593Smuzhiyun * Extended precision shifts. 59*4882a593Smuzhiyun * 60*4882a593Smuzhiyun * Updated to be valid for shift counts from 0 to 63 inclusive. 61*4882a593Smuzhiyun * -- Gabriel 62*4882a593Smuzhiyun * 63*4882a593Smuzhiyun * R3/R4 has 64 bit value 64*4882a593Smuzhiyun * R5 has shift count 65*4882a593Smuzhiyun * result in R3/R4 66*4882a593Smuzhiyun * 67*4882a593Smuzhiyun * ashrdi3: arithmetic right shift (sign propagation) 68*4882a593Smuzhiyun * lshrdi3: logical right shift 69*4882a593Smuzhiyun * ashldi3: left shift 70*4882a593Smuzhiyun */ 71*4882a593Smuzhiyun .globl __ashrdi3 72*4882a593Smuzhiyun__ashrdi3: 73*4882a593Smuzhiyun subfic r6,r5,32 74*4882a593Smuzhiyun srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count 75*4882a593Smuzhiyun addi r7,r5,32 # could be xori, or addi with -32 76*4882a593Smuzhiyun slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) 77*4882a593Smuzhiyun rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0 78*4882a593Smuzhiyun sraw r7,r3,r7 # t2 = MSW >> (count-32) 79*4882a593Smuzhiyun or r4,r4,r6 # LSW |= t1 80*4882a593Smuzhiyun slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2 81*4882a593Smuzhiyun sraw r3,r3,r5 # MSW = MSW >> count 82*4882a593Smuzhiyun or r4,r4,r7 # LSW |= t2 83*4882a593Smuzhiyun blr 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun .globl __ashldi3 86*4882a593Smuzhiyun__ashldi3: 87*4882a593Smuzhiyun subfic r6,r5,32 88*4882a593Smuzhiyun slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count 89*4882a593Smuzhiyun addi r7,r5,32 # could be xori, or addi with -32 90*4882a593Smuzhiyun srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count) 91*4882a593Smuzhiyun slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32) 92*4882a593Smuzhiyun or r3,r3,r6 # MSW |= t1 93*4882a593Smuzhiyun slw r4,r4,r5 # LSW = LSW << count 94*4882a593Smuzhiyun or r3,r3,r7 # MSW |= t2 95*4882a593Smuzhiyun blr 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun .globl __lshrdi3 98*4882a593Smuzhiyun__lshrdi3: 99*4882a593Smuzhiyun subfic r6,r5,32 100*4882a593Smuzhiyun srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count 101*4882a593Smuzhiyun addi r7,r5,32 # could be xori, or addi with -32 102*4882a593Smuzhiyun slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) 103*4882a593Smuzhiyun srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32) 104*4882a593Smuzhiyun or r4,r4,r6 # LSW |= t1 105*4882a593Smuzhiyun srw r3,r3,r5 # MSW = MSW >> count 106*4882a593Smuzhiyun or r4,r4,r7 # LSW |= t2 107*4882a593Smuzhiyun blr 108