1*0e14a7fbSdp-arm /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== 2*0e14a7fbSdp-arm * 3*0e14a7fbSdp-arm * The LLVM Compiler Infrastructure 4*0e14a7fbSdp-arm * 5*0e14a7fbSdp-arm * This file is dual licensed under the MIT and the University of Illinois Open 6*0e14a7fbSdp-arm * Source Licenses. See LICENSE.TXT for details. 7*0e14a7fbSdp-arm * 8*0e14a7fbSdp-arm * ===----------------------------------------------------------------------=== 9*0e14a7fbSdp-arm * 10*0e14a7fbSdp-arm * This file implements __udivmoddi4 for the compiler_rt library. 11*0e14a7fbSdp-arm * 12*0e14a7fbSdp-arm * ===----------------------------------------------------------------------=== 13*0e14a7fbSdp-arm */ 14*0e14a7fbSdp-arm 15*0e14a7fbSdp-arm #include "int_lib.h" 16*0e14a7fbSdp-arm 17*0e14a7fbSdp-arm /* Effects: if rem != 0, *rem = a % b 18*0e14a7fbSdp-arm * Returns: a / b 19*0e14a7fbSdp-arm */ 20*0e14a7fbSdp-arm 21*0e14a7fbSdp-arm /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 22*0e14a7fbSdp-arm 23*0e14a7fbSdp-arm COMPILER_RT_ABI du_int 24*0e14a7fbSdp-arm __udivmoddi4(du_int a, du_int b, du_int* rem) 25*0e14a7fbSdp-arm { 26*0e14a7fbSdp-arm const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 27*0e14a7fbSdp-arm const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 28*0e14a7fbSdp-arm udwords n; 29*0e14a7fbSdp-arm n.all = a; 30*0e14a7fbSdp-arm udwords d; 31*0e14a7fbSdp-arm d.all = b; 32*0e14a7fbSdp-arm udwords q; 33*0e14a7fbSdp-arm udwords r; 34*0e14a7fbSdp-arm unsigned sr; 35*0e14a7fbSdp-arm /* special cases, X is unknown, K != 0 */ 36*0e14a7fbSdp-arm if (n.s.high == 0) 37*0e14a7fbSdp-arm { 38*0e14a7fbSdp-arm if (d.s.high == 0) 39*0e14a7fbSdp-arm { 40*0e14a7fbSdp-arm /* 0 X 41*0e14a7fbSdp-arm * --- 42*0e14a7fbSdp-arm * 0 X 43*0e14a7fbSdp-arm */ 44*0e14a7fbSdp-arm if (rem) 45*0e14a7fbSdp-arm *rem = n.s.low % d.s.low; 46*0e14a7fbSdp-arm return n.s.low / d.s.low; 47*0e14a7fbSdp-arm } 48*0e14a7fbSdp-arm /* 0 X 49*0e14a7fbSdp-arm * --- 50*0e14a7fbSdp-arm * K X 51*0e14a7fbSdp-arm */ 52*0e14a7fbSdp-arm if (rem) 53*0e14a7fbSdp-arm *rem = n.s.low; 54*0e14a7fbSdp-arm return 0; 55*0e14a7fbSdp-arm } 56*0e14a7fbSdp-arm /* n.s.high != 0 */ 57*0e14a7fbSdp-arm if (d.s.low == 0) 58*0e14a7fbSdp-arm { 59*0e14a7fbSdp-arm if (d.s.high == 0) 60*0e14a7fbSdp-arm { 61*0e14a7fbSdp-arm /* K X 62*0e14a7fbSdp-arm * --- 63*0e14a7fbSdp-arm * 0 0 64*0e14a7fbSdp-arm */ 65*0e14a7fbSdp-arm if (rem) 66*0e14a7fbSdp-arm *rem = n.s.high % d.s.low; 67*0e14a7fbSdp-arm return n.s.high / d.s.low; 68*0e14a7fbSdp-arm } 69*0e14a7fbSdp-arm /* d.s.high != 0 */ 70*0e14a7fbSdp-arm if (n.s.low == 0) 71*0e14a7fbSdp-arm { 72*0e14a7fbSdp-arm /* K 0 73*0e14a7fbSdp-arm * --- 74*0e14a7fbSdp-arm * K 0 75*0e14a7fbSdp-arm */ 76*0e14a7fbSdp-arm if (rem) 77*0e14a7fbSdp-arm { 78*0e14a7fbSdp-arm r.s.high = n.s.high % d.s.high; 79*0e14a7fbSdp-arm r.s.low = 0; 80*0e14a7fbSdp-arm *rem = r.all; 81*0e14a7fbSdp-arm } 82*0e14a7fbSdp-arm return n.s.high / d.s.high; 83*0e14a7fbSdp-arm } 84*0e14a7fbSdp-arm /* K K 85*0e14a7fbSdp-arm * --- 86*0e14a7fbSdp-arm * K 0 87*0e14a7fbSdp-arm */ 88*0e14a7fbSdp-arm if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 89*0e14a7fbSdp-arm { 90*0e14a7fbSdp-arm if (rem) 91*0e14a7fbSdp-arm { 92*0e14a7fbSdp-arm r.s.low = n.s.low; 93*0e14a7fbSdp-arm r.s.high = n.s.high & (d.s.high - 1); 94*0e14a7fbSdp-arm *rem = r.all; 95*0e14a7fbSdp-arm } 96*0e14a7fbSdp-arm return n.s.high >> __builtin_ctz(d.s.high); 97*0e14a7fbSdp-arm } 98*0e14a7fbSdp-arm /* K K 99*0e14a7fbSdp-arm * --- 100*0e14a7fbSdp-arm * K 0 101*0e14a7fbSdp-arm */ 102*0e14a7fbSdp-arm sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 103*0e14a7fbSdp-arm /* 0 <= sr <= n_uword_bits - 2 or sr large */ 104*0e14a7fbSdp-arm if (sr > n_uword_bits - 2) 105*0e14a7fbSdp-arm { 106*0e14a7fbSdp-arm if (rem) 107*0e14a7fbSdp-arm *rem = n.all; 108*0e14a7fbSdp-arm return 0; 109*0e14a7fbSdp-arm } 110*0e14a7fbSdp-arm ++sr; 111*0e14a7fbSdp-arm /* 1 <= sr <= n_uword_bits - 1 */ 112*0e14a7fbSdp-arm /* q.all = n.all << (n_udword_bits - sr); */ 113*0e14a7fbSdp-arm q.s.low = 0; 114*0e14a7fbSdp-arm q.s.high = n.s.low << (n_uword_bits - sr); 115*0e14a7fbSdp-arm /* r.all = n.all >> sr; */ 116*0e14a7fbSdp-arm r.s.high = n.s.high >> sr; 117*0e14a7fbSdp-arm r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 118*0e14a7fbSdp-arm } 119*0e14a7fbSdp-arm else /* d.s.low != 0 */ 120*0e14a7fbSdp-arm { 121*0e14a7fbSdp-arm if (d.s.high == 0) 122*0e14a7fbSdp-arm { 123*0e14a7fbSdp-arm /* K X 124*0e14a7fbSdp-arm * --- 125*0e14a7fbSdp-arm * 0 K 126*0e14a7fbSdp-arm */ 127*0e14a7fbSdp-arm if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 128*0e14a7fbSdp-arm { 129*0e14a7fbSdp-arm if (rem) 130*0e14a7fbSdp-arm *rem = n.s.low & (d.s.low - 1); 131*0e14a7fbSdp-arm if (d.s.low == 1) 132*0e14a7fbSdp-arm return n.all; 133*0e14a7fbSdp-arm sr = __builtin_ctz(d.s.low); 134*0e14a7fbSdp-arm q.s.high = n.s.high >> sr; 135*0e14a7fbSdp-arm q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 136*0e14a7fbSdp-arm return q.all; 137*0e14a7fbSdp-arm } 138*0e14a7fbSdp-arm /* K X 139*0e14a7fbSdp-arm * --- 140*0e14a7fbSdp-arm * 0 K 141*0e14a7fbSdp-arm */ 142*0e14a7fbSdp-arm sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 143*0e14a7fbSdp-arm /* 2 <= sr <= n_udword_bits - 1 144*0e14a7fbSdp-arm * q.all = n.all << (n_udword_bits - sr); 145*0e14a7fbSdp-arm * r.all = n.all >> sr; 146*0e14a7fbSdp-arm */ 147*0e14a7fbSdp-arm if (sr == n_uword_bits) 148*0e14a7fbSdp-arm { 149*0e14a7fbSdp-arm q.s.low = 0; 150*0e14a7fbSdp-arm q.s.high = n.s.low; 151*0e14a7fbSdp-arm r.s.high = 0; 152*0e14a7fbSdp-arm r.s.low = n.s.high; 153*0e14a7fbSdp-arm } 154*0e14a7fbSdp-arm else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 155*0e14a7fbSdp-arm { 156*0e14a7fbSdp-arm q.s.low = 0; 157*0e14a7fbSdp-arm q.s.high = n.s.low << (n_uword_bits - sr); 158*0e14a7fbSdp-arm r.s.high = n.s.high >> sr; 159*0e14a7fbSdp-arm r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 160*0e14a7fbSdp-arm } 161*0e14a7fbSdp-arm else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 162*0e14a7fbSdp-arm { 163*0e14a7fbSdp-arm q.s.low = n.s.low << (n_udword_bits - sr); 164*0e14a7fbSdp-arm q.s.high = (n.s.high << (n_udword_bits - sr)) | 165*0e14a7fbSdp-arm (n.s.low >> (sr - n_uword_bits)); 166*0e14a7fbSdp-arm r.s.high = 0; 167*0e14a7fbSdp-arm r.s.low = n.s.high >> (sr - n_uword_bits); 168*0e14a7fbSdp-arm } 169*0e14a7fbSdp-arm } 170*0e14a7fbSdp-arm else 171*0e14a7fbSdp-arm { 172*0e14a7fbSdp-arm /* K X 173*0e14a7fbSdp-arm * --- 174*0e14a7fbSdp-arm * K K 175*0e14a7fbSdp-arm */ 176*0e14a7fbSdp-arm sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 177*0e14a7fbSdp-arm /* 0 <= sr <= n_uword_bits - 1 or sr large */ 178*0e14a7fbSdp-arm if (sr > n_uword_bits - 1) 179*0e14a7fbSdp-arm { 180*0e14a7fbSdp-arm if (rem) 181*0e14a7fbSdp-arm *rem = n.all; 182*0e14a7fbSdp-arm return 0; 183*0e14a7fbSdp-arm } 184*0e14a7fbSdp-arm ++sr; 185*0e14a7fbSdp-arm /* 1 <= sr <= n_uword_bits */ 186*0e14a7fbSdp-arm /* q.all = n.all << (n_udword_bits - sr); */ 187*0e14a7fbSdp-arm q.s.low = 0; 188*0e14a7fbSdp-arm if (sr == n_uword_bits) 189*0e14a7fbSdp-arm { 190*0e14a7fbSdp-arm q.s.high = n.s.low; 191*0e14a7fbSdp-arm r.s.high = 0; 192*0e14a7fbSdp-arm r.s.low = n.s.high; 193*0e14a7fbSdp-arm } 194*0e14a7fbSdp-arm else 195*0e14a7fbSdp-arm { 196*0e14a7fbSdp-arm q.s.high = n.s.low << (n_uword_bits - sr); 197*0e14a7fbSdp-arm r.s.high = n.s.high >> sr; 198*0e14a7fbSdp-arm r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 199*0e14a7fbSdp-arm } 200*0e14a7fbSdp-arm } 201*0e14a7fbSdp-arm } 202*0e14a7fbSdp-arm /* Not a special case 203*0e14a7fbSdp-arm * q and r are initialized with: 204*0e14a7fbSdp-arm * q.all = n.all << (n_udword_bits - sr); 205*0e14a7fbSdp-arm * r.all = n.all >> sr; 206*0e14a7fbSdp-arm * 1 <= sr <= n_udword_bits - 1 207*0e14a7fbSdp-arm */ 208*0e14a7fbSdp-arm su_int carry = 0; 209*0e14a7fbSdp-arm for (; sr > 0; --sr) 210*0e14a7fbSdp-arm { 211*0e14a7fbSdp-arm /* r:q = ((r:q) << 1) | carry */ 212*0e14a7fbSdp-arm r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 213*0e14a7fbSdp-arm r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 214*0e14a7fbSdp-arm q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 215*0e14a7fbSdp-arm q.s.low = (q.s.low << 1) | carry; 216*0e14a7fbSdp-arm /* carry = 0; 217*0e14a7fbSdp-arm * if (r.all >= d.all) 218*0e14a7fbSdp-arm * { 219*0e14a7fbSdp-arm * r.all -= d.all; 220*0e14a7fbSdp-arm * carry = 1; 221*0e14a7fbSdp-arm * } 222*0e14a7fbSdp-arm */ 223*0e14a7fbSdp-arm const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 224*0e14a7fbSdp-arm carry = s & 1; 225*0e14a7fbSdp-arm r.all -= d.all & s; 226*0e14a7fbSdp-arm } 227*0e14a7fbSdp-arm q.all = (q.all << 1) | carry; 228*0e14a7fbSdp-arm if (rem) 229*0e14a7fbSdp-arm *rem = r.all; 230*0e14a7fbSdp-arm return q.all; 231*0e14a7fbSdp-arm } 232