1*8a6a9560SDaniel Boulby //===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===// 2*8a6a9560SDaniel Boulby // 3*8a6a9560SDaniel Boulby // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*8a6a9560SDaniel Boulby // See https://llvm.org/LICENSE.txt for license information. 5*8a6a9560SDaniel Boulby // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*8a6a9560SDaniel Boulby // 7*8a6a9560SDaniel Boulby //===----------------------------------------------------------------------===// 8*8a6a9560SDaniel Boulby // 9*8a6a9560SDaniel Boulby // This file implements __udivmoddi4 for the compiler_rt library. 10*8a6a9560SDaniel Boulby // 11*8a6a9560SDaniel Boulby //===----------------------------------------------------------------------===// 120e14a7fbSdp-arm 130e14a7fbSdp-arm #include "int_lib.h" 140e14a7fbSdp-arm 15*8a6a9560SDaniel Boulby // Effects: if rem != 0, *rem = a % b 16*8a6a9560SDaniel Boulby // Returns: a / b 170e14a7fbSdp-arm 18*8a6a9560SDaniel Boulby // Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide 190e14a7fbSdp-arm 20*8a6a9560SDaniel Boulby #if defined(_MSC_VER) && !defined(__clang__) 21*8a6a9560SDaniel Boulby // MSVC throws a warning about mod 0 here, disable it for builds that 22*8a6a9560SDaniel Boulby // warn-as-error 23*8a6a9560SDaniel Boulby #pragma warning(push) 24*8a6a9560SDaniel Boulby #pragma warning(disable : 4723 4724) 25*8a6a9560SDaniel Boulby #endif 26*8a6a9560SDaniel Boulby 27*8a6a9560SDaniel Boulby COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) { 280e14a7fbSdp-arm const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 290e14a7fbSdp-arm const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 300e14a7fbSdp-arm udwords n; 310e14a7fbSdp-arm n.all = a; 320e14a7fbSdp-arm udwords d; 330e14a7fbSdp-arm d.all = b; 340e14a7fbSdp-arm udwords q; 350e14a7fbSdp-arm udwords r; 360e14a7fbSdp-arm unsigned sr; 37*8a6a9560SDaniel Boulby // special cases, X is unknown, K != 0 38*8a6a9560SDaniel Boulby if (n.s.high == 0) { 39*8a6a9560SDaniel Boulby if (d.s.high == 0) { 40*8a6a9560SDaniel Boulby // 0 X 41*8a6a9560SDaniel Boulby // --- 42*8a6a9560SDaniel Boulby // 0 X 430e14a7fbSdp-arm if (rem) 440e14a7fbSdp-arm *rem = n.s.low % d.s.low; 450e14a7fbSdp-arm return n.s.low / d.s.low; 460e14a7fbSdp-arm } 47*8a6a9560SDaniel Boulby // 0 X 48*8a6a9560SDaniel Boulby // --- 49*8a6a9560SDaniel Boulby // K X 500e14a7fbSdp-arm if (rem) 510e14a7fbSdp-arm *rem = n.s.low; 520e14a7fbSdp-arm return 0; 530e14a7fbSdp-arm } 54*8a6a9560SDaniel Boulby // n.s.high != 0 55*8a6a9560SDaniel Boulby if (d.s.low == 0) { 56*8a6a9560SDaniel Boulby if (d.s.high == 0) { 57*8a6a9560SDaniel Boulby // K X 58*8a6a9560SDaniel Boulby // --- 59*8a6a9560SDaniel Boulby // 0 0 600e14a7fbSdp-arm if (rem) 610e14a7fbSdp-arm *rem = n.s.high % d.s.low; 620e14a7fbSdp-arm return n.s.high / d.s.low; 630e14a7fbSdp-arm } 64*8a6a9560SDaniel Boulby // d.s.high != 0 65*8a6a9560SDaniel Boulby if (n.s.low == 0) { 66*8a6a9560SDaniel Boulby // K 0 67*8a6a9560SDaniel Boulby // --- 68*8a6a9560SDaniel Boulby // K 0 69*8a6a9560SDaniel Boulby if (rem) { 700e14a7fbSdp-arm r.s.high = n.s.high % d.s.high; 710e14a7fbSdp-arm r.s.low = 0; 720e14a7fbSdp-arm *rem = r.all; 730e14a7fbSdp-arm } 740e14a7fbSdp-arm return n.s.high / d.s.high; 750e14a7fbSdp-arm } 76*8a6a9560SDaniel Boulby // K K 77*8a6a9560SDaniel Boulby // --- 78*8a6a9560SDaniel Boulby // K 0 79*8a6a9560SDaniel Boulby if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ { 80*8a6a9560SDaniel Boulby if (rem) { 810e14a7fbSdp-arm r.s.low = n.s.low; 820e14a7fbSdp-arm r.s.high = n.s.high & (d.s.high - 1); 830e14a7fbSdp-arm *rem = r.all; 840e14a7fbSdp-arm } 85*8a6a9560SDaniel Boulby return n.s.high >> ctzsi(d.s.high); 860e14a7fbSdp-arm } 87*8a6a9560SDaniel Boulby // K K 88*8a6a9560SDaniel Boulby // --- 89*8a6a9560SDaniel Boulby // K 0 90*8a6a9560SDaniel Boulby sr = clzsi(d.s.high) - clzsi(n.s.high); 91*8a6a9560SDaniel Boulby // 0 <= sr <= n_uword_bits - 2 or sr large 92*8a6a9560SDaniel Boulby if (sr > n_uword_bits - 2) { 930e14a7fbSdp-arm if (rem) 940e14a7fbSdp-arm *rem = n.all; 950e14a7fbSdp-arm return 0; 960e14a7fbSdp-arm } 970e14a7fbSdp-arm ++sr; 98*8a6a9560SDaniel Boulby // 1 <= sr <= n_uword_bits - 1 99*8a6a9560SDaniel Boulby // q.all = n.all << (n_udword_bits - sr); 1000e14a7fbSdp-arm q.s.low = 0; 1010e14a7fbSdp-arm q.s.high = n.s.low << (n_uword_bits - sr); 102*8a6a9560SDaniel Boulby // r.all = n.all >> sr; 1030e14a7fbSdp-arm r.s.high = n.s.high >> sr; 1040e14a7fbSdp-arm r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 105*8a6a9560SDaniel Boulby } else /* d.s.low != 0 */ { 106*8a6a9560SDaniel Boulby if (d.s.high == 0) { 107*8a6a9560SDaniel Boulby // K X 108*8a6a9560SDaniel Boulby // --- 109*8a6a9560SDaniel Boulby // 0 K 110*8a6a9560SDaniel Boulby if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ { 1110e14a7fbSdp-arm if (rem) 1120e14a7fbSdp-arm *rem = n.s.low & (d.s.low - 1); 1130e14a7fbSdp-arm if (d.s.low == 1) 1140e14a7fbSdp-arm return n.all; 115*8a6a9560SDaniel Boulby sr = ctzsi(d.s.low); 1160e14a7fbSdp-arm q.s.high = n.s.high >> sr; 1170e14a7fbSdp-arm q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 1180e14a7fbSdp-arm return q.all; 1190e14a7fbSdp-arm } 120*8a6a9560SDaniel Boulby // K X 121*8a6a9560SDaniel Boulby // --- 122*8a6a9560SDaniel Boulby // 0 K 123*8a6a9560SDaniel Boulby sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high); 124*8a6a9560SDaniel Boulby // 2 <= sr <= n_udword_bits - 1 125*8a6a9560SDaniel Boulby // q.all = n.all << (n_udword_bits - sr); 126*8a6a9560SDaniel Boulby // r.all = n.all >> sr; 127*8a6a9560SDaniel Boulby if (sr == n_uword_bits) { 1280e14a7fbSdp-arm q.s.low = 0; 1290e14a7fbSdp-arm q.s.high = n.s.low; 1300e14a7fbSdp-arm r.s.high = 0; 1310e14a7fbSdp-arm r.s.low = n.s.high; 132*8a6a9560SDaniel Boulby } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ { 1330e14a7fbSdp-arm q.s.low = 0; 1340e14a7fbSdp-arm q.s.high = n.s.low << (n_uword_bits - sr); 1350e14a7fbSdp-arm r.s.high = n.s.high >> sr; 1360e14a7fbSdp-arm r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 137*8a6a9560SDaniel Boulby } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ { 1380e14a7fbSdp-arm q.s.low = n.s.low << (n_udword_bits - sr); 1390e14a7fbSdp-arm q.s.high = (n.s.high << (n_udword_bits - sr)) | 1400e14a7fbSdp-arm (n.s.low >> (sr - n_uword_bits)); 1410e14a7fbSdp-arm r.s.high = 0; 1420e14a7fbSdp-arm r.s.low = n.s.high >> (sr - n_uword_bits); 1430e14a7fbSdp-arm } 144*8a6a9560SDaniel Boulby } else { 145*8a6a9560SDaniel Boulby // K X 146*8a6a9560SDaniel Boulby // --- 147*8a6a9560SDaniel Boulby // K K 148*8a6a9560SDaniel Boulby sr = clzsi(d.s.high) - clzsi(n.s.high); 149*8a6a9560SDaniel Boulby // 0 <= sr <= n_uword_bits - 1 or sr large 150*8a6a9560SDaniel Boulby if (sr > n_uword_bits - 1) { 1510e14a7fbSdp-arm if (rem) 1520e14a7fbSdp-arm *rem = n.all; 1530e14a7fbSdp-arm return 0; 1540e14a7fbSdp-arm } 1550e14a7fbSdp-arm ++sr; 156*8a6a9560SDaniel Boulby // 1 <= sr <= n_uword_bits 157*8a6a9560SDaniel Boulby // q.all = n.all << (n_udword_bits - sr); 1580e14a7fbSdp-arm q.s.low = 0; 159*8a6a9560SDaniel Boulby if (sr == n_uword_bits) { 1600e14a7fbSdp-arm q.s.high = n.s.low; 1610e14a7fbSdp-arm r.s.high = 0; 1620e14a7fbSdp-arm r.s.low = n.s.high; 163*8a6a9560SDaniel Boulby } else { 1640e14a7fbSdp-arm q.s.high = n.s.low << (n_uword_bits - sr); 1650e14a7fbSdp-arm r.s.high = n.s.high >> sr; 1660e14a7fbSdp-arm r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 1670e14a7fbSdp-arm } 1680e14a7fbSdp-arm } 1690e14a7fbSdp-arm } 170*8a6a9560SDaniel Boulby // Not a special case 171*8a6a9560SDaniel Boulby // q and r are initialized with: 172*8a6a9560SDaniel Boulby // q.all = n.all << (n_udword_bits - sr); 173*8a6a9560SDaniel Boulby // r.all = n.all >> sr; 174*8a6a9560SDaniel Boulby // 1 <= sr <= n_udword_bits - 1 1750e14a7fbSdp-arm su_int carry = 0; 176*8a6a9560SDaniel Boulby for (; sr > 0; --sr) { 177*8a6a9560SDaniel Boulby // r:q = ((r:q) << 1) | carry 1780e14a7fbSdp-arm r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 1790e14a7fbSdp-arm r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 1800e14a7fbSdp-arm q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 1810e14a7fbSdp-arm q.s.low = (q.s.low << 1) | carry; 182*8a6a9560SDaniel Boulby // carry = 0; 183*8a6a9560SDaniel Boulby // if (r.all >= d.all) 184*8a6a9560SDaniel Boulby // { 185*8a6a9560SDaniel Boulby // r.all -= d.all; 186*8a6a9560SDaniel Boulby // carry = 1; 187*8a6a9560SDaniel Boulby // } 1880e14a7fbSdp-arm const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 1890e14a7fbSdp-arm carry = s & 1; 1900e14a7fbSdp-arm r.all -= d.all & s; 1910e14a7fbSdp-arm } 1920e14a7fbSdp-arm q.all = (q.all << 1) | carry; 1930e14a7fbSdp-arm if (rem) 1940e14a7fbSdp-arm *rem = r.all; 1950e14a7fbSdp-arm return q.all; 1960e14a7fbSdp-arm } 197*8a6a9560SDaniel Boulby 198*8a6a9560SDaniel Boulby #if defined(_MSC_VER) && !defined(__clang__) 199*8a6a9560SDaniel Boulby #pragma warning(pop) 200*8a6a9560SDaniel Boulby #endif 201