xref: /rk3399_ARM-atf/lib/compiler-rt/builtins/udivmoddi4.c (revision 0e14a7fbeb3014e719302c9b7f6a24c4030dfaf0)
1*0e14a7fbSdp-arm /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
2*0e14a7fbSdp-arm  *
3*0e14a7fbSdp-arm  *                     The LLVM Compiler Infrastructure
4*0e14a7fbSdp-arm  *
5*0e14a7fbSdp-arm  * This file is dual licensed under the MIT and the University of Illinois Open
6*0e14a7fbSdp-arm  * Source Licenses. See LICENSE.TXT for details.
7*0e14a7fbSdp-arm  *
8*0e14a7fbSdp-arm  * ===----------------------------------------------------------------------===
9*0e14a7fbSdp-arm  *
10*0e14a7fbSdp-arm  * This file implements __udivmoddi4 for the compiler_rt library.
11*0e14a7fbSdp-arm  *
12*0e14a7fbSdp-arm  * ===----------------------------------------------------------------------===
13*0e14a7fbSdp-arm  */
14*0e14a7fbSdp-arm 
15*0e14a7fbSdp-arm #include "int_lib.h"
16*0e14a7fbSdp-arm 
17*0e14a7fbSdp-arm /* Effects: if rem != 0, *rem = a % b
18*0e14a7fbSdp-arm  * Returns: a / b
19*0e14a7fbSdp-arm  */
20*0e14a7fbSdp-arm 
21*0e14a7fbSdp-arm /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
22*0e14a7fbSdp-arm 
23*0e14a7fbSdp-arm COMPILER_RT_ABI du_int
24*0e14a7fbSdp-arm __udivmoddi4(du_int a, du_int b, du_int* rem)
25*0e14a7fbSdp-arm {
26*0e14a7fbSdp-arm     const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
27*0e14a7fbSdp-arm     const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
28*0e14a7fbSdp-arm     udwords n;
29*0e14a7fbSdp-arm     n.all = a;
30*0e14a7fbSdp-arm     udwords d;
31*0e14a7fbSdp-arm     d.all = b;
32*0e14a7fbSdp-arm     udwords q;
33*0e14a7fbSdp-arm     udwords r;
34*0e14a7fbSdp-arm     unsigned sr;
35*0e14a7fbSdp-arm     /* special cases, X is unknown, K != 0 */
36*0e14a7fbSdp-arm     if (n.s.high == 0)
37*0e14a7fbSdp-arm     {
38*0e14a7fbSdp-arm         if (d.s.high == 0)
39*0e14a7fbSdp-arm         {
40*0e14a7fbSdp-arm             /* 0 X
41*0e14a7fbSdp-arm              * ---
42*0e14a7fbSdp-arm              * 0 X
43*0e14a7fbSdp-arm              */
44*0e14a7fbSdp-arm             if (rem)
45*0e14a7fbSdp-arm                 *rem = n.s.low % d.s.low;
46*0e14a7fbSdp-arm             return n.s.low / d.s.low;
47*0e14a7fbSdp-arm         }
48*0e14a7fbSdp-arm         /* 0 X
49*0e14a7fbSdp-arm          * ---
50*0e14a7fbSdp-arm          * K X
51*0e14a7fbSdp-arm          */
52*0e14a7fbSdp-arm         if (rem)
53*0e14a7fbSdp-arm             *rem = n.s.low;
54*0e14a7fbSdp-arm         return 0;
55*0e14a7fbSdp-arm     }
56*0e14a7fbSdp-arm     /* n.s.high != 0 */
57*0e14a7fbSdp-arm     if (d.s.low == 0)
58*0e14a7fbSdp-arm     {
59*0e14a7fbSdp-arm         if (d.s.high == 0)
60*0e14a7fbSdp-arm         {
61*0e14a7fbSdp-arm             /* K X
62*0e14a7fbSdp-arm              * ---
63*0e14a7fbSdp-arm              * 0 0
64*0e14a7fbSdp-arm              */
65*0e14a7fbSdp-arm             if (rem)
66*0e14a7fbSdp-arm                 *rem = n.s.high % d.s.low;
67*0e14a7fbSdp-arm             return n.s.high / d.s.low;
68*0e14a7fbSdp-arm         }
69*0e14a7fbSdp-arm         /* d.s.high != 0 */
70*0e14a7fbSdp-arm         if (n.s.low == 0)
71*0e14a7fbSdp-arm         {
72*0e14a7fbSdp-arm             /* K 0
73*0e14a7fbSdp-arm              * ---
74*0e14a7fbSdp-arm              * K 0
75*0e14a7fbSdp-arm              */
76*0e14a7fbSdp-arm             if (rem)
77*0e14a7fbSdp-arm             {
78*0e14a7fbSdp-arm                 r.s.high = n.s.high % d.s.high;
79*0e14a7fbSdp-arm                 r.s.low = 0;
80*0e14a7fbSdp-arm                 *rem = r.all;
81*0e14a7fbSdp-arm             }
82*0e14a7fbSdp-arm             return n.s.high / d.s.high;
83*0e14a7fbSdp-arm         }
84*0e14a7fbSdp-arm         /* K K
85*0e14a7fbSdp-arm          * ---
86*0e14a7fbSdp-arm          * K 0
87*0e14a7fbSdp-arm          */
88*0e14a7fbSdp-arm         if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
89*0e14a7fbSdp-arm         {
90*0e14a7fbSdp-arm             if (rem)
91*0e14a7fbSdp-arm             {
92*0e14a7fbSdp-arm                 r.s.low = n.s.low;
93*0e14a7fbSdp-arm                 r.s.high = n.s.high & (d.s.high - 1);
94*0e14a7fbSdp-arm                 *rem = r.all;
95*0e14a7fbSdp-arm             }
96*0e14a7fbSdp-arm             return n.s.high >> __builtin_ctz(d.s.high);
97*0e14a7fbSdp-arm         }
98*0e14a7fbSdp-arm         /* K K
99*0e14a7fbSdp-arm          * ---
100*0e14a7fbSdp-arm          * K 0
101*0e14a7fbSdp-arm          */
102*0e14a7fbSdp-arm         sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
103*0e14a7fbSdp-arm         /* 0 <= sr <= n_uword_bits - 2 or sr large */
104*0e14a7fbSdp-arm         if (sr > n_uword_bits - 2)
105*0e14a7fbSdp-arm         {
106*0e14a7fbSdp-arm            if (rem)
107*0e14a7fbSdp-arm                 *rem = n.all;
108*0e14a7fbSdp-arm             return 0;
109*0e14a7fbSdp-arm         }
110*0e14a7fbSdp-arm         ++sr;
111*0e14a7fbSdp-arm         /* 1 <= sr <= n_uword_bits - 1 */
112*0e14a7fbSdp-arm         /* q.all = n.all << (n_udword_bits - sr); */
113*0e14a7fbSdp-arm         q.s.low = 0;
114*0e14a7fbSdp-arm         q.s.high = n.s.low << (n_uword_bits - sr);
115*0e14a7fbSdp-arm         /* r.all = n.all >> sr; */
116*0e14a7fbSdp-arm         r.s.high = n.s.high >> sr;
117*0e14a7fbSdp-arm         r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
118*0e14a7fbSdp-arm     }
119*0e14a7fbSdp-arm     else  /* d.s.low != 0 */
120*0e14a7fbSdp-arm     {
121*0e14a7fbSdp-arm         if (d.s.high == 0)
122*0e14a7fbSdp-arm         {
123*0e14a7fbSdp-arm             /* K X
124*0e14a7fbSdp-arm              * ---
125*0e14a7fbSdp-arm              * 0 K
126*0e14a7fbSdp-arm              */
127*0e14a7fbSdp-arm             if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
128*0e14a7fbSdp-arm             {
129*0e14a7fbSdp-arm                 if (rem)
130*0e14a7fbSdp-arm                     *rem = n.s.low & (d.s.low - 1);
131*0e14a7fbSdp-arm                 if (d.s.low == 1)
132*0e14a7fbSdp-arm                     return n.all;
133*0e14a7fbSdp-arm                 sr = __builtin_ctz(d.s.low);
134*0e14a7fbSdp-arm                 q.s.high = n.s.high >> sr;
135*0e14a7fbSdp-arm                 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
136*0e14a7fbSdp-arm                 return q.all;
137*0e14a7fbSdp-arm             }
138*0e14a7fbSdp-arm             /* K X
139*0e14a7fbSdp-arm              * ---
140*0e14a7fbSdp-arm              * 0 K
141*0e14a7fbSdp-arm              */
142*0e14a7fbSdp-arm             sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
143*0e14a7fbSdp-arm             /* 2 <= sr <= n_udword_bits - 1
144*0e14a7fbSdp-arm              * q.all = n.all << (n_udword_bits - sr);
145*0e14a7fbSdp-arm              * r.all = n.all >> sr;
146*0e14a7fbSdp-arm              */
147*0e14a7fbSdp-arm             if (sr == n_uword_bits)
148*0e14a7fbSdp-arm             {
149*0e14a7fbSdp-arm                 q.s.low = 0;
150*0e14a7fbSdp-arm                 q.s.high = n.s.low;
151*0e14a7fbSdp-arm                 r.s.high = 0;
152*0e14a7fbSdp-arm                 r.s.low = n.s.high;
153*0e14a7fbSdp-arm             }
154*0e14a7fbSdp-arm             else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
155*0e14a7fbSdp-arm             {
156*0e14a7fbSdp-arm                 q.s.low = 0;
157*0e14a7fbSdp-arm                 q.s.high = n.s.low << (n_uword_bits - sr);
158*0e14a7fbSdp-arm                 r.s.high = n.s.high >> sr;
159*0e14a7fbSdp-arm                 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
160*0e14a7fbSdp-arm             }
161*0e14a7fbSdp-arm             else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
162*0e14a7fbSdp-arm             {
163*0e14a7fbSdp-arm                 q.s.low = n.s.low << (n_udword_bits - sr);
164*0e14a7fbSdp-arm                 q.s.high = (n.s.high << (n_udword_bits - sr)) |
165*0e14a7fbSdp-arm                            (n.s.low >> (sr - n_uword_bits));
166*0e14a7fbSdp-arm                 r.s.high = 0;
167*0e14a7fbSdp-arm                 r.s.low = n.s.high >> (sr - n_uword_bits);
168*0e14a7fbSdp-arm             }
169*0e14a7fbSdp-arm         }
170*0e14a7fbSdp-arm         else
171*0e14a7fbSdp-arm         {
172*0e14a7fbSdp-arm             /* K X
173*0e14a7fbSdp-arm              * ---
174*0e14a7fbSdp-arm              * K K
175*0e14a7fbSdp-arm              */
176*0e14a7fbSdp-arm             sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
177*0e14a7fbSdp-arm             /* 0 <= sr <= n_uword_bits - 1 or sr large */
178*0e14a7fbSdp-arm             if (sr > n_uword_bits - 1)
179*0e14a7fbSdp-arm             {
180*0e14a7fbSdp-arm                 if (rem)
181*0e14a7fbSdp-arm                     *rem = n.all;
182*0e14a7fbSdp-arm                 return 0;
183*0e14a7fbSdp-arm             }
184*0e14a7fbSdp-arm             ++sr;
185*0e14a7fbSdp-arm             /* 1 <= sr <= n_uword_bits */
186*0e14a7fbSdp-arm             /*  q.all = n.all << (n_udword_bits - sr); */
187*0e14a7fbSdp-arm             q.s.low = 0;
188*0e14a7fbSdp-arm             if (sr == n_uword_bits)
189*0e14a7fbSdp-arm             {
190*0e14a7fbSdp-arm                 q.s.high = n.s.low;
191*0e14a7fbSdp-arm                 r.s.high = 0;
192*0e14a7fbSdp-arm                 r.s.low = n.s.high;
193*0e14a7fbSdp-arm             }
194*0e14a7fbSdp-arm             else
195*0e14a7fbSdp-arm             {
196*0e14a7fbSdp-arm                 q.s.high = n.s.low << (n_uword_bits - sr);
197*0e14a7fbSdp-arm                 r.s.high = n.s.high >> sr;
198*0e14a7fbSdp-arm                 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
199*0e14a7fbSdp-arm             }
200*0e14a7fbSdp-arm         }
201*0e14a7fbSdp-arm     }
202*0e14a7fbSdp-arm     /* Not a special case
203*0e14a7fbSdp-arm      * q and r are initialized with:
204*0e14a7fbSdp-arm      * q.all = n.all << (n_udword_bits - sr);
205*0e14a7fbSdp-arm      * r.all = n.all >> sr;
206*0e14a7fbSdp-arm      * 1 <= sr <= n_udword_bits - 1
207*0e14a7fbSdp-arm      */
208*0e14a7fbSdp-arm     su_int carry = 0;
209*0e14a7fbSdp-arm     for (; sr > 0; --sr)
210*0e14a7fbSdp-arm     {
211*0e14a7fbSdp-arm         /* r:q = ((r:q)  << 1) | carry */
212*0e14a7fbSdp-arm         r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
213*0e14a7fbSdp-arm         r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
214*0e14a7fbSdp-arm         q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
215*0e14a7fbSdp-arm         q.s.low  = (q.s.low  << 1) | carry;
216*0e14a7fbSdp-arm         /* carry = 0;
217*0e14a7fbSdp-arm          * if (r.all >= d.all)
218*0e14a7fbSdp-arm          * {
219*0e14a7fbSdp-arm          *      r.all -= d.all;
220*0e14a7fbSdp-arm          *      carry = 1;
221*0e14a7fbSdp-arm          * }
222*0e14a7fbSdp-arm          */
223*0e14a7fbSdp-arm         const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
224*0e14a7fbSdp-arm         carry = s & 1;
225*0e14a7fbSdp-arm         r.all -= d.all & s;
226*0e14a7fbSdp-arm     }
227*0e14a7fbSdp-arm     q.all = (q.all << 1) | carry;
228*0e14a7fbSdp-arm     if (rem)
229*0e14a7fbSdp-arm         *rem = r.all;
230*0e14a7fbSdp-arm     return q.all;
231*0e14a7fbSdp-arm }
232