xref: /OK3568_Linux_fs/kernel/arch/arm/nwfpe/softfloat-macros (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun===============================================================================
4*4882a593Smuzhiyun
5*4882a593SmuzhiyunThis C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6*4882a593SmuzhiyunArithmetic Package, Release 2.
7*4882a593Smuzhiyun
8*4882a593SmuzhiyunWritten by John R. Hauser.  This work was made possible in part by the
9*4882a593SmuzhiyunInternational Computer Science Institute, located at Suite 600, 1947 Center
10*4882a593SmuzhiyunStreet, Berkeley, California 94704.  Funding was partially provided by the
11*4882a593SmuzhiyunNational Science Foundation under grant MIP-9311980.  The original version
12*4882a593Smuzhiyunof this code was written as part of a project to build a fixed-point vector
13*4882a593Smuzhiyunprocessor in collaboration with the University of California at Berkeley,
14*4882a593Smuzhiyunoverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
15*4882a593Smuzhiyunis available through the web page
16*4882a593Smuzhiyunhttp://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
17*4882a593Smuzhiyun
18*4882a593SmuzhiyunTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
19*4882a593Smuzhiyunhas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20*4882a593SmuzhiyunTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
21*4882a593SmuzhiyunPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22*4882a593SmuzhiyunAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23*4882a593Smuzhiyun
24*4882a593SmuzhiyunDerivative works are acceptable, even for commercial purposes, so long as
25*4882a593Smuzhiyun(1) they include prominent notice that the work is derivative, and (2) they
26*4882a593Smuzhiyuninclude prominent notice akin to these three paragraphs for those parts of
27*4882a593Smuzhiyunthis code that are retained.
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun===============================================================================
30*4882a593Smuzhiyun*/
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun/*
33*4882a593Smuzhiyun-------------------------------------------------------------------------------
34*4882a593SmuzhiyunShifts `a' right by the number of bits given in `count'.  If any nonzero
35*4882a593Smuzhiyunbits are shifted off, they are ``jammed'' into the least significant bit of
36*4882a593Smuzhiyunthe result by setting the least significant bit to 1.  The value of `count'
37*4882a593Smuzhiyuncan be arbitrarily large; in particular, if `count' is greater than 32, the
38*4882a593Smuzhiyunresult will be either 0 or 1, depending on whether `a' is zero or nonzero.
39*4882a593SmuzhiyunThe result is stored in the location pointed to by `zPtr'.
40*4882a593Smuzhiyun-------------------------------------------------------------------------------
41*4882a593Smuzhiyun*/
42*4882a593SmuzhiyunINLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43*4882a593Smuzhiyun{
44*4882a593Smuzhiyun    bits32 z;
45*4882a593Smuzhiyun    if ( count == 0 ) {
46*4882a593Smuzhiyun        z = a;
47*4882a593Smuzhiyun    }
48*4882a593Smuzhiyun    else if ( count < 32 ) {
49*4882a593Smuzhiyun        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
50*4882a593Smuzhiyun    }
51*4882a593Smuzhiyun    else {
52*4882a593Smuzhiyun        z = ( a != 0 );
53*4882a593Smuzhiyun    }
54*4882a593Smuzhiyun    *zPtr = z;
55*4882a593Smuzhiyun}
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun/*
58*4882a593Smuzhiyun-------------------------------------------------------------------------------
59*4882a593SmuzhiyunShifts `a' right by the number of bits given in `count'.  If any nonzero
60*4882a593Smuzhiyunbits are shifted off, they are ``jammed'' into the least significant bit of
61*4882a593Smuzhiyunthe result by setting the least significant bit to 1.  The value of `count'
62*4882a593Smuzhiyuncan be arbitrarily large; in particular, if `count' is greater than 64, the
63*4882a593Smuzhiyunresult will be either 0 or 1, depending on whether `a' is zero or nonzero.
64*4882a593SmuzhiyunThe result is stored in the location pointed to by `zPtr'.
65*4882a593Smuzhiyun-------------------------------------------------------------------------------
66*4882a593Smuzhiyun*/
67*4882a593SmuzhiyunINLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
68*4882a593Smuzhiyun{
69*4882a593Smuzhiyun    bits64 z;
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun __asm__("@shift64RightJamming -- start");
72*4882a593Smuzhiyun    if ( count == 0 ) {
73*4882a593Smuzhiyun        z = a;
74*4882a593Smuzhiyun    }
75*4882a593Smuzhiyun    else if ( count < 64 ) {
76*4882a593Smuzhiyun        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
77*4882a593Smuzhiyun    }
78*4882a593Smuzhiyun    else {
79*4882a593Smuzhiyun        z = ( a != 0 );
80*4882a593Smuzhiyun    }
81*4882a593Smuzhiyun __asm__("@shift64RightJamming -- end");
82*4882a593Smuzhiyun    *zPtr = z;
83*4882a593Smuzhiyun}
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun/*
86*4882a593Smuzhiyun-------------------------------------------------------------------------------
87*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
88*4882a593Smuzhiyun_plus_ the number of bits given in `count'.  The shifted result is at most
89*4882a593Smuzhiyun64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
90*4882a593Smuzhiyunbits shifted off form a second 64-bit result as follows:  The _last_ bit
91*4882a593Smuzhiyunshifted off is the most-significant bit of the extra result, and the other
92*4882a593Smuzhiyun63 bits of the extra result are all zero if and only if _all_but_the_last_
93*4882a593Smuzhiyunbits shifted off were all zero.  This extra result is stored in the location
94*4882a593Smuzhiyunpointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
95*4882a593Smuzhiyun    (This routine makes more sense if `a0' and `a1' are considered to form a
96*4882a593Smuzhiyunfixed-point value with binary point between `a0' and `a1'.  This fixed-point
97*4882a593Smuzhiyunvalue is shifted right by the number of bits given in `count', and the
98*4882a593Smuzhiyuninteger part of the result is returned at the location pointed to by
99*4882a593Smuzhiyun`z0Ptr'.  The fractional part of the result may be slightly corrupted as
100*4882a593Smuzhiyundescribed above, and is returned at the location pointed to by `z1Ptr'.)
101*4882a593Smuzhiyun-------------------------------------------------------------------------------
102*4882a593Smuzhiyun*/
103*4882a593SmuzhiyunINLINE void
104*4882a593Smuzhiyun shift64ExtraRightJamming(
105*4882a593Smuzhiyun     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
106*4882a593Smuzhiyun{
107*4882a593Smuzhiyun    bits64 z0, z1;
108*4882a593Smuzhiyun    int8 negCount = ( - count ) & 63;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun    if ( count == 0 ) {
111*4882a593Smuzhiyun        z1 = a1;
112*4882a593Smuzhiyun        z0 = a0;
113*4882a593Smuzhiyun    }
114*4882a593Smuzhiyun    else if ( count < 64 ) {
115*4882a593Smuzhiyun        z1 = ( a0<<negCount ) | ( a1 != 0 );
116*4882a593Smuzhiyun        z0 = a0>>count;
117*4882a593Smuzhiyun    }
118*4882a593Smuzhiyun    else {
119*4882a593Smuzhiyun        if ( count == 64 ) {
120*4882a593Smuzhiyun            z1 = a0 | ( a1 != 0 );
121*4882a593Smuzhiyun        }
122*4882a593Smuzhiyun        else {
123*4882a593Smuzhiyun            z1 = ( ( a0 | a1 ) != 0 );
124*4882a593Smuzhiyun        }
125*4882a593Smuzhiyun        z0 = 0;
126*4882a593Smuzhiyun    }
127*4882a593Smuzhiyun    *z1Ptr = z1;
128*4882a593Smuzhiyun    *z0Ptr = z0;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun}
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun/*
133*4882a593Smuzhiyun-------------------------------------------------------------------------------
134*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' right by the
135*4882a593Smuzhiyunnumber of bits given in `count'.  Any bits shifted off are lost.  The value
136*4882a593Smuzhiyunof `count' can be arbitrarily large; in particular, if `count' is greater
137*4882a593Smuzhiyunthan 128, the result will be 0.  The result is broken into two 64-bit pieces
138*4882a593Smuzhiyunwhich are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
139*4882a593Smuzhiyun-------------------------------------------------------------------------------
140*4882a593Smuzhiyun*/
141*4882a593SmuzhiyunINLINE void
142*4882a593Smuzhiyun shift128Right(
143*4882a593Smuzhiyun     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
144*4882a593Smuzhiyun{
145*4882a593Smuzhiyun    bits64 z0, z1;
146*4882a593Smuzhiyun    int8 negCount = ( - count ) & 63;
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun    if ( count == 0 ) {
149*4882a593Smuzhiyun        z1 = a1;
150*4882a593Smuzhiyun        z0 = a0;
151*4882a593Smuzhiyun    }
152*4882a593Smuzhiyun    else if ( count < 64 ) {
153*4882a593Smuzhiyun        z1 = ( a0<<negCount ) | ( a1>>count );
154*4882a593Smuzhiyun        z0 = a0>>count;
155*4882a593Smuzhiyun    }
156*4882a593Smuzhiyun    else {
157*4882a593Smuzhiyun        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
158*4882a593Smuzhiyun        z0 = 0;
159*4882a593Smuzhiyun    }
160*4882a593Smuzhiyun    *z1Ptr = z1;
161*4882a593Smuzhiyun    *z0Ptr = z0;
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun}
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun/*
166*4882a593Smuzhiyun-------------------------------------------------------------------------------
167*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' right by the
168*4882a593Smuzhiyunnumber of bits given in `count'.  If any nonzero bits are shifted off, they
169*4882a593Smuzhiyunare ``jammed'' into the least significant bit of the result by setting the
170*4882a593Smuzhiyunleast significant bit to 1.  The value of `count' can be arbitrarily large;
171*4882a593Smuzhiyunin particular, if `count' is greater than 128, the result will be either 0
172*4882a593Smuzhiyunor 1, depending on whether the concatenation of `a0' and `a1' is zero or
173*4882a593Smuzhiyunnonzero.  The result is broken into two 64-bit pieces which are stored at
174*4882a593Smuzhiyunthe locations pointed to by `z0Ptr' and `z1Ptr'.
175*4882a593Smuzhiyun-------------------------------------------------------------------------------
176*4882a593Smuzhiyun*/
177*4882a593SmuzhiyunINLINE void
178*4882a593Smuzhiyun shift128RightJamming(
179*4882a593Smuzhiyun     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
180*4882a593Smuzhiyun{
181*4882a593Smuzhiyun    bits64 z0, z1;
182*4882a593Smuzhiyun    int8 negCount = ( - count ) & 63;
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun    if ( count == 0 ) {
185*4882a593Smuzhiyun        z1 = a1;
186*4882a593Smuzhiyun        z0 = a0;
187*4882a593Smuzhiyun    }
188*4882a593Smuzhiyun    else if ( count < 64 ) {
189*4882a593Smuzhiyun        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
190*4882a593Smuzhiyun        z0 = a0>>count;
191*4882a593Smuzhiyun    }
192*4882a593Smuzhiyun    else {
193*4882a593Smuzhiyun        if ( count == 64 ) {
194*4882a593Smuzhiyun            z1 = a0 | ( a1 != 0 );
195*4882a593Smuzhiyun        }
196*4882a593Smuzhiyun        else if ( count < 128 ) {
197*4882a593Smuzhiyun            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
198*4882a593Smuzhiyun        }
199*4882a593Smuzhiyun        else {
200*4882a593Smuzhiyun            z1 = ( ( a0 | a1 ) != 0 );
201*4882a593Smuzhiyun        }
202*4882a593Smuzhiyun        z0 = 0;
203*4882a593Smuzhiyun    }
204*4882a593Smuzhiyun    *z1Ptr = z1;
205*4882a593Smuzhiyun    *z0Ptr = z0;
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun}
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun/*
210*4882a593Smuzhiyun-------------------------------------------------------------------------------
211*4882a593SmuzhiyunShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
212*4882a593Smuzhiyunby 64 _plus_ the number of bits given in `count'.  The shifted result is
213*4882a593Smuzhiyunat most 128 nonzero bits; these are broken into two 64-bit pieces which are
214*4882a593Smuzhiyunstored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
215*4882a593Smuzhiyunoff form a third 64-bit result as follows:  The _last_ bit shifted off is
216*4882a593Smuzhiyunthe most-significant bit of the extra result, and the other 63 bits of the
217*4882a593Smuzhiyunextra result are all zero if and only if _all_but_the_last_ bits shifted off
218*4882a593Smuzhiyunwere all zero.  This extra result is stored in the location pointed to by
219*4882a593Smuzhiyun`z2Ptr'.  The value of `count' can be arbitrarily large.
220*4882a593Smuzhiyun    (This routine makes more sense if `a0', `a1', and `a2' are considered
221*4882a593Smuzhiyunto form a fixed-point value with binary point between `a1' and `a2'.  This
222*4882a593Smuzhiyunfixed-point value is shifted right by the number of bits given in `count',
223*4882a593Smuzhiyunand the integer part of the result is returned at the locations pointed to
224*4882a593Smuzhiyunby `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
225*4882a593Smuzhiyuncorrupted as described above, and is returned at the location pointed to by
226*4882a593Smuzhiyun`z2Ptr'.)
227*4882a593Smuzhiyun-------------------------------------------------------------------------------
228*4882a593Smuzhiyun*/
229*4882a593SmuzhiyunINLINE void
230*4882a593Smuzhiyun shift128ExtraRightJamming(
231*4882a593Smuzhiyun     bits64 a0,
232*4882a593Smuzhiyun     bits64 a1,
233*4882a593Smuzhiyun     bits64 a2,
234*4882a593Smuzhiyun     int16 count,
235*4882a593Smuzhiyun     bits64 *z0Ptr,
236*4882a593Smuzhiyun     bits64 *z1Ptr,
237*4882a593Smuzhiyun     bits64 *z2Ptr
238*4882a593Smuzhiyun )
239*4882a593Smuzhiyun{
240*4882a593Smuzhiyun    bits64 z0, z1, z2;
241*4882a593Smuzhiyun    int8 negCount = ( - count ) & 63;
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun    if ( count == 0 ) {
244*4882a593Smuzhiyun        z2 = a2;
245*4882a593Smuzhiyun        z1 = a1;
246*4882a593Smuzhiyun        z0 = a0;
247*4882a593Smuzhiyun    }
248*4882a593Smuzhiyun    else {
249*4882a593Smuzhiyun        if ( count < 64 ) {
250*4882a593Smuzhiyun            z2 = a1<<negCount;
251*4882a593Smuzhiyun            z1 = ( a0<<negCount ) | ( a1>>count );
252*4882a593Smuzhiyun            z0 = a0>>count;
253*4882a593Smuzhiyun        }
254*4882a593Smuzhiyun        else {
255*4882a593Smuzhiyun            if ( count == 64 ) {
256*4882a593Smuzhiyun                z2 = a1;
257*4882a593Smuzhiyun                z1 = a0;
258*4882a593Smuzhiyun            }
259*4882a593Smuzhiyun            else {
260*4882a593Smuzhiyun                a2 |= a1;
261*4882a593Smuzhiyun                if ( count < 128 ) {
262*4882a593Smuzhiyun                    z2 = a0<<negCount;
263*4882a593Smuzhiyun                    z1 = a0>>( count & 63 );
264*4882a593Smuzhiyun                }
265*4882a593Smuzhiyun                else {
266*4882a593Smuzhiyun                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
267*4882a593Smuzhiyun                    z1 = 0;
268*4882a593Smuzhiyun                }
269*4882a593Smuzhiyun            }
270*4882a593Smuzhiyun            z0 = 0;
271*4882a593Smuzhiyun        }
272*4882a593Smuzhiyun        z2 |= ( a2 != 0 );
273*4882a593Smuzhiyun    }
274*4882a593Smuzhiyun    *z2Ptr = z2;
275*4882a593Smuzhiyun    *z1Ptr = z1;
276*4882a593Smuzhiyun    *z0Ptr = z0;
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun}
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun/*
281*4882a593Smuzhiyun-------------------------------------------------------------------------------
282*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283*4882a593Smuzhiyunnumber of bits given in `count'.  Any bits shifted off are lost.  The value
284*4882a593Smuzhiyunof `count' must be less than 64.  The result is broken into two 64-bit
285*4882a593Smuzhiyunpieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286*4882a593Smuzhiyun-------------------------------------------------------------------------------
287*4882a593Smuzhiyun*/
288*4882a593SmuzhiyunINLINE void
289*4882a593Smuzhiyun shortShift128Left(
290*4882a593Smuzhiyun     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
291*4882a593Smuzhiyun{
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun    *z1Ptr = a1<<count;
294*4882a593Smuzhiyun    *z0Ptr =
295*4882a593Smuzhiyun        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun}
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun/*
300*4882a593Smuzhiyun-------------------------------------------------------------------------------
301*4882a593SmuzhiyunShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
302*4882a593Smuzhiyunby the number of bits given in `count'.  Any bits shifted off are lost.
303*4882a593SmuzhiyunThe value of `count' must be less than 64.  The result is broken into three
304*4882a593Smuzhiyun64-bit pieces which are stored at the locations pointed to by `z0Ptr',
305*4882a593Smuzhiyun`z1Ptr', and `z2Ptr'.
306*4882a593Smuzhiyun-------------------------------------------------------------------------------
307*4882a593Smuzhiyun*/
308*4882a593SmuzhiyunINLINE void
309*4882a593Smuzhiyun shortShift192Left(
310*4882a593Smuzhiyun     bits64 a0,
311*4882a593Smuzhiyun     bits64 a1,
312*4882a593Smuzhiyun     bits64 a2,
313*4882a593Smuzhiyun     int16 count,
314*4882a593Smuzhiyun     bits64 *z0Ptr,
315*4882a593Smuzhiyun     bits64 *z1Ptr,
316*4882a593Smuzhiyun     bits64 *z2Ptr
317*4882a593Smuzhiyun )
318*4882a593Smuzhiyun{
319*4882a593Smuzhiyun    bits64 z0, z1, z2;
320*4882a593Smuzhiyun    int8 negCount;
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun    z2 = a2<<count;
323*4882a593Smuzhiyun    z1 = a1<<count;
324*4882a593Smuzhiyun    z0 = a0<<count;
325*4882a593Smuzhiyun    if ( 0 < count ) {
326*4882a593Smuzhiyun        negCount = ( ( - count ) & 63 );
327*4882a593Smuzhiyun        z1 |= a2>>negCount;
328*4882a593Smuzhiyun        z0 |= a1>>negCount;
329*4882a593Smuzhiyun    }
330*4882a593Smuzhiyun    *z2Ptr = z2;
331*4882a593Smuzhiyun    *z1Ptr = z1;
332*4882a593Smuzhiyun    *z0Ptr = z0;
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun}
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun/*
337*4882a593Smuzhiyun-------------------------------------------------------------------------------
338*4882a593SmuzhiyunAdds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
339*4882a593Smuzhiyunvalue formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
340*4882a593Smuzhiyunany carry out is lost.  The result is broken into two 64-bit pieces which
341*4882a593Smuzhiyunare stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
342*4882a593Smuzhiyun-------------------------------------------------------------------------------
343*4882a593Smuzhiyun*/
344*4882a593SmuzhiyunINLINE void
345*4882a593Smuzhiyun add128(
346*4882a593Smuzhiyun     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
347*4882a593Smuzhiyun{
348*4882a593Smuzhiyun    bits64 z1;
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun    z1 = a1 + b1;
351*4882a593Smuzhiyun    *z1Ptr = z1;
352*4882a593Smuzhiyun    *z0Ptr = a0 + b0 + ( z1 < a1 );
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun}
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun/*
357*4882a593Smuzhiyun-------------------------------------------------------------------------------
358*4882a593SmuzhiyunAdds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
359*4882a593Smuzhiyun192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
360*4882a593Smuzhiyunmodulo 2^192, so any carry out is lost.  The result is broken into three
361*4882a593Smuzhiyun64-bit pieces which are stored at the locations pointed to by `z0Ptr',
362*4882a593Smuzhiyun`z1Ptr', and `z2Ptr'.
363*4882a593Smuzhiyun-------------------------------------------------------------------------------
364*4882a593Smuzhiyun*/
365*4882a593SmuzhiyunINLINE void
366*4882a593Smuzhiyun add192(
367*4882a593Smuzhiyun     bits64 a0,
368*4882a593Smuzhiyun     bits64 a1,
369*4882a593Smuzhiyun     bits64 a2,
370*4882a593Smuzhiyun     bits64 b0,
371*4882a593Smuzhiyun     bits64 b1,
372*4882a593Smuzhiyun     bits64 b2,
373*4882a593Smuzhiyun     bits64 *z0Ptr,
374*4882a593Smuzhiyun     bits64 *z1Ptr,
375*4882a593Smuzhiyun     bits64 *z2Ptr
376*4882a593Smuzhiyun )
377*4882a593Smuzhiyun{
378*4882a593Smuzhiyun    bits64 z0, z1, z2;
379*4882a593Smuzhiyun    int8 carry0, carry1;
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun    z2 = a2 + b2;
382*4882a593Smuzhiyun    carry1 = ( z2 < a2 );
383*4882a593Smuzhiyun    z1 = a1 + b1;
384*4882a593Smuzhiyun    carry0 = ( z1 < a1 );
385*4882a593Smuzhiyun    z0 = a0 + b0;
386*4882a593Smuzhiyun    z1 += carry1;
387*4882a593Smuzhiyun    z0 += ( z1 < carry1 );
388*4882a593Smuzhiyun    z0 += carry0;
389*4882a593Smuzhiyun    *z2Ptr = z2;
390*4882a593Smuzhiyun    *z1Ptr = z1;
391*4882a593Smuzhiyun    *z0Ptr = z0;
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun}
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun/*
396*4882a593Smuzhiyun-------------------------------------------------------------------------------
397*4882a593SmuzhiyunSubtracts the 128-bit value formed by concatenating `b0' and `b1' from the
398*4882a593Smuzhiyun128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
399*4882a593Smuzhiyun2^128, so any borrow out (carry out) is lost.  The result is broken into two
400*4882a593Smuzhiyun64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
401*4882a593Smuzhiyun`z1Ptr'.
402*4882a593Smuzhiyun-------------------------------------------------------------------------------
403*4882a593Smuzhiyun*/
404*4882a593SmuzhiyunINLINE void
405*4882a593Smuzhiyun sub128(
406*4882a593Smuzhiyun     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
407*4882a593Smuzhiyun{
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun    *z1Ptr = a1 - b1;
410*4882a593Smuzhiyun    *z0Ptr = a0 - b0 - ( a1 < b1 );
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun}
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun/*
415*4882a593Smuzhiyun-------------------------------------------------------------------------------
416*4882a593SmuzhiyunSubtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
417*4882a593Smuzhiyunfrom the 192-bit value formed by concatenating `a0', `a1', and `a2'.
418*4882a593SmuzhiyunSubtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
419*4882a593Smuzhiyunresult is broken into three 64-bit pieces which are stored at the locations
420*4882a593Smuzhiyunpointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
421*4882a593Smuzhiyun-------------------------------------------------------------------------------
422*4882a593Smuzhiyun*/
423*4882a593SmuzhiyunINLINE void
424*4882a593Smuzhiyun sub192(
425*4882a593Smuzhiyun     bits64 a0,
426*4882a593Smuzhiyun     bits64 a1,
427*4882a593Smuzhiyun     bits64 a2,
428*4882a593Smuzhiyun     bits64 b0,
429*4882a593Smuzhiyun     bits64 b1,
430*4882a593Smuzhiyun     bits64 b2,
431*4882a593Smuzhiyun     bits64 *z0Ptr,
432*4882a593Smuzhiyun     bits64 *z1Ptr,
433*4882a593Smuzhiyun     bits64 *z2Ptr
434*4882a593Smuzhiyun )
435*4882a593Smuzhiyun{
436*4882a593Smuzhiyun    bits64 z0, z1, z2;
437*4882a593Smuzhiyun    int8 borrow0, borrow1;
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun    z2 = a2 - b2;
440*4882a593Smuzhiyun    borrow1 = ( a2 < b2 );
441*4882a593Smuzhiyun    z1 = a1 - b1;
442*4882a593Smuzhiyun    borrow0 = ( a1 < b1 );
443*4882a593Smuzhiyun    z0 = a0 - b0;
444*4882a593Smuzhiyun    z0 -= ( z1 < borrow1 );
445*4882a593Smuzhiyun    z1 -= borrow1;
446*4882a593Smuzhiyun    z0 -= borrow0;
447*4882a593Smuzhiyun    *z2Ptr = z2;
448*4882a593Smuzhiyun    *z1Ptr = z1;
449*4882a593Smuzhiyun    *z0Ptr = z0;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun}
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun/*
454*4882a593Smuzhiyun-------------------------------------------------------------------------------
455*4882a593SmuzhiyunMultiplies `a' by `b' to obtain a 128-bit product.  The product is broken
456*4882a593Smuzhiyuninto two 64-bit pieces which are stored at the locations pointed to by
457*4882a593Smuzhiyun`z0Ptr' and `z1Ptr'.
458*4882a593Smuzhiyun-------------------------------------------------------------------------------
459*4882a593Smuzhiyun*/
460*4882a593SmuzhiyunINLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
461*4882a593Smuzhiyun{
462*4882a593Smuzhiyun    bits32 aHigh, aLow, bHigh, bLow;
463*4882a593Smuzhiyun    bits64 z0, zMiddleA, zMiddleB, z1;
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun    aLow = a;
466*4882a593Smuzhiyun    aHigh = a>>32;
467*4882a593Smuzhiyun    bLow = b;
468*4882a593Smuzhiyun    bHigh = b>>32;
469*4882a593Smuzhiyun    z1 = ( (bits64) aLow ) * bLow;
470*4882a593Smuzhiyun    zMiddleA = ( (bits64) aLow ) * bHigh;
471*4882a593Smuzhiyun    zMiddleB = ( (bits64) aHigh ) * bLow;
472*4882a593Smuzhiyun    z0 = ( (bits64) aHigh ) * bHigh;
473*4882a593Smuzhiyun    zMiddleA += zMiddleB;
474*4882a593Smuzhiyun    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
475*4882a593Smuzhiyun    zMiddleA <<= 32;
476*4882a593Smuzhiyun    z1 += zMiddleA;
477*4882a593Smuzhiyun    z0 += ( z1 < zMiddleA );
478*4882a593Smuzhiyun    *z1Ptr = z1;
479*4882a593Smuzhiyun    *z0Ptr = z0;
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun}
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun/*
484*4882a593Smuzhiyun-------------------------------------------------------------------------------
485*4882a593SmuzhiyunMultiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
486*4882a593Smuzhiyunobtain a 192-bit product.  The product is broken into three 64-bit pieces
487*4882a593Smuzhiyunwhich are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
488*4882a593Smuzhiyun`z2Ptr'.
489*4882a593Smuzhiyun-------------------------------------------------------------------------------
490*4882a593Smuzhiyun*/
491*4882a593SmuzhiyunINLINE void
492*4882a593Smuzhiyun mul128By64To192(
493*4882a593Smuzhiyun     bits64 a0,
494*4882a593Smuzhiyun     bits64 a1,
495*4882a593Smuzhiyun     bits64 b,
496*4882a593Smuzhiyun     bits64 *z0Ptr,
497*4882a593Smuzhiyun     bits64 *z1Ptr,
498*4882a593Smuzhiyun     bits64 *z2Ptr
499*4882a593Smuzhiyun )
500*4882a593Smuzhiyun{
501*4882a593Smuzhiyun    bits64 z0, z1, z2, more1;
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun    mul64To128( a1, b, &z1, &z2 );
504*4882a593Smuzhiyun    mul64To128( a0, b, &z0, &more1 );
505*4882a593Smuzhiyun    add128( z0, more1, 0, z1, &z0, &z1 );
506*4882a593Smuzhiyun    *z2Ptr = z2;
507*4882a593Smuzhiyun    *z1Ptr = z1;
508*4882a593Smuzhiyun    *z0Ptr = z0;
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun}
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun/*
513*4882a593Smuzhiyun-------------------------------------------------------------------------------
514*4882a593SmuzhiyunMultiplies the 128-bit value formed by concatenating `a0' and `a1' to the
515*4882a593Smuzhiyun128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
516*4882a593Smuzhiyunproduct.  The product is broken into four 64-bit pieces which are stored at
517*4882a593Smuzhiyunthe locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
518*4882a593Smuzhiyun-------------------------------------------------------------------------------
519*4882a593Smuzhiyun*/
520*4882a593SmuzhiyunINLINE void
521*4882a593Smuzhiyun mul128To256(
522*4882a593Smuzhiyun     bits64 a0,
523*4882a593Smuzhiyun     bits64 a1,
524*4882a593Smuzhiyun     bits64 b0,
525*4882a593Smuzhiyun     bits64 b1,
526*4882a593Smuzhiyun     bits64 *z0Ptr,
527*4882a593Smuzhiyun     bits64 *z1Ptr,
528*4882a593Smuzhiyun     bits64 *z2Ptr,
529*4882a593Smuzhiyun     bits64 *z3Ptr
530*4882a593Smuzhiyun )
531*4882a593Smuzhiyun{
532*4882a593Smuzhiyun    bits64 z0, z1, z2, z3;
533*4882a593Smuzhiyun    bits64 more1, more2;
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun    mul64To128( a1, b1, &z2, &z3 );
536*4882a593Smuzhiyun    mul64To128( a1, b0, &z1, &more2 );
537*4882a593Smuzhiyun    add128( z1, more2, 0, z2, &z1, &z2 );
538*4882a593Smuzhiyun    mul64To128( a0, b0, &z0, &more1 );
539*4882a593Smuzhiyun    add128( z0, more1, 0, z1, &z0, &z1 );
540*4882a593Smuzhiyun    mul64To128( a0, b1, &more1, &more2 );
541*4882a593Smuzhiyun    add128( more1, more2, 0, z2, &more1, &z2 );
542*4882a593Smuzhiyun    add128( z0, z1, 0, more1, &z0, &z1 );
543*4882a593Smuzhiyun    *z3Ptr = z3;
544*4882a593Smuzhiyun    *z2Ptr = z2;
545*4882a593Smuzhiyun    *z1Ptr = z1;
546*4882a593Smuzhiyun    *z0Ptr = z0;
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun}
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun/*
551*4882a593Smuzhiyun-------------------------------------------------------------------------------
552*4882a593SmuzhiyunReturns an approximation to the 64-bit integer quotient obtained by dividing
553*4882a593Smuzhiyun`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
554*4882a593Smuzhiyundivisor `b' must be at least 2^63.  If q is the exact quotient truncated
555*4882a593Smuzhiyuntoward zero, the approximation returned lies between q and q + 2 inclusive.
556*4882a593SmuzhiyunIf the exact quotient q is larger than 64 bits, the maximum positive 64-bit
557*4882a593Smuzhiyununsigned integer is returned.
558*4882a593Smuzhiyun-------------------------------------------------------------------------------
559*4882a593Smuzhiyun*/
560*4882a593Smuzhiyunstatic bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
561*4882a593Smuzhiyun{
562*4882a593Smuzhiyun    bits64 b0, b1;
563*4882a593Smuzhiyun    bits64 rem0, rem1, term0, term1;
564*4882a593Smuzhiyun    bits64 z;
565*4882a593Smuzhiyun    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
566*4882a593Smuzhiyun    b0 = b>>32;  /* hence b0 is 32 bits wide now */
567*4882a593Smuzhiyun    if ( b0<<32 <= a0 ) {
568*4882a593Smuzhiyun        z = LIT64( 0xFFFFFFFF00000000 );
569*4882a593Smuzhiyun    }  else {
570*4882a593Smuzhiyun        z = a0;
571*4882a593Smuzhiyun        do_div( z, b0 );
572*4882a593Smuzhiyun        z <<= 32;
573*4882a593Smuzhiyun    }
574*4882a593Smuzhiyun    mul64To128( b, z, &term0, &term1 );
575*4882a593Smuzhiyun    sub128( a0, a1, term0, term1, &rem0, &rem1 );
576*4882a593Smuzhiyun    while ( ( (sbits64) rem0 ) < 0 ) {
577*4882a593Smuzhiyun        z -= LIT64( 0x100000000 );
578*4882a593Smuzhiyun        b1 = b<<32;
579*4882a593Smuzhiyun        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
580*4882a593Smuzhiyun    }
581*4882a593Smuzhiyun    rem0 = ( rem0<<32 ) | ( rem1>>32 );
582*4882a593Smuzhiyun    if ( b0<<32 <= rem0 ) {
583*4882a593Smuzhiyun        z |= 0xFFFFFFFF;
584*4882a593Smuzhiyun    } else {
585*4882a593Smuzhiyun        do_div( rem0, b0 );
586*4882a593Smuzhiyun        z |= rem0;
587*4882a593Smuzhiyun    }
588*4882a593Smuzhiyun    return z;
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun}
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun/*
593*4882a593Smuzhiyun-------------------------------------------------------------------------------
594*4882a593SmuzhiyunReturns an approximation to the square root of the 32-bit significand given
595*4882a593Smuzhiyunby `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
596*4882a593Smuzhiyun`aExp' (the least significant bit) is 1, the integer returned approximates
597*4882a593Smuzhiyun2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
598*4882a593Smuzhiyunis 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
599*4882a593Smuzhiyuncase, the approximation returned lies strictly within +/-2 of the exact
600*4882a593Smuzhiyunvalue.
601*4882a593Smuzhiyun-------------------------------------------------------------------------------
602*4882a593Smuzhiyun*/
603*4882a593Smuzhiyunstatic bits32 estimateSqrt32( int16 aExp, bits32 a )
604*4882a593Smuzhiyun{
605*4882a593Smuzhiyun    static const bits16 sqrtOddAdjustments[] = {
606*4882a593Smuzhiyun        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
607*4882a593Smuzhiyun        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
608*4882a593Smuzhiyun    };
609*4882a593Smuzhiyun    static const bits16 sqrtEvenAdjustments[] = {
610*4882a593Smuzhiyun        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
611*4882a593Smuzhiyun        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
612*4882a593Smuzhiyun    };
613*4882a593Smuzhiyun    int8 index;
614*4882a593Smuzhiyun    bits32 z;
615*4882a593Smuzhiyun    bits64 A;
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun    index = ( a>>27 ) & 15;
618*4882a593Smuzhiyun    if ( aExp & 1 ) {
619*4882a593Smuzhiyun        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
620*4882a593Smuzhiyun        z = ( ( a / z )<<14 ) + ( z<<15 );
621*4882a593Smuzhiyun        a >>= 1;
622*4882a593Smuzhiyun    }
623*4882a593Smuzhiyun    else {
624*4882a593Smuzhiyun        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
625*4882a593Smuzhiyun        z = a / z + z;
626*4882a593Smuzhiyun        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
627*4882a593Smuzhiyun        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
628*4882a593Smuzhiyun    }
629*4882a593Smuzhiyun    A = ( (bits64) a )<<31;
630*4882a593Smuzhiyun    do_div( A, z );
631*4882a593Smuzhiyun    return ( (bits32) A ) + ( z>>1 );
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun}
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun/*
636*4882a593Smuzhiyun-------------------------------------------------------------------------------
637*4882a593SmuzhiyunReturns the number of leading 0 bits before the most-significant 1 bit
638*4882a593Smuzhiyunof `a'.  If `a' is zero, 32 is returned.
639*4882a593Smuzhiyun-------------------------------------------------------------------------------
640*4882a593Smuzhiyun*/
641*4882a593Smuzhiyunstatic int8 countLeadingZeros32( bits32 a )
642*4882a593Smuzhiyun{
643*4882a593Smuzhiyun    static const int8 countLeadingZerosHigh[] = {
644*4882a593Smuzhiyun        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
645*4882a593Smuzhiyun        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
646*4882a593Smuzhiyun        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
647*4882a593Smuzhiyun        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
648*4882a593Smuzhiyun        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
649*4882a593Smuzhiyun        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
650*4882a593Smuzhiyun        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
651*4882a593Smuzhiyun        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
652*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
654*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
655*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
656*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
657*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
658*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
659*4882a593Smuzhiyun        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
660*4882a593Smuzhiyun    };
661*4882a593Smuzhiyun    int8 shiftCount;
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun    shiftCount = 0;
664*4882a593Smuzhiyun    if ( a < 0x10000 ) {
665*4882a593Smuzhiyun        shiftCount += 16;
666*4882a593Smuzhiyun        a <<= 16;
667*4882a593Smuzhiyun    }
668*4882a593Smuzhiyun    if ( a < 0x1000000 ) {
669*4882a593Smuzhiyun        shiftCount += 8;
670*4882a593Smuzhiyun        a <<= 8;
671*4882a593Smuzhiyun    }
672*4882a593Smuzhiyun    shiftCount += countLeadingZerosHigh[ a>>24 ];
673*4882a593Smuzhiyun    return shiftCount;
674*4882a593Smuzhiyun
675*4882a593Smuzhiyun}
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun/*
678*4882a593Smuzhiyun-------------------------------------------------------------------------------
679*4882a593SmuzhiyunReturns the number of leading 0 bits before the most-significant 1 bit
680*4882a593Smuzhiyunof `a'.  If `a' is zero, 64 is returned.
681*4882a593Smuzhiyun-------------------------------------------------------------------------------
682*4882a593Smuzhiyun*/
683*4882a593Smuzhiyunstatic int8 countLeadingZeros64( bits64 a )
684*4882a593Smuzhiyun{
685*4882a593Smuzhiyun    int8 shiftCount;
686*4882a593Smuzhiyun
687*4882a593Smuzhiyun    shiftCount = 0;
688*4882a593Smuzhiyun    if ( a < ( (bits64) 1 )<<32 ) {
689*4882a593Smuzhiyun        shiftCount += 32;
690*4882a593Smuzhiyun    }
691*4882a593Smuzhiyun    else {
692*4882a593Smuzhiyun        a >>= 32;
693*4882a593Smuzhiyun    }
694*4882a593Smuzhiyun    shiftCount += countLeadingZeros32( a );
695*4882a593Smuzhiyun    return shiftCount;
696*4882a593Smuzhiyun
697*4882a593Smuzhiyun}
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun/*
700*4882a593Smuzhiyun-------------------------------------------------------------------------------
701*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1'
702*4882a593Smuzhiyunis equal to the 128-bit value formed by concatenating `b0' and `b1'.
703*4882a593SmuzhiyunOtherwise, returns 0.
704*4882a593Smuzhiyun-------------------------------------------------------------------------------
705*4882a593Smuzhiyun*/
706*4882a593SmuzhiyunINLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707*4882a593Smuzhiyun{
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun    return ( a0 == b0 ) && ( a1 == b1 );
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun}
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun/*
714*4882a593Smuzhiyun-------------------------------------------------------------------------------
715*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
716*4882a593Smuzhiyunthan or equal to the 128-bit value formed by concatenating `b0' and `b1'.
717*4882a593SmuzhiyunOtherwise, returns 0.
718*4882a593Smuzhiyun-------------------------------------------------------------------------------
719*4882a593Smuzhiyun*/
720*4882a593SmuzhiyunINLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
721*4882a593Smuzhiyun{
722*4882a593Smuzhiyun
723*4882a593Smuzhiyun    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
724*4882a593Smuzhiyun
725*4882a593Smuzhiyun}
726*4882a593Smuzhiyun
727*4882a593Smuzhiyun/*
728*4882a593Smuzhiyun-------------------------------------------------------------------------------
729*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
730*4882a593Smuzhiyunthan the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
731*4882a593Smuzhiyunreturns 0.
732*4882a593Smuzhiyun-------------------------------------------------------------------------------
733*4882a593Smuzhiyun*/
734*4882a593SmuzhiyunINLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
735*4882a593Smuzhiyun{
736*4882a593Smuzhiyun
737*4882a593Smuzhiyun    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
738*4882a593Smuzhiyun
739*4882a593Smuzhiyun}
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun/*
742*4882a593Smuzhiyun-------------------------------------------------------------------------------
743*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
744*4882a593Smuzhiyunnot equal to the 128-bit value formed by concatenating `b0' and `b1'.
745*4882a593SmuzhiyunOtherwise, returns 0.
746*4882a593Smuzhiyun-------------------------------------------------------------------------------
747*4882a593Smuzhiyun*/
748*4882a593SmuzhiyunINLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
749*4882a593Smuzhiyun{
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun    return ( a0 != b0 ) || ( a1 != b1 );
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun}
754*4882a593Smuzhiyun
755