1*4882a593Smuzhiyun 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun=============================================================================== 4*4882a593Smuzhiyun 5*4882a593SmuzhiyunThis C source fragment is part of the SoftFloat IEC/IEEE Floating-point 6*4882a593SmuzhiyunArithmetic Package, Release 2. 7*4882a593Smuzhiyun 8*4882a593SmuzhiyunWritten by John R. Hauser. This work was made possible in part by the 9*4882a593SmuzhiyunInternational Computer Science Institute, located at Suite 600, 1947 Center 10*4882a593SmuzhiyunStreet, Berkeley, California 94704. Funding was partially provided by the 11*4882a593SmuzhiyunNational Science Foundation under grant MIP-9311980. The original version 12*4882a593Smuzhiyunof this code was written as part of a project to build a fixed-point vector 13*4882a593Smuzhiyunprocessor in collaboration with the University of California at Berkeley, 14*4882a593Smuzhiyunoverseen by Profs. Nelson Morgan and John Wawrzynek. More information 15*4882a593Smuzhiyunis available through the web page 16*4882a593Smuzhiyunhttp://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt 17*4882a593Smuzhiyun 18*4882a593SmuzhiyunTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 19*4882a593Smuzhiyunhas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 20*4882a593SmuzhiyunTIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 21*4882a593SmuzhiyunPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 22*4882a593SmuzhiyunAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 23*4882a593Smuzhiyun 24*4882a593SmuzhiyunDerivative works are acceptable, even for commercial purposes, so long as 25*4882a593Smuzhiyun(1) they include prominent notice that the work is derivative, and (2) they 26*4882a593Smuzhiyuninclude prominent notice akin to these three paragraphs for those parts of 27*4882a593Smuzhiyunthis code that are retained. 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun=============================================================================== 30*4882a593Smuzhiyun*/ 31*4882a593Smuzhiyun 32*4882a593Smuzhiyun/* 33*4882a593Smuzhiyun------------------------------------------------------------------------------- 34*4882a593SmuzhiyunShifts `a' right by the number of bits given in `count'. If any nonzero 35*4882a593Smuzhiyunbits are shifted off, they are ``jammed'' into the least significant bit of 36*4882a593Smuzhiyunthe result by setting the least significant bit to 1. The value of `count' 37*4882a593Smuzhiyuncan be arbitrarily large; in particular, if `count' is greater than 32, the 38*4882a593Smuzhiyunresult will be either 0 or 1, depending on whether `a' is zero or nonzero. 39*4882a593SmuzhiyunThe result is stored in the location pointed to by `zPtr'. 40*4882a593Smuzhiyun------------------------------------------------------------------------------- 41*4882a593Smuzhiyun*/ 42*4882a593SmuzhiyunINLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 43*4882a593Smuzhiyun{ 44*4882a593Smuzhiyun bits32 z; 45*4882a593Smuzhiyun if ( count == 0 ) { 46*4882a593Smuzhiyun z = a; 47*4882a593Smuzhiyun } 48*4882a593Smuzhiyun else if ( count < 32 ) { 49*4882a593Smuzhiyun z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 50*4882a593Smuzhiyun } 51*4882a593Smuzhiyun else { 52*4882a593Smuzhiyun z = ( a != 0 ); 53*4882a593Smuzhiyun } 54*4882a593Smuzhiyun *zPtr = z; 55*4882a593Smuzhiyun} 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun/* 58*4882a593Smuzhiyun------------------------------------------------------------------------------- 59*4882a593SmuzhiyunShifts `a' right by the number of bits given in `count'. If any nonzero 60*4882a593Smuzhiyunbits are shifted off, they are ``jammed'' into the least significant bit of 61*4882a593Smuzhiyunthe result by setting the least significant bit to 1. The value of `count' 62*4882a593Smuzhiyuncan be arbitrarily large; in particular, if `count' is greater than 64, the 63*4882a593Smuzhiyunresult will be either 0 or 1, depending on whether `a' is zero or nonzero. 64*4882a593SmuzhiyunThe result is stored in the location pointed to by `zPtr'. 65*4882a593Smuzhiyun------------------------------------------------------------------------------- 66*4882a593Smuzhiyun*/ 67*4882a593SmuzhiyunINLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) 68*4882a593Smuzhiyun{ 69*4882a593Smuzhiyun bits64 z; 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun __asm__("@shift64RightJamming -- start"); 72*4882a593Smuzhiyun if ( count == 0 ) { 73*4882a593Smuzhiyun z = a; 74*4882a593Smuzhiyun } 75*4882a593Smuzhiyun else if ( count < 64 ) { 76*4882a593Smuzhiyun z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 77*4882a593Smuzhiyun } 78*4882a593Smuzhiyun else { 79*4882a593Smuzhiyun z = ( a != 0 ); 80*4882a593Smuzhiyun } 81*4882a593Smuzhiyun __asm__("@shift64RightJamming -- end"); 82*4882a593Smuzhiyun *zPtr = z; 83*4882a593Smuzhiyun} 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun/* 86*4882a593Smuzhiyun------------------------------------------------------------------------------- 87*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 88*4882a593Smuzhiyun_plus_ the number of bits given in `count'. The shifted result is at most 89*4882a593Smuzhiyun64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 90*4882a593Smuzhiyunbits shifted off form a second 64-bit result as follows: The _last_ bit 91*4882a593Smuzhiyunshifted off is the most-significant bit of the extra result, and the other 92*4882a593Smuzhiyun63 bits of the extra result are all zero if and only if _all_but_the_last_ 93*4882a593Smuzhiyunbits shifted off were all zero. This extra result is stored in the location 94*4882a593Smuzhiyunpointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 95*4882a593Smuzhiyun (This routine makes more sense if `a0' and `a1' are considered to form a 96*4882a593Smuzhiyunfixed-point value with binary point between `a0' and `a1'. This fixed-point 97*4882a593Smuzhiyunvalue is shifted right by the number of bits given in `count', and the 98*4882a593Smuzhiyuninteger part of the result is returned at the location pointed to by 99*4882a593Smuzhiyun`z0Ptr'. The fractional part of the result may be slightly corrupted as 100*4882a593Smuzhiyundescribed above, and is returned at the location pointed to by `z1Ptr'.) 101*4882a593Smuzhiyun------------------------------------------------------------------------------- 102*4882a593Smuzhiyun*/ 103*4882a593SmuzhiyunINLINE void 104*4882a593Smuzhiyun shift64ExtraRightJamming( 105*4882a593Smuzhiyun bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 106*4882a593Smuzhiyun{ 107*4882a593Smuzhiyun bits64 z0, z1; 108*4882a593Smuzhiyun int8 negCount = ( - count ) & 63; 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun if ( count == 0 ) { 111*4882a593Smuzhiyun z1 = a1; 112*4882a593Smuzhiyun z0 = a0; 113*4882a593Smuzhiyun } 114*4882a593Smuzhiyun else if ( count < 64 ) { 115*4882a593Smuzhiyun z1 = ( a0<<negCount ) | ( a1 != 0 ); 116*4882a593Smuzhiyun z0 = a0>>count; 117*4882a593Smuzhiyun } 118*4882a593Smuzhiyun else { 119*4882a593Smuzhiyun if ( count == 64 ) { 120*4882a593Smuzhiyun z1 = a0 | ( a1 != 0 ); 121*4882a593Smuzhiyun } 122*4882a593Smuzhiyun else { 123*4882a593Smuzhiyun z1 = ( ( a0 | a1 ) != 0 ); 124*4882a593Smuzhiyun } 125*4882a593Smuzhiyun z0 = 0; 126*4882a593Smuzhiyun } 127*4882a593Smuzhiyun *z1Ptr = z1; 128*4882a593Smuzhiyun *z0Ptr = z0; 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun} 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun/* 133*4882a593Smuzhiyun------------------------------------------------------------------------------- 134*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' right by the 135*4882a593Smuzhiyunnumber of bits given in `count'. Any bits shifted off are lost. The value 136*4882a593Smuzhiyunof `count' can be arbitrarily large; in particular, if `count' is greater 137*4882a593Smuzhiyunthan 128, the result will be 0. The result is broken into two 64-bit pieces 138*4882a593Smuzhiyunwhich are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 139*4882a593Smuzhiyun------------------------------------------------------------------------------- 140*4882a593Smuzhiyun*/ 141*4882a593SmuzhiyunINLINE void 142*4882a593Smuzhiyun shift128Right( 143*4882a593Smuzhiyun bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 144*4882a593Smuzhiyun{ 145*4882a593Smuzhiyun bits64 z0, z1; 146*4882a593Smuzhiyun int8 negCount = ( - count ) & 63; 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun if ( count == 0 ) { 149*4882a593Smuzhiyun z1 = a1; 150*4882a593Smuzhiyun z0 = a0; 151*4882a593Smuzhiyun } 152*4882a593Smuzhiyun else if ( count < 64 ) { 153*4882a593Smuzhiyun z1 = ( a0<<negCount ) | ( a1>>count ); 154*4882a593Smuzhiyun z0 = a0>>count; 155*4882a593Smuzhiyun } 156*4882a593Smuzhiyun else { 157*4882a593Smuzhiyun z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 158*4882a593Smuzhiyun z0 = 0; 159*4882a593Smuzhiyun } 160*4882a593Smuzhiyun *z1Ptr = z1; 161*4882a593Smuzhiyun *z0Ptr = z0; 162*4882a593Smuzhiyun 163*4882a593Smuzhiyun} 164*4882a593Smuzhiyun 165*4882a593Smuzhiyun/* 166*4882a593Smuzhiyun------------------------------------------------------------------------------- 167*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' right by the 168*4882a593Smuzhiyunnumber of bits given in `count'. If any nonzero bits are shifted off, they 169*4882a593Smuzhiyunare ``jammed'' into the least significant bit of the result by setting the 170*4882a593Smuzhiyunleast significant bit to 1. The value of `count' can be arbitrarily large; 171*4882a593Smuzhiyunin particular, if `count' is greater than 128, the result will be either 0 172*4882a593Smuzhiyunor 1, depending on whether the concatenation of `a0' and `a1' is zero or 173*4882a593Smuzhiyunnonzero. The result is broken into two 64-bit pieces which are stored at 174*4882a593Smuzhiyunthe locations pointed to by `z0Ptr' and `z1Ptr'. 175*4882a593Smuzhiyun------------------------------------------------------------------------------- 176*4882a593Smuzhiyun*/ 177*4882a593SmuzhiyunINLINE void 178*4882a593Smuzhiyun shift128RightJamming( 179*4882a593Smuzhiyun bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 180*4882a593Smuzhiyun{ 181*4882a593Smuzhiyun bits64 z0, z1; 182*4882a593Smuzhiyun int8 negCount = ( - count ) & 63; 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun if ( count == 0 ) { 185*4882a593Smuzhiyun z1 = a1; 186*4882a593Smuzhiyun z0 = a0; 187*4882a593Smuzhiyun } 188*4882a593Smuzhiyun else if ( count < 64 ) { 189*4882a593Smuzhiyun z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 190*4882a593Smuzhiyun z0 = a0>>count; 191*4882a593Smuzhiyun } 192*4882a593Smuzhiyun else { 193*4882a593Smuzhiyun if ( count == 64 ) { 194*4882a593Smuzhiyun z1 = a0 | ( a1 != 0 ); 195*4882a593Smuzhiyun } 196*4882a593Smuzhiyun else if ( count < 128 ) { 197*4882a593Smuzhiyun z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 198*4882a593Smuzhiyun } 199*4882a593Smuzhiyun else { 200*4882a593Smuzhiyun z1 = ( ( a0 | a1 ) != 0 ); 201*4882a593Smuzhiyun } 202*4882a593Smuzhiyun z0 = 0; 203*4882a593Smuzhiyun } 204*4882a593Smuzhiyun *z1Ptr = z1; 205*4882a593Smuzhiyun *z0Ptr = z0; 206*4882a593Smuzhiyun 207*4882a593Smuzhiyun} 208*4882a593Smuzhiyun 209*4882a593Smuzhiyun/* 210*4882a593Smuzhiyun------------------------------------------------------------------------------- 211*4882a593SmuzhiyunShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 212*4882a593Smuzhiyunby 64 _plus_ the number of bits given in `count'. The shifted result is 213*4882a593Smuzhiyunat most 128 nonzero bits; these are broken into two 64-bit pieces which are 214*4882a593Smuzhiyunstored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 215*4882a593Smuzhiyunoff form a third 64-bit result as follows: The _last_ bit shifted off is 216*4882a593Smuzhiyunthe most-significant bit of the extra result, and the other 63 bits of the 217*4882a593Smuzhiyunextra result are all zero if and only if _all_but_the_last_ bits shifted off 218*4882a593Smuzhiyunwere all zero. This extra result is stored in the location pointed to by 219*4882a593Smuzhiyun`z2Ptr'. The value of `count' can be arbitrarily large. 220*4882a593Smuzhiyun (This routine makes more sense if `a0', `a1', and `a2' are considered 221*4882a593Smuzhiyunto form a fixed-point value with binary point between `a1' and `a2'. This 222*4882a593Smuzhiyunfixed-point value is shifted right by the number of bits given in `count', 223*4882a593Smuzhiyunand the integer part of the result is returned at the locations pointed to 224*4882a593Smuzhiyunby `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 225*4882a593Smuzhiyuncorrupted as described above, and is returned at the location pointed to by 226*4882a593Smuzhiyun`z2Ptr'.) 227*4882a593Smuzhiyun------------------------------------------------------------------------------- 228*4882a593Smuzhiyun*/ 229*4882a593SmuzhiyunINLINE void 230*4882a593Smuzhiyun shift128ExtraRightJamming( 231*4882a593Smuzhiyun bits64 a0, 232*4882a593Smuzhiyun bits64 a1, 233*4882a593Smuzhiyun bits64 a2, 234*4882a593Smuzhiyun int16 count, 235*4882a593Smuzhiyun bits64 *z0Ptr, 236*4882a593Smuzhiyun bits64 *z1Ptr, 237*4882a593Smuzhiyun bits64 *z2Ptr 238*4882a593Smuzhiyun ) 239*4882a593Smuzhiyun{ 240*4882a593Smuzhiyun bits64 z0, z1, z2; 241*4882a593Smuzhiyun int8 negCount = ( - count ) & 63; 242*4882a593Smuzhiyun 243*4882a593Smuzhiyun if ( count == 0 ) { 244*4882a593Smuzhiyun z2 = a2; 245*4882a593Smuzhiyun z1 = a1; 246*4882a593Smuzhiyun z0 = a0; 247*4882a593Smuzhiyun } 248*4882a593Smuzhiyun else { 249*4882a593Smuzhiyun if ( count < 64 ) { 250*4882a593Smuzhiyun z2 = a1<<negCount; 251*4882a593Smuzhiyun z1 = ( a0<<negCount ) | ( a1>>count ); 252*4882a593Smuzhiyun z0 = a0>>count; 253*4882a593Smuzhiyun } 254*4882a593Smuzhiyun else { 255*4882a593Smuzhiyun if ( count == 64 ) { 256*4882a593Smuzhiyun z2 = a1; 257*4882a593Smuzhiyun z1 = a0; 258*4882a593Smuzhiyun } 259*4882a593Smuzhiyun else { 260*4882a593Smuzhiyun a2 |= a1; 261*4882a593Smuzhiyun if ( count < 128 ) { 262*4882a593Smuzhiyun z2 = a0<<negCount; 263*4882a593Smuzhiyun z1 = a0>>( count & 63 ); 264*4882a593Smuzhiyun } 265*4882a593Smuzhiyun else { 266*4882a593Smuzhiyun z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 267*4882a593Smuzhiyun z1 = 0; 268*4882a593Smuzhiyun } 269*4882a593Smuzhiyun } 270*4882a593Smuzhiyun z0 = 0; 271*4882a593Smuzhiyun } 272*4882a593Smuzhiyun z2 |= ( a2 != 0 ); 273*4882a593Smuzhiyun } 274*4882a593Smuzhiyun *z2Ptr = z2; 275*4882a593Smuzhiyun *z1Ptr = z1; 276*4882a593Smuzhiyun *z0Ptr = z0; 277*4882a593Smuzhiyun 278*4882a593Smuzhiyun} 279*4882a593Smuzhiyun 280*4882a593Smuzhiyun/* 281*4882a593Smuzhiyun------------------------------------------------------------------------------- 282*4882a593SmuzhiyunShifts the 128-bit value formed by concatenating `a0' and `a1' left by the 283*4882a593Smuzhiyunnumber of bits given in `count'. Any bits shifted off are lost. The value 284*4882a593Smuzhiyunof `count' must be less than 64. The result is broken into two 64-bit 285*4882a593Smuzhiyunpieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 286*4882a593Smuzhiyun------------------------------------------------------------------------------- 287*4882a593Smuzhiyun*/ 288*4882a593SmuzhiyunINLINE void 289*4882a593Smuzhiyun shortShift128Left( 290*4882a593Smuzhiyun bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 291*4882a593Smuzhiyun{ 292*4882a593Smuzhiyun 293*4882a593Smuzhiyun *z1Ptr = a1<<count; 294*4882a593Smuzhiyun *z0Ptr = 295*4882a593Smuzhiyun ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 296*4882a593Smuzhiyun 297*4882a593Smuzhiyun} 298*4882a593Smuzhiyun 299*4882a593Smuzhiyun/* 300*4882a593Smuzhiyun------------------------------------------------------------------------------- 301*4882a593SmuzhiyunShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 302*4882a593Smuzhiyunby the number of bits given in `count'. Any bits shifted off are lost. 303*4882a593SmuzhiyunThe value of `count' must be less than 64. The result is broken into three 304*4882a593Smuzhiyun64-bit pieces which are stored at the locations pointed to by `z0Ptr', 305*4882a593Smuzhiyun`z1Ptr', and `z2Ptr'. 306*4882a593Smuzhiyun------------------------------------------------------------------------------- 307*4882a593Smuzhiyun*/ 308*4882a593SmuzhiyunINLINE void 309*4882a593Smuzhiyun shortShift192Left( 310*4882a593Smuzhiyun bits64 a0, 311*4882a593Smuzhiyun bits64 a1, 312*4882a593Smuzhiyun bits64 a2, 313*4882a593Smuzhiyun int16 count, 314*4882a593Smuzhiyun bits64 *z0Ptr, 315*4882a593Smuzhiyun bits64 *z1Ptr, 316*4882a593Smuzhiyun bits64 *z2Ptr 317*4882a593Smuzhiyun ) 318*4882a593Smuzhiyun{ 319*4882a593Smuzhiyun bits64 z0, z1, z2; 320*4882a593Smuzhiyun int8 negCount; 321*4882a593Smuzhiyun 322*4882a593Smuzhiyun z2 = a2<<count; 323*4882a593Smuzhiyun z1 = a1<<count; 324*4882a593Smuzhiyun z0 = a0<<count; 325*4882a593Smuzhiyun if ( 0 < count ) { 326*4882a593Smuzhiyun negCount = ( ( - count ) & 63 ); 327*4882a593Smuzhiyun z1 |= a2>>negCount; 328*4882a593Smuzhiyun z0 |= a1>>negCount; 329*4882a593Smuzhiyun } 330*4882a593Smuzhiyun *z2Ptr = z2; 331*4882a593Smuzhiyun *z1Ptr = z1; 332*4882a593Smuzhiyun *z0Ptr = z0; 333*4882a593Smuzhiyun 334*4882a593Smuzhiyun} 335*4882a593Smuzhiyun 336*4882a593Smuzhiyun/* 337*4882a593Smuzhiyun------------------------------------------------------------------------------- 338*4882a593SmuzhiyunAdds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 339*4882a593Smuzhiyunvalue formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 340*4882a593Smuzhiyunany carry out is lost. The result is broken into two 64-bit pieces which 341*4882a593Smuzhiyunare stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 342*4882a593Smuzhiyun------------------------------------------------------------------------------- 343*4882a593Smuzhiyun*/ 344*4882a593SmuzhiyunINLINE void 345*4882a593Smuzhiyun add128( 346*4882a593Smuzhiyun bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 347*4882a593Smuzhiyun{ 348*4882a593Smuzhiyun bits64 z1; 349*4882a593Smuzhiyun 350*4882a593Smuzhiyun z1 = a1 + b1; 351*4882a593Smuzhiyun *z1Ptr = z1; 352*4882a593Smuzhiyun *z0Ptr = a0 + b0 + ( z1 < a1 ); 353*4882a593Smuzhiyun 354*4882a593Smuzhiyun} 355*4882a593Smuzhiyun 356*4882a593Smuzhiyun/* 357*4882a593Smuzhiyun------------------------------------------------------------------------------- 358*4882a593SmuzhiyunAdds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 359*4882a593Smuzhiyun192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 360*4882a593Smuzhiyunmodulo 2^192, so any carry out is lost. The result is broken into three 361*4882a593Smuzhiyun64-bit pieces which are stored at the locations pointed to by `z0Ptr', 362*4882a593Smuzhiyun`z1Ptr', and `z2Ptr'. 363*4882a593Smuzhiyun------------------------------------------------------------------------------- 364*4882a593Smuzhiyun*/ 365*4882a593SmuzhiyunINLINE void 366*4882a593Smuzhiyun add192( 367*4882a593Smuzhiyun bits64 a0, 368*4882a593Smuzhiyun bits64 a1, 369*4882a593Smuzhiyun bits64 a2, 370*4882a593Smuzhiyun bits64 b0, 371*4882a593Smuzhiyun bits64 b1, 372*4882a593Smuzhiyun bits64 b2, 373*4882a593Smuzhiyun bits64 *z0Ptr, 374*4882a593Smuzhiyun bits64 *z1Ptr, 375*4882a593Smuzhiyun bits64 *z2Ptr 376*4882a593Smuzhiyun ) 377*4882a593Smuzhiyun{ 378*4882a593Smuzhiyun bits64 z0, z1, z2; 379*4882a593Smuzhiyun int8 carry0, carry1; 380*4882a593Smuzhiyun 381*4882a593Smuzhiyun z2 = a2 + b2; 382*4882a593Smuzhiyun carry1 = ( z2 < a2 ); 383*4882a593Smuzhiyun z1 = a1 + b1; 384*4882a593Smuzhiyun carry0 = ( z1 < a1 ); 385*4882a593Smuzhiyun z0 = a0 + b0; 386*4882a593Smuzhiyun z1 += carry1; 387*4882a593Smuzhiyun z0 += ( z1 < carry1 ); 388*4882a593Smuzhiyun z0 += carry0; 389*4882a593Smuzhiyun *z2Ptr = z2; 390*4882a593Smuzhiyun *z1Ptr = z1; 391*4882a593Smuzhiyun *z0Ptr = z0; 392*4882a593Smuzhiyun 393*4882a593Smuzhiyun} 394*4882a593Smuzhiyun 395*4882a593Smuzhiyun/* 396*4882a593Smuzhiyun------------------------------------------------------------------------------- 397*4882a593SmuzhiyunSubtracts the 128-bit value formed by concatenating `b0' and `b1' from the 398*4882a593Smuzhiyun128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 399*4882a593Smuzhiyun2^128, so any borrow out (carry out) is lost. The result is broken into two 400*4882a593Smuzhiyun64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 401*4882a593Smuzhiyun`z1Ptr'. 402*4882a593Smuzhiyun------------------------------------------------------------------------------- 403*4882a593Smuzhiyun*/ 404*4882a593SmuzhiyunINLINE void 405*4882a593Smuzhiyun sub128( 406*4882a593Smuzhiyun bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 407*4882a593Smuzhiyun{ 408*4882a593Smuzhiyun 409*4882a593Smuzhiyun *z1Ptr = a1 - b1; 410*4882a593Smuzhiyun *z0Ptr = a0 - b0 - ( a1 < b1 ); 411*4882a593Smuzhiyun 412*4882a593Smuzhiyun} 413*4882a593Smuzhiyun 414*4882a593Smuzhiyun/* 415*4882a593Smuzhiyun------------------------------------------------------------------------------- 416*4882a593SmuzhiyunSubtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 417*4882a593Smuzhiyunfrom the 192-bit value formed by concatenating `a0', `a1', and `a2'. 418*4882a593SmuzhiyunSubtraction is modulo 2^192, so any borrow out (carry out) is lost. The 419*4882a593Smuzhiyunresult is broken into three 64-bit pieces which are stored at the locations 420*4882a593Smuzhiyunpointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 421*4882a593Smuzhiyun------------------------------------------------------------------------------- 422*4882a593Smuzhiyun*/ 423*4882a593SmuzhiyunINLINE void 424*4882a593Smuzhiyun sub192( 425*4882a593Smuzhiyun bits64 a0, 426*4882a593Smuzhiyun bits64 a1, 427*4882a593Smuzhiyun bits64 a2, 428*4882a593Smuzhiyun bits64 b0, 429*4882a593Smuzhiyun bits64 b1, 430*4882a593Smuzhiyun bits64 b2, 431*4882a593Smuzhiyun bits64 *z0Ptr, 432*4882a593Smuzhiyun bits64 *z1Ptr, 433*4882a593Smuzhiyun bits64 *z2Ptr 434*4882a593Smuzhiyun ) 435*4882a593Smuzhiyun{ 436*4882a593Smuzhiyun bits64 z0, z1, z2; 437*4882a593Smuzhiyun int8 borrow0, borrow1; 438*4882a593Smuzhiyun 439*4882a593Smuzhiyun z2 = a2 - b2; 440*4882a593Smuzhiyun borrow1 = ( a2 < b2 ); 441*4882a593Smuzhiyun z1 = a1 - b1; 442*4882a593Smuzhiyun borrow0 = ( a1 < b1 ); 443*4882a593Smuzhiyun z0 = a0 - b0; 444*4882a593Smuzhiyun z0 -= ( z1 < borrow1 ); 445*4882a593Smuzhiyun z1 -= borrow1; 446*4882a593Smuzhiyun z0 -= borrow0; 447*4882a593Smuzhiyun *z2Ptr = z2; 448*4882a593Smuzhiyun *z1Ptr = z1; 449*4882a593Smuzhiyun *z0Ptr = z0; 450*4882a593Smuzhiyun 451*4882a593Smuzhiyun} 452*4882a593Smuzhiyun 453*4882a593Smuzhiyun/* 454*4882a593Smuzhiyun------------------------------------------------------------------------------- 455*4882a593SmuzhiyunMultiplies `a' by `b' to obtain a 128-bit product. The product is broken 456*4882a593Smuzhiyuninto two 64-bit pieces which are stored at the locations pointed to by 457*4882a593Smuzhiyun`z0Ptr' and `z1Ptr'. 458*4882a593Smuzhiyun------------------------------------------------------------------------------- 459*4882a593Smuzhiyun*/ 460*4882a593SmuzhiyunINLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) 461*4882a593Smuzhiyun{ 462*4882a593Smuzhiyun bits32 aHigh, aLow, bHigh, bLow; 463*4882a593Smuzhiyun bits64 z0, zMiddleA, zMiddleB, z1; 464*4882a593Smuzhiyun 465*4882a593Smuzhiyun aLow = a; 466*4882a593Smuzhiyun aHigh = a>>32; 467*4882a593Smuzhiyun bLow = b; 468*4882a593Smuzhiyun bHigh = b>>32; 469*4882a593Smuzhiyun z1 = ( (bits64) aLow ) * bLow; 470*4882a593Smuzhiyun zMiddleA = ( (bits64) aLow ) * bHigh; 471*4882a593Smuzhiyun zMiddleB = ( (bits64) aHigh ) * bLow; 472*4882a593Smuzhiyun z0 = ( (bits64) aHigh ) * bHigh; 473*4882a593Smuzhiyun zMiddleA += zMiddleB; 474*4882a593Smuzhiyun z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 475*4882a593Smuzhiyun zMiddleA <<= 32; 476*4882a593Smuzhiyun z1 += zMiddleA; 477*4882a593Smuzhiyun z0 += ( z1 < zMiddleA ); 478*4882a593Smuzhiyun *z1Ptr = z1; 479*4882a593Smuzhiyun *z0Ptr = z0; 480*4882a593Smuzhiyun 481*4882a593Smuzhiyun} 482*4882a593Smuzhiyun 483*4882a593Smuzhiyun/* 484*4882a593Smuzhiyun------------------------------------------------------------------------------- 485*4882a593SmuzhiyunMultiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to 486*4882a593Smuzhiyunobtain a 192-bit product. The product is broken into three 64-bit pieces 487*4882a593Smuzhiyunwhich are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 488*4882a593Smuzhiyun`z2Ptr'. 489*4882a593Smuzhiyun------------------------------------------------------------------------------- 490*4882a593Smuzhiyun*/ 491*4882a593SmuzhiyunINLINE void 492*4882a593Smuzhiyun mul128By64To192( 493*4882a593Smuzhiyun bits64 a0, 494*4882a593Smuzhiyun bits64 a1, 495*4882a593Smuzhiyun bits64 b, 496*4882a593Smuzhiyun bits64 *z0Ptr, 497*4882a593Smuzhiyun bits64 *z1Ptr, 498*4882a593Smuzhiyun bits64 *z2Ptr 499*4882a593Smuzhiyun ) 500*4882a593Smuzhiyun{ 501*4882a593Smuzhiyun bits64 z0, z1, z2, more1; 502*4882a593Smuzhiyun 503*4882a593Smuzhiyun mul64To128( a1, b, &z1, &z2 ); 504*4882a593Smuzhiyun mul64To128( a0, b, &z0, &more1 ); 505*4882a593Smuzhiyun add128( z0, more1, 0, z1, &z0, &z1 ); 506*4882a593Smuzhiyun *z2Ptr = z2; 507*4882a593Smuzhiyun *z1Ptr = z1; 508*4882a593Smuzhiyun *z0Ptr = z0; 509*4882a593Smuzhiyun 510*4882a593Smuzhiyun} 511*4882a593Smuzhiyun 512*4882a593Smuzhiyun/* 513*4882a593Smuzhiyun------------------------------------------------------------------------------- 514*4882a593SmuzhiyunMultiplies the 128-bit value formed by concatenating `a0' and `a1' to the 515*4882a593Smuzhiyun128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 516*4882a593Smuzhiyunproduct. The product is broken into four 64-bit pieces which are stored at 517*4882a593Smuzhiyunthe locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 518*4882a593Smuzhiyun------------------------------------------------------------------------------- 519*4882a593Smuzhiyun*/ 520*4882a593SmuzhiyunINLINE void 521*4882a593Smuzhiyun mul128To256( 522*4882a593Smuzhiyun bits64 a0, 523*4882a593Smuzhiyun bits64 a1, 524*4882a593Smuzhiyun bits64 b0, 525*4882a593Smuzhiyun bits64 b1, 526*4882a593Smuzhiyun bits64 *z0Ptr, 527*4882a593Smuzhiyun bits64 *z1Ptr, 528*4882a593Smuzhiyun bits64 *z2Ptr, 529*4882a593Smuzhiyun bits64 *z3Ptr 530*4882a593Smuzhiyun ) 531*4882a593Smuzhiyun{ 532*4882a593Smuzhiyun bits64 z0, z1, z2, z3; 533*4882a593Smuzhiyun bits64 more1, more2; 534*4882a593Smuzhiyun 535*4882a593Smuzhiyun mul64To128( a1, b1, &z2, &z3 ); 536*4882a593Smuzhiyun mul64To128( a1, b0, &z1, &more2 ); 537*4882a593Smuzhiyun add128( z1, more2, 0, z2, &z1, &z2 ); 538*4882a593Smuzhiyun mul64To128( a0, b0, &z0, &more1 ); 539*4882a593Smuzhiyun add128( z0, more1, 0, z1, &z0, &z1 ); 540*4882a593Smuzhiyun mul64To128( a0, b1, &more1, &more2 ); 541*4882a593Smuzhiyun add128( more1, more2, 0, z2, &more1, &z2 ); 542*4882a593Smuzhiyun add128( z0, z1, 0, more1, &z0, &z1 ); 543*4882a593Smuzhiyun *z3Ptr = z3; 544*4882a593Smuzhiyun *z2Ptr = z2; 545*4882a593Smuzhiyun *z1Ptr = z1; 546*4882a593Smuzhiyun *z0Ptr = z0; 547*4882a593Smuzhiyun 548*4882a593Smuzhiyun} 549*4882a593Smuzhiyun 550*4882a593Smuzhiyun/* 551*4882a593Smuzhiyun------------------------------------------------------------------------------- 552*4882a593SmuzhiyunReturns an approximation to the 64-bit integer quotient obtained by dividing 553*4882a593Smuzhiyun`b' into the 128-bit value formed by concatenating `a0' and `a1'. The 554*4882a593Smuzhiyundivisor `b' must be at least 2^63. If q is the exact quotient truncated 555*4882a593Smuzhiyuntoward zero, the approximation returned lies between q and q + 2 inclusive. 556*4882a593SmuzhiyunIf the exact quotient q is larger than 64 bits, the maximum positive 64-bit 557*4882a593Smuzhiyununsigned integer is returned. 558*4882a593Smuzhiyun------------------------------------------------------------------------------- 559*4882a593Smuzhiyun*/ 560*4882a593Smuzhiyunstatic bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) 561*4882a593Smuzhiyun{ 562*4882a593Smuzhiyun bits64 b0, b1; 563*4882a593Smuzhiyun bits64 rem0, rem1, term0, term1; 564*4882a593Smuzhiyun bits64 z; 565*4882a593Smuzhiyun if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 566*4882a593Smuzhiyun b0 = b>>32; /* hence b0 is 32 bits wide now */ 567*4882a593Smuzhiyun if ( b0<<32 <= a0 ) { 568*4882a593Smuzhiyun z = LIT64( 0xFFFFFFFF00000000 ); 569*4882a593Smuzhiyun } else { 570*4882a593Smuzhiyun z = a0; 571*4882a593Smuzhiyun do_div( z, b0 ); 572*4882a593Smuzhiyun z <<= 32; 573*4882a593Smuzhiyun } 574*4882a593Smuzhiyun mul64To128( b, z, &term0, &term1 ); 575*4882a593Smuzhiyun sub128( a0, a1, term0, term1, &rem0, &rem1 ); 576*4882a593Smuzhiyun while ( ( (sbits64) rem0 ) < 0 ) { 577*4882a593Smuzhiyun z -= LIT64( 0x100000000 ); 578*4882a593Smuzhiyun b1 = b<<32; 579*4882a593Smuzhiyun add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 580*4882a593Smuzhiyun } 581*4882a593Smuzhiyun rem0 = ( rem0<<32 ) | ( rem1>>32 ); 582*4882a593Smuzhiyun if ( b0<<32 <= rem0 ) { 583*4882a593Smuzhiyun z |= 0xFFFFFFFF; 584*4882a593Smuzhiyun } else { 585*4882a593Smuzhiyun do_div( rem0, b0 ); 586*4882a593Smuzhiyun z |= rem0; 587*4882a593Smuzhiyun } 588*4882a593Smuzhiyun return z; 589*4882a593Smuzhiyun 590*4882a593Smuzhiyun} 591*4882a593Smuzhiyun 592*4882a593Smuzhiyun/* 593*4882a593Smuzhiyun------------------------------------------------------------------------------- 594*4882a593SmuzhiyunReturns an approximation to the square root of the 32-bit significand given 595*4882a593Smuzhiyunby `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 596*4882a593Smuzhiyun`aExp' (the least significant bit) is 1, the integer returned approximates 597*4882a593Smuzhiyun2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 598*4882a593Smuzhiyunis 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 599*4882a593Smuzhiyuncase, the approximation returned lies strictly within +/-2 of the exact 600*4882a593Smuzhiyunvalue. 601*4882a593Smuzhiyun------------------------------------------------------------------------------- 602*4882a593Smuzhiyun*/ 603*4882a593Smuzhiyunstatic bits32 estimateSqrt32( int16 aExp, bits32 a ) 604*4882a593Smuzhiyun{ 605*4882a593Smuzhiyun static const bits16 sqrtOddAdjustments[] = { 606*4882a593Smuzhiyun 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 607*4882a593Smuzhiyun 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 608*4882a593Smuzhiyun }; 609*4882a593Smuzhiyun static const bits16 sqrtEvenAdjustments[] = { 610*4882a593Smuzhiyun 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 611*4882a593Smuzhiyun 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 612*4882a593Smuzhiyun }; 613*4882a593Smuzhiyun int8 index; 614*4882a593Smuzhiyun bits32 z; 615*4882a593Smuzhiyun bits64 A; 616*4882a593Smuzhiyun 617*4882a593Smuzhiyun index = ( a>>27 ) & 15; 618*4882a593Smuzhiyun if ( aExp & 1 ) { 619*4882a593Smuzhiyun z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; 620*4882a593Smuzhiyun z = ( ( a / z )<<14 ) + ( z<<15 ); 621*4882a593Smuzhiyun a >>= 1; 622*4882a593Smuzhiyun } 623*4882a593Smuzhiyun else { 624*4882a593Smuzhiyun z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; 625*4882a593Smuzhiyun z = a / z + z; 626*4882a593Smuzhiyun z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 627*4882a593Smuzhiyun if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); 628*4882a593Smuzhiyun } 629*4882a593Smuzhiyun A = ( (bits64) a )<<31; 630*4882a593Smuzhiyun do_div( A, z ); 631*4882a593Smuzhiyun return ( (bits32) A ) + ( z>>1 ); 632*4882a593Smuzhiyun 633*4882a593Smuzhiyun} 634*4882a593Smuzhiyun 635*4882a593Smuzhiyun/* 636*4882a593Smuzhiyun------------------------------------------------------------------------------- 637*4882a593SmuzhiyunReturns the number of leading 0 bits before the most-significant 1 bit 638*4882a593Smuzhiyunof `a'. If `a' is zero, 32 is returned. 639*4882a593Smuzhiyun------------------------------------------------------------------------------- 640*4882a593Smuzhiyun*/ 641*4882a593Smuzhiyunstatic int8 countLeadingZeros32( bits32 a ) 642*4882a593Smuzhiyun{ 643*4882a593Smuzhiyun static const int8 countLeadingZerosHigh[] = { 644*4882a593Smuzhiyun 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 645*4882a593Smuzhiyun 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 646*4882a593Smuzhiyun 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 647*4882a593Smuzhiyun 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 648*4882a593Smuzhiyun 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 649*4882a593Smuzhiyun 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 650*4882a593Smuzhiyun 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 651*4882a593Smuzhiyun 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 652*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 653*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 654*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 655*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 656*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 657*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 658*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 659*4882a593Smuzhiyun 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 660*4882a593Smuzhiyun }; 661*4882a593Smuzhiyun int8 shiftCount; 662*4882a593Smuzhiyun 663*4882a593Smuzhiyun shiftCount = 0; 664*4882a593Smuzhiyun if ( a < 0x10000 ) { 665*4882a593Smuzhiyun shiftCount += 16; 666*4882a593Smuzhiyun a <<= 16; 667*4882a593Smuzhiyun } 668*4882a593Smuzhiyun if ( a < 0x1000000 ) { 669*4882a593Smuzhiyun shiftCount += 8; 670*4882a593Smuzhiyun a <<= 8; 671*4882a593Smuzhiyun } 672*4882a593Smuzhiyun shiftCount += countLeadingZerosHigh[ a>>24 ]; 673*4882a593Smuzhiyun return shiftCount; 674*4882a593Smuzhiyun 675*4882a593Smuzhiyun} 676*4882a593Smuzhiyun 677*4882a593Smuzhiyun/* 678*4882a593Smuzhiyun------------------------------------------------------------------------------- 679*4882a593SmuzhiyunReturns the number of leading 0 bits before the most-significant 1 bit 680*4882a593Smuzhiyunof `a'. If `a' is zero, 64 is returned. 681*4882a593Smuzhiyun------------------------------------------------------------------------------- 682*4882a593Smuzhiyun*/ 683*4882a593Smuzhiyunstatic int8 countLeadingZeros64( bits64 a ) 684*4882a593Smuzhiyun{ 685*4882a593Smuzhiyun int8 shiftCount; 686*4882a593Smuzhiyun 687*4882a593Smuzhiyun shiftCount = 0; 688*4882a593Smuzhiyun if ( a < ( (bits64) 1 )<<32 ) { 689*4882a593Smuzhiyun shiftCount += 32; 690*4882a593Smuzhiyun } 691*4882a593Smuzhiyun else { 692*4882a593Smuzhiyun a >>= 32; 693*4882a593Smuzhiyun } 694*4882a593Smuzhiyun shiftCount += countLeadingZeros32( a ); 695*4882a593Smuzhiyun return shiftCount; 696*4882a593Smuzhiyun 697*4882a593Smuzhiyun} 698*4882a593Smuzhiyun 699*4882a593Smuzhiyun/* 700*4882a593Smuzhiyun------------------------------------------------------------------------------- 701*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' 702*4882a593Smuzhiyunis equal to the 128-bit value formed by concatenating `b0' and `b1'. 703*4882a593SmuzhiyunOtherwise, returns 0. 704*4882a593Smuzhiyun------------------------------------------------------------------------------- 705*4882a593Smuzhiyun*/ 706*4882a593SmuzhiyunINLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 707*4882a593Smuzhiyun{ 708*4882a593Smuzhiyun 709*4882a593Smuzhiyun return ( a0 == b0 ) && ( a1 == b1 ); 710*4882a593Smuzhiyun 711*4882a593Smuzhiyun} 712*4882a593Smuzhiyun 713*4882a593Smuzhiyun/* 714*4882a593Smuzhiyun------------------------------------------------------------------------------- 715*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 716*4882a593Smuzhiyunthan or equal to the 128-bit value formed by concatenating `b0' and `b1'. 717*4882a593SmuzhiyunOtherwise, returns 0. 718*4882a593Smuzhiyun------------------------------------------------------------------------------- 719*4882a593Smuzhiyun*/ 720*4882a593SmuzhiyunINLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 721*4882a593Smuzhiyun{ 722*4882a593Smuzhiyun 723*4882a593Smuzhiyun return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 724*4882a593Smuzhiyun 725*4882a593Smuzhiyun} 726*4882a593Smuzhiyun 727*4882a593Smuzhiyun/* 728*4882a593Smuzhiyun------------------------------------------------------------------------------- 729*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 730*4882a593Smuzhiyunthan the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 731*4882a593Smuzhiyunreturns 0. 732*4882a593Smuzhiyun------------------------------------------------------------------------------- 733*4882a593Smuzhiyun*/ 734*4882a593SmuzhiyunINLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 735*4882a593Smuzhiyun{ 736*4882a593Smuzhiyun 737*4882a593Smuzhiyun return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 738*4882a593Smuzhiyun 739*4882a593Smuzhiyun} 740*4882a593Smuzhiyun 741*4882a593Smuzhiyun/* 742*4882a593Smuzhiyun------------------------------------------------------------------------------- 743*4882a593SmuzhiyunReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 744*4882a593Smuzhiyunnot equal to the 128-bit value formed by concatenating `b0' and `b1'. 745*4882a593SmuzhiyunOtherwise, returns 0. 746*4882a593Smuzhiyun------------------------------------------------------------------------------- 747*4882a593Smuzhiyun*/ 748*4882a593SmuzhiyunINLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 749*4882a593Smuzhiyun{ 750*4882a593Smuzhiyun 751*4882a593Smuzhiyun return ( a0 != b0 ) || ( a1 != b1 ); 752*4882a593Smuzhiyun 753*4882a593Smuzhiyun} 754*4882a593Smuzhiyun 755