1 2 /*============================================================================ 3 4 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic 5 Package, Release 3a, by John R. Hauser. 6 7 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions are met: 12 13 1. Redistributions of source code must retain the above copyright notice, 14 this list of conditions, and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright notice, 17 this list of conditions, and the following disclaimer in the documentation 18 and/or other materials provided with the distribution. 19 20 3. Neither the name of the University nor the names of its contributors may 21 be used to endorse or promote products derived from this software without 22 specific prior written permission. 23 24 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 25 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 26 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 27 DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 28 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 31 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 35 =============================================================================*/ 36 37 #include <stdbool.h> 38 #include <stdint.h> 39 #include "platform.h" 40 #include "internals.h" 41 #include "specialize.h" 42 #include "softfloat.h" 43 44 float128_t 45 softfloat_mulAddF128( 46 uint_fast64_t uiA64, 47 uint_fast64_t uiA0, 48 uint_fast64_t uiB64, 49 uint_fast64_t uiB0, 50 uint_fast64_t uiC64, 51 uint_fast64_t uiC0, 52 uint_fast8_t op 53 ) 54 { 55 bool signA; 56 int_fast32_t expA; 57 struct uint128 sigA; 58 bool signB; 59 int_fast32_t expB; 60 struct uint128 sigB; 61 bool signC; 62 int_fast32_t expC; 63 struct uint128 sigC; 64 bool signZ; 65 uint_fast64_t magBits; 66 struct uint128 uiZ; 67 struct exp32_sig128 normExpSig; 68 int_fast32_t expZ; 69 uint64_t sig256Z[4]; 70 struct uint128 sigZ; 71 int_fast32_t shiftCount, expDiff; 72 struct uint128 x128; 73 uint64_t sig256C[4]; 74 static uint64_t zero256[4] = INIT_UINTM4( 0, 0, 0, 0 ); 75 uint_fast64_t sigZExtra, sig256Z0; 76 union ui128_f128 uZ; 77 78 /*------------------------------------------------------------------------ 79 *------------------------------------------------------------------------*/ 80 signA = signF128UI64( uiA64 ); 81 expA = expF128UI64( uiA64 ); 82 sigA.v64 = fracF128UI64( uiA64 ); 83 sigA.v0 = uiA0; 84 signB = signF128UI64( uiB64 ); 85 expB = expF128UI64( uiB64 ); 86 sigB.v64 = fracF128UI64( uiB64 ); 87 sigB.v0 = uiB0; 88 signC = signF128UI64( uiC64 ) ^ (op == softfloat_mulAdd_subC); 89 expC = expF128UI64( uiC64 ); 90 sigC.v64 = fracF128UI64( uiC64 ); 91 sigC.v0 = uiC0; 92 signZ = signA ^ signB ^ (op == softfloat_mulAdd_subProd); 93 /*------------------------------------------------------------------------ 94 *------------------------------------------------------------------------*/ 95 if ( expA == 0x7FFF ) { 96 if ( 97 (sigA.v64 | sigA.v0) || ((expB == 0x7FFF) && (sigB.v64 | sigB.v0)) 98 ) { 99 goto propagateNaN_ABC; 100 } 101 magBits = expB | sigB.v64 | sigB.v0; 102 goto infProdArg; 103 } 104 if ( expB == 0x7FFF ) { 105 if ( sigB.v64 | sigB.v0 ) goto propagateNaN_ABC; 106 magBits = expA | sigA.v64 | sigA.v0; 107 goto infProdArg; 108 } 109 if ( expC == 0x7FFF ) { 110 if ( sigC.v64 | sigC.v0 ) { 111 uiZ.v64 = 0; 112 uiZ.v0 = 0; 113 goto propagateNaN_ZC; 114 } 115 uiZ.v64 = uiC64; 116 uiZ.v0 = uiC0; 117 goto uiZ; 118 } 119 /*------------------------------------------------------------------------ 120 *------------------------------------------------------------------------*/ 121 if ( ! expA ) { 122 if ( ! (sigA.v64 | sigA.v0) ) goto zeroProd; 123 normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 ); 124 expA = normExpSig.exp; 125 sigA = normExpSig.sig; 126 } 127 if ( ! expB ) { 128 if ( ! (sigB.v64 | sigB.v0) ) goto zeroProd; 129 normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 ); 130 expB = normExpSig.exp; 131 sigB = normExpSig.sig; 132 } 133 /*------------------------------------------------------------------------ 134 *------------------------------------------------------------------------*/ 135 expZ = expA + expB - 0x3FFE; 136 sigA.v64 |= UINT64_C( 0x0001000000000000 ); 137 sigB.v64 |= UINT64_C( 0x0001000000000000 ); 138 sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 8 ); 139 sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 15 ); 140 softfloat_mul128To256M( sigA.v64, sigA.v0, sigB.v64, sigB.v0, sig256Z ); 141 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 142 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 143 shiftCount = 0; 144 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) { 145 --expZ; 146 shiftCount = -1; 147 } 148 if ( ! expC ) { 149 if ( ! (sigC.v64 | sigC.v0) ) { 150 shiftCount += 8; 151 goto sigZ; 152 } 153 normExpSig = softfloat_normSubnormalF128Sig( sigC.v64, sigC.v0 ); 154 expC = normExpSig.exp; 155 sigC = normExpSig.sig; 156 } 157 sigC.v64 |= UINT64_C( 0x0001000000000000 ); 158 sigC = softfloat_shortShiftLeft128( sigC.v64, sigC.v0, 8 ); 159 /*------------------------------------------------------------------------ 160 *------------------------------------------------------------------------*/ 161 expDiff = expZ - expC; 162 if ( expDiff < 0 ) { 163 expZ = expC; 164 if ( (signZ == signC) || (expDiff < -1) ) { 165 shiftCount -= expDiff; 166 if ( shiftCount ) { 167 sigZ = 168 softfloat_shiftRightJam128( 169 sigZ.v64, sigZ.v0, shiftCount ); 170 } 171 } else { 172 if ( ! shiftCount ) { 173 x128 = 174 softfloat_shortShiftRight128( 175 sig256Z[indexWord( 4, 1 )], sig256Z[indexWord( 4, 0 )], 176 1 177 ); 178 sig256Z[indexWord( 4, 1 )] = (sigZ.v0<<63) | x128.v64; 179 sig256Z[indexWord( 4, 0 )] = x128.v0; 180 sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, 1 ); 181 sig256Z[indexWord( 4, 3 )] = sigZ.v64; 182 sig256Z[indexWord( 4, 2 )] = sigZ.v0; 183 } 184 } 185 } else { 186 if ( shiftCount ) softfloat_add256M( sig256Z, sig256Z, sig256Z ); 187 if ( ! expDiff ) { 188 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 189 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 190 } else { 191 sig256C[indexWord( 4, 3 )] = sigC.v64; 192 sig256C[indexWord( 4, 2 )] = sigC.v0; 193 sig256C[indexWord( 4, 1 )] = 0; 194 sig256C[indexWord( 4, 0 )] = 0; 195 softfloat_shiftRightJam256M( sig256C, expDiff, sig256C ); 196 } 197 } 198 /*------------------------------------------------------------------------ 199 *------------------------------------------------------------------------*/ 200 shiftCount = 8; 201 if ( signZ == signC ) { 202 /*-------------------------------------------------------------------- 203 *--------------------------------------------------------------------*/ 204 if ( expDiff <= 0 ) { 205 sigZ = softfloat_add128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 ); 206 } else { 207 softfloat_add256M( sig256Z, sig256C, sig256Z ); 208 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 209 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 210 } 211 if ( sigZ.v64 & UINT64_C( 0x0200000000000000 ) ) { 212 ++expZ; 213 shiftCount = 9; 214 } 215 } else { 216 /*-------------------------------------------------------------------- 217 *--------------------------------------------------------------------*/ 218 if ( expDiff < 0 ) { 219 signZ = signC; 220 if ( expDiff < -1 ) { 221 sigZ = 222 softfloat_sub128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 ); 223 sigZExtra = 224 sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )]; 225 if ( sigZExtra ) { 226 sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, 0, 1 ); 227 } 228 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) { 229 --expZ; 230 shiftCount = 7; 231 } 232 goto shiftRightRoundPack; 233 } else { 234 sig256C[indexWord( 4, 3 )] = sigC.v64; 235 sig256C[indexWord( 4, 2 )] = sigC.v0; 236 sig256C[indexWord( 4, 1 )] = 0; 237 sig256C[indexWord( 4, 0 )] = 0; 238 softfloat_sub256M( sig256C, sig256Z, sig256Z ); 239 } 240 } else if ( ! expDiff ) { 241 sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, sigC.v64, sigC.v0 ); 242 if ( 243 ! (sigZ.v64 | sigZ.v0) && ! sig256Z[indexWord( 4, 1 )] 244 && ! sig256Z[indexWord( 4, 0 )] 245 ) { 246 goto completeCancellation; 247 } 248 sig256Z[indexWord( 4, 3 )] = sigZ.v64; 249 sig256Z[indexWord( 4, 2 )] = sigZ.v0; 250 if ( sigZ.v64 & UINT64_C( 0x8000000000000000 ) ) { 251 signZ ^= 1; 252 softfloat_sub256M( zero256, sig256Z, sig256Z ); 253 } 254 } else { 255 softfloat_sub256M( sig256Z, sig256C, sig256Z ); 256 if ( 1 < expDiff ) { 257 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 258 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 259 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) { 260 --expZ; 261 shiftCount = 7; 262 } 263 goto sigZ; 264 } 265 } 266 /*-------------------------------------------------------------------- 267 *--------------------------------------------------------------------*/ 268 sigZ.v64 = sig256Z[indexWord( 4, 3 )]; 269 sigZ.v0 = sig256Z[indexWord( 4, 2 )]; 270 sigZExtra = sig256Z[indexWord( 4, 1 )]; 271 sig256Z0 = sig256Z[indexWord( 4, 0 )]; 272 if ( sigZ.v64 ) { 273 if ( sig256Z0 ) sigZExtra |= 1; 274 } else { 275 expZ -= 64; 276 sigZ.v64 = sigZ.v0; 277 sigZ.v0 = sigZExtra; 278 sigZExtra = sig256Z0; 279 if ( ! sigZ.v64 ) { 280 expZ -= 64; 281 sigZ.v64 = sigZ.v0; 282 sigZ.v0 = sigZExtra; 283 sigZExtra = 0; 284 if ( ! sigZ.v64 ) { 285 expZ -= 64; 286 sigZ.v64 = sigZ.v0; 287 sigZ.v0 = 0; 288 } 289 } 290 } 291 shiftCount = softfloat_countLeadingZeros64( sigZ.v64 ); 292 expZ += 7 - shiftCount; 293 shiftCount = 15 - shiftCount; 294 if ( 0 < shiftCount ) goto shiftRightRoundPack; 295 if ( shiftCount ) { 296 shiftCount = -shiftCount; 297 sigZ = 298 softfloat_shortShiftLeft128( sigZ.v64, sigZ.v0, shiftCount ); 299 x128 = softfloat_shortShiftLeft128( 0, sigZExtra, shiftCount ); 300 sigZ.v0 |= x128.v64; 301 sigZExtra = x128.v0; 302 } 303 goto roundPack; 304 } 305 sigZ: 306 sigZExtra = sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )]; 307 shiftRightRoundPack: 308 sigZExtra = (uint64_t) (sigZ.v0<<(64 - shiftCount)) | (sigZExtra != 0); 309 sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, shiftCount ); 310 roundPack: 311 return 312 softfloat_roundPackToF128( 313 signZ, expZ - 1, sigZ.v64, sigZ.v0, sigZExtra ); 314 /*------------------------------------------------------------------------ 315 *------------------------------------------------------------------------*/ 316 propagateNaN_ABC: 317 uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 ); 318 goto propagateNaN_ZC; 319 /*------------------------------------------------------------------------ 320 *------------------------------------------------------------------------*/ 321 infProdArg: 322 if ( magBits ) { 323 uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 ); 324 uiZ.v0 = 0; 325 if ( expC != 0x7FFF ) goto uiZ; 326 if ( sigC.v64 | sigC.v0 ) goto propagateNaN_ZC; 327 if ( signZ == signC ) goto uiZ; 328 } 329 invalid: 330 softfloat_raiseFlags( softfloat_flag_invalid ); 331 uiZ.v64 = defaultNaNF128UI64; 332 uiZ.v0 = defaultNaNF128UI0; 333 propagateNaN_ZC: 334 uiZ = softfloat_propagateNaNF128UI( uiZ.v64, uiZ.v0, uiC64, uiC0 ); 335 goto uiZ; 336 /*------------------------------------------------------------------------ 337 *------------------------------------------------------------------------*/ 338 zeroProd: 339 uiZ.v64 = uiC64; 340 uiZ.v0 = uiC0; 341 if ( ! (expC | sigC.v64 | sigC.v0) && (signZ != signC) ) { 342 completeCancellation: 343 uiZ.v64 = 344 packToF128UI64( 345 softfloat_roundingMode == softfloat_round_min, 0, 0 ); 346 uiZ.v0 = 0; 347 } 348 uiZ: 349 uZ.ui = uiZ; 350 return uZ.f; 351 352 } 353 354