xref: /optee_os/lib/libutils/isoc/arch/arm/softfloat/source/s_mulAddF128.c (revision 9403c583381528e7fb391e3769644cc9653cfbb6)
1 
2 /*============================================================================
3 
4 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
5 Package, Release 3a, by John R. Hauser.
6 
7 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
8 All rights reserved.
9 
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12 
13  1. Redistributions of source code must retain the above copyright notice,
14     this list of conditions, and the following disclaimer.
15 
16  2. Redistributions in binary form must reproduce the above copyright notice,
17     this list of conditions, and the following disclaimer in the documentation
18     and/or other materials provided with the distribution.
19 
20  3. Neither the name of the University nor the names of its contributors may
21     be used to endorse or promote products derived from this software without
22     specific prior written permission.
23 
24 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
25 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
27 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
28 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 =============================================================================*/
36 
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include "platform.h"
40 #include "internals.h"
41 #include "specialize.h"
42 #include "softfloat.h"
43 
44 float128_t
45  softfloat_mulAddF128(
46      uint_fast64_t uiA64,
47      uint_fast64_t uiA0,
48      uint_fast64_t uiB64,
49      uint_fast64_t uiB0,
50      uint_fast64_t uiC64,
51      uint_fast64_t uiC0,
52      uint_fast8_t op
53  )
54 {
55     bool signA;
56     int_fast32_t expA;
57     struct uint128 sigA;
58     bool signB;
59     int_fast32_t expB;
60     struct uint128 sigB;
61     bool signC;
62     int_fast32_t expC;
63     struct uint128 sigC;
64     bool signZ;
65     uint_fast64_t magBits;
66     struct uint128 uiZ;
67     struct exp32_sig128 normExpSig;
68     int_fast32_t expZ;
69     uint64_t sig256Z[4];
70     struct uint128 sigZ;
71     int_fast32_t shiftCount, expDiff;
72     struct uint128 x128;
73     uint64_t sig256C[4];
74     static uint64_t zero256[4] = INIT_UINTM4( 0, 0, 0, 0 );
75     uint_fast64_t sigZExtra, sig256Z0;
76     union ui128_f128 uZ;
77 
78     /*------------------------------------------------------------------------
79     *------------------------------------------------------------------------*/
80     signA = signF128UI64( uiA64 );
81     expA  = expF128UI64( uiA64 );
82     sigA.v64 = fracF128UI64( uiA64 );
83     sigA.v0  = uiA0;
84     signB = signF128UI64( uiB64 );
85     expB  = expF128UI64( uiB64 );
86     sigB.v64 = fracF128UI64( uiB64 );
87     sigB.v0  = uiB0;
88     signC = signF128UI64( uiC64 ) ^ (op == softfloat_mulAdd_subC);
89     expC  = expF128UI64( uiC64 );
90     sigC.v64 = fracF128UI64( uiC64 );
91     sigC.v0  = uiC0;
92     signZ = signA ^ signB ^ (op == softfloat_mulAdd_subProd);
93     /*------------------------------------------------------------------------
94     *------------------------------------------------------------------------*/
95     if ( expA == 0x7FFF ) {
96         if (
97             (sigA.v64 | sigA.v0) || ((expB == 0x7FFF) && (sigB.v64 | sigB.v0))
98         ) {
99             goto propagateNaN_ABC;
100         }
101         magBits = expB | sigB.v64 | sigB.v0;
102         goto infProdArg;
103     }
104     if ( expB == 0x7FFF ) {
105         if ( sigB.v64 | sigB.v0 ) goto propagateNaN_ABC;
106         magBits = expA | sigA.v64 | sigA.v0;
107         goto infProdArg;
108     }
109     if ( expC == 0x7FFF ) {
110         if ( sigC.v64 | sigC.v0 ) {
111             uiZ.v64 = 0;
112             uiZ.v0  = 0;
113             goto propagateNaN_ZC;
114         }
115         uiZ.v64 = uiC64;
116         uiZ.v0  = uiC0;
117         goto uiZ;
118     }
119     /*------------------------------------------------------------------------
120     *------------------------------------------------------------------------*/
121     if ( ! expA ) {
122         if ( ! (sigA.v64 | sigA.v0) ) goto zeroProd;
123         normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
124         expA = normExpSig.exp;
125         sigA = normExpSig.sig;
126     }
127     if ( ! expB ) {
128         if ( ! (sigB.v64 | sigB.v0) ) goto zeroProd;
129         normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 );
130         expB = normExpSig.exp;
131         sigB = normExpSig.sig;
132     }
133     /*------------------------------------------------------------------------
134     *------------------------------------------------------------------------*/
135     expZ = expA + expB - 0x3FFE;
136     sigA.v64 |= UINT64_C( 0x0001000000000000 );
137     sigB.v64 |= UINT64_C( 0x0001000000000000 );
138     sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 8 );
139     sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 15 );
140     softfloat_mul128To256M( sigA.v64, sigA.v0, sigB.v64, sigB.v0, sig256Z );
141     sigZ.v64 = sig256Z[indexWord( 4, 3 )];
142     sigZ.v0  = sig256Z[indexWord( 4, 2 )];
143     shiftCount = 0;
144     if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) {
145         --expZ;
146         shiftCount = -1;
147     }
148     if ( ! expC ) {
149         if ( ! (sigC.v64 | sigC.v0) ) {
150             shiftCount += 8;
151             goto sigZ;
152         }
153         normExpSig = softfloat_normSubnormalF128Sig( sigC.v64, sigC.v0 );
154         expC = normExpSig.exp;
155         sigC = normExpSig.sig;
156     }
157     sigC.v64 |= UINT64_C( 0x0001000000000000 );
158     sigC = softfloat_shortShiftLeft128( sigC.v64, sigC.v0, 8 );
159     /*------------------------------------------------------------------------
160     *------------------------------------------------------------------------*/
161     expDiff = expZ - expC;
162     if ( expDiff < 0 ) {
163         expZ = expC;
164         if ( (signZ == signC) || (expDiff < -1) ) {
165             shiftCount -= expDiff;
166             if ( shiftCount ) {
167                 sigZ =
168                     softfloat_shiftRightJam128(
169                         sigZ.v64, sigZ.v0, shiftCount );
170             }
171         } else {
172             if ( ! shiftCount ) {
173                 x128 =
174                     softfloat_shortShiftRight128(
175                         sig256Z[indexWord( 4, 1 )], sig256Z[indexWord( 4, 0 )],
176                         1
177                     );
178                 sig256Z[indexWord( 4, 1 )] = (sigZ.v0<<63) | x128.v64;
179                 sig256Z[indexWord( 4, 0 )] = x128.v0;
180                 sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, 1 );
181                 sig256Z[indexWord( 4, 3 )] = sigZ.v64;
182                 sig256Z[indexWord( 4, 2 )] = sigZ.v0;
183             }
184         }
185     } else {
186         if ( shiftCount ) softfloat_add256M( sig256Z, sig256Z, sig256Z );
187         if ( ! expDiff ) {
188             sigZ.v64 = sig256Z[indexWord( 4, 3 )];
189             sigZ.v0  = sig256Z[indexWord( 4, 2 )];
190         } else {
191             sig256C[indexWord( 4, 3 )] = sigC.v64;
192             sig256C[indexWord( 4, 2 )] = sigC.v0;
193             sig256C[indexWord( 4, 1 )] = 0;
194             sig256C[indexWord( 4, 0 )] = 0;
195             softfloat_shiftRightJam256M( sig256C, expDiff, sig256C );
196         }
197     }
198     /*------------------------------------------------------------------------
199     *------------------------------------------------------------------------*/
200     shiftCount = 8;
201     if ( signZ == signC ) {
202         /*--------------------------------------------------------------------
203         *--------------------------------------------------------------------*/
204         if ( expDiff <= 0 ) {
205             sigZ = softfloat_add128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 );
206         } else {
207             softfloat_add256M( sig256Z, sig256C, sig256Z );
208             sigZ.v64 = sig256Z[indexWord( 4, 3 )];
209             sigZ.v0  = sig256Z[indexWord( 4, 2 )];
210         }
211         if ( sigZ.v64 & UINT64_C( 0x0200000000000000 ) ) {
212             ++expZ;
213             shiftCount = 9;
214         }
215     } else {
216         /*--------------------------------------------------------------------
217         *--------------------------------------------------------------------*/
218         if ( expDiff < 0 ) {
219             signZ = signC;
220             if ( expDiff < -1 ) {
221                 sigZ =
222                     softfloat_sub128( sigC.v64, sigC.v0, sigZ.v64, sigZ.v0 );
223                 sigZExtra =
224                     sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )];
225                 if ( sigZExtra ) {
226                     sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, 0, 1 );
227                 }
228                 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) {
229                     --expZ;
230                     shiftCount = 7;
231                 }
232                 goto shiftRightRoundPack;
233             } else {
234                 sig256C[indexWord( 4, 3 )] = sigC.v64;
235                 sig256C[indexWord( 4, 2 )] = sigC.v0;
236                 sig256C[indexWord( 4, 1 )] = 0;
237                 sig256C[indexWord( 4, 0 )] = 0;
238                 softfloat_sub256M( sig256C, sig256Z, sig256Z );
239             }
240         } else if ( ! expDiff ) {
241             sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, sigC.v64, sigC.v0 );
242             if (
243                 ! (sigZ.v64 | sigZ.v0) && ! sig256Z[indexWord( 4, 1 )]
244                     && ! sig256Z[indexWord( 4, 0 )]
245             ) {
246                 goto completeCancellation;
247             }
248             sig256Z[indexWord( 4, 3 )] = sigZ.v64;
249             sig256Z[indexWord( 4, 2 )] = sigZ.v0;
250             if ( sigZ.v64 & UINT64_C( 0x8000000000000000 ) ) {
251                 signZ ^= 1;
252                 softfloat_sub256M( zero256, sig256Z, sig256Z );
253             }
254         } else {
255             softfloat_sub256M( sig256Z, sig256C, sig256Z );
256             if ( 1 < expDiff ) {
257                 sigZ.v64 = sig256Z[indexWord( 4, 3 )];
258                 sigZ.v0  = sig256Z[indexWord( 4, 2 )];
259                 if ( ! (sigZ.v64 & UINT64_C( 0x0100000000000000 )) ) {
260                     --expZ;
261                     shiftCount = 7;
262                 }
263                 goto sigZ;
264             }
265         }
266         /*--------------------------------------------------------------------
267         *--------------------------------------------------------------------*/
268         sigZ.v64  = sig256Z[indexWord( 4, 3 )];
269         sigZ.v0   = sig256Z[indexWord( 4, 2 )];
270         sigZExtra = sig256Z[indexWord( 4, 1 )];
271         sig256Z0  = sig256Z[indexWord( 4, 0 )];
272         if ( sigZ.v64 ) {
273             if ( sig256Z0 ) sigZExtra |= 1;
274         } else {
275             expZ -= 64;
276             sigZ.v64  = sigZ.v0;
277             sigZ.v0   = sigZExtra;
278             sigZExtra = sig256Z0;
279             if ( ! sigZ.v64 ) {
280                 expZ -= 64;
281                 sigZ.v64  = sigZ.v0;
282                 sigZ.v0   = sigZExtra;
283                 sigZExtra = 0;
284                 if ( ! sigZ.v64 ) {
285                     expZ -= 64;
286                     sigZ.v64 = sigZ.v0;
287                     sigZ.v0  = 0;
288                 }
289             }
290         }
291         shiftCount = softfloat_countLeadingZeros64( sigZ.v64 );
292         expZ += 7 - shiftCount;
293         shiftCount = 15 - shiftCount;
294         if ( 0 < shiftCount ) goto shiftRightRoundPack;
295         if ( shiftCount ) {
296             shiftCount = -shiftCount;
297             sigZ =
298                 softfloat_shortShiftLeft128( sigZ.v64, sigZ.v0, shiftCount );
299             x128 = softfloat_shortShiftLeft128( 0, sigZExtra, shiftCount );
300             sigZ.v0 |= x128.v64;
301             sigZExtra = x128.v0;
302         }
303         goto roundPack;
304     }
305  sigZ:
306     sigZExtra = sig256Z[indexWord( 4, 1 )] | sig256Z[indexWord( 4, 0 )];
307  shiftRightRoundPack:
308     sigZExtra = (uint64_t) (sigZ.v0<<(64 - shiftCount)) | (sigZExtra != 0);
309     sigZ = softfloat_shortShiftRight128( sigZ.v64, sigZ.v0, shiftCount );
310  roundPack:
311     return
312         softfloat_roundPackToF128(
313             signZ, expZ - 1, sigZ.v64, sigZ.v0, sigZExtra );
314     /*------------------------------------------------------------------------
315     *------------------------------------------------------------------------*/
316  propagateNaN_ABC:
317     uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, uiB64, uiB0 );
318     goto propagateNaN_ZC;
319     /*------------------------------------------------------------------------
320     *------------------------------------------------------------------------*/
321  infProdArg:
322     if ( magBits ) {
323         uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
324         uiZ.v0 = 0;
325         if ( expC != 0x7FFF ) goto uiZ;
326         if ( sigC.v64 | sigC.v0 ) goto propagateNaN_ZC;
327         if ( signZ == signC ) goto uiZ;
328     }
329  invalid:
330     softfloat_raiseFlags( softfloat_flag_invalid );
331     uiZ.v64 = defaultNaNF128UI64;
332     uiZ.v0  = defaultNaNF128UI0;
333  propagateNaN_ZC:
334     uiZ = softfloat_propagateNaNF128UI( uiZ.v64, uiZ.v0, uiC64, uiC0 );
335     goto uiZ;
336     /*------------------------------------------------------------------------
337     *------------------------------------------------------------------------*/
338  zeroProd:
339     uiZ.v64 = uiC64;
340     uiZ.v0  = uiC0;
341     if ( ! (expC | sigC.v64 | sigC.v0) && (signZ != signC) ) {
342  completeCancellation:
343         uiZ.v64 =
344             packToF128UI64(
345                 softfloat_roundingMode == softfloat_round_min, 0, 0 );
346         uiZ.v0 = 0;
347     }
348  uiZ:
349     uZ.ui = uiZ;
350     return uZ.f;
351 
352 }
353 
354