xref: /optee_os/lib/libutils/isoc/arch/arm/softfloat/source/f128M_sqrt.c (revision 9403c583381528e7fb391e3769644cc9653cfbb6)
1 
2 /*============================================================================
3 
4 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
5 Package, Release 3a, by John R. Hauser.
6 
7 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
8 All rights reserved.
9 
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12 
13  1. Redistributions of source code must retain the above copyright notice,
14     this list of conditions, and the following disclaimer.
15 
16  2. Redistributions in binary form must reproduce the above copyright notice,
17     this list of conditions, and the following disclaimer in the documentation
18     and/or other materials provided with the distribution.
19 
20  3. Neither the name of the University nor the names of its contributors may
21     be used to endorse or promote products derived from this software without
22     specific prior written permission.
23 
24 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
25 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
27 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
28 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 =============================================================================*/
36 
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include "platform.h"
40 #include "internals.h"
41 #include "specialize.h"
42 #include "softfloat.h"
43 
44 #ifdef SOFTFLOAT_FAST_INT64
45 
46 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
47 {
48 
49     *zPtr = f128_sqrt( *aPtr );
50 
51 }
52 
53 #else
54 
55 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
56 {
57     const uint32_t *aWPtr;
58     uint32_t *zWPtr;
59     uint32_t uiA96;
60     bool signA;
61     int32_t rawExpA;
62     uint32_t rem[6];
63     int32_t expA, expZ;
64     uint64_t rem64;
65     uint32_t sig32A, recipSqrt32, sig32Z, qs[3], q;
66     uint64_t sig64Z, x64;
67     uint32_t term[5], y[5], rem32;
68 
69     /*------------------------------------------------------------------------
70     *------------------------------------------------------------------------*/
71     aWPtr = (const uint32_t *) aPtr;
72     zWPtr = (uint32_t *) zPtr;
73     /*------------------------------------------------------------------------
74     *------------------------------------------------------------------------*/
75     uiA96 = aWPtr[indexWordHi( 4 )];
76     signA = signF128UI96( uiA96 );
77     rawExpA  = expF128UI96( uiA96 );
78     /*------------------------------------------------------------------------
79     *------------------------------------------------------------------------*/
80     if ( rawExpA == 0x7FFF ) {
81         if (
82             fracF128UI96( uiA96 )
83                 || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
84                         | aWPtr[indexWord( 4, 0 )])
85         ) {
86             softfloat_propagateNaNF128M( aWPtr, 0, zWPtr );
87             return;
88         }
89         if ( ! signA ) goto copyA;
90         goto invalid;
91     }
92     /*------------------------------------------------------------------------
93     *------------------------------------------------------------------------*/
94     expA = softfloat_shiftNormSigF128M( aWPtr, 13 - (rawExpA & 1), rem );
95     if ( expA == -128 ) goto copyA;
96     if ( signA ) goto invalid;
97     /*------------------------------------------------------------------------
98     | (`sig32Z' is guaranteed to be a lower bound on the square root of
99     | `sig32A', which makes `sig32Z' also a lower bound on the square root of
100     | `sigA'.)
101     *------------------------------------------------------------------------*/
102     expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
103     expA &= 1;
104     rem64 = (uint64_t) rem[indexWord( 4, 3 )]<<32 | rem[indexWord( 4, 2 )];
105     if ( expA ) {
106         if ( ! rawExpA ) {
107             softfloat_shortShiftRight128M( rem, 1, rem );
108             rem64 >>= 1;
109         }
110         sig32A = rem64>>29;
111     } else {
112         sig32A = rem64>>30;
113     }
114     recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
115     sig32Z = ((uint64_t) sig32A * recipSqrt32)>>32;
116     if ( expA ) sig32Z >>= 1;
117     qs[2] = sig32Z;
118     rem64 -= (uint64_t) sig32Z * sig32Z;
119     rem[indexWord( 4, 3 )] = rem64>>32;
120     rem[indexWord( 4, 2 )] = rem64;
121     /*------------------------------------------------------------------------
122     *------------------------------------------------------------------------*/
123     q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
124     qs[1] = q;
125     sig64Z = ((uint64_t) sig32Z<<32) + ((uint64_t) q<<3);
126     x64 = ((uint64_t) sig32Z<<32) + sig64Z;
127     term[indexWord( 4, 3 )] = 0;
128     term[indexWord( 4, 2 )] = x64>>32;
129     term[indexWord( 4, 1 )] = x64;
130     term[indexWord( 4, 0 )] = 0;
131     softfloat_remStep128MBy32( rem, 29, term, q, y );
132     rem64 = (uint64_t) y[indexWord( 4, 3 )]<<32 | y[indexWord( 4, 2 )];
133     /*------------------------------------------------------------------------
134     *------------------------------------------------------------------------*/
135     q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
136     sig64Z <<= 1;
137     /*------------------------------------------------------------------------
138     | (Repeating this loop is a rare occurrence.)
139     *------------------------------------------------------------------------*/
140     for (;;) {
141         x64 = sig64Z + (q>>26);
142         term[indexWord( 4, 2 )] = x64>>32;
143         term[indexWord( 4, 1 )] = x64;
144         term[indexWord( 4, 0 )] = q<<6;
145         term[indexWord( 4, 3 )] = 0;
146         softfloat_remStep128MBy32(
147             y, 29, term, q, &rem[indexMultiwordHi( 6, 4 )] );
148         rem32 = rem[indexWordHi( 6 )];
149         if ( ! (rem32 & 0x80000000) ) break;
150         --q;
151     }
152     qs[0] = q;
153     rem64 = (uint64_t) rem32<<32 | rem[indexWord( 6, 4 )];
154     /*------------------------------------------------------------------------
155     *------------------------------------------------------------------------*/
156     q = (((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32) + 2;
157     x64 = (uint64_t) q<<27;
158     y[indexWord( 5, 0 )] = x64;
159     x64 = ((uint64_t) qs[0]<<24) + (x64>>32);
160     y[indexWord( 5, 1 )] = x64;
161     x64 = ((uint64_t) qs[1]<<21) + (x64>>32);
162     y[indexWord( 5, 2 )] = x64;
163     x64 = ((uint64_t) qs[2]<<18) + (x64>>32);
164     y[indexWord( 5, 3 )] = x64;
165     y[indexWord( 5, 4 )] = x64>>32;
166     /*------------------------------------------------------------------------
167     *------------------------------------------------------------------------*/
168     if ( (q & 0xF) <= 2 ) {
169         q &= ~3;
170         y[indexWordLo( 5 )] = q<<27;
171         term[indexWord( 5, 4 )] = 0;
172         term[indexWord( 5, 3 )] = 0;
173         term[indexWord( 5, 2 )] = 0;
174         term[indexWord( 5, 1 )] = q>>6;
175         term[indexWord( 5, 0 )] = q<<26;
176         softfloat_sub160M( y, term, term );
177         rem[indexWord( 6, 1 )] = 0;
178         rem[indexWord( 6, 0 )] = 0;
179         softfloat_remStep160MBy32(
180             &rem[indexMultiwordLo( 6, 5 )],
181             14,
182             term,
183             q,
184             &rem[indexMultiwordLo( 6, 5 )]
185         );
186         rem32 = rem[indexWord( 6, 4 )];
187         if ( rem32 & 0x80000000 ) {
188             softfloat_sub1X160M( y );
189         } else {
190             if (
191                 rem32 || rem[indexWord( 6, 0 )] || rem[indexWord( 6, 1 )]
192                     || (rem[indexWord( 6, 3 )] | rem[indexWord( 6, 2 )])
193             ) {
194                 y[indexWordLo( 5 )] |= 1;
195             }
196         }
197     }
198     softfloat_roundPackMToF128M( 0, expZ, y, zWPtr );
199     return;
200     /*------------------------------------------------------------------------
201     *------------------------------------------------------------------------*/
202  invalid:
203     softfloat_invalidF128M( zWPtr );
204     return;
205     /*------------------------------------------------------------------------
206     *------------------------------------------------------------------------*/
207  copyA:
208     zWPtr[indexWordHi( 4 )] = uiA96;
209     zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
210     zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
211     zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
212 
213 }
214 
215 #endif
216 
217