1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Floating-point emulation code
6*4882a593Smuzhiyun * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun /*
9*4882a593Smuzhiyun * BEGIN_DESC
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * File:
12*4882a593Smuzhiyun * @(#) pa/spmath/dfsqrt.c $Revision: 1.1 $
13*4882a593Smuzhiyun *
14*4882a593Smuzhiyun * Purpose:
15*4882a593Smuzhiyun * Double Floating-point Square Root
16*4882a593Smuzhiyun *
17*4882a593Smuzhiyun * External Interfaces:
18*4882a593Smuzhiyun * dbl_fsqrt(srcptr,nullptr,dstptr,status)
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun * Internal Interfaces:
21*4882a593Smuzhiyun *
22*4882a593Smuzhiyun * Theory:
23*4882a593Smuzhiyun * <<please update with a overview of the operation of this file>>
24*4882a593Smuzhiyun *
25*4882a593Smuzhiyun * END_DESC
26*4882a593Smuzhiyun */
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #include "float.h"
30*4882a593Smuzhiyun #include "dbl_float.h"
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun /*
33*4882a593Smuzhiyun * Double Floating-point Square Root
34*4882a593Smuzhiyun */
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun /*ARGSUSED*/
37*4882a593Smuzhiyun unsigned int
dbl_fsqrt(dbl_floating_point * srcptr,unsigned int * nullptr,dbl_floating_point * dstptr,unsigned int * status)38*4882a593Smuzhiyun dbl_fsqrt(
39*4882a593Smuzhiyun dbl_floating_point *srcptr,
40*4882a593Smuzhiyun unsigned int *nullptr,
41*4882a593Smuzhiyun dbl_floating_point *dstptr,
42*4882a593Smuzhiyun unsigned int *status)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun register unsigned int srcp1, srcp2, resultp1, resultp2;
45*4882a593Smuzhiyun register unsigned int newbitp1, newbitp2, sump1, sump2;
46*4882a593Smuzhiyun register int src_exponent;
47*4882a593Smuzhiyun register boolean guardbit = FALSE, even_exponent;
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun Dbl_copyfromptr(srcptr,srcp1,srcp2);
50*4882a593Smuzhiyun /*
51*4882a593Smuzhiyun * check source operand for NaN or infinity
52*4882a593Smuzhiyun */
53*4882a593Smuzhiyun if ((src_exponent = Dbl_exponent(srcp1)) == DBL_INFINITY_EXPONENT) {
54*4882a593Smuzhiyun /*
55*4882a593Smuzhiyun * is signaling NaN?
56*4882a593Smuzhiyun */
57*4882a593Smuzhiyun if (Dbl_isone_signaling(srcp1)) {
58*4882a593Smuzhiyun /* trap if INVALIDTRAP enabled */
59*4882a593Smuzhiyun if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
60*4882a593Smuzhiyun /* make NaN quiet */
61*4882a593Smuzhiyun Set_invalidflag();
62*4882a593Smuzhiyun Dbl_set_quiet(srcp1);
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun /*
65*4882a593Smuzhiyun * Return quiet NaN or positive infinity.
66*4882a593Smuzhiyun * Fall through to negative test if negative infinity.
67*4882a593Smuzhiyun */
68*4882a593Smuzhiyun if (Dbl_iszero_sign(srcp1) ||
69*4882a593Smuzhiyun Dbl_isnotzero_mantissa(srcp1,srcp2)) {
70*4882a593Smuzhiyun Dbl_copytoptr(srcp1,srcp2,dstptr);
71*4882a593Smuzhiyun return(NOEXCEPTION);
72*4882a593Smuzhiyun }
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun /*
76*4882a593Smuzhiyun * check for zero source operand
77*4882a593Smuzhiyun */
78*4882a593Smuzhiyun if (Dbl_iszero_exponentmantissa(srcp1,srcp2)) {
79*4882a593Smuzhiyun Dbl_copytoptr(srcp1,srcp2,dstptr);
80*4882a593Smuzhiyun return(NOEXCEPTION);
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun /*
84*4882a593Smuzhiyun * check for negative source operand
85*4882a593Smuzhiyun */
86*4882a593Smuzhiyun if (Dbl_isone_sign(srcp1)) {
87*4882a593Smuzhiyun /* trap if INVALIDTRAP enabled */
88*4882a593Smuzhiyun if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
89*4882a593Smuzhiyun /* make NaN quiet */
90*4882a593Smuzhiyun Set_invalidflag();
91*4882a593Smuzhiyun Dbl_makequietnan(srcp1,srcp2);
92*4882a593Smuzhiyun Dbl_copytoptr(srcp1,srcp2,dstptr);
93*4882a593Smuzhiyun return(NOEXCEPTION);
94*4882a593Smuzhiyun }
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun /*
97*4882a593Smuzhiyun * Generate result
98*4882a593Smuzhiyun */
99*4882a593Smuzhiyun if (src_exponent > 0) {
100*4882a593Smuzhiyun even_exponent = Dbl_hidden(srcp1);
101*4882a593Smuzhiyun Dbl_clear_signexponent_set_hidden(srcp1);
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun else {
104*4882a593Smuzhiyun /* normalize operand */
105*4882a593Smuzhiyun Dbl_clear_signexponent(srcp1);
106*4882a593Smuzhiyun src_exponent++;
107*4882a593Smuzhiyun Dbl_normalize(srcp1,srcp2,src_exponent);
108*4882a593Smuzhiyun even_exponent = src_exponent & 1;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun if (even_exponent) {
111*4882a593Smuzhiyun /* exponent is even */
112*4882a593Smuzhiyun /* Add comment here. Explain why odd exponent needs correction */
113*4882a593Smuzhiyun Dbl_leftshiftby1(srcp1,srcp2);
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun /*
116*4882a593Smuzhiyun * Add comment here. Explain following algorithm.
117*4882a593Smuzhiyun *
118*4882a593Smuzhiyun * Trust me, it works.
119*4882a593Smuzhiyun *
120*4882a593Smuzhiyun */
121*4882a593Smuzhiyun Dbl_setzero(resultp1,resultp2);
122*4882a593Smuzhiyun Dbl_allp1(newbitp1) = 1 << (DBL_P - 32);
123*4882a593Smuzhiyun Dbl_setzero_mantissap2(newbitp2);
124*4882a593Smuzhiyun while (Dbl_isnotzero(newbitp1,newbitp2) && Dbl_isnotzero(srcp1,srcp2)) {
125*4882a593Smuzhiyun Dbl_addition(resultp1,resultp2,newbitp1,newbitp2,sump1,sump2);
126*4882a593Smuzhiyun if(Dbl_isnotgreaterthan(sump1,sump2,srcp1,srcp2)) {
127*4882a593Smuzhiyun Dbl_leftshiftby1(newbitp1,newbitp2);
128*4882a593Smuzhiyun /* update result */
129*4882a593Smuzhiyun Dbl_addition(resultp1,resultp2,newbitp1,newbitp2,
130*4882a593Smuzhiyun resultp1,resultp2);
131*4882a593Smuzhiyun Dbl_subtract(srcp1,srcp2,sump1,sump2,srcp1,srcp2);
132*4882a593Smuzhiyun Dbl_rightshiftby2(newbitp1,newbitp2);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun else {
135*4882a593Smuzhiyun Dbl_rightshiftby1(newbitp1,newbitp2);
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun Dbl_leftshiftby1(srcp1,srcp2);
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun /* correct exponent for pre-shift */
140*4882a593Smuzhiyun if (even_exponent) {
141*4882a593Smuzhiyun Dbl_rightshiftby1(resultp1,resultp2);
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun /* check for inexact */
145*4882a593Smuzhiyun if (Dbl_isnotzero(srcp1,srcp2)) {
146*4882a593Smuzhiyun if (!even_exponent && Dbl_islessthan(resultp1,resultp2,srcp1,srcp2)) {
147*4882a593Smuzhiyun Dbl_increment(resultp1,resultp2);
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun guardbit = Dbl_lowmantissap2(resultp2);
150*4882a593Smuzhiyun Dbl_rightshiftby1(resultp1,resultp2);
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun /* now round result */
153*4882a593Smuzhiyun switch (Rounding_mode()) {
154*4882a593Smuzhiyun case ROUNDPLUS:
155*4882a593Smuzhiyun Dbl_increment(resultp1,resultp2);
156*4882a593Smuzhiyun break;
157*4882a593Smuzhiyun case ROUNDNEAREST:
158*4882a593Smuzhiyun /* stickybit is always true, so guardbit
159*4882a593Smuzhiyun * is enough to determine rounding */
160*4882a593Smuzhiyun if (guardbit) {
161*4882a593Smuzhiyun Dbl_increment(resultp1,resultp2);
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun break;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun /* increment result exponent by 1 if mantissa overflowed */
166*4882a593Smuzhiyun if (Dbl_isone_hiddenoverflow(resultp1)) src_exponent+=2;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun if (Is_inexacttrap_enabled()) {
169*4882a593Smuzhiyun Dbl_set_exponent(resultp1,
170*4882a593Smuzhiyun ((src_exponent-DBL_BIAS)>>1)+DBL_BIAS);
171*4882a593Smuzhiyun Dbl_copytoptr(resultp1,resultp2,dstptr);
172*4882a593Smuzhiyun return(INEXACTEXCEPTION);
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun else Set_inexactflag();
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun else {
177*4882a593Smuzhiyun Dbl_rightshiftby1(resultp1,resultp2);
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun Dbl_set_exponent(resultp1,((src_exponent-DBL_BIAS)>>1)+DBL_BIAS);
180*4882a593Smuzhiyun Dbl_copytoptr(resultp1,resultp2,dstptr);
181*4882a593Smuzhiyun return(NOEXCEPTION);
182*4882a593Smuzhiyun }
183