xref: /OK3568_Linux_fs/kernel/arch/parisc/math-emu/dfsqrt.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Floating-point emulation code
6*4882a593Smuzhiyun  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun /*
9*4882a593Smuzhiyun  * BEGIN_DESC
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  *  File:
12*4882a593Smuzhiyun  *	@(#)	pa/spmath/dfsqrt.c		$Revision: 1.1 $
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  *  Purpose:
15*4882a593Smuzhiyun  *	Double Floating-point Square Root
16*4882a593Smuzhiyun  *
17*4882a593Smuzhiyun  *  External Interfaces:
18*4882a593Smuzhiyun  *	dbl_fsqrt(srcptr,nullptr,dstptr,status)
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  *  Internal Interfaces:
21*4882a593Smuzhiyun  *
22*4882a593Smuzhiyun  *  Theory:
23*4882a593Smuzhiyun  *	<<please update with a overview of the operation of this file>>
24*4882a593Smuzhiyun  *
25*4882a593Smuzhiyun  * END_DESC
26*4882a593Smuzhiyun */
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #include "float.h"
30*4882a593Smuzhiyun #include "dbl_float.h"
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun /*
33*4882a593Smuzhiyun  *  Double Floating-point Square Root
34*4882a593Smuzhiyun  */
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun /*ARGSUSED*/
37*4882a593Smuzhiyun unsigned int
dbl_fsqrt(dbl_floating_point * srcptr,unsigned int * nullptr,dbl_floating_point * dstptr,unsigned int * status)38*4882a593Smuzhiyun dbl_fsqrt(
39*4882a593Smuzhiyun 	    dbl_floating_point *srcptr,
40*4882a593Smuzhiyun 	    unsigned int *nullptr,
41*4882a593Smuzhiyun 	    dbl_floating_point *dstptr,
42*4882a593Smuzhiyun 	    unsigned int *status)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun 	register unsigned int srcp1, srcp2, resultp1, resultp2;
45*4882a593Smuzhiyun 	register unsigned int newbitp1, newbitp2, sump1, sump2;
46*4882a593Smuzhiyun 	register int src_exponent;
47*4882a593Smuzhiyun 	register boolean guardbit = FALSE, even_exponent;
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun 	Dbl_copyfromptr(srcptr,srcp1,srcp2);
50*4882a593Smuzhiyun         /*
51*4882a593Smuzhiyun          * check source operand for NaN or infinity
52*4882a593Smuzhiyun          */
53*4882a593Smuzhiyun         if ((src_exponent = Dbl_exponent(srcp1)) == DBL_INFINITY_EXPONENT) {
54*4882a593Smuzhiyun                 /*
55*4882a593Smuzhiyun                  * is signaling NaN?
56*4882a593Smuzhiyun                  */
57*4882a593Smuzhiyun                 if (Dbl_isone_signaling(srcp1)) {
58*4882a593Smuzhiyun                         /* trap if INVALIDTRAP enabled */
59*4882a593Smuzhiyun                         if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
60*4882a593Smuzhiyun                         /* make NaN quiet */
61*4882a593Smuzhiyun                         Set_invalidflag();
62*4882a593Smuzhiyun                         Dbl_set_quiet(srcp1);
63*4882a593Smuzhiyun                 }
64*4882a593Smuzhiyun                 /*
65*4882a593Smuzhiyun                  * Return quiet NaN or positive infinity.
66*4882a593Smuzhiyun 		 *  Fall through to negative test if negative infinity.
67*4882a593Smuzhiyun                  */
68*4882a593Smuzhiyun 		if (Dbl_iszero_sign(srcp1) ||
69*4882a593Smuzhiyun 		    Dbl_isnotzero_mantissa(srcp1,srcp2)) {
70*4882a593Smuzhiyun                 	Dbl_copytoptr(srcp1,srcp2,dstptr);
71*4882a593Smuzhiyun                 	return(NOEXCEPTION);
72*4882a593Smuzhiyun 		}
73*4882a593Smuzhiyun         }
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun         /*
76*4882a593Smuzhiyun          * check for zero source operand
77*4882a593Smuzhiyun          */
78*4882a593Smuzhiyun 	if (Dbl_iszero_exponentmantissa(srcp1,srcp2)) {
79*4882a593Smuzhiyun 		Dbl_copytoptr(srcp1,srcp2,dstptr);
80*4882a593Smuzhiyun 		return(NOEXCEPTION);
81*4882a593Smuzhiyun 	}
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun         /*
84*4882a593Smuzhiyun          * check for negative source operand
85*4882a593Smuzhiyun          */
86*4882a593Smuzhiyun 	if (Dbl_isone_sign(srcp1)) {
87*4882a593Smuzhiyun 		/* trap if INVALIDTRAP enabled */
88*4882a593Smuzhiyun 		if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
89*4882a593Smuzhiyun 		/* make NaN quiet */
90*4882a593Smuzhiyun 		Set_invalidflag();
91*4882a593Smuzhiyun 		Dbl_makequietnan(srcp1,srcp2);
92*4882a593Smuzhiyun 		Dbl_copytoptr(srcp1,srcp2,dstptr);
93*4882a593Smuzhiyun 		return(NOEXCEPTION);
94*4882a593Smuzhiyun 	}
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	/*
97*4882a593Smuzhiyun 	 * Generate result
98*4882a593Smuzhiyun 	 */
99*4882a593Smuzhiyun 	if (src_exponent > 0) {
100*4882a593Smuzhiyun 		even_exponent = Dbl_hidden(srcp1);
101*4882a593Smuzhiyun 		Dbl_clear_signexponent_set_hidden(srcp1);
102*4882a593Smuzhiyun 	}
103*4882a593Smuzhiyun 	else {
104*4882a593Smuzhiyun 		/* normalize operand */
105*4882a593Smuzhiyun 		Dbl_clear_signexponent(srcp1);
106*4882a593Smuzhiyun 		src_exponent++;
107*4882a593Smuzhiyun 		Dbl_normalize(srcp1,srcp2,src_exponent);
108*4882a593Smuzhiyun 		even_exponent = src_exponent & 1;
109*4882a593Smuzhiyun 	}
110*4882a593Smuzhiyun 	if (even_exponent) {
111*4882a593Smuzhiyun 		/* exponent is even */
112*4882a593Smuzhiyun 		/* Add comment here.  Explain why odd exponent needs correction */
113*4882a593Smuzhiyun 		Dbl_leftshiftby1(srcp1,srcp2);
114*4882a593Smuzhiyun 	}
115*4882a593Smuzhiyun 	/*
116*4882a593Smuzhiyun 	 * Add comment here.  Explain following algorithm.
117*4882a593Smuzhiyun 	 *
118*4882a593Smuzhiyun 	 * Trust me, it works.
119*4882a593Smuzhiyun 	 *
120*4882a593Smuzhiyun 	 */
121*4882a593Smuzhiyun 	Dbl_setzero(resultp1,resultp2);
122*4882a593Smuzhiyun 	Dbl_allp1(newbitp1) = 1 << (DBL_P - 32);
123*4882a593Smuzhiyun 	Dbl_setzero_mantissap2(newbitp2);
124*4882a593Smuzhiyun 	while (Dbl_isnotzero(newbitp1,newbitp2) && Dbl_isnotzero(srcp1,srcp2)) {
125*4882a593Smuzhiyun 		Dbl_addition(resultp1,resultp2,newbitp1,newbitp2,sump1,sump2);
126*4882a593Smuzhiyun 		if(Dbl_isnotgreaterthan(sump1,sump2,srcp1,srcp2)) {
127*4882a593Smuzhiyun 			Dbl_leftshiftby1(newbitp1,newbitp2);
128*4882a593Smuzhiyun 			/* update result */
129*4882a593Smuzhiyun 			Dbl_addition(resultp1,resultp2,newbitp1,newbitp2,
130*4882a593Smuzhiyun 			 resultp1,resultp2);
131*4882a593Smuzhiyun 			Dbl_subtract(srcp1,srcp2,sump1,sump2,srcp1,srcp2);
132*4882a593Smuzhiyun 			Dbl_rightshiftby2(newbitp1,newbitp2);
133*4882a593Smuzhiyun 		}
134*4882a593Smuzhiyun 		else {
135*4882a593Smuzhiyun 			Dbl_rightshiftby1(newbitp1,newbitp2);
136*4882a593Smuzhiyun 		}
137*4882a593Smuzhiyun 		Dbl_leftshiftby1(srcp1,srcp2);
138*4882a593Smuzhiyun 	}
139*4882a593Smuzhiyun 	/* correct exponent for pre-shift */
140*4882a593Smuzhiyun 	if (even_exponent) {
141*4882a593Smuzhiyun 		Dbl_rightshiftby1(resultp1,resultp2);
142*4882a593Smuzhiyun 	}
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	/* check for inexact */
145*4882a593Smuzhiyun 	if (Dbl_isnotzero(srcp1,srcp2)) {
146*4882a593Smuzhiyun 		if (!even_exponent && Dbl_islessthan(resultp1,resultp2,srcp1,srcp2)) {
147*4882a593Smuzhiyun 			Dbl_increment(resultp1,resultp2);
148*4882a593Smuzhiyun 		}
149*4882a593Smuzhiyun 		guardbit = Dbl_lowmantissap2(resultp2);
150*4882a593Smuzhiyun 		Dbl_rightshiftby1(resultp1,resultp2);
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 		/*  now round result  */
153*4882a593Smuzhiyun 		switch (Rounding_mode()) {
154*4882a593Smuzhiyun 		case ROUNDPLUS:
155*4882a593Smuzhiyun 		     Dbl_increment(resultp1,resultp2);
156*4882a593Smuzhiyun 		     break;
157*4882a593Smuzhiyun 		case ROUNDNEAREST:
158*4882a593Smuzhiyun 		     /* stickybit is always true, so guardbit
159*4882a593Smuzhiyun 		      * is enough to determine rounding */
160*4882a593Smuzhiyun 		     if (guardbit) {
161*4882a593Smuzhiyun 			    Dbl_increment(resultp1,resultp2);
162*4882a593Smuzhiyun 		     }
163*4882a593Smuzhiyun 		     break;
164*4882a593Smuzhiyun 		}
165*4882a593Smuzhiyun 		/* increment result exponent by 1 if mantissa overflowed */
166*4882a593Smuzhiyun 		if (Dbl_isone_hiddenoverflow(resultp1)) src_exponent+=2;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 		if (Is_inexacttrap_enabled()) {
169*4882a593Smuzhiyun 			Dbl_set_exponent(resultp1,
170*4882a593Smuzhiyun 			 ((src_exponent-DBL_BIAS)>>1)+DBL_BIAS);
171*4882a593Smuzhiyun 			Dbl_copytoptr(resultp1,resultp2,dstptr);
172*4882a593Smuzhiyun 			return(INEXACTEXCEPTION);
173*4882a593Smuzhiyun 		}
174*4882a593Smuzhiyun 		else Set_inexactflag();
175*4882a593Smuzhiyun 	}
176*4882a593Smuzhiyun 	else {
177*4882a593Smuzhiyun 		Dbl_rightshiftby1(resultp1,resultp2);
178*4882a593Smuzhiyun 	}
179*4882a593Smuzhiyun 	Dbl_set_exponent(resultp1,((src_exponent-DBL_BIAS)>>1)+DBL_BIAS);
180*4882a593Smuzhiyun 	Dbl_copytoptr(resultp1,resultp2,dstptr);
181*4882a593Smuzhiyun 	return(NOEXCEPTION);
182*4882a593Smuzhiyun }
183