xref: /OK3568_Linux_fs/kernel/arch/x86/math-emu/poly_l2.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*---------------------------------------------------------------------------+
3*4882a593Smuzhiyun  |  poly_l2.c                                                                |
4*4882a593Smuzhiyun  |                                                                           |
5*4882a593Smuzhiyun  | Compute the base 2 log of a FPU_REG, using a polynomial approximation.    |
6*4882a593Smuzhiyun  |                                                                           |
7*4882a593Smuzhiyun  | Copyright (C) 1992,1993,1994,1997                                         |
8*4882a593Smuzhiyun  |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9*4882a593Smuzhiyun  |                  E-mail   billm@suburbia.net                              |
10*4882a593Smuzhiyun  |                                                                           |
11*4882a593Smuzhiyun  |                                                                           |
12*4882a593Smuzhiyun  +---------------------------------------------------------------------------*/
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #include "exception.h"
15*4882a593Smuzhiyun #include "reg_constant.h"
16*4882a593Smuzhiyun #include "fpu_emu.h"
17*4882a593Smuzhiyun #include "fpu_system.h"
18*4882a593Smuzhiyun #include "control_w.h"
19*4882a593Smuzhiyun #include "poly.h"
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun static void log2_kernel(FPU_REG const *arg, u_char argsign,
22*4882a593Smuzhiyun 			Xsig * accum_result, long int *expon);
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun /*--- poly_l2() -------------------------------------------------------------+
25*4882a593Smuzhiyun  |   Base 2 logarithm by a polynomial approximation.                         |
26*4882a593Smuzhiyun  +---------------------------------------------------------------------------*/
poly_l2(FPU_REG * st0_ptr,FPU_REG * st1_ptr,u_char st1_sign)27*4882a593Smuzhiyun void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun 	long int exponent, expon, expon_expon;
30*4882a593Smuzhiyun 	Xsig accumulator, expon_accum, yaccum;
31*4882a593Smuzhiyun 	u_char sign, argsign;
32*4882a593Smuzhiyun 	FPU_REG x;
33*4882a593Smuzhiyun 	int tag;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	exponent = exponent16(st0_ptr);
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	/* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
38*4882a593Smuzhiyun 	if (st0_ptr->sigh > (unsigned)0xb504f334) {
39*4882a593Smuzhiyun 		/* Treat as  sqrt(2)/2 < st0_ptr < 1 */
40*4882a593Smuzhiyun 		significand(&x) = -significand(st0_ptr);
41*4882a593Smuzhiyun 		setexponent16(&x, -1);
42*4882a593Smuzhiyun 		exponent++;
43*4882a593Smuzhiyun 		argsign = SIGN_NEG;
44*4882a593Smuzhiyun 	} else {
45*4882a593Smuzhiyun 		/* Treat as  1 <= st0_ptr < sqrt(2) */
46*4882a593Smuzhiyun 		x.sigh = st0_ptr->sigh - 0x80000000;
47*4882a593Smuzhiyun 		x.sigl = st0_ptr->sigl;
48*4882a593Smuzhiyun 		setexponent16(&x, 0);
49*4882a593Smuzhiyun 		argsign = SIGN_POS;
50*4882a593Smuzhiyun 	}
51*4882a593Smuzhiyun 	tag = FPU_normalize_nuo(&x);
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	if (tag == TAG_Zero) {
54*4882a593Smuzhiyun 		expon = 0;
55*4882a593Smuzhiyun 		accumulator.msw = accumulator.midw = accumulator.lsw = 0;
56*4882a593Smuzhiyun 	} else {
57*4882a593Smuzhiyun 		log2_kernel(&x, argsign, &accumulator, &expon);
58*4882a593Smuzhiyun 	}
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	if (exponent < 0) {
61*4882a593Smuzhiyun 		sign = SIGN_NEG;
62*4882a593Smuzhiyun 		exponent = -exponent;
63*4882a593Smuzhiyun 	} else
64*4882a593Smuzhiyun 		sign = SIGN_POS;
65*4882a593Smuzhiyun 	expon_accum.msw = exponent;
66*4882a593Smuzhiyun 	expon_accum.midw = expon_accum.lsw = 0;
67*4882a593Smuzhiyun 	if (exponent) {
68*4882a593Smuzhiyun 		expon_expon = 31 + norm_Xsig(&expon_accum);
69*4882a593Smuzhiyun 		shr_Xsig(&accumulator, expon_expon - expon);
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 		if (sign ^ argsign)
72*4882a593Smuzhiyun 			negate_Xsig(&accumulator);
73*4882a593Smuzhiyun 		add_Xsig_Xsig(&accumulator, &expon_accum);
74*4882a593Smuzhiyun 	} else {
75*4882a593Smuzhiyun 		expon_expon = expon;
76*4882a593Smuzhiyun 		sign = argsign;
77*4882a593Smuzhiyun 	}
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	yaccum.lsw = 0;
80*4882a593Smuzhiyun 	XSIG_LL(yaccum) = significand(st1_ptr);
81*4882a593Smuzhiyun 	mul_Xsig_Xsig(&accumulator, &yaccum);
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	expon_expon += round_Xsig(&accumulator);
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	if (accumulator.msw == 0) {
86*4882a593Smuzhiyun 		FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
87*4882a593Smuzhiyun 		return;
88*4882a593Smuzhiyun 	}
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 	significand(st1_ptr) = XSIG_LL(accumulator);
91*4882a593Smuzhiyun 	setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
94*4882a593Smuzhiyun 	FPU_settagi(1, tag);
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	set_precision_flag_up();	/* 80486 appears to always do this */
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 	return;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun /*--- poly_l2p1() -----------------------------------------------------------+
103*4882a593Smuzhiyun  |   Base 2 logarithm by a polynomial approximation.                         |
104*4882a593Smuzhiyun  |   log2(x+1)                                                               |
105*4882a593Smuzhiyun  +---------------------------------------------------------------------------*/
poly_l2p1(u_char sign0,u_char sign1,FPU_REG * st0_ptr,FPU_REG * st1_ptr,FPU_REG * dest)106*4882a593Smuzhiyun int poly_l2p1(u_char sign0, u_char sign1,
107*4882a593Smuzhiyun 	      FPU_REG * st0_ptr, FPU_REG * st1_ptr, FPU_REG * dest)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun 	u_char tag;
110*4882a593Smuzhiyun 	long int exponent;
111*4882a593Smuzhiyun 	Xsig accumulator, yaccum;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	if (exponent16(st0_ptr) < 0) {
114*4882a593Smuzhiyun 		log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 		yaccum.lsw = 0;
117*4882a593Smuzhiyun 		XSIG_LL(yaccum) = significand(st1_ptr);
118*4882a593Smuzhiyun 		mul_Xsig_Xsig(&accumulator, &yaccum);
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 		exponent += round_Xsig(&accumulator);
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 		exponent += exponent16(st1_ptr) + 1;
123*4882a593Smuzhiyun 		if (exponent < EXP_WAY_UNDER)
124*4882a593Smuzhiyun 			exponent = EXP_WAY_UNDER;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 		significand(dest) = XSIG_LL(accumulator);
127*4882a593Smuzhiyun 		setexponent16(dest, exponent);
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 		tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
130*4882a593Smuzhiyun 		FPU_settagi(1, tag);
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 		if (tag == TAG_Valid)
133*4882a593Smuzhiyun 			set_precision_flag_up();	/* 80486 appears to always do this */
134*4882a593Smuzhiyun 	} else {
135*4882a593Smuzhiyun 		/* The magnitude of st0_ptr is far too large. */
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 		if (sign0 != SIGN_POS) {
138*4882a593Smuzhiyun 			/* Trying to get the log of a negative number. */
139*4882a593Smuzhiyun #ifdef PECULIAR_486		/* Stupid 80486 doesn't worry about log(negative). */
140*4882a593Smuzhiyun 			changesign(st1_ptr);
141*4882a593Smuzhiyun #else
142*4882a593Smuzhiyun 			if (arith_invalid(1) < 0)
143*4882a593Smuzhiyun 				return 1;
144*4882a593Smuzhiyun #endif /* PECULIAR_486 */
145*4882a593Smuzhiyun 		}
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 		/* 80486 appears to do this */
148*4882a593Smuzhiyun 		if (sign0 == SIGN_NEG)
149*4882a593Smuzhiyun 			set_precision_flag_down();
150*4882a593Smuzhiyun 		else
151*4882a593Smuzhiyun 			set_precision_flag_up();
152*4882a593Smuzhiyun 	}
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	if (exponent(dest) <= EXP_UNDER)
155*4882a593Smuzhiyun 		EXCEPTION(EX_Underflow);
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	return 0;
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun #undef HIPOWER
162*4882a593Smuzhiyun #define	HIPOWER	10
163*4882a593Smuzhiyun static const unsigned long long logterms[HIPOWER] = {
164*4882a593Smuzhiyun 	0x2a8eca5705fc2ef0LL,
165*4882a593Smuzhiyun 	0xf6384ee1d01febceLL,
166*4882a593Smuzhiyun 	0x093bb62877cdf642LL,
167*4882a593Smuzhiyun 	0x006985d8a9ec439bLL,
168*4882a593Smuzhiyun 	0x0005212c4f55a9c8LL,
169*4882a593Smuzhiyun 	0x00004326a16927f0LL,
170*4882a593Smuzhiyun 	0x0000038d1d80a0e7LL,
171*4882a593Smuzhiyun 	0x0000003141cc80c6LL,
172*4882a593Smuzhiyun 	0x00000002b1668c9fLL,
173*4882a593Smuzhiyun 	0x000000002c7a46aaLL
174*4882a593Smuzhiyun };
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun static const unsigned long leadterm = 0xb8000000;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun /*--- log2_kernel() ---------------------------------------------------------+
179*4882a593Smuzhiyun  |   Base 2 logarithm by a polynomial approximation.                         |
180*4882a593Smuzhiyun  |   log2(x+1)                                                               |
181*4882a593Smuzhiyun  +---------------------------------------------------------------------------*/
log2_kernel(FPU_REG const * arg,u_char argsign,Xsig * accum_result,long int * expon)182*4882a593Smuzhiyun static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
183*4882a593Smuzhiyun 			long int *expon)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun 	long int exponent, adj;
186*4882a593Smuzhiyun 	unsigned long long Xsq;
187*4882a593Smuzhiyun 	Xsig accumulator, Numer, Denom, argSignif, arg_signif;
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun 	exponent = exponent16(arg);
190*4882a593Smuzhiyun 	Numer.lsw = Denom.lsw = 0;
191*4882a593Smuzhiyun 	XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
192*4882a593Smuzhiyun 	if (argsign == SIGN_POS) {
193*4882a593Smuzhiyun 		shr_Xsig(&Denom, 2 - (1 + exponent));
194*4882a593Smuzhiyun 		Denom.msw |= 0x80000000;
195*4882a593Smuzhiyun 		div_Xsig(&Numer, &Denom, &argSignif);
196*4882a593Smuzhiyun 	} else {
197*4882a593Smuzhiyun 		shr_Xsig(&Denom, 1 - (1 + exponent));
198*4882a593Smuzhiyun 		negate_Xsig(&Denom);
199*4882a593Smuzhiyun 		if (Denom.msw & 0x80000000) {
200*4882a593Smuzhiyun 			div_Xsig(&Numer, &Denom, &argSignif);
201*4882a593Smuzhiyun 			exponent++;
202*4882a593Smuzhiyun 		} else {
203*4882a593Smuzhiyun 			/* Denom must be 1.0 */
204*4882a593Smuzhiyun 			argSignif.lsw = Numer.lsw;
205*4882a593Smuzhiyun 			argSignif.midw = Numer.midw;
206*4882a593Smuzhiyun 			argSignif.msw = Numer.msw;
207*4882a593Smuzhiyun 		}
208*4882a593Smuzhiyun 	}
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun #ifndef PECULIAR_486
211*4882a593Smuzhiyun 	/* Should check here that  |local_arg|  is within the valid range */
212*4882a593Smuzhiyun 	if (exponent >= -2) {
213*4882a593Smuzhiyun 		if ((exponent > -2) || (argSignif.msw > (unsigned)0xafb0ccc0)) {
214*4882a593Smuzhiyun 			/* The argument is too large */
215*4882a593Smuzhiyun 		}
216*4882a593Smuzhiyun 	}
217*4882a593Smuzhiyun #endif /* PECULIAR_486 */
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	arg_signif.lsw = argSignif.lsw;
220*4882a593Smuzhiyun 	XSIG_LL(arg_signif) = XSIG_LL(argSignif);
221*4882a593Smuzhiyun 	adj = norm_Xsig(&argSignif);
222*4882a593Smuzhiyun 	accumulator.lsw = argSignif.lsw;
223*4882a593Smuzhiyun 	XSIG_LL(accumulator) = XSIG_LL(argSignif);
224*4882a593Smuzhiyun 	mul_Xsig_Xsig(&accumulator, &accumulator);
225*4882a593Smuzhiyun 	shr_Xsig(&accumulator, 2 * (-1 - (1 + exponent + adj)));
226*4882a593Smuzhiyun 	Xsq = XSIG_LL(accumulator);
227*4882a593Smuzhiyun 	if (accumulator.lsw & 0x80000000)
228*4882a593Smuzhiyun 		Xsq++;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	accumulator.msw = accumulator.midw = accumulator.lsw = 0;
231*4882a593Smuzhiyun 	/* Do the basic fixed point polynomial evaluation */
232*4882a593Smuzhiyun 	polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER - 1);
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	mul_Xsig_Xsig(&accumulator, &argSignif);
235*4882a593Smuzhiyun 	shr_Xsig(&accumulator, 6 - adj);
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	mul32_Xsig(&arg_signif, leadterm);
238*4882a593Smuzhiyun 	add_two_Xsig(&accumulator, &arg_signif, &exponent);
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 	*expon = exponent + 1;
241*4882a593Smuzhiyun 	accum_result->lsw = accumulator.lsw;
242*4882a593Smuzhiyun 	accum_result->midw = accumulator.midw;
243*4882a593Smuzhiyun 	accum_result->msw = accumulator.msw;
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun }
246