xref: /OK3568_Linux_fs/kernel/arch/mips/math-emu/dp_mul.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /* IEEE754 floating point arithmetic
3*4882a593Smuzhiyun  * double precision: common utilities
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun  * MIPS floating point support
7*4882a593Smuzhiyun  * Copyright (C) 1994-2000 Algorithmics Ltd.
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include "ieee754dp.h"
11*4882a593Smuzhiyun 
ieee754dp_mul(union ieee754dp x,union ieee754dp y)12*4882a593Smuzhiyun union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
13*4882a593Smuzhiyun {
14*4882a593Smuzhiyun 	int re;
15*4882a593Smuzhiyun 	int rs;
16*4882a593Smuzhiyun 	u64 rm;
17*4882a593Smuzhiyun 	unsigned int lxm;
18*4882a593Smuzhiyun 	unsigned int hxm;
19*4882a593Smuzhiyun 	unsigned int lym;
20*4882a593Smuzhiyun 	unsigned int hym;
21*4882a593Smuzhiyun 	u64 lrm;
22*4882a593Smuzhiyun 	u64 hrm;
23*4882a593Smuzhiyun 	u64 t;
24*4882a593Smuzhiyun 	u64 at;
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun 	COMPXDP;
27*4882a593Smuzhiyun 	COMPYDP;
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun 	EXPLODEXDP;
30*4882a593Smuzhiyun 	EXPLODEYDP;
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun 	ieee754_clearcx();
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun 	FLUSHXDP;
35*4882a593Smuzhiyun 	FLUSHYDP;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	switch (CLPAIR(xc, yc)) {
38*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
39*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
40*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
41*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
42*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
43*4882a593Smuzhiyun 		return ieee754dp_nanxcpt(y);
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
46*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
47*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
48*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
49*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
50*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
51*4882a593Smuzhiyun 		return ieee754dp_nanxcpt(x);
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
54*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
55*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
56*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
57*4882a593Smuzhiyun 		return y;
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
60*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
61*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
62*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
63*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
64*4882a593Smuzhiyun 		return x;
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	/*
68*4882a593Smuzhiyun 	 * Infinity handling
69*4882a593Smuzhiyun 	 */
70*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
71*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
72*4882a593Smuzhiyun 		ieee754_setcx(IEEE754_INVALID_OPERATION);
73*4882a593Smuzhiyun 		return ieee754dp_indef();
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
76*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
77*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
78*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
79*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
80*4882a593Smuzhiyun 		return ieee754dp_inf(xs ^ ys);
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
83*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
84*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
85*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
86*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
87*4882a593Smuzhiyun 		return ieee754dp_zero(xs ^ ys);
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
91*4882a593Smuzhiyun 		DPDNORMX;
92*4882a593Smuzhiyun 		fallthrough;
93*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
94*4882a593Smuzhiyun 		DPDNORMY;
95*4882a593Smuzhiyun 		break;
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
98*4882a593Smuzhiyun 		DPDNORMX;
99*4882a593Smuzhiyun 		break;
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
102*4882a593Smuzhiyun 		break;
103*4882a593Smuzhiyun 	}
104*4882a593Smuzhiyun 	/* rm = xm * ym, re = xe+ye basically */
105*4882a593Smuzhiyun 	assert(xm & DP_HIDDEN_BIT);
106*4882a593Smuzhiyun 	assert(ym & DP_HIDDEN_BIT);
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 	re = xe + ye;
109*4882a593Smuzhiyun 	rs = xs ^ ys;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	/* shunt to top of word */
112*4882a593Smuzhiyun 	xm <<= 64 - (DP_FBITS + 1);
113*4882a593Smuzhiyun 	ym <<= 64 - (DP_FBITS + 1);
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	/*
116*4882a593Smuzhiyun 	 * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
117*4882a593Smuzhiyun 	 */
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	lxm = xm;
120*4882a593Smuzhiyun 	hxm = xm >> 32;
121*4882a593Smuzhiyun 	lym = ym;
122*4882a593Smuzhiyun 	hym = ym >> 32;
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 	lrm = DPXMULT(lxm, lym);
125*4882a593Smuzhiyun 	hrm = DPXMULT(hxm, hym);
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	t = DPXMULT(lxm, hym);
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	at = lrm + (t << 32);
130*4882a593Smuzhiyun 	hrm += at < lrm;
131*4882a593Smuzhiyun 	lrm = at;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	hrm = hrm + (t >> 32);
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	t = DPXMULT(hxm, lym);
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 	at = lrm + (t << 32);
138*4882a593Smuzhiyun 	hrm += at < lrm;
139*4882a593Smuzhiyun 	lrm = at;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	hrm = hrm + (t >> 32);
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	rm = hrm | (lrm != 0);
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	/*
146*4882a593Smuzhiyun 	 * Sticky shift down to normal rounding precision.
147*4882a593Smuzhiyun 	 */
148*4882a593Smuzhiyun 	if ((s64) rm < 0) {
149*4882a593Smuzhiyun 		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
150*4882a593Smuzhiyun 		     ((rm << (DP_FBITS + 1 + 3)) != 0);
151*4882a593Smuzhiyun 		re++;
152*4882a593Smuzhiyun 	} else {
153*4882a593Smuzhiyun 		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
154*4882a593Smuzhiyun 		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
155*4882a593Smuzhiyun 	}
156*4882a593Smuzhiyun 	assert(rm & (DP_HIDDEN_BIT << 3));
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	return ieee754dp_format(rs, re, rm);
159*4882a593Smuzhiyun }
160