xref: /OK3568_Linux_fs/kernel/arch/mips/math-emu/sp_maddf.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * IEEE754 floating point arithmetic
4*4882a593Smuzhiyun  * single precision: MADDF.f (Fused Multiply Add)
5*4882a593Smuzhiyun  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * MIPS floating point support
8*4882a593Smuzhiyun  * Copyright (C) 2015 Imagination Technologies, Ltd.
9*4882a593Smuzhiyun  * Author: Markos Chandras <markos.chandras@imgtec.com>
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include "ieee754sp.h"
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun 
_sp_maddf(union ieee754sp z,union ieee754sp x,union ieee754sp y,enum maddf_flags flags)15*4882a593Smuzhiyun static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
16*4882a593Smuzhiyun 				 union ieee754sp y, enum maddf_flags flags)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun 	int re;
19*4882a593Smuzhiyun 	int rs;
20*4882a593Smuzhiyun 	unsigned int rm;
21*4882a593Smuzhiyun 	u64 rm64;
22*4882a593Smuzhiyun 	u64 zm64;
23*4882a593Smuzhiyun 	int s;
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun 	COMPXSP;
26*4882a593Smuzhiyun 	COMPYSP;
27*4882a593Smuzhiyun 	COMPZSP;
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun 	EXPLODEXSP;
30*4882a593Smuzhiyun 	EXPLODEYSP;
31*4882a593Smuzhiyun 	EXPLODEZSP;
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun 	FLUSHXSP;
34*4882a593Smuzhiyun 	FLUSHYSP;
35*4882a593Smuzhiyun 	FLUSHZSP;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	ieee754_clearcx();
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 	rs = xs ^ ys;
40*4882a593Smuzhiyun 	if (flags & MADDF_NEGATE_PRODUCT)
41*4882a593Smuzhiyun 		rs ^= 1;
42*4882a593Smuzhiyun 	if (flags & MADDF_NEGATE_ADDITION)
43*4882a593Smuzhiyun 		zs ^= 1;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	/*
46*4882a593Smuzhiyun 	 * Handle the cases when at least one of x, y or z is a NaN.
47*4882a593Smuzhiyun 	 * Order of precedence is sNaN, qNaN and z, x, y.
48*4882a593Smuzhiyun 	 */
49*4882a593Smuzhiyun 	if (zc == IEEE754_CLASS_SNAN)
50*4882a593Smuzhiyun 		return ieee754sp_nanxcpt(z);
51*4882a593Smuzhiyun 	if (xc == IEEE754_CLASS_SNAN)
52*4882a593Smuzhiyun 		return ieee754sp_nanxcpt(x);
53*4882a593Smuzhiyun 	if (yc == IEEE754_CLASS_SNAN)
54*4882a593Smuzhiyun 		return ieee754sp_nanxcpt(y);
55*4882a593Smuzhiyun 	if (zc == IEEE754_CLASS_QNAN)
56*4882a593Smuzhiyun 		return z;
57*4882a593Smuzhiyun 	if (xc == IEEE754_CLASS_QNAN)
58*4882a593Smuzhiyun 		return x;
59*4882a593Smuzhiyun 	if (yc == IEEE754_CLASS_QNAN)
60*4882a593Smuzhiyun 		return y;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	if (zc == IEEE754_CLASS_DNORM)
63*4882a593Smuzhiyun 		SPDNORMZ;
64*4882a593Smuzhiyun 	/* ZERO z cases are handled separately below */
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	switch (CLPAIR(xc, yc)) {
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	/*
70*4882a593Smuzhiyun 	 * Infinity handling
71*4882a593Smuzhiyun 	 */
72*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
73*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
74*4882a593Smuzhiyun 		ieee754_setcx(IEEE754_INVALID_OPERATION);
75*4882a593Smuzhiyun 		return ieee754sp_indef();
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
78*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
79*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
80*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
81*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
82*4882a593Smuzhiyun 		if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
83*4882a593Smuzhiyun 			/*
84*4882a593Smuzhiyun 			 * Cases of addition of infinities with opposite signs
85*4882a593Smuzhiyun 			 * or subtraction of infinities with same signs.
86*4882a593Smuzhiyun 			 */
87*4882a593Smuzhiyun 			ieee754_setcx(IEEE754_INVALID_OPERATION);
88*4882a593Smuzhiyun 			return ieee754sp_indef();
89*4882a593Smuzhiyun 		}
90*4882a593Smuzhiyun 		/*
91*4882a593Smuzhiyun 		 * z is here either not an infinity, or an infinity having the
92*4882a593Smuzhiyun 		 * same sign as product (x*y). The result must be an infinity,
93*4882a593Smuzhiyun 		 * and its sign is determined only by the sign of product (x*y).
94*4882a593Smuzhiyun 		 */
95*4882a593Smuzhiyun 		return ieee754sp_inf(rs);
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
98*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
99*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
100*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
101*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
102*4882a593Smuzhiyun 		if (zc == IEEE754_CLASS_INF)
103*4882a593Smuzhiyun 			return ieee754sp_inf(zs);
104*4882a593Smuzhiyun 		if (zc == IEEE754_CLASS_ZERO) {
105*4882a593Smuzhiyun 			/* Handle cases +0 + (-0) and similar ones. */
106*4882a593Smuzhiyun 			if (zs == rs)
107*4882a593Smuzhiyun 				/*
108*4882a593Smuzhiyun 				 * Cases of addition of zeros of equal signs
109*4882a593Smuzhiyun 				 * or subtraction of zeroes of opposite signs.
110*4882a593Smuzhiyun 				 * The sign of the resulting zero is in any
111*4882a593Smuzhiyun 				 * such case determined only by the sign of z.
112*4882a593Smuzhiyun 				 */
113*4882a593Smuzhiyun 				return z;
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
116*4882a593Smuzhiyun 		}
117*4882a593Smuzhiyun 		/* x*y is here 0, and z is not 0, so just return z */
118*4882a593Smuzhiyun 		return z;
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
121*4882a593Smuzhiyun 		SPDNORMX;
122*4882a593Smuzhiyun 		fallthrough;
123*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
124*4882a593Smuzhiyun 		if (zc == IEEE754_CLASS_INF)
125*4882a593Smuzhiyun 			return ieee754sp_inf(zs);
126*4882a593Smuzhiyun 		SPDNORMY;
127*4882a593Smuzhiyun 		break;
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130*4882a593Smuzhiyun 		if (zc == IEEE754_CLASS_INF)
131*4882a593Smuzhiyun 			return ieee754sp_inf(zs);
132*4882a593Smuzhiyun 		SPDNORMX;
133*4882a593Smuzhiyun 		break;
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
136*4882a593Smuzhiyun 		if (zc == IEEE754_CLASS_INF)
137*4882a593Smuzhiyun 			return ieee754sp_inf(zs);
138*4882a593Smuzhiyun 		/* continue to real computations */
139*4882a593Smuzhiyun 	}
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	/* Finally get to do some computation */
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	/*
144*4882a593Smuzhiyun 	 * Do the multiplication bit first
145*4882a593Smuzhiyun 	 *
146*4882a593Smuzhiyun 	 * rm = xm * ym, re = xe + ye basically
147*4882a593Smuzhiyun 	 *
148*4882a593Smuzhiyun 	 * At this point xm and ym should have been normalized.
149*4882a593Smuzhiyun 	 */
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	/* rm = xm * ym, re = xe+ye basically */
152*4882a593Smuzhiyun 	assert(xm & SP_HIDDEN_BIT);
153*4882a593Smuzhiyun 	assert(ym & SP_HIDDEN_BIT);
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	re = xe + ye;
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	/* Multiple 24 bit xm and ym to give 48 bit results */
158*4882a593Smuzhiyun 	rm64 = (uint64_t)xm * ym;
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	/* Shunt to top of word */
161*4882a593Smuzhiyun 	rm64 = rm64 << 16;
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	/* Put explicit bit at bit 62 if necessary */
164*4882a593Smuzhiyun 	if ((int64_t) rm64 < 0) {
165*4882a593Smuzhiyun 		rm64 = rm64 >> 1;
166*4882a593Smuzhiyun 		re++;
167*4882a593Smuzhiyun 	}
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	assert(rm64 & (1 << 62));
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	if (zc == IEEE754_CLASS_ZERO) {
172*4882a593Smuzhiyun 		/*
173*4882a593Smuzhiyun 		 * Move explicit bit from bit 62 to bit 26 since the
174*4882a593Smuzhiyun 		 * ieee754sp_format code expects the mantissa to be
175*4882a593Smuzhiyun 		 * 27 bits wide (24 + 3 rounding bits).
176*4882a593Smuzhiyun 		 */
177*4882a593Smuzhiyun 		rm = XSPSRS64(rm64, (62 - 26));
178*4882a593Smuzhiyun 		return ieee754sp_format(rs, re, rm);
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	/* Move explicit bit from bit 23 to bit 62 */
182*4882a593Smuzhiyun 	zm64 = (uint64_t)zm << (62 - 23);
183*4882a593Smuzhiyun 	assert(zm64 & (1 << 62));
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	/* Make the exponents the same */
186*4882a593Smuzhiyun 	if (ze > re) {
187*4882a593Smuzhiyun 		/*
188*4882a593Smuzhiyun 		 * Have to shift r fraction right to align.
189*4882a593Smuzhiyun 		 */
190*4882a593Smuzhiyun 		s = ze - re;
191*4882a593Smuzhiyun 		rm64 = XSPSRS64(rm64, s);
192*4882a593Smuzhiyun 		re += s;
193*4882a593Smuzhiyun 	} else if (re > ze) {
194*4882a593Smuzhiyun 		/*
195*4882a593Smuzhiyun 		 * Have to shift z fraction right to align.
196*4882a593Smuzhiyun 		 */
197*4882a593Smuzhiyun 		s = re - ze;
198*4882a593Smuzhiyun 		zm64 = XSPSRS64(zm64, s);
199*4882a593Smuzhiyun 		ze += s;
200*4882a593Smuzhiyun 	}
201*4882a593Smuzhiyun 	assert(ze == re);
202*4882a593Smuzhiyun 	assert(ze <= SP_EMAX);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	/* Do the addition */
205*4882a593Smuzhiyun 	if (zs == rs) {
206*4882a593Smuzhiyun 		/*
207*4882a593Smuzhiyun 		 * Generate 64 bit result by adding two 63 bit numbers
208*4882a593Smuzhiyun 		 * leaving result in zm64, zs and ze.
209*4882a593Smuzhiyun 		 */
210*4882a593Smuzhiyun 		zm64 = zm64 + rm64;
211*4882a593Smuzhiyun 		if ((int64_t)zm64 < 0) {	/* carry out */
212*4882a593Smuzhiyun 			zm64 = XSPSRS1(zm64);
213*4882a593Smuzhiyun 			ze++;
214*4882a593Smuzhiyun 		}
215*4882a593Smuzhiyun 	} else {
216*4882a593Smuzhiyun 		if (zm64 >= rm64) {
217*4882a593Smuzhiyun 			zm64 = zm64 - rm64;
218*4882a593Smuzhiyun 		} else {
219*4882a593Smuzhiyun 			zm64 = rm64 - zm64;
220*4882a593Smuzhiyun 			zs = rs;
221*4882a593Smuzhiyun 		}
222*4882a593Smuzhiyun 		if (zm64 == 0)
223*4882a593Smuzhiyun 			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 		/*
226*4882a593Smuzhiyun 		 * Put explicit bit at bit 62 if necessary.
227*4882a593Smuzhiyun 		 */
228*4882a593Smuzhiyun 		while ((zm64 >> 62) == 0) {
229*4882a593Smuzhiyun 			zm64 <<= 1;
230*4882a593Smuzhiyun 			ze--;
231*4882a593Smuzhiyun 		}
232*4882a593Smuzhiyun 	}
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	/*
235*4882a593Smuzhiyun 	 * Move explicit bit from bit 62 to bit 26 since the
236*4882a593Smuzhiyun 	 * ieee754sp_format code expects the mantissa to be
237*4882a593Smuzhiyun 	 * 27 bits wide (24 + 3 rounding bits).
238*4882a593Smuzhiyun 	 */
239*4882a593Smuzhiyun 	zm = XSPSRS64(zm64, (62 - 26));
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	return ieee754sp_format(zs, ze, zm);
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun 
ieee754sp_maddf(union ieee754sp z,union ieee754sp x,union ieee754sp y)244*4882a593Smuzhiyun union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
245*4882a593Smuzhiyun 				union ieee754sp y)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun 	return _sp_maddf(z, x, y, 0);
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun 
ieee754sp_msubf(union ieee754sp z,union ieee754sp x,union ieee754sp y)250*4882a593Smuzhiyun union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
251*4882a593Smuzhiyun 				union ieee754sp y)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun 	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun 
ieee754sp_madd(union ieee754sp z,union ieee754sp x,union ieee754sp y)256*4882a593Smuzhiyun union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
257*4882a593Smuzhiyun 				union ieee754sp y)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun 	return _sp_maddf(z, x, y, 0);
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun 
ieee754sp_msub(union ieee754sp z,union ieee754sp x,union ieee754sp y)262*4882a593Smuzhiyun union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
263*4882a593Smuzhiyun 				union ieee754sp y)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun 	return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun 
ieee754sp_nmadd(union ieee754sp z,union ieee754sp x,union ieee754sp y)268*4882a593Smuzhiyun union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
269*4882a593Smuzhiyun 				union ieee754sp y)
270*4882a593Smuzhiyun {
271*4882a593Smuzhiyun 	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun 
ieee754sp_nmsub(union ieee754sp z,union ieee754sp x,union ieee754sp y)274*4882a593Smuzhiyun union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
275*4882a593Smuzhiyun 				union ieee754sp y)
276*4882a593Smuzhiyun {
277*4882a593Smuzhiyun 	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
278*4882a593Smuzhiyun }
279