1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * IEEE754 floating point arithmetic
4*4882a593Smuzhiyun * single precision: MADDF.f (Fused Multiply Add)
5*4882a593Smuzhiyun * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * MIPS floating point support
8*4882a593Smuzhiyun * Copyright (C) 2015 Imagination Technologies, Ltd.
9*4882a593Smuzhiyun * Author: Markos Chandras <markos.chandras@imgtec.com>
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include "ieee754sp.h"
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun
_sp_maddf(union ieee754sp z,union ieee754sp x,union ieee754sp y,enum maddf_flags flags)15*4882a593Smuzhiyun static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
16*4882a593Smuzhiyun union ieee754sp y, enum maddf_flags flags)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun int re;
19*4882a593Smuzhiyun int rs;
20*4882a593Smuzhiyun unsigned int rm;
21*4882a593Smuzhiyun u64 rm64;
22*4882a593Smuzhiyun u64 zm64;
23*4882a593Smuzhiyun int s;
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun COMPXSP;
26*4882a593Smuzhiyun COMPYSP;
27*4882a593Smuzhiyun COMPZSP;
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun EXPLODEXSP;
30*4882a593Smuzhiyun EXPLODEYSP;
31*4882a593Smuzhiyun EXPLODEZSP;
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun FLUSHXSP;
34*4882a593Smuzhiyun FLUSHYSP;
35*4882a593Smuzhiyun FLUSHZSP;
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun ieee754_clearcx();
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun rs = xs ^ ys;
40*4882a593Smuzhiyun if (flags & MADDF_NEGATE_PRODUCT)
41*4882a593Smuzhiyun rs ^= 1;
42*4882a593Smuzhiyun if (flags & MADDF_NEGATE_ADDITION)
43*4882a593Smuzhiyun zs ^= 1;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun /*
46*4882a593Smuzhiyun * Handle the cases when at least one of x, y or z is a NaN.
47*4882a593Smuzhiyun * Order of precedence is sNaN, qNaN and z, x, y.
48*4882a593Smuzhiyun */
49*4882a593Smuzhiyun if (zc == IEEE754_CLASS_SNAN)
50*4882a593Smuzhiyun return ieee754sp_nanxcpt(z);
51*4882a593Smuzhiyun if (xc == IEEE754_CLASS_SNAN)
52*4882a593Smuzhiyun return ieee754sp_nanxcpt(x);
53*4882a593Smuzhiyun if (yc == IEEE754_CLASS_SNAN)
54*4882a593Smuzhiyun return ieee754sp_nanxcpt(y);
55*4882a593Smuzhiyun if (zc == IEEE754_CLASS_QNAN)
56*4882a593Smuzhiyun return z;
57*4882a593Smuzhiyun if (xc == IEEE754_CLASS_QNAN)
58*4882a593Smuzhiyun return x;
59*4882a593Smuzhiyun if (yc == IEEE754_CLASS_QNAN)
60*4882a593Smuzhiyun return y;
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun if (zc == IEEE754_CLASS_DNORM)
63*4882a593Smuzhiyun SPDNORMZ;
64*4882a593Smuzhiyun /* ZERO z cases are handled separately below */
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun switch (CLPAIR(xc, yc)) {
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun /*
70*4882a593Smuzhiyun * Infinity handling
71*4882a593Smuzhiyun */
72*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
73*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
74*4882a593Smuzhiyun ieee754_setcx(IEEE754_INVALID_OPERATION);
75*4882a593Smuzhiyun return ieee754sp_indef();
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
78*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
79*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
80*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
81*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
82*4882a593Smuzhiyun if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
83*4882a593Smuzhiyun /*
84*4882a593Smuzhiyun * Cases of addition of infinities with opposite signs
85*4882a593Smuzhiyun * or subtraction of infinities with same signs.
86*4882a593Smuzhiyun */
87*4882a593Smuzhiyun ieee754_setcx(IEEE754_INVALID_OPERATION);
88*4882a593Smuzhiyun return ieee754sp_indef();
89*4882a593Smuzhiyun }
90*4882a593Smuzhiyun /*
91*4882a593Smuzhiyun * z is here either not an infinity, or an infinity having the
92*4882a593Smuzhiyun * same sign as product (x*y). The result must be an infinity,
93*4882a593Smuzhiyun * and its sign is determined only by the sign of product (x*y).
94*4882a593Smuzhiyun */
95*4882a593Smuzhiyun return ieee754sp_inf(rs);
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
98*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
99*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
100*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
101*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
102*4882a593Smuzhiyun if (zc == IEEE754_CLASS_INF)
103*4882a593Smuzhiyun return ieee754sp_inf(zs);
104*4882a593Smuzhiyun if (zc == IEEE754_CLASS_ZERO) {
105*4882a593Smuzhiyun /* Handle cases +0 + (-0) and similar ones. */
106*4882a593Smuzhiyun if (zs == rs)
107*4882a593Smuzhiyun /*
108*4882a593Smuzhiyun * Cases of addition of zeros of equal signs
109*4882a593Smuzhiyun * or subtraction of zeroes of opposite signs.
110*4882a593Smuzhiyun * The sign of the resulting zero is in any
111*4882a593Smuzhiyun * such case determined only by the sign of z.
112*4882a593Smuzhiyun */
113*4882a593Smuzhiyun return z;
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun /* x*y is here 0, and z is not 0, so just return z */
118*4882a593Smuzhiyun return z;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
121*4882a593Smuzhiyun SPDNORMX;
122*4882a593Smuzhiyun fallthrough;
123*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
124*4882a593Smuzhiyun if (zc == IEEE754_CLASS_INF)
125*4882a593Smuzhiyun return ieee754sp_inf(zs);
126*4882a593Smuzhiyun SPDNORMY;
127*4882a593Smuzhiyun break;
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130*4882a593Smuzhiyun if (zc == IEEE754_CLASS_INF)
131*4882a593Smuzhiyun return ieee754sp_inf(zs);
132*4882a593Smuzhiyun SPDNORMX;
133*4882a593Smuzhiyun break;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
136*4882a593Smuzhiyun if (zc == IEEE754_CLASS_INF)
137*4882a593Smuzhiyun return ieee754sp_inf(zs);
138*4882a593Smuzhiyun /* continue to real computations */
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun /* Finally get to do some computation */
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun /*
144*4882a593Smuzhiyun * Do the multiplication bit first
145*4882a593Smuzhiyun *
146*4882a593Smuzhiyun * rm = xm * ym, re = xe + ye basically
147*4882a593Smuzhiyun *
148*4882a593Smuzhiyun * At this point xm and ym should have been normalized.
149*4882a593Smuzhiyun */
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun /* rm = xm * ym, re = xe+ye basically */
152*4882a593Smuzhiyun assert(xm & SP_HIDDEN_BIT);
153*4882a593Smuzhiyun assert(ym & SP_HIDDEN_BIT);
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun re = xe + ye;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun /* Multiple 24 bit xm and ym to give 48 bit results */
158*4882a593Smuzhiyun rm64 = (uint64_t)xm * ym;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun /* Shunt to top of word */
161*4882a593Smuzhiyun rm64 = rm64 << 16;
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun /* Put explicit bit at bit 62 if necessary */
164*4882a593Smuzhiyun if ((int64_t) rm64 < 0) {
165*4882a593Smuzhiyun rm64 = rm64 >> 1;
166*4882a593Smuzhiyun re++;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun assert(rm64 & (1 << 62));
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun if (zc == IEEE754_CLASS_ZERO) {
172*4882a593Smuzhiyun /*
173*4882a593Smuzhiyun * Move explicit bit from bit 62 to bit 26 since the
174*4882a593Smuzhiyun * ieee754sp_format code expects the mantissa to be
175*4882a593Smuzhiyun * 27 bits wide (24 + 3 rounding bits).
176*4882a593Smuzhiyun */
177*4882a593Smuzhiyun rm = XSPSRS64(rm64, (62 - 26));
178*4882a593Smuzhiyun return ieee754sp_format(rs, re, rm);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun /* Move explicit bit from bit 23 to bit 62 */
182*4882a593Smuzhiyun zm64 = (uint64_t)zm << (62 - 23);
183*4882a593Smuzhiyun assert(zm64 & (1 << 62));
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun /* Make the exponents the same */
186*4882a593Smuzhiyun if (ze > re) {
187*4882a593Smuzhiyun /*
188*4882a593Smuzhiyun * Have to shift r fraction right to align.
189*4882a593Smuzhiyun */
190*4882a593Smuzhiyun s = ze - re;
191*4882a593Smuzhiyun rm64 = XSPSRS64(rm64, s);
192*4882a593Smuzhiyun re += s;
193*4882a593Smuzhiyun } else if (re > ze) {
194*4882a593Smuzhiyun /*
195*4882a593Smuzhiyun * Have to shift z fraction right to align.
196*4882a593Smuzhiyun */
197*4882a593Smuzhiyun s = re - ze;
198*4882a593Smuzhiyun zm64 = XSPSRS64(zm64, s);
199*4882a593Smuzhiyun ze += s;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun assert(ze == re);
202*4882a593Smuzhiyun assert(ze <= SP_EMAX);
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun /* Do the addition */
205*4882a593Smuzhiyun if (zs == rs) {
206*4882a593Smuzhiyun /*
207*4882a593Smuzhiyun * Generate 64 bit result by adding two 63 bit numbers
208*4882a593Smuzhiyun * leaving result in zm64, zs and ze.
209*4882a593Smuzhiyun */
210*4882a593Smuzhiyun zm64 = zm64 + rm64;
211*4882a593Smuzhiyun if ((int64_t)zm64 < 0) { /* carry out */
212*4882a593Smuzhiyun zm64 = XSPSRS1(zm64);
213*4882a593Smuzhiyun ze++;
214*4882a593Smuzhiyun }
215*4882a593Smuzhiyun } else {
216*4882a593Smuzhiyun if (zm64 >= rm64) {
217*4882a593Smuzhiyun zm64 = zm64 - rm64;
218*4882a593Smuzhiyun } else {
219*4882a593Smuzhiyun zm64 = rm64 - zm64;
220*4882a593Smuzhiyun zs = rs;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun if (zm64 == 0)
223*4882a593Smuzhiyun return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun /*
226*4882a593Smuzhiyun * Put explicit bit at bit 62 if necessary.
227*4882a593Smuzhiyun */
228*4882a593Smuzhiyun while ((zm64 >> 62) == 0) {
229*4882a593Smuzhiyun zm64 <<= 1;
230*4882a593Smuzhiyun ze--;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun /*
235*4882a593Smuzhiyun * Move explicit bit from bit 62 to bit 26 since the
236*4882a593Smuzhiyun * ieee754sp_format code expects the mantissa to be
237*4882a593Smuzhiyun * 27 bits wide (24 + 3 rounding bits).
238*4882a593Smuzhiyun */
239*4882a593Smuzhiyun zm = XSPSRS64(zm64, (62 - 26));
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun return ieee754sp_format(zs, ze, zm);
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun
ieee754sp_maddf(union ieee754sp z,union ieee754sp x,union ieee754sp y)244*4882a593Smuzhiyun union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
245*4882a593Smuzhiyun union ieee754sp y)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun return _sp_maddf(z, x, y, 0);
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun
ieee754sp_msubf(union ieee754sp z,union ieee754sp x,union ieee754sp y)250*4882a593Smuzhiyun union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
251*4882a593Smuzhiyun union ieee754sp y)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
ieee754sp_madd(union ieee754sp z,union ieee754sp x,union ieee754sp y)256*4882a593Smuzhiyun union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
257*4882a593Smuzhiyun union ieee754sp y)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun return _sp_maddf(z, x, y, 0);
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun
ieee754sp_msub(union ieee754sp z,union ieee754sp x,union ieee754sp y)262*4882a593Smuzhiyun union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
263*4882a593Smuzhiyun union ieee754sp y)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun
ieee754sp_nmadd(union ieee754sp z,union ieee754sp x,union ieee754sp y)268*4882a593Smuzhiyun union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
269*4882a593Smuzhiyun union ieee754sp y)
270*4882a593Smuzhiyun {
271*4882a593Smuzhiyun return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun
ieee754sp_nmsub(union ieee754sp z,union ieee754sp x,union ieee754sp y)274*4882a593Smuzhiyun union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
275*4882a593Smuzhiyun union ieee754sp y)
276*4882a593Smuzhiyun {
277*4882a593Smuzhiyun return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
278*4882a593Smuzhiyun }
279