1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /* IEEE754 floating point arithmetic
3*4882a593Smuzhiyun * double precision: common utilities
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun * MIPS floating point support
7*4882a593Smuzhiyun * Copyright (C) 1994-2000 Algorithmics Ltd.
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include "ieee754dp.h"
11*4882a593Smuzhiyun
ieee754dp_mul(union ieee754dp x,union ieee754dp y)12*4882a593Smuzhiyun union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
13*4882a593Smuzhiyun {
14*4882a593Smuzhiyun int re;
15*4882a593Smuzhiyun int rs;
16*4882a593Smuzhiyun u64 rm;
17*4882a593Smuzhiyun unsigned int lxm;
18*4882a593Smuzhiyun unsigned int hxm;
19*4882a593Smuzhiyun unsigned int lym;
20*4882a593Smuzhiyun unsigned int hym;
21*4882a593Smuzhiyun u64 lrm;
22*4882a593Smuzhiyun u64 hrm;
23*4882a593Smuzhiyun u64 t;
24*4882a593Smuzhiyun u64 at;
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun COMPXDP;
27*4882a593Smuzhiyun COMPYDP;
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun EXPLODEXDP;
30*4882a593Smuzhiyun EXPLODEYDP;
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun ieee754_clearcx();
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun FLUSHXDP;
35*4882a593Smuzhiyun FLUSHYDP;
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun switch (CLPAIR(xc, yc)) {
38*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
39*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
40*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
41*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
42*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
43*4882a593Smuzhiyun return ieee754dp_nanxcpt(y);
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
46*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
47*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
48*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
49*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
50*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
51*4882a593Smuzhiyun return ieee754dp_nanxcpt(x);
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
54*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
55*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
56*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
57*4882a593Smuzhiyun return y;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
60*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
61*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
62*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
63*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
64*4882a593Smuzhiyun return x;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun /*
68*4882a593Smuzhiyun * Infinity handling
69*4882a593Smuzhiyun */
70*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
71*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
72*4882a593Smuzhiyun ieee754_setcx(IEEE754_INVALID_OPERATION);
73*4882a593Smuzhiyun return ieee754dp_indef();
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
76*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
77*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
78*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
79*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
80*4882a593Smuzhiyun return ieee754dp_inf(xs ^ ys);
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
83*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
84*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
85*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
86*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
87*4882a593Smuzhiyun return ieee754dp_zero(xs ^ ys);
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
91*4882a593Smuzhiyun DPDNORMX;
92*4882a593Smuzhiyun fallthrough;
93*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
94*4882a593Smuzhiyun DPDNORMY;
95*4882a593Smuzhiyun break;
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
98*4882a593Smuzhiyun DPDNORMX;
99*4882a593Smuzhiyun break;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
102*4882a593Smuzhiyun break;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun /* rm = xm * ym, re = xe+ye basically */
105*4882a593Smuzhiyun assert(xm & DP_HIDDEN_BIT);
106*4882a593Smuzhiyun assert(ym & DP_HIDDEN_BIT);
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun re = xe + ye;
109*4882a593Smuzhiyun rs = xs ^ ys;
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun /* shunt to top of word */
112*4882a593Smuzhiyun xm <<= 64 - (DP_FBITS + 1);
113*4882a593Smuzhiyun ym <<= 64 - (DP_FBITS + 1);
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /*
116*4882a593Smuzhiyun * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
117*4882a593Smuzhiyun */
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun lxm = xm;
120*4882a593Smuzhiyun hxm = xm >> 32;
121*4882a593Smuzhiyun lym = ym;
122*4882a593Smuzhiyun hym = ym >> 32;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun lrm = DPXMULT(lxm, lym);
125*4882a593Smuzhiyun hrm = DPXMULT(hxm, hym);
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun t = DPXMULT(lxm, hym);
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun at = lrm + (t << 32);
130*4882a593Smuzhiyun hrm += at < lrm;
131*4882a593Smuzhiyun lrm = at;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun hrm = hrm + (t >> 32);
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun t = DPXMULT(hxm, lym);
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun at = lrm + (t << 32);
138*4882a593Smuzhiyun hrm += at < lrm;
139*4882a593Smuzhiyun lrm = at;
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun hrm = hrm + (t >> 32);
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun rm = hrm | (lrm != 0);
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun /*
146*4882a593Smuzhiyun * Sticky shift down to normal rounding precision.
147*4882a593Smuzhiyun */
148*4882a593Smuzhiyun if ((s64) rm < 0) {
149*4882a593Smuzhiyun rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
150*4882a593Smuzhiyun ((rm << (DP_FBITS + 1 + 3)) != 0);
151*4882a593Smuzhiyun re++;
152*4882a593Smuzhiyun } else {
153*4882a593Smuzhiyun rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
154*4882a593Smuzhiyun ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun assert(rm & (DP_HIDDEN_BIT << 3));
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun return ieee754dp_format(rs, re, rm);
159*4882a593Smuzhiyun }
160