xref: /OK3568_Linux_fs/kernel/arch/arm/vfp/vfpdouble.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  *  linux/arch/arm/vfp/vfpdouble.c
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * This code is derived in part from John R. Housers softfloat library, which
5*4882a593Smuzhiyun  * carries the following notice:
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * ===========================================================================
8*4882a593Smuzhiyun  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
9*4882a593Smuzhiyun  * Arithmetic Package, Release 2.
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * Written by John R. Hauser.  This work was made possible in part by the
12*4882a593Smuzhiyun  * International Computer Science Institute, located at Suite 600, 1947 Center
13*4882a593Smuzhiyun  * Street, Berkeley, California 94704.  Funding was partially provided by the
14*4882a593Smuzhiyun  * National Science Foundation under grant MIP-9311980.  The original version
15*4882a593Smuzhiyun  * of this code was written as part of a project to build a fixed-point vector
16*4882a593Smuzhiyun  * processor in collaboration with the University of California at Berkeley,
17*4882a593Smuzhiyun  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
18*4882a593Smuzhiyun  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19*4882a593Smuzhiyun  * arithmetic/softfloat.html'.
20*4882a593Smuzhiyun  *
21*4882a593Smuzhiyun  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
22*4882a593Smuzhiyun  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23*4882a593Smuzhiyun  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
24*4882a593Smuzhiyun  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25*4882a593Smuzhiyun  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26*4882a593Smuzhiyun  *
27*4882a593Smuzhiyun  * Derivative works are acceptable, even for commercial purposes, so long as
28*4882a593Smuzhiyun  * (1) they include prominent notice that the work is derivative, and (2) they
29*4882a593Smuzhiyun  * include prominent notice akin to these three paragraphs for those parts of
30*4882a593Smuzhiyun  * this code that are retained.
31*4882a593Smuzhiyun  * ===========================================================================
32*4882a593Smuzhiyun  */
33*4882a593Smuzhiyun #include <linux/kernel.h>
34*4882a593Smuzhiyun #include <linux/bitops.h>
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #include <asm/div64.h>
37*4882a593Smuzhiyun #include <asm/vfp.h>
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun #include "vfpinstr.h"
40*4882a593Smuzhiyun #include "vfp.h"
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun static struct vfp_double vfp_double_default_qnan = {
43*4882a593Smuzhiyun 	.exponent	= 2047,
44*4882a593Smuzhiyun 	.sign		= 0,
45*4882a593Smuzhiyun 	.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
46*4882a593Smuzhiyun };
47*4882a593Smuzhiyun 
vfp_double_dump(const char * str,struct vfp_double * d)48*4882a593Smuzhiyun static void vfp_double_dump(const char *str, struct vfp_double *d)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
51*4882a593Smuzhiyun 		 str, d->sign != 0, d->exponent, d->significand);
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun 
vfp_double_normalise_denormal(struct vfp_double * vd)54*4882a593Smuzhiyun static void vfp_double_normalise_denormal(struct vfp_double *vd)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	int bits = 31 - fls(vd->significand >> 32);
57*4882a593Smuzhiyun 	if (bits == 31)
58*4882a593Smuzhiyun 		bits = 63 - fls(vd->significand);
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	vfp_double_dump("normalise_denormal: in", vd);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	if (bits) {
63*4882a593Smuzhiyun 		vd->exponent -= bits - 1;
64*4882a593Smuzhiyun 		vd->significand <<= bits;
65*4882a593Smuzhiyun 	}
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	vfp_double_dump("normalise_denormal: out", vd);
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun 
vfp_double_normaliseround(int dd,struct vfp_double * vd,u32 fpscr,u32 exceptions,const char * func)70*4882a593Smuzhiyun u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun 	u64 significand, incr;
73*4882a593Smuzhiyun 	int exponent, shift, underflow;
74*4882a593Smuzhiyun 	u32 rmode;
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	vfp_double_dump("pack: in", vd);
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	/*
79*4882a593Smuzhiyun 	 * Infinities and NaNs are a special case.
80*4882a593Smuzhiyun 	 */
81*4882a593Smuzhiyun 	if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
82*4882a593Smuzhiyun 		goto pack;
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	/*
85*4882a593Smuzhiyun 	 * Special-case zero.
86*4882a593Smuzhiyun 	 */
87*4882a593Smuzhiyun 	if (vd->significand == 0) {
88*4882a593Smuzhiyun 		vd->exponent = 0;
89*4882a593Smuzhiyun 		goto pack;
90*4882a593Smuzhiyun 	}
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 	exponent = vd->exponent;
93*4882a593Smuzhiyun 	significand = vd->significand;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	shift = 32 - fls(significand >> 32);
96*4882a593Smuzhiyun 	if (shift == 32)
97*4882a593Smuzhiyun 		shift = 64 - fls(significand);
98*4882a593Smuzhiyun 	if (shift) {
99*4882a593Smuzhiyun 		exponent -= shift;
100*4882a593Smuzhiyun 		significand <<= shift;
101*4882a593Smuzhiyun 	}
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun #ifdef DEBUG
104*4882a593Smuzhiyun 	vd->exponent = exponent;
105*4882a593Smuzhiyun 	vd->significand = significand;
106*4882a593Smuzhiyun 	vfp_double_dump("pack: normalised", vd);
107*4882a593Smuzhiyun #endif
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	/*
110*4882a593Smuzhiyun 	 * Tiny number?
111*4882a593Smuzhiyun 	 */
112*4882a593Smuzhiyun 	underflow = exponent < 0;
113*4882a593Smuzhiyun 	if (underflow) {
114*4882a593Smuzhiyun 		significand = vfp_shiftright64jamming(significand, -exponent);
115*4882a593Smuzhiyun 		exponent = 0;
116*4882a593Smuzhiyun #ifdef DEBUG
117*4882a593Smuzhiyun 		vd->exponent = exponent;
118*4882a593Smuzhiyun 		vd->significand = significand;
119*4882a593Smuzhiyun 		vfp_double_dump("pack: tiny number", vd);
120*4882a593Smuzhiyun #endif
121*4882a593Smuzhiyun 		if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
122*4882a593Smuzhiyun 			underflow = 0;
123*4882a593Smuzhiyun 	}
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	/*
126*4882a593Smuzhiyun 	 * Select rounding increment.
127*4882a593Smuzhiyun 	 */
128*4882a593Smuzhiyun 	incr = 0;
129*4882a593Smuzhiyun 	rmode = fpscr & FPSCR_RMODE_MASK;
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	if (rmode == FPSCR_ROUND_NEAREST) {
132*4882a593Smuzhiyun 		incr = 1ULL << VFP_DOUBLE_LOW_BITS;
133*4882a593Smuzhiyun 		if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
134*4882a593Smuzhiyun 			incr -= 1;
135*4882a593Smuzhiyun 	} else if (rmode == FPSCR_ROUND_TOZERO) {
136*4882a593Smuzhiyun 		incr = 0;
137*4882a593Smuzhiyun 	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
138*4882a593Smuzhiyun 		incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	/*
143*4882a593Smuzhiyun 	 * Is our rounding going to overflow?
144*4882a593Smuzhiyun 	 */
145*4882a593Smuzhiyun 	if ((significand + incr) < significand) {
146*4882a593Smuzhiyun 		exponent += 1;
147*4882a593Smuzhiyun 		significand = (significand >> 1) | (significand & 1);
148*4882a593Smuzhiyun 		incr >>= 1;
149*4882a593Smuzhiyun #ifdef DEBUG
150*4882a593Smuzhiyun 		vd->exponent = exponent;
151*4882a593Smuzhiyun 		vd->significand = significand;
152*4882a593Smuzhiyun 		vfp_double_dump("pack: overflow", vd);
153*4882a593Smuzhiyun #endif
154*4882a593Smuzhiyun 	}
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 	/*
157*4882a593Smuzhiyun 	 * If any of the low bits (which will be shifted out of the
158*4882a593Smuzhiyun 	 * number) are non-zero, the result is inexact.
159*4882a593Smuzhiyun 	 */
160*4882a593Smuzhiyun 	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
161*4882a593Smuzhiyun 		exceptions |= FPSCR_IXC;
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	/*
164*4882a593Smuzhiyun 	 * Do our rounding.
165*4882a593Smuzhiyun 	 */
166*4882a593Smuzhiyun 	significand += incr;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	/*
169*4882a593Smuzhiyun 	 * Infinity?
170*4882a593Smuzhiyun 	 */
171*4882a593Smuzhiyun 	if (exponent >= 2046) {
172*4882a593Smuzhiyun 		exceptions |= FPSCR_OFC | FPSCR_IXC;
173*4882a593Smuzhiyun 		if (incr == 0) {
174*4882a593Smuzhiyun 			vd->exponent = 2045;
175*4882a593Smuzhiyun 			vd->significand = 0x7fffffffffffffffULL;
176*4882a593Smuzhiyun 		} else {
177*4882a593Smuzhiyun 			vd->exponent = 2047;		/* infinity */
178*4882a593Smuzhiyun 			vd->significand = 0;
179*4882a593Smuzhiyun 		}
180*4882a593Smuzhiyun 	} else {
181*4882a593Smuzhiyun 		if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
182*4882a593Smuzhiyun 			exponent = 0;
183*4882a593Smuzhiyun 		if (exponent || significand > 0x8000000000000000ULL)
184*4882a593Smuzhiyun 			underflow = 0;
185*4882a593Smuzhiyun 		if (underflow)
186*4882a593Smuzhiyun 			exceptions |= FPSCR_UFC;
187*4882a593Smuzhiyun 		vd->exponent = exponent;
188*4882a593Smuzhiyun 		vd->significand = significand >> 1;
189*4882a593Smuzhiyun 	}
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun  pack:
192*4882a593Smuzhiyun 	vfp_double_dump("pack: final", vd);
193*4882a593Smuzhiyun 	{
194*4882a593Smuzhiyun 		s64 d = vfp_double_pack(vd);
195*4882a593Smuzhiyun 		pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
196*4882a593Smuzhiyun 			 dd, d, exceptions);
197*4882a593Smuzhiyun 		vfp_put_double(d, dd);
198*4882a593Smuzhiyun 	}
199*4882a593Smuzhiyun 	return exceptions;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun /*
203*4882a593Smuzhiyun  * Propagate the NaN, setting exceptions if it is signalling.
204*4882a593Smuzhiyun  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
205*4882a593Smuzhiyun  */
206*4882a593Smuzhiyun static u32
vfp_propagate_nan(struct vfp_double * vdd,struct vfp_double * vdn,struct vfp_double * vdm,u32 fpscr)207*4882a593Smuzhiyun vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
208*4882a593Smuzhiyun 		  struct vfp_double *vdm, u32 fpscr)
209*4882a593Smuzhiyun {
210*4882a593Smuzhiyun 	struct vfp_double *nan;
211*4882a593Smuzhiyun 	int tn, tm = 0;
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	tn = vfp_double_type(vdn);
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	if (vdm)
216*4882a593Smuzhiyun 		tm = vfp_double_type(vdm);
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	if (fpscr & FPSCR_DEFAULT_NAN)
219*4882a593Smuzhiyun 		/*
220*4882a593Smuzhiyun 		 * Default NaN mode - always returns a quiet NaN
221*4882a593Smuzhiyun 		 */
222*4882a593Smuzhiyun 		nan = &vfp_double_default_qnan;
223*4882a593Smuzhiyun 	else {
224*4882a593Smuzhiyun 		/*
225*4882a593Smuzhiyun 		 * Contemporary mode - select the first signalling
226*4882a593Smuzhiyun 		 * NAN, or if neither are signalling, the first
227*4882a593Smuzhiyun 		 * quiet NAN.
228*4882a593Smuzhiyun 		 */
229*4882a593Smuzhiyun 		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
230*4882a593Smuzhiyun 			nan = vdn;
231*4882a593Smuzhiyun 		else
232*4882a593Smuzhiyun 			nan = vdm;
233*4882a593Smuzhiyun 		/*
234*4882a593Smuzhiyun 		 * Make the NaN quiet.
235*4882a593Smuzhiyun 		 */
236*4882a593Smuzhiyun 		nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
237*4882a593Smuzhiyun 	}
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	*vdd = *nan;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	/*
242*4882a593Smuzhiyun 	 * If one was a signalling NAN, raise invalid operation.
243*4882a593Smuzhiyun 	 */
244*4882a593Smuzhiyun 	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun /*
248*4882a593Smuzhiyun  * Extended operations
249*4882a593Smuzhiyun  */
vfp_double_fabs(int dd,int unused,int dm,u32 fpscr)250*4882a593Smuzhiyun static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
251*4882a593Smuzhiyun {
252*4882a593Smuzhiyun 	vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd);
253*4882a593Smuzhiyun 	return 0;
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun 
vfp_double_fcpy(int dd,int unused,int dm,u32 fpscr)256*4882a593Smuzhiyun static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	vfp_put_double(vfp_get_double(dm), dd);
259*4882a593Smuzhiyun 	return 0;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun 
vfp_double_fneg(int dd,int unused,int dm,u32 fpscr)262*4882a593Smuzhiyun static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun 	vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd);
265*4882a593Smuzhiyun 	return 0;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun 
vfp_double_fsqrt(int dd,int unused,int dm,u32 fpscr)268*4882a593Smuzhiyun static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
269*4882a593Smuzhiyun {
270*4882a593Smuzhiyun 	struct vfp_double vdm, vdd;
271*4882a593Smuzhiyun 	int ret, tm;
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
274*4882a593Smuzhiyun 	tm = vfp_double_type(&vdm);
275*4882a593Smuzhiyun 	if (tm & (VFP_NAN|VFP_INFINITY)) {
276*4882a593Smuzhiyun 		struct vfp_double *vdp = &vdd;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 		if (tm & VFP_NAN)
279*4882a593Smuzhiyun 			ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
280*4882a593Smuzhiyun 		else if (vdm.sign == 0) {
281*4882a593Smuzhiyun  sqrt_copy:
282*4882a593Smuzhiyun 			vdp = &vdm;
283*4882a593Smuzhiyun 			ret = 0;
284*4882a593Smuzhiyun 		} else {
285*4882a593Smuzhiyun  sqrt_invalid:
286*4882a593Smuzhiyun 			vdp = &vfp_double_default_qnan;
287*4882a593Smuzhiyun 			ret = FPSCR_IOC;
288*4882a593Smuzhiyun 		}
289*4882a593Smuzhiyun 		vfp_put_double(vfp_double_pack(vdp), dd);
290*4882a593Smuzhiyun 		return ret;
291*4882a593Smuzhiyun 	}
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	/*
294*4882a593Smuzhiyun 	 * sqrt(+/- 0) == +/- 0
295*4882a593Smuzhiyun 	 */
296*4882a593Smuzhiyun 	if (tm & VFP_ZERO)
297*4882a593Smuzhiyun 		goto sqrt_copy;
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	/*
300*4882a593Smuzhiyun 	 * Normalise a denormalised number
301*4882a593Smuzhiyun 	 */
302*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
303*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 	/*
306*4882a593Smuzhiyun 	 * sqrt(<0) = invalid
307*4882a593Smuzhiyun 	 */
308*4882a593Smuzhiyun 	if (vdm.sign)
309*4882a593Smuzhiyun 		goto sqrt_invalid;
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	vfp_double_dump("sqrt", &vdm);
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	/*
314*4882a593Smuzhiyun 	 * Estimate the square root.
315*4882a593Smuzhiyun 	 */
316*4882a593Smuzhiyun 	vdd.sign = 0;
317*4882a593Smuzhiyun 	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
318*4882a593Smuzhiyun 	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	vfp_double_dump("sqrt estimate1", &vdd);
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	vdm.significand >>= 1 + (vdm.exponent & 1);
323*4882a593Smuzhiyun 	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	vfp_double_dump("sqrt estimate2", &vdd);
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	/*
328*4882a593Smuzhiyun 	 * And now adjust.
329*4882a593Smuzhiyun 	 */
330*4882a593Smuzhiyun 	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
331*4882a593Smuzhiyun 		if (vdd.significand < 2) {
332*4882a593Smuzhiyun 			vdd.significand = ~0ULL;
333*4882a593Smuzhiyun 		} else {
334*4882a593Smuzhiyun 			u64 termh, terml, remh, reml;
335*4882a593Smuzhiyun 			vdm.significand <<= 2;
336*4882a593Smuzhiyun 			mul64to128(&termh, &terml, vdd.significand, vdd.significand);
337*4882a593Smuzhiyun 			sub128(&remh, &reml, vdm.significand, 0, termh, terml);
338*4882a593Smuzhiyun 			while ((s64)remh < 0) {
339*4882a593Smuzhiyun 				vdd.significand -= 1;
340*4882a593Smuzhiyun 				shift64left(&termh, &terml, vdd.significand);
341*4882a593Smuzhiyun 				terml |= 1;
342*4882a593Smuzhiyun 				add128(&remh, &reml, remh, reml, termh, terml);
343*4882a593Smuzhiyun 			}
344*4882a593Smuzhiyun 			vdd.significand |= (remh | reml) != 0;
345*4882a593Smuzhiyun 		}
346*4882a593Smuzhiyun 	}
347*4882a593Smuzhiyun 	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun 
352*4882a593Smuzhiyun /*
353*4882a593Smuzhiyun  * Equal	:= ZC
354*4882a593Smuzhiyun  * Less than	:= N
355*4882a593Smuzhiyun  * Greater than	:= C
356*4882a593Smuzhiyun  * Unordered	:= CV
357*4882a593Smuzhiyun  */
vfp_compare(int dd,int signal_on_qnan,int dm,u32 fpscr)358*4882a593Smuzhiyun static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun 	s64 d, m;
361*4882a593Smuzhiyun 	u32 ret = 0;
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	m = vfp_get_double(dm);
364*4882a593Smuzhiyun 	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
365*4882a593Smuzhiyun 		ret |= FPSCR_C | FPSCR_V;
366*4882a593Smuzhiyun 		if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
367*4882a593Smuzhiyun 			/*
368*4882a593Smuzhiyun 			 * Signalling NaN, or signalling on quiet NaN
369*4882a593Smuzhiyun 			 */
370*4882a593Smuzhiyun 			ret |= FPSCR_IOC;
371*4882a593Smuzhiyun 	}
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	d = vfp_get_double(dd);
374*4882a593Smuzhiyun 	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
375*4882a593Smuzhiyun 		ret |= FPSCR_C | FPSCR_V;
376*4882a593Smuzhiyun 		if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
377*4882a593Smuzhiyun 			/*
378*4882a593Smuzhiyun 			 * Signalling NaN, or signalling on quiet NaN
379*4882a593Smuzhiyun 			 */
380*4882a593Smuzhiyun 			ret |= FPSCR_IOC;
381*4882a593Smuzhiyun 	}
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	if (ret == 0) {
384*4882a593Smuzhiyun 		if (d == m || vfp_double_packed_abs(d | m) == 0) {
385*4882a593Smuzhiyun 			/*
386*4882a593Smuzhiyun 			 * equal
387*4882a593Smuzhiyun 			 */
388*4882a593Smuzhiyun 			ret |= FPSCR_Z | FPSCR_C;
389*4882a593Smuzhiyun 		} else if (vfp_double_packed_sign(d ^ m)) {
390*4882a593Smuzhiyun 			/*
391*4882a593Smuzhiyun 			 * different signs
392*4882a593Smuzhiyun 			 */
393*4882a593Smuzhiyun 			if (vfp_double_packed_sign(d))
394*4882a593Smuzhiyun 				/*
395*4882a593Smuzhiyun 				 * d is negative, so d < m
396*4882a593Smuzhiyun 				 */
397*4882a593Smuzhiyun 				ret |= FPSCR_N;
398*4882a593Smuzhiyun 			else
399*4882a593Smuzhiyun 				/*
400*4882a593Smuzhiyun 				 * d is positive, so d > m
401*4882a593Smuzhiyun 				 */
402*4882a593Smuzhiyun 				ret |= FPSCR_C;
403*4882a593Smuzhiyun 		} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
404*4882a593Smuzhiyun 			/*
405*4882a593Smuzhiyun 			 * d < m
406*4882a593Smuzhiyun 			 */
407*4882a593Smuzhiyun 			ret |= FPSCR_N;
408*4882a593Smuzhiyun 		} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
409*4882a593Smuzhiyun 			/*
410*4882a593Smuzhiyun 			 * d > m
411*4882a593Smuzhiyun 			 */
412*4882a593Smuzhiyun 			ret |= FPSCR_C;
413*4882a593Smuzhiyun 		}
414*4882a593Smuzhiyun 	}
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun 	return ret;
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun 
vfp_double_fcmp(int dd,int unused,int dm,u32 fpscr)419*4882a593Smuzhiyun static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
420*4882a593Smuzhiyun {
421*4882a593Smuzhiyun 	return vfp_compare(dd, 0, dm, fpscr);
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun 
vfp_double_fcmpe(int dd,int unused,int dm,u32 fpscr)424*4882a593Smuzhiyun static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
425*4882a593Smuzhiyun {
426*4882a593Smuzhiyun 	return vfp_compare(dd, 1, dm, fpscr);
427*4882a593Smuzhiyun }
428*4882a593Smuzhiyun 
vfp_double_fcmpz(int dd,int unused,int dm,u32 fpscr)429*4882a593Smuzhiyun static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
430*4882a593Smuzhiyun {
431*4882a593Smuzhiyun 	return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
432*4882a593Smuzhiyun }
433*4882a593Smuzhiyun 
vfp_double_fcmpez(int dd,int unused,int dm,u32 fpscr)434*4882a593Smuzhiyun static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
435*4882a593Smuzhiyun {
436*4882a593Smuzhiyun 	return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun 
vfp_double_fcvts(int sd,int unused,int dm,u32 fpscr)439*4882a593Smuzhiyun static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
440*4882a593Smuzhiyun {
441*4882a593Smuzhiyun 	struct vfp_double vdm;
442*4882a593Smuzhiyun 	struct vfp_single vsd;
443*4882a593Smuzhiyun 	int tm;
444*4882a593Smuzhiyun 	u32 exceptions = 0;
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	tm = vfp_double_type(&vdm);
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	/*
451*4882a593Smuzhiyun 	 * If we have a signalling NaN, signal invalid operation.
452*4882a593Smuzhiyun 	 */
453*4882a593Smuzhiyun 	if (tm == VFP_SNAN)
454*4882a593Smuzhiyun 		exceptions = FPSCR_IOC;
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
457*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun 	vsd.sign = vdm.sign;
460*4882a593Smuzhiyun 	vsd.significand = vfp_hi64to32jamming(vdm.significand);
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	/*
463*4882a593Smuzhiyun 	 * If we have an infinity or a NaN, the exponent must be 255
464*4882a593Smuzhiyun 	 */
465*4882a593Smuzhiyun 	if (tm & (VFP_INFINITY|VFP_NAN)) {
466*4882a593Smuzhiyun 		vsd.exponent = 255;
467*4882a593Smuzhiyun 		if (tm == VFP_QNAN)
468*4882a593Smuzhiyun 			vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
469*4882a593Smuzhiyun 		goto pack_nan;
470*4882a593Smuzhiyun 	} else if (tm & VFP_ZERO)
471*4882a593Smuzhiyun 		vsd.exponent = 0;
472*4882a593Smuzhiyun 	else
473*4882a593Smuzhiyun 		vsd.exponent = vdm.exponent - (1023 - 127);
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun  pack_nan:
478*4882a593Smuzhiyun 	vfp_put_float(vfp_single_pack(&vsd), sd);
479*4882a593Smuzhiyun 	return exceptions;
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun 
vfp_double_fuito(int dd,int unused,int dm,u32 fpscr)482*4882a593Smuzhiyun static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun 	struct vfp_double vdm;
485*4882a593Smuzhiyun 	u32 m = vfp_get_float(dm);
486*4882a593Smuzhiyun 
487*4882a593Smuzhiyun 	vdm.sign = 0;
488*4882a593Smuzhiyun 	vdm.exponent = 1023 + 63 - 1;
489*4882a593Smuzhiyun 	vdm.significand = (u64)m;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun 
vfp_double_fsito(int dd,int unused,int dm,u32 fpscr)494*4882a593Smuzhiyun static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
495*4882a593Smuzhiyun {
496*4882a593Smuzhiyun 	struct vfp_double vdm;
497*4882a593Smuzhiyun 	u32 m = vfp_get_float(dm);
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	vdm.sign = (m & 0x80000000) >> 16;
500*4882a593Smuzhiyun 	vdm.exponent = 1023 + 63 - 1;
501*4882a593Smuzhiyun 	vdm.significand = vdm.sign ? -m : m;
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
504*4882a593Smuzhiyun }
505*4882a593Smuzhiyun 
vfp_double_ftoui(int sd,int unused,int dm,u32 fpscr)506*4882a593Smuzhiyun static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
507*4882a593Smuzhiyun {
508*4882a593Smuzhiyun 	struct vfp_double vdm;
509*4882a593Smuzhiyun 	u32 d, exceptions = 0;
510*4882a593Smuzhiyun 	int rmode = fpscr & FPSCR_RMODE_MASK;
511*4882a593Smuzhiyun 	int tm;
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 	/*
516*4882a593Smuzhiyun 	 * Do we have a denormalised number?
517*4882a593Smuzhiyun 	 */
518*4882a593Smuzhiyun 	tm = vfp_double_type(&vdm);
519*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
520*4882a593Smuzhiyun 		exceptions |= FPSCR_IDC;
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	if (tm & VFP_NAN)
523*4882a593Smuzhiyun 		vdm.sign = 0;
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	if (vdm.exponent >= 1023 + 32) {
526*4882a593Smuzhiyun 		d = vdm.sign ? 0 : 0xffffffff;
527*4882a593Smuzhiyun 		exceptions = FPSCR_IOC;
528*4882a593Smuzhiyun 	} else if (vdm.exponent >= 1023 - 1) {
529*4882a593Smuzhiyun 		int shift = 1023 + 63 - vdm.exponent;
530*4882a593Smuzhiyun 		u64 rem, incr = 0;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 		/*
533*4882a593Smuzhiyun 		 * 2^0 <= m < 2^32-2^8
534*4882a593Smuzhiyun 		 */
535*4882a593Smuzhiyun 		d = (vdm.significand << 1) >> shift;
536*4882a593Smuzhiyun 		rem = vdm.significand << (65 - shift);
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 		if (rmode == FPSCR_ROUND_NEAREST) {
539*4882a593Smuzhiyun 			incr = 0x8000000000000000ULL;
540*4882a593Smuzhiyun 			if ((d & 1) == 0)
541*4882a593Smuzhiyun 				incr -= 1;
542*4882a593Smuzhiyun 		} else if (rmode == FPSCR_ROUND_TOZERO) {
543*4882a593Smuzhiyun 			incr = 0;
544*4882a593Smuzhiyun 		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
545*4882a593Smuzhiyun 			incr = ~0ULL;
546*4882a593Smuzhiyun 		}
547*4882a593Smuzhiyun 
548*4882a593Smuzhiyun 		if ((rem + incr) < rem) {
549*4882a593Smuzhiyun 			if (d < 0xffffffff)
550*4882a593Smuzhiyun 				d += 1;
551*4882a593Smuzhiyun 			else
552*4882a593Smuzhiyun 				exceptions |= FPSCR_IOC;
553*4882a593Smuzhiyun 		}
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 		if (d && vdm.sign) {
556*4882a593Smuzhiyun 			d = 0;
557*4882a593Smuzhiyun 			exceptions |= FPSCR_IOC;
558*4882a593Smuzhiyun 		} else if (rem)
559*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
560*4882a593Smuzhiyun 	} else {
561*4882a593Smuzhiyun 		d = 0;
562*4882a593Smuzhiyun 		if (vdm.exponent | vdm.significand) {
563*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
564*4882a593Smuzhiyun 			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
565*4882a593Smuzhiyun 				d = 1;
566*4882a593Smuzhiyun 			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
567*4882a593Smuzhiyun 				d = 0;
568*4882a593Smuzhiyun 				exceptions |= FPSCR_IOC;
569*4882a593Smuzhiyun 			}
570*4882a593Smuzhiyun 		}
571*4882a593Smuzhiyun 	}
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	vfp_put_float(d, sd);
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	return exceptions;
578*4882a593Smuzhiyun }
579*4882a593Smuzhiyun 
vfp_double_ftouiz(int sd,int unused,int dm,u32 fpscr)580*4882a593Smuzhiyun static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
581*4882a593Smuzhiyun {
582*4882a593Smuzhiyun 	return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun 
vfp_double_ftosi(int sd,int unused,int dm,u32 fpscr)585*4882a593Smuzhiyun static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun 	struct vfp_double vdm;
588*4882a593Smuzhiyun 	u32 d, exceptions = 0;
589*4882a593Smuzhiyun 	int rmode = fpscr & FPSCR_RMODE_MASK;
590*4882a593Smuzhiyun 	int tm;
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
593*4882a593Smuzhiyun 	vfp_double_dump("VDM", &vdm);
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	/*
596*4882a593Smuzhiyun 	 * Do we have denormalised number?
597*4882a593Smuzhiyun 	 */
598*4882a593Smuzhiyun 	tm = vfp_double_type(&vdm);
599*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
600*4882a593Smuzhiyun 		exceptions |= FPSCR_IDC;
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun 	if (tm & VFP_NAN) {
603*4882a593Smuzhiyun 		d = 0;
604*4882a593Smuzhiyun 		exceptions |= FPSCR_IOC;
605*4882a593Smuzhiyun 	} else if (vdm.exponent >= 1023 + 32) {
606*4882a593Smuzhiyun 		d = 0x7fffffff;
607*4882a593Smuzhiyun 		if (vdm.sign)
608*4882a593Smuzhiyun 			d = ~d;
609*4882a593Smuzhiyun 		exceptions |= FPSCR_IOC;
610*4882a593Smuzhiyun 	} else if (vdm.exponent >= 1023 - 1) {
611*4882a593Smuzhiyun 		int shift = 1023 + 63 - vdm.exponent;	/* 58 */
612*4882a593Smuzhiyun 		u64 rem, incr = 0;
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun 		d = (vdm.significand << 1) >> shift;
615*4882a593Smuzhiyun 		rem = vdm.significand << (65 - shift);
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 		if (rmode == FPSCR_ROUND_NEAREST) {
618*4882a593Smuzhiyun 			incr = 0x8000000000000000ULL;
619*4882a593Smuzhiyun 			if ((d & 1) == 0)
620*4882a593Smuzhiyun 				incr -= 1;
621*4882a593Smuzhiyun 		} else if (rmode == FPSCR_ROUND_TOZERO) {
622*4882a593Smuzhiyun 			incr = 0;
623*4882a593Smuzhiyun 		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
624*4882a593Smuzhiyun 			incr = ~0ULL;
625*4882a593Smuzhiyun 		}
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun 		if ((rem + incr) < rem && d < 0xffffffff)
628*4882a593Smuzhiyun 			d += 1;
629*4882a593Smuzhiyun 		if (d > 0x7fffffff + (vdm.sign != 0)) {
630*4882a593Smuzhiyun 			d = 0x7fffffff + (vdm.sign != 0);
631*4882a593Smuzhiyun 			exceptions |= FPSCR_IOC;
632*4882a593Smuzhiyun 		} else if (rem)
633*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 		if (vdm.sign)
636*4882a593Smuzhiyun 			d = -d;
637*4882a593Smuzhiyun 	} else {
638*4882a593Smuzhiyun 		d = 0;
639*4882a593Smuzhiyun 		if (vdm.exponent | vdm.significand) {
640*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
641*4882a593Smuzhiyun 			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
642*4882a593Smuzhiyun 				d = 1;
643*4882a593Smuzhiyun 			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
644*4882a593Smuzhiyun 				d = -1;
645*4882a593Smuzhiyun 		}
646*4882a593Smuzhiyun 	}
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
649*4882a593Smuzhiyun 
650*4882a593Smuzhiyun 	vfp_put_float((s32)d, sd);
651*4882a593Smuzhiyun 
652*4882a593Smuzhiyun 	return exceptions;
653*4882a593Smuzhiyun }
654*4882a593Smuzhiyun 
vfp_double_ftosiz(int dd,int unused,int dm,u32 fpscr)655*4882a593Smuzhiyun static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
656*4882a593Smuzhiyun {
657*4882a593Smuzhiyun 	return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
658*4882a593Smuzhiyun }
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun static struct op fops_ext[32] = {
662*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCPY)]	= { vfp_double_fcpy,   0 },
663*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FABS)]	= { vfp_double_fabs,   0 },
664*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FNEG)]	= { vfp_double_fneg,   0 },
665*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FSQRT)]	= { vfp_double_fsqrt,  0 },
666*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMP)]	= { vfp_double_fcmp,   OP_SCALAR },
667*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMPE)]	= { vfp_double_fcmpe,  OP_SCALAR },
668*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMPZ)]	= { vfp_double_fcmpz,  OP_SCALAR },
669*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMPEZ)]	= { vfp_double_fcmpez, OP_SCALAR },
670*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCVT)]	= { vfp_double_fcvts,  OP_SCALAR|OP_SD },
671*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FUITO)]	= { vfp_double_fuito,  OP_SCALAR|OP_SM },
672*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FSITO)]	= { vfp_double_fsito,  OP_SCALAR|OP_SM },
673*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOUI)]	= { vfp_double_ftoui,  OP_SCALAR|OP_SD },
674*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOUIZ)]	= { vfp_double_ftouiz, OP_SCALAR|OP_SD },
675*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOSI)]	= { vfp_double_ftosi,  OP_SCALAR|OP_SD },
676*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOSIZ)]	= { vfp_double_ftosiz, OP_SCALAR|OP_SD },
677*4882a593Smuzhiyun };
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun static u32
vfp_double_fadd_nonnumber(struct vfp_double * vdd,struct vfp_double * vdn,struct vfp_double * vdm,u32 fpscr)683*4882a593Smuzhiyun vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
684*4882a593Smuzhiyun 			  struct vfp_double *vdm, u32 fpscr)
685*4882a593Smuzhiyun {
686*4882a593Smuzhiyun 	struct vfp_double *vdp;
687*4882a593Smuzhiyun 	u32 exceptions = 0;
688*4882a593Smuzhiyun 	int tn, tm;
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun 	tn = vfp_double_type(vdn);
691*4882a593Smuzhiyun 	tm = vfp_double_type(vdm);
692*4882a593Smuzhiyun 
693*4882a593Smuzhiyun 	if (tn & tm & VFP_INFINITY) {
694*4882a593Smuzhiyun 		/*
695*4882a593Smuzhiyun 		 * Two infinities.  Are they different signs?
696*4882a593Smuzhiyun 		 */
697*4882a593Smuzhiyun 		if (vdn->sign ^ vdm->sign) {
698*4882a593Smuzhiyun 			/*
699*4882a593Smuzhiyun 			 * different signs -> invalid
700*4882a593Smuzhiyun 			 */
701*4882a593Smuzhiyun 			exceptions = FPSCR_IOC;
702*4882a593Smuzhiyun 			vdp = &vfp_double_default_qnan;
703*4882a593Smuzhiyun 		} else {
704*4882a593Smuzhiyun 			/*
705*4882a593Smuzhiyun 			 * same signs -> valid
706*4882a593Smuzhiyun 			 */
707*4882a593Smuzhiyun 			vdp = vdn;
708*4882a593Smuzhiyun 		}
709*4882a593Smuzhiyun 	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
710*4882a593Smuzhiyun 		/*
711*4882a593Smuzhiyun 		 * One infinity and one number -> infinity
712*4882a593Smuzhiyun 		 */
713*4882a593Smuzhiyun 		vdp = vdn;
714*4882a593Smuzhiyun 	} else {
715*4882a593Smuzhiyun 		/*
716*4882a593Smuzhiyun 		 * 'n' is a NaN of some type
717*4882a593Smuzhiyun 		 */
718*4882a593Smuzhiyun 		return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
719*4882a593Smuzhiyun 	}
720*4882a593Smuzhiyun 	*vdd = *vdp;
721*4882a593Smuzhiyun 	return exceptions;
722*4882a593Smuzhiyun }
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun static u32
vfp_double_add(struct vfp_double * vdd,struct vfp_double * vdn,struct vfp_double * vdm,u32 fpscr)725*4882a593Smuzhiyun vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
726*4882a593Smuzhiyun 	       struct vfp_double *vdm, u32 fpscr)
727*4882a593Smuzhiyun {
728*4882a593Smuzhiyun 	u32 exp_diff;
729*4882a593Smuzhiyun 	u64 m_sig;
730*4882a593Smuzhiyun 
731*4882a593Smuzhiyun 	if (vdn->significand & (1ULL << 63) ||
732*4882a593Smuzhiyun 	    vdm->significand & (1ULL << 63)) {
733*4882a593Smuzhiyun 		pr_info("VFP: bad FP values in %s\n", __func__);
734*4882a593Smuzhiyun 		vfp_double_dump("VDN", vdn);
735*4882a593Smuzhiyun 		vfp_double_dump("VDM", vdm);
736*4882a593Smuzhiyun 	}
737*4882a593Smuzhiyun 
738*4882a593Smuzhiyun 	/*
739*4882a593Smuzhiyun 	 * Ensure that 'n' is the largest magnitude number.  Note that
740*4882a593Smuzhiyun 	 * if 'n' and 'm' have equal exponents, we do not swap them.
741*4882a593Smuzhiyun 	 * This ensures that NaN propagation works correctly.
742*4882a593Smuzhiyun 	 */
743*4882a593Smuzhiyun 	if (vdn->exponent < vdm->exponent) {
744*4882a593Smuzhiyun 		struct vfp_double *t = vdn;
745*4882a593Smuzhiyun 		vdn = vdm;
746*4882a593Smuzhiyun 		vdm = t;
747*4882a593Smuzhiyun 	}
748*4882a593Smuzhiyun 
749*4882a593Smuzhiyun 	/*
750*4882a593Smuzhiyun 	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
751*4882a593Smuzhiyun 	 * infinity or a NaN here.
752*4882a593Smuzhiyun 	 */
753*4882a593Smuzhiyun 	if (vdn->exponent == 2047)
754*4882a593Smuzhiyun 		return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
755*4882a593Smuzhiyun 
756*4882a593Smuzhiyun 	/*
757*4882a593Smuzhiyun 	 * We have two proper numbers, where 'vdn' is the larger magnitude.
758*4882a593Smuzhiyun 	 *
759*4882a593Smuzhiyun 	 * Copy 'n' to 'd' before doing the arithmetic.
760*4882a593Smuzhiyun 	 */
761*4882a593Smuzhiyun 	*vdd = *vdn;
762*4882a593Smuzhiyun 
763*4882a593Smuzhiyun 	/*
764*4882a593Smuzhiyun 	 * Align 'm' with the result.
765*4882a593Smuzhiyun 	 */
766*4882a593Smuzhiyun 	exp_diff = vdn->exponent - vdm->exponent;
767*4882a593Smuzhiyun 	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
768*4882a593Smuzhiyun 
769*4882a593Smuzhiyun 	/*
770*4882a593Smuzhiyun 	 * If the signs are different, we are really subtracting.
771*4882a593Smuzhiyun 	 */
772*4882a593Smuzhiyun 	if (vdn->sign ^ vdm->sign) {
773*4882a593Smuzhiyun 		m_sig = vdn->significand - m_sig;
774*4882a593Smuzhiyun 		if ((s64)m_sig < 0) {
775*4882a593Smuzhiyun 			vdd->sign = vfp_sign_negate(vdd->sign);
776*4882a593Smuzhiyun 			m_sig = -m_sig;
777*4882a593Smuzhiyun 		} else if (m_sig == 0) {
778*4882a593Smuzhiyun 			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
779*4882a593Smuzhiyun 				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
780*4882a593Smuzhiyun 		}
781*4882a593Smuzhiyun 	} else {
782*4882a593Smuzhiyun 		m_sig += vdn->significand;
783*4882a593Smuzhiyun 	}
784*4882a593Smuzhiyun 	vdd->significand = m_sig;
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun 	return 0;
787*4882a593Smuzhiyun }
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun static u32
vfp_double_multiply(struct vfp_double * vdd,struct vfp_double * vdn,struct vfp_double * vdm,u32 fpscr)790*4882a593Smuzhiyun vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
791*4882a593Smuzhiyun 		    struct vfp_double *vdm, u32 fpscr)
792*4882a593Smuzhiyun {
793*4882a593Smuzhiyun 	vfp_double_dump("VDN", vdn);
794*4882a593Smuzhiyun 	vfp_double_dump("VDM", vdm);
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 	/*
797*4882a593Smuzhiyun 	 * Ensure that 'n' is the largest magnitude number.  Note that
798*4882a593Smuzhiyun 	 * if 'n' and 'm' have equal exponents, we do not swap them.
799*4882a593Smuzhiyun 	 * This ensures that NaN propagation works correctly.
800*4882a593Smuzhiyun 	 */
801*4882a593Smuzhiyun 	if (vdn->exponent < vdm->exponent) {
802*4882a593Smuzhiyun 		struct vfp_double *t = vdn;
803*4882a593Smuzhiyun 		vdn = vdm;
804*4882a593Smuzhiyun 		vdm = t;
805*4882a593Smuzhiyun 		pr_debug("VFP: swapping M <-> N\n");
806*4882a593Smuzhiyun 	}
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 	vdd->sign = vdn->sign ^ vdm->sign;
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun 	/*
811*4882a593Smuzhiyun 	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
812*4882a593Smuzhiyun 	 */
813*4882a593Smuzhiyun 	if (vdn->exponent == 2047) {
814*4882a593Smuzhiyun 		if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
815*4882a593Smuzhiyun 			return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
816*4882a593Smuzhiyun 		if ((vdm->exponent | vdm->significand) == 0) {
817*4882a593Smuzhiyun 			*vdd = vfp_double_default_qnan;
818*4882a593Smuzhiyun 			return FPSCR_IOC;
819*4882a593Smuzhiyun 		}
820*4882a593Smuzhiyun 		vdd->exponent = vdn->exponent;
821*4882a593Smuzhiyun 		vdd->significand = 0;
822*4882a593Smuzhiyun 		return 0;
823*4882a593Smuzhiyun 	}
824*4882a593Smuzhiyun 
825*4882a593Smuzhiyun 	/*
826*4882a593Smuzhiyun 	 * If 'm' is zero, the result is always zero.  In this case,
827*4882a593Smuzhiyun 	 * 'n' may be zero or a number, but it doesn't matter which.
828*4882a593Smuzhiyun 	 */
829*4882a593Smuzhiyun 	if ((vdm->exponent | vdm->significand) == 0) {
830*4882a593Smuzhiyun 		vdd->exponent = 0;
831*4882a593Smuzhiyun 		vdd->significand = 0;
832*4882a593Smuzhiyun 		return 0;
833*4882a593Smuzhiyun 	}
834*4882a593Smuzhiyun 
835*4882a593Smuzhiyun 	/*
836*4882a593Smuzhiyun 	 * We add 2 to the destination exponent for the same reason
837*4882a593Smuzhiyun 	 * as the addition case - though this time we have +1 from
838*4882a593Smuzhiyun 	 * each input operand.
839*4882a593Smuzhiyun 	 */
840*4882a593Smuzhiyun 	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
841*4882a593Smuzhiyun 	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
842*4882a593Smuzhiyun 
843*4882a593Smuzhiyun 	vfp_double_dump("VDD", vdd);
844*4882a593Smuzhiyun 	return 0;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun 
847*4882a593Smuzhiyun #define NEG_MULTIPLY	(1 << 0)
848*4882a593Smuzhiyun #define NEG_SUBTRACT	(1 << 1)
849*4882a593Smuzhiyun 
850*4882a593Smuzhiyun static u32
vfp_double_multiply_accumulate(int dd,int dn,int dm,u32 fpscr,u32 negate,char * func)851*4882a593Smuzhiyun vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
852*4882a593Smuzhiyun {
853*4882a593Smuzhiyun 	struct vfp_double vdd, vdp, vdn, vdm;
854*4882a593Smuzhiyun 	u32 exceptions;
855*4882a593Smuzhiyun 
856*4882a593Smuzhiyun 	vfp_double_unpack(&vdn, vfp_get_double(dn));
857*4882a593Smuzhiyun 	if (vdn.exponent == 0 && vdn.significand)
858*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdn);
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
861*4882a593Smuzhiyun 	if (vdm.exponent == 0 && vdm.significand)
862*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
865*4882a593Smuzhiyun 	if (negate & NEG_MULTIPLY)
866*4882a593Smuzhiyun 		vdp.sign = vfp_sign_negate(vdp.sign);
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 	vfp_double_unpack(&vdn, vfp_get_double(dd));
869*4882a593Smuzhiyun 	if (vdn.exponent == 0 && vdn.significand)
870*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdn);
871*4882a593Smuzhiyun 	if (negate & NEG_SUBTRACT)
872*4882a593Smuzhiyun 		vdn.sign = vfp_sign_negate(vdn.sign);
873*4882a593Smuzhiyun 
874*4882a593Smuzhiyun 	exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
877*4882a593Smuzhiyun }
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun /*
880*4882a593Smuzhiyun  * Standard operations
881*4882a593Smuzhiyun  */
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun /*
884*4882a593Smuzhiyun  * sd = sd + (sn * sm)
885*4882a593Smuzhiyun  */
vfp_double_fmac(int dd,int dn,int dm,u32 fpscr)886*4882a593Smuzhiyun static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
887*4882a593Smuzhiyun {
888*4882a593Smuzhiyun 	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
889*4882a593Smuzhiyun }
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun /*
892*4882a593Smuzhiyun  * sd = sd - (sn * sm)
893*4882a593Smuzhiyun  */
vfp_double_fnmac(int dd,int dn,int dm,u32 fpscr)894*4882a593Smuzhiyun static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
895*4882a593Smuzhiyun {
896*4882a593Smuzhiyun 	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
897*4882a593Smuzhiyun }
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun /*
900*4882a593Smuzhiyun  * sd = -sd + (sn * sm)
901*4882a593Smuzhiyun  */
vfp_double_fmsc(int dd,int dn,int dm,u32 fpscr)902*4882a593Smuzhiyun static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
903*4882a593Smuzhiyun {
904*4882a593Smuzhiyun 	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
905*4882a593Smuzhiyun }
906*4882a593Smuzhiyun 
907*4882a593Smuzhiyun /*
908*4882a593Smuzhiyun  * sd = -sd - (sn * sm)
909*4882a593Smuzhiyun  */
vfp_double_fnmsc(int dd,int dn,int dm,u32 fpscr)910*4882a593Smuzhiyun static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
911*4882a593Smuzhiyun {
912*4882a593Smuzhiyun 	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
913*4882a593Smuzhiyun }
914*4882a593Smuzhiyun 
915*4882a593Smuzhiyun /*
916*4882a593Smuzhiyun  * sd = sn * sm
917*4882a593Smuzhiyun  */
vfp_double_fmul(int dd,int dn,int dm,u32 fpscr)918*4882a593Smuzhiyun static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
919*4882a593Smuzhiyun {
920*4882a593Smuzhiyun 	struct vfp_double vdd, vdn, vdm;
921*4882a593Smuzhiyun 	u32 exceptions;
922*4882a593Smuzhiyun 
923*4882a593Smuzhiyun 	vfp_double_unpack(&vdn, vfp_get_double(dn));
924*4882a593Smuzhiyun 	if (vdn.exponent == 0 && vdn.significand)
925*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdn);
926*4882a593Smuzhiyun 
927*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
928*4882a593Smuzhiyun 	if (vdm.exponent == 0 && vdm.significand)
929*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
930*4882a593Smuzhiyun 
931*4882a593Smuzhiyun 	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
932*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
933*4882a593Smuzhiyun }
934*4882a593Smuzhiyun 
935*4882a593Smuzhiyun /*
936*4882a593Smuzhiyun  * sd = -(sn * sm)
937*4882a593Smuzhiyun  */
vfp_double_fnmul(int dd,int dn,int dm,u32 fpscr)938*4882a593Smuzhiyun static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
939*4882a593Smuzhiyun {
940*4882a593Smuzhiyun 	struct vfp_double vdd, vdn, vdm;
941*4882a593Smuzhiyun 	u32 exceptions;
942*4882a593Smuzhiyun 
943*4882a593Smuzhiyun 	vfp_double_unpack(&vdn, vfp_get_double(dn));
944*4882a593Smuzhiyun 	if (vdn.exponent == 0 && vdn.significand)
945*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdn);
946*4882a593Smuzhiyun 
947*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
948*4882a593Smuzhiyun 	if (vdm.exponent == 0 && vdm.significand)
949*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun 	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
952*4882a593Smuzhiyun 	vdd.sign = vfp_sign_negate(vdd.sign);
953*4882a593Smuzhiyun 
954*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
955*4882a593Smuzhiyun }
956*4882a593Smuzhiyun 
957*4882a593Smuzhiyun /*
958*4882a593Smuzhiyun  * sd = sn + sm
959*4882a593Smuzhiyun  */
vfp_double_fadd(int dd,int dn,int dm,u32 fpscr)960*4882a593Smuzhiyun static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
961*4882a593Smuzhiyun {
962*4882a593Smuzhiyun 	struct vfp_double vdd, vdn, vdm;
963*4882a593Smuzhiyun 	u32 exceptions;
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 	vfp_double_unpack(&vdn, vfp_get_double(dn));
966*4882a593Smuzhiyun 	if (vdn.exponent == 0 && vdn.significand)
967*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdn);
968*4882a593Smuzhiyun 
969*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
970*4882a593Smuzhiyun 	if (vdm.exponent == 0 && vdm.significand)
971*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
972*4882a593Smuzhiyun 
973*4882a593Smuzhiyun 	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun 
978*4882a593Smuzhiyun /*
979*4882a593Smuzhiyun  * sd = sn - sm
980*4882a593Smuzhiyun  */
vfp_double_fsub(int dd,int dn,int dm,u32 fpscr)981*4882a593Smuzhiyun static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
982*4882a593Smuzhiyun {
983*4882a593Smuzhiyun 	struct vfp_double vdd, vdn, vdm;
984*4882a593Smuzhiyun 	u32 exceptions;
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun 	vfp_double_unpack(&vdn, vfp_get_double(dn));
987*4882a593Smuzhiyun 	if (vdn.exponent == 0 && vdn.significand)
988*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdn);
989*4882a593Smuzhiyun 
990*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
991*4882a593Smuzhiyun 	if (vdm.exponent == 0 && vdm.significand)
992*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
993*4882a593Smuzhiyun 
994*4882a593Smuzhiyun 	/*
995*4882a593Smuzhiyun 	 * Subtraction is like addition, but with a negated operand.
996*4882a593Smuzhiyun 	 */
997*4882a593Smuzhiyun 	vdm.sign = vfp_sign_negate(vdm.sign);
998*4882a593Smuzhiyun 
999*4882a593Smuzhiyun 	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun 
1004*4882a593Smuzhiyun /*
1005*4882a593Smuzhiyun  * sd = sn / sm
1006*4882a593Smuzhiyun  */
vfp_double_fdiv(int dd,int dn,int dm,u32 fpscr)1007*4882a593Smuzhiyun static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
1008*4882a593Smuzhiyun {
1009*4882a593Smuzhiyun 	struct vfp_double vdd, vdn, vdm;
1010*4882a593Smuzhiyun 	u32 exceptions = 0;
1011*4882a593Smuzhiyun 	int tm, tn;
1012*4882a593Smuzhiyun 
1013*4882a593Smuzhiyun 	vfp_double_unpack(&vdn, vfp_get_double(dn));
1014*4882a593Smuzhiyun 	vfp_double_unpack(&vdm, vfp_get_double(dm));
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun 	vdd.sign = vdn.sign ^ vdm.sign;
1017*4882a593Smuzhiyun 
1018*4882a593Smuzhiyun 	tn = vfp_double_type(&vdn);
1019*4882a593Smuzhiyun 	tm = vfp_double_type(&vdm);
1020*4882a593Smuzhiyun 
1021*4882a593Smuzhiyun 	/*
1022*4882a593Smuzhiyun 	 * Is n a NAN?
1023*4882a593Smuzhiyun 	 */
1024*4882a593Smuzhiyun 	if (tn & VFP_NAN)
1025*4882a593Smuzhiyun 		goto vdn_nan;
1026*4882a593Smuzhiyun 
1027*4882a593Smuzhiyun 	/*
1028*4882a593Smuzhiyun 	 * Is m a NAN?
1029*4882a593Smuzhiyun 	 */
1030*4882a593Smuzhiyun 	if (tm & VFP_NAN)
1031*4882a593Smuzhiyun 		goto vdm_nan;
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun 	/*
1034*4882a593Smuzhiyun 	 * If n and m are infinity, the result is invalid
1035*4882a593Smuzhiyun 	 * If n and m are zero, the result is invalid
1036*4882a593Smuzhiyun 	 */
1037*4882a593Smuzhiyun 	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1038*4882a593Smuzhiyun 		goto invalid;
1039*4882a593Smuzhiyun 
1040*4882a593Smuzhiyun 	/*
1041*4882a593Smuzhiyun 	 * If n is infinity, the result is infinity
1042*4882a593Smuzhiyun 	 */
1043*4882a593Smuzhiyun 	if (tn & VFP_INFINITY)
1044*4882a593Smuzhiyun 		goto infinity;
1045*4882a593Smuzhiyun 
1046*4882a593Smuzhiyun 	/*
1047*4882a593Smuzhiyun 	 * If m is zero, raise div0 exceptions
1048*4882a593Smuzhiyun 	 */
1049*4882a593Smuzhiyun 	if (tm & VFP_ZERO)
1050*4882a593Smuzhiyun 		goto divzero;
1051*4882a593Smuzhiyun 
1052*4882a593Smuzhiyun 	/*
1053*4882a593Smuzhiyun 	 * If m is infinity, or n is zero, the result is zero
1054*4882a593Smuzhiyun 	 */
1055*4882a593Smuzhiyun 	if (tm & VFP_INFINITY || tn & VFP_ZERO)
1056*4882a593Smuzhiyun 		goto zero;
1057*4882a593Smuzhiyun 
1058*4882a593Smuzhiyun 	if (tn & VFP_DENORMAL)
1059*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdn);
1060*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
1061*4882a593Smuzhiyun 		vfp_double_normalise_denormal(&vdm);
1062*4882a593Smuzhiyun 
1063*4882a593Smuzhiyun 	/*
1064*4882a593Smuzhiyun 	 * Ok, we have two numbers, we can perform division.
1065*4882a593Smuzhiyun 	 */
1066*4882a593Smuzhiyun 	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
1067*4882a593Smuzhiyun 	vdm.significand <<= 1;
1068*4882a593Smuzhiyun 	if (vdm.significand <= (2 * vdn.significand)) {
1069*4882a593Smuzhiyun 		vdn.significand >>= 1;
1070*4882a593Smuzhiyun 		vdd.exponent++;
1071*4882a593Smuzhiyun 	}
1072*4882a593Smuzhiyun 	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
1073*4882a593Smuzhiyun 	if ((vdd.significand & 0x1ff) <= 2) {
1074*4882a593Smuzhiyun 		u64 termh, terml, remh, reml;
1075*4882a593Smuzhiyun 		mul64to128(&termh, &terml, vdm.significand, vdd.significand);
1076*4882a593Smuzhiyun 		sub128(&remh, &reml, vdn.significand, 0, termh, terml);
1077*4882a593Smuzhiyun 		while ((s64)remh < 0) {
1078*4882a593Smuzhiyun 			vdd.significand -= 1;
1079*4882a593Smuzhiyun 			add128(&remh, &reml, remh, reml, 0, vdm.significand);
1080*4882a593Smuzhiyun 		}
1081*4882a593Smuzhiyun 		vdd.significand |= (reml != 0);
1082*4882a593Smuzhiyun 	}
1083*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun  vdn_nan:
1086*4882a593Smuzhiyun 	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
1087*4882a593Smuzhiyun  pack:
1088*4882a593Smuzhiyun 	vfp_put_double(vfp_double_pack(&vdd), dd);
1089*4882a593Smuzhiyun 	return exceptions;
1090*4882a593Smuzhiyun 
1091*4882a593Smuzhiyun  vdm_nan:
1092*4882a593Smuzhiyun 	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
1093*4882a593Smuzhiyun 	goto pack;
1094*4882a593Smuzhiyun 
1095*4882a593Smuzhiyun  zero:
1096*4882a593Smuzhiyun 	vdd.exponent = 0;
1097*4882a593Smuzhiyun 	vdd.significand = 0;
1098*4882a593Smuzhiyun 	goto pack;
1099*4882a593Smuzhiyun 
1100*4882a593Smuzhiyun  divzero:
1101*4882a593Smuzhiyun 	exceptions = FPSCR_DZC;
1102*4882a593Smuzhiyun  infinity:
1103*4882a593Smuzhiyun 	vdd.exponent = 2047;
1104*4882a593Smuzhiyun 	vdd.significand = 0;
1105*4882a593Smuzhiyun 	goto pack;
1106*4882a593Smuzhiyun 
1107*4882a593Smuzhiyun  invalid:
1108*4882a593Smuzhiyun 	vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd);
1109*4882a593Smuzhiyun 	return FPSCR_IOC;
1110*4882a593Smuzhiyun }
1111*4882a593Smuzhiyun 
1112*4882a593Smuzhiyun static struct op fops[16] = {
1113*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FMAC)]	= { vfp_double_fmac,  0 },
1114*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FNMAC)]	= { vfp_double_fnmac, 0 },
1115*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FMSC)]	= { vfp_double_fmsc,  0 },
1116*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FNMSC)]	= { vfp_double_fnmsc, 0 },
1117*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FMUL)]	= { vfp_double_fmul,  0 },
1118*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FNMUL)]	= { vfp_double_fnmul, 0 },
1119*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FADD)]	= { vfp_double_fadd,  0 },
1120*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FSUB)]	= { vfp_double_fsub,  0 },
1121*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FDIV)]	= { vfp_double_fdiv,  0 },
1122*4882a593Smuzhiyun };
1123*4882a593Smuzhiyun 
1124*4882a593Smuzhiyun #define FREG_BANK(x)	((x) & 0x0c)
1125*4882a593Smuzhiyun #define FREG_IDX(x)	((x) & 3)
1126*4882a593Smuzhiyun 
vfp_double_cpdo(u32 inst,u32 fpscr)1127*4882a593Smuzhiyun u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1128*4882a593Smuzhiyun {
1129*4882a593Smuzhiyun 	u32 op = inst & FOP_MASK;
1130*4882a593Smuzhiyun 	u32 exceptions = 0;
1131*4882a593Smuzhiyun 	unsigned int dest;
1132*4882a593Smuzhiyun 	unsigned int dn = vfp_get_dn(inst);
1133*4882a593Smuzhiyun 	unsigned int dm;
1134*4882a593Smuzhiyun 	unsigned int vecitr, veclen, vecstride;
1135*4882a593Smuzhiyun 	struct op *fop;
1136*4882a593Smuzhiyun 
1137*4882a593Smuzhiyun 	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun 	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1140*4882a593Smuzhiyun 
1141*4882a593Smuzhiyun 	/*
1142*4882a593Smuzhiyun 	 * fcvtds takes an sN register number as destination, not dN.
1143*4882a593Smuzhiyun 	 * It also always operates on scalars.
1144*4882a593Smuzhiyun 	 */
1145*4882a593Smuzhiyun 	if (fop->flags & OP_SD)
1146*4882a593Smuzhiyun 		dest = vfp_get_sd(inst);
1147*4882a593Smuzhiyun 	else
1148*4882a593Smuzhiyun 		dest = vfp_get_dd(inst);
1149*4882a593Smuzhiyun 
1150*4882a593Smuzhiyun 	/*
1151*4882a593Smuzhiyun 	 * f[us]ito takes a sN operand, not a dN operand.
1152*4882a593Smuzhiyun 	 */
1153*4882a593Smuzhiyun 	if (fop->flags & OP_SM)
1154*4882a593Smuzhiyun 		dm = vfp_get_sm(inst);
1155*4882a593Smuzhiyun 	else
1156*4882a593Smuzhiyun 		dm = vfp_get_dm(inst);
1157*4882a593Smuzhiyun 
1158*4882a593Smuzhiyun 	/*
1159*4882a593Smuzhiyun 	 * If destination bank is zero, vector length is always '1'.
1160*4882a593Smuzhiyun 	 * ARM DDI0100F C5.1.3, C5.3.2.
1161*4882a593Smuzhiyun 	 */
1162*4882a593Smuzhiyun 	if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
1163*4882a593Smuzhiyun 		veclen = 0;
1164*4882a593Smuzhiyun 	else
1165*4882a593Smuzhiyun 		veclen = fpscr & FPSCR_LENGTH_MASK;
1166*4882a593Smuzhiyun 
1167*4882a593Smuzhiyun 	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1168*4882a593Smuzhiyun 		 (veclen >> FPSCR_LENGTH_BIT) + 1);
1169*4882a593Smuzhiyun 
1170*4882a593Smuzhiyun 	if (!fop->fn)
1171*4882a593Smuzhiyun 		goto invalid;
1172*4882a593Smuzhiyun 
1173*4882a593Smuzhiyun 	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1174*4882a593Smuzhiyun 		u32 except;
1175*4882a593Smuzhiyun 		char type;
1176*4882a593Smuzhiyun 
1177*4882a593Smuzhiyun 		type = fop->flags & OP_SD ? 's' : 'd';
1178*4882a593Smuzhiyun 		if (op == FOP_EXT)
1179*4882a593Smuzhiyun 			pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
1180*4882a593Smuzhiyun 				 vecitr >> FPSCR_LENGTH_BIT,
1181*4882a593Smuzhiyun 				 type, dest, dn, dm);
1182*4882a593Smuzhiyun 		else
1183*4882a593Smuzhiyun 			pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
1184*4882a593Smuzhiyun 				 vecitr >> FPSCR_LENGTH_BIT,
1185*4882a593Smuzhiyun 				 type, dest, dn, FOP_TO_IDX(op), dm);
1186*4882a593Smuzhiyun 
1187*4882a593Smuzhiyun 		except = fop->fn(dest, dn, dm, fpscr);
1188*4882a593Smuzhiyun 		pr_debug("VFP: itr%d: exceptions=%08x\n",
1189*4882a593Smuzhiyun 			 vecitr >> FPSCR_LENGTH_BIT, except);
1190*4882a593Smuzhiyun 
1191*4882a593Smuzhiyun 		exceptions |= except;
1192*4882a593Smuzhiyun 
1193*4882a593Smuzhiyun 		/*
1194*4882a593Smuzhiyun 		 * CHECK: It appears to be undefined whether we stop when
1195*4882a593Smuzhiyun 		 * we encounter an exception.  We continue.
1196*4882a593Smuzhiyun 		 */
1197*4882a593Smuzhiyun 		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
1198*4882a593Smuzhiyun 		dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
1199*4882a593Smuzhiyun 		if (FREG_BANK(dm) != 0)
1200*4882a593Smuzhiyun 			dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
1201*4882a593Smuzhiyun 	}
1202*4882a593Smuzhiyun 	return exceptions;
1203*4882a593Smuzhiyun 
1204*4882a593Smuzhiyun  invalid:
1205*4882a593Smuzhiyun 	return ~0;
1206*4882a593Smuzhiyun }
1207