xref: /OK3568_Linux_fs/kernel/arch/arm/vfp/vfpsingle.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  *  linux/arch/arm/vfp/vfpsingle.c
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * This code is derived in part from John R. Housers softfloat library, which
5*4882a593Smuzhiyun  * carries the following notice:
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * ===========================================================================
8*4882a593Smuzhiyun  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
9*4882a593Smuzhiyun  * Arithmetic Package, Release 2.
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * Written by John R. Hauser.  This work was made possible in part by the
12*4882a593Smuzhiyun  * International Computer Science Institute, located at Suite 600, 1947 Center
13*4882a593Smuzhiyun  * Street, Berkeley, California 94704.  Funding was partially provided by the
14*4882a593Smuzhiyun  * National Science Foundation under grant MIP-9311980.  The original version
15*4882a593Smuzhiyun  * of this code was written as part of a project to build a fixed-point vector
16*4882a593Smuzhiyun  * processor in collaboration with the University of California at Berkeley,
17*4882a593Smuzhiyun  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
18*4882a593Smuzhiyun  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19*4882a593Smuzhiyun  * arithmetic/softfloat.html'.
20*4882a593Smuzhiyun  *
21*4882a593Smuzhiyun  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
22*4882a593Smuzhiyun  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23*4882a593Smuzhiyun  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
24*4882a593Smuzhiyun  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25*4882a593Smuzhiyun  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26*4882a593Smuzhiyun  *
27*4882a593Smuzhiyun  * Derivative works are acceptable, even for commercial purposes, so long as
28*4882a593Smuzhiyun  * (1) they include prominent notice that the work is derivative, and (2) they
29*4882a593Smuzhiyun  * include prominent notice akin to these three paragraphs for those parts of
30*4882a593Smuzhiyun  * this code that are retained.
31*4882a593Smuzhiyun  * ===========================================================================
32*4882a593Smuzhiyun  */
33*4882a593Smuzhiyun #include <linux/kernel.h>
34*4882a593Smuzhiyun #include <linux/bitops.h>
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #include <asm/div64.h>
37*4882a593Smuzhiyun #include <asm/vfp.h>
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun #include "vfpinstr.h"
40*4882a593Smuzhiyun #include "vfp.h"
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun static struct vfp_single vfp_single_default_qnan = {
43*4882a593Smuzhiyun 	.exponent	= 255,
44*4882a593Smuzhiyun 	.sign		= 0,
45*4882a593Smuzhiyun 	.significand	= VFP_SINGLE_SIGNIFICAND_QNAN,
46*4882a593Smuzhiyun };
47*4882a593Smuzhiyun 
vfp_single_dump(const char * str,struct vfp_single * s)48*4882a593Smuzhiyun static void vfp_single_dump(const char *str, struct vfp_single *s)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
51*4882a593Smuzhiyun 		 str, s->sign != 0, s->exponent, s->significand);
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun 
vfp_single_normalise_denormal(struct vfp_single * vs)54*4882a593Smuzhiyun static void vfp_single_normalise_denormal(struct vfp_single *vs)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	int bits = 31 - fls(vs->significand);
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun 	vfp_single_dump("normalise_denormal: in", vs);
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	if (bits) {
61*4882a593Smuzhiyun 		vs->exponent -= bits - 1;
62*4882a593Smuzhiyun 		vs->significand <<= bits;
63*4882a593Smuzhiyun 	}
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 	vfp_single_dump("normalise_denormal: out", vs);
66*4882a593Smuzhiyun }
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun #ifndef DEBUG
69*4882a593Smuzhiyun #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
__vfp_single_normaliseround(int sd,struct vfp_single * vs,u32 fpscr,u32 exceptions)70*4882a593Smuzhiyun u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
71*4882a593Smuzhiyun #else
72*4882a593Smuzhiyun u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
73*4882a593Smuzhiyun #endif
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun 	u32 significand, incr, rmode;
76*4882a593Smuzhiyun 	int exponent, shift, underflow;
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	vfp_single_dump("pack: in", vs);
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	/*
81*4882a593Smuzhiyun 	 * Infinities and NaNs are a special case.
82*4882a593Smuzhiyun 	 */
83*4882a593Smuzhiyun 	if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
84*4882a593Smuzhiyun 		goto pack;
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	/*
87*4882a593Smuzhiyun 	 * Special-case zero.
88*4882a593Smuzhiyun 	 */
89*4882a593Smuzhiyun 	if (vs->significand == 0) {
90*4882a593Smuzhiyun 		vs->exponent = 0;
91*4882a593Smuzhiyun 		goto pack;
92*4882a593Smuzhiyun 	}
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	exponent = vs->exponent;
95*4882a593Smuzhiyun 	significand = vs->significand;
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	/*
98*4882a593Smuzhiyun 	 * Normalise first.  Note that we shift the significand up to
99*4882a593Smuzhiyun 	 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
100*4882a593Smuzhiyun 	 * significant bit.
101*4882a593Smuzhiyun 	 */
102*4882a593Smuzhiyun 	shift = 32 - fls(significand);
103*4882a593Smuzhiyun 	if (shift < 32 && shift) {
104*4882a593Smuzhiyun 		exponent -= shift;
105*4882a593Smuzhiyun 		significand <<= shift;
106*4882a593Smuzhiyun 	}
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun #ifdef DEBUG
109*4882a593Smuzhiyun 	vs->exponent = exponent;
110*4882a593Smuzhiyun 	vs->significand = significand;
111*4882a593Smuzhiyun 	vfp_single_dump("pack: normalised", vs);
112*4882a593Smuzhiyun #endif
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	/*
115*4882a593Smuzhiyun 	 * Tiny number?
116*4882a593Smuzhiyun 	 */
117*4882a593Smuzhiyun 	underflow = exponent < 0;
118*4882a593Smuzhiyun 	if (underflow) {
119*4882a593Smuzhiyun 		significand = vfp_shiftright32jamming(significand, -exponent);
120*4882a593Smuzhiyun 		exponent = 0;
121*4882a593Smuzhiyun #ifdef DEBUG
122*4882a593Smuzhiyun 		vs->exponent = exponent;
123*4882a593Smuzhiyun 		vs->significand = significand;
124*4882a593Smuzhiyun 		vfp_single_dump("pack: tiny number", vs);
125*4882a593Smuzhiyun #endif
126*4882a593Smuzhiyun 		if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
127*4882a593Smuzhiyun 			underflow = 0;
128*4882a593Smuzhiyun 	}
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	/*
131*4882a593Smuzhiyun 	 * Select rounding increment.
132*4882a593Smuzhiyun 	 */
133*4882a593Smuzhiyun 	incr = 0;
134*4882a593Smuzhiyun 	rmode = fpscr & FPSCR_RMODE_MASK;
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	if (rmode == FPSCR_ROUND_NEAREST) {
137*4882a593Smuzhiyun 		incr = 1 << VFP_SINGLE_LOW_BITS;
138*4882a593Smuzhiyun 		if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
139*4882a593Smuzhiyun 			incr -= 1;
140*4882a593Smuzhiyun 	} else if (rmode == FPSCR_ROUND_TOZERO) {
141*4882a593Smuzhiyun 		incr = 0;
142*4882a593Smuzhiyun 	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
143*4882a593Smuzhiyun 		incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	/*
148*4882a593Smuzhiyun 	 * Is our rounding going to overflow?
149*4882a593Smuzhiyun 	 */
150*4882a593Smuzhiyun 	if ((significand + incr) < significand) {
151*4882a593Smuzhiyun 		exponent += 1;
152*4882a593Smuzhiyun 		significand = (significand >> 1) | (significand & 1);
153*4882a593Smuzhiyun 		incr >>= 1;
154*4882a593Smuzhiyun #ifdef DEBUG
155*4882a593Smuzhiyun 		vs->exponent = exponent;
156*4882a593Smuzhiyun 		vs->significand = significand;
157*4882a593Smuzhiyun 		vfp_single_dump("pack: overflow", vs);
158*4882a593Smuzhiyun #endif
159*4882a593Smuzhiyun 	}
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	/*
162*4882a593Smuzhiyun 	 * If any of the low bits (which will be shifted out of the
163*4882a593Smuzhiyun 	 * number) are non-zero, the result is inexact.
164*4882a593Smuzhiyun 	 */
165*4882a593Smuzhiyun 	if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
166*4882a593Smuzhiyun 		exceptions |= FPSCR_IXC;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	/*
169*4882a593Smuzhiyun 	 * Do our rounding.
170*4882a593Smuzhiyun 	 */
171*4882a593Smuzhiyun 	significand += incr;
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	/*
174*4882a593Smuzhiyun 	 * Infinity?
175*4882a593Smuzhiyun 	 */
176*4882a593Smuzhiyun 	if (exponent >= 254) {
177*4882a593Smuzhiyun 		exceptions |= FPSCR_OFC | FPSCR_IXC;
178*4882a593Smuzhiyun 		if (incr == 0) {
179*4882a593Smuzhiyun 			vs->exponent = 253;
180*4882a593Smuzhiyun 			vs->significand = 0x7fffffff;
181*4882a593Smuzhiyun 		} else {
182*4882a593Smuzhiyun 			vs->exponent = 255;		/* infinity */
183*4882a593Smuzhiyun 			vs->significand = 0;
184*4882a593Smuzhiyun 		}
185*4882a593Smuzhiyun 	} else {
186*4882a593Smuzhiyun 		if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
187*4882a593Smuzhiyun 			exponent = 0;
188*4882a593Smuzhiyun 		if (exponent || significand > 0x80000000)
189*4882a593Smuzhiyun 			underflow = 0;
190*4882a593Smuzhiyun 		if (underflow)
191*4882a593Smuzhiyun 			exceptions |= FPSCR_UFC;
192*4882a593Smuzhiyun 		vs->exponent = exponent;
193*4882a593Smuzhiyun 		vs->significand = significand >> 1;
194*4882a593Smuzhiyun 	}
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun  pack:
197*4882a593Smuzhiyun 	vfp_single_dump("pack: final", vs);
198*4882a593Smuzhiyun 	{
199*4882a593Smuzhiyun 		s32 d = vfp_single_pack(vs);
200*4882a593Smuzhiyun #ifdef DEBUG
201*4882a593Smuzhiyun 		pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
202*4882a593Smuzhiyun 			 sd, d, exceptions);
203*4882a593Smuzhiyun #endif
204*4882a593Smuzhiyun 		vfp_put_float(d, sd);
205*4882a593Smuzhiyun 	}
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	return exceptions;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun /*
211*4882a593Smuzhiyun  * Propagate the NaN, setting exceptions if it is signalling.
212*4882a593Smuzhiyun  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
213*4882a593Smuzhiyun  */
214*4882a593Smuzhiyun static u32
vfp_propagate_nan(struct vfp_single * vsd,struct vfp_single * vsn,struct vfp_single * vsm,u32 fpscr)215*4882a593Smuzhiyun vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
216*4882a593Smuzhiyun 		  struct vfp_single *vsm, u32 fpscr)
217*4882a593Smuzhiyun {
218*4882a593Smuzhiyun 	struct vfp_single *nan;
219*4882a593Smuzhiyun 	int tn, tm = 0;
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	tn = vfp_single_type(vsn);
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	if (vsm)
224*4882a593Smuzhiyun 		tm = vfp_single_type(vsm);
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 	if (fpscr & FPSCR_DEFAULT_NAN)
227*4882a593Smuzhiyun 		/*
228*4882a593Smuzhiyun 		 * Default NaN mode - always returns a quiet NaN
229*4882a593Smuzhiyun 		 */
230*4882a593Smuzhiyun 		nan = &vfp_single_default_qnan;
231*4882a593Smuzhiyun 	else {
232*4882a593Smuzhiyun 		/*
233*4882a593Smuzhiyun 		 * Contemporary mode - select the first signalling
234*4882a593Smuzhiyun 		 * NAN, or if neither are signalling, the first
235*4882a593Smuzhiyun 		 * quiet NAN.
236*4882a593Smuzhiyun 		 */
237*4882a593Smuzhiyun 		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
238*4882a593Smuzhiyun 			nan = vsn;
239*4882a593Smuzhiyun 		else
240*4882a593Smuzhiyun 			nan = vsm;
241*4882a593Smuzhiyun 		/*
242*4882a593Smuzhiyun 		 * Make the NaN quiet.
243*4882a593Smuzhiyun 		 */
244*4882a593Smuzhiyun 		nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
245*4882a593Smuzhiyun 	}
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	*vsd = *nan;
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	/*
250*4882a593Smuzhiyun 	 * If one was a signalling NAN, raise invalid operation.
251*4882a593Smuzhiyun 	 */
252*4882a593Smuzhiyun 	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 
256*4882a593Smuzhiyun /*
257*4882a593Smuzhiyun  * Extended operations
258*4882a593Smuzhiyun  */
vfp_single_fabs(int sd,int unused,s32 m,u32 fpscr)259*4882a593Smuzhiyun static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun 	vfp_put_float(vfp_single_packed_abs(m), sd);
262*4882a593Smuzhiyun 	return 0;
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun 
vfp_single_fcpy(int sd,int unused,s32 m,u32 fpscr)265*4882a593Smuzhiyun static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun 	vfp_put_float(m, sd);
268*4882a593Smuzhiyun 	return 0;
269*4882a593Smuzhiyun }
270*4882a593Smuzhiyun 
vfp_single_fneg(int sd,int unused,s32 m,u32 fpscr)271*4882a593Smuzhiyun static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun 	vfp_put_float(vfp_single_packed_negate(m), sd);
274*4882a593Smuzhiyun 	return 0;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun static const u16 sqrt_oddadjust[] = {
278*4882a593Smuzhiyun 	0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
279*4882a593Smuzhiyun 	0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
280*4882a593Smuzhiyun };
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun static const u16 sqrt_evenadjust[] = {
283*4882a593Smuzhiyun 	0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
284*4882a593Smuzhiyun 	0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
285*4882a593Smuzhiyun };
286*4882a593Smuzhiyun 
vfp_estimate_sqrt_significand(u32 exponent,u32 significand)287*4882a593Smuzhiyun u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
288*4882a593Smuzhiyun {
289*4882a593Smuzhiyun 	int index;
290*4882a593Smuzhiyun 	u32 z, a;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	if ((significand & 0xc0000000) != 0x40000000) {
293*4882a593Smuzhiyun 		pr_warn("VFP: estimate_sqrt: invalid significand\n");
294*4882a593Smuzhiyun 	}
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	a = significand << 1;
297*4882a593Smuzhiyun 	index = (a >> 27) & 15;
298*4882a593Smuzhiyun 	if (exponent & 1) {
299*4882a593Smuzhiyun 		z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
300*4882a593Smuzhiyun 		z = ((a / z) << 14) + (z << 15);
301*4882a593Smuzhiyun 		a >>= 1;
302*4882a593Smuzhiyun 	} else {
303*4882a593Smuzhiyun 		z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
304*4882a593Smuzhiyun 		z = a / z + z;
305*4882a593Smuzhiyun 		z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
306*4882a593Smuzhiyun 		if (z <= a)
307*4882a593Smuzhiyun 			return (s32)a >> 1;
308*4882a593Smuzhiyun 	}
309*4882a593Smuzhiyun 	{
310*4882a593Smuzhiyun 		u64 v = (u64)a << 31;
311*4882a593Smuzhiyun 		do_div(v, z);
312*4882a593Smuzhiyun 		return v + (z >> 1);
313*4882a593Smuzhiyun 	}
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun 
vfp_single_fsqrt(int sd,int unused,s32 m,u32 fpscr)316*4882a593Smuzhiyun static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun 	struct vfp_single vsm, vsd;
319*4882a593Smuzhiyun 	int ret, tm;
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
322*4882a593Smuzhiyun 	tm = vfp_single_type(&vsm);
323*4882a593Smuzhiyun 	if (tm & (VFP_NAN|VFP_INFINITY)) {
324*4882a593Smuzhiyun 		struct vfp_single *vsp = &vsd;
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 		if (tm & VFP_NAN)
327*4882a593Smuzhiyun 			ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
328*4882a593Smuzhiyun 		else if (vsm.sign == 0) {
329*4882a593Smuzhiyun  sqrt_copy:
330*4882a593Smuzhiyun 			vsp = &vsm;
331*4882a593Smuzhiyun 			ret = 0;
332*4882a593Smuzhiyun 		} else {
333*4882a593Smuzhiyun  sqrt_invalid:
334*4882a593Smuzhiyun 			vsp = &vfp_single_default_qnan;
335*4882a593Smuzhiyun 			ret = FPSCR_IOC;
336*4882a593Smuzhiyun 		}
337*4882a593Smuzhiyun 		vfp_put_float(vfp_single_pack(vsp), sd);
338*4882a593Smuzhiyun 		return ret;
339*4882a593Smuzhiyun 	}
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	/*
342*4882a593Smuzhiyun 	 * sqrt(+/- 0) == +/- 0
343*4882a593Smuzhiyun 	 */
344*4882a593Smuzhiyun 	if (tm & VFP_ZERO)
345*4882a593Smuzhiyun 		goto sqrt_copy;
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	/*
348*4882a593Smuzhiyun 	 * Normalise a denormalised number
349*4882a593Smuzhiyun 	 */
350*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
351*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsm);
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 	/*
354*4882a593Smuzhiyun 	 * sqrt(<0) = invalid
355*4882a593Smuzhiyun 	 */
356*4882a593Smuzhiyun 	if (vsm.sign)
357*4882a593Smuzhiyun 		goto sqrt_invalid;
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 	vfp_single_dump("sqrt", &vsm);
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	/*
362*4882a593Smuzhiyun 	 * Estimate the square root.
363*4882a593Smuzhiyun 	 */
364*4882a593Smuzhiyun 	vsd.sign = 0;
365*4882a593Smuzhiyun 	vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
366*4882a593Smuzhiyun 	vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 	vfp_single_dump("sqrt estimate", &vsd);
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	/*
371*4882a593Smuzhiyun 	 * And now adjust.
372*4882a593Smuzhiyun 	 */
373*4882a593Smuzhiyun 	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
374*4882a593Smuzhiyun 		if (vsd.significand < 2) {
375*4882a593Smuzhiyun 			vsd.significand = 0xffffffff;
376*4882a593Smuzhiyun 		} else {
377*4882a593Smuzhiyun 			u64 term;
378*4882a593Smuzhiyun 			s64 rem;
379*4882a593Smuzhiyun 			vsm.significand <<= !(vsm.exponent & 1);
380*4882a593Smuzhiyun 			term = (u64)vsd.significand * vsd.significand;
381*4882a593Smuzhiyun 			rem = ((u64)vsm.significand << 32) - term;
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 			pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 			while (rem < 0) {
386*4882a593Smuzhiyun 				vsd.significand -= 1;
387*4882a593Smuzhiyun 				rem += ((u64)vsd.significand << 1) | 1;
388*4882a593Smuzhiyun 			}
389*4882a593Smuzhiyun 			vsd.significand |= rem != 0;
390*4882a593Smuzhiyun 		}
391*4882a593Smuzhiyun 	}
392*4882a593Smuzhiyun 	vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
395*4882a593Smuzhiyun }
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun /*
398*4882a593Smuzhiyun  * Equal	:= ZC
399*4882a593Smuzhiyun  * Less than	:= N
400*4882a593Smuzhiyun  * Greater than	:= C
401*4882a593Smuzhiyun  * Unordered	:= CV
402*4882a593Smuzhiyun  */
vfp_compare(int sd,int signal_on_qnan,s32 m,u32 fpscr)403*4882a593Smuzhiyun static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
404*4882a593Smuzhiyun {
405*4882a593Smuzhiyun 	s32 d;
406*4882a593Smuzhiyun 	u32 ret = 0;
407*4882a593Smuzhiyun 
408*4882a593Smuzhiyun 	d = vfp_get_float(sd);
409*4882a593Smuzhiyun 	if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
410*4882a593Smuzhiyun 		ret |= FPSCR_C | FPSCR_V;
411*4882a593Smuzhiyun 		if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
412*4882a593Smuzhiyun 			/*
413*4882a593Smuzhiyun 			 * Signalling NaN, or signalling on quiet NaN
414*4882a593Smuzhiyun 			 */
415*4882a593Smuzhiyun 			ret |= FPSCR_IOC;
416*4882a593Smuzhiyun 	}
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
419*4882a593Smuzhiyun 		ret |= FPSCR_C | FPSCR_V;
420*4882a593Smuzhiyun 		if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
421*4882a593Smuzhiyun 			/*
422*4882a593Smuzhiyun 			 * Signalling NaN, or signalling on quiet NaN
423*4882a593Smuzhiyun 			 */
424*4882a593Smuzhiyun 			ret |= FPSCR_IOC;
425*4882a593Smuzhiyun 	}
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	if (ret == 0) {
428*4882a593Smuzhiyun 		if (d == m || vfp_single_packed_abs(d | m) == 0) {
429*4882a593Smuzhiyun 			/*
430*4882a593Smuzhiyun 			 * equal
431*4882a593Smuzhiyun 			 */
432*4882a593Smuzhiyun 			ret |= FPSCR_Z | FPSCR_C;
433*4882a593Smuzhiyun 		} else if (vfp_single_packed_sign(d ^ m)) {
434*4882a593Smuzhiyun 			/*
435*4882a593Smuzhiyun 			 * different signs
436*4882a593Smuzhiyun 			 */
437*4882a593Smuzhiyun 			if (vfp_single_packed_sign(d))
438*4882a593Smuzhiyun 				/*
439*4882a593Smuzhiyun 				 * d is negative, so d < m
440*4882a593Smuzhiyun 				 */
441*4882a593Smuzhiyun 				ret |= FPSCR_N;
442*4882a593Smuzhiyun 			else
443*4882a593Smuzhiyun 				/*
444*4882a593Smuzhiyun 				 * d is positive, so d > m
445*4882a593Smuzhiyun 				 */
446*4882a593Smuzhiyun 				ret |= FPSCR_C;
447*4882a593Smuzhiyun 		} else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
448*4882a593Smuzhiyun 			/*
449*4882a593Smuzhiyun 			 * d < m
450*4882a593Smuzhiyun 			 */
451*4882a593Smuzhiyun 			ret |= FPSCR_N;
452*4882a593Smuzhiyun 		} else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
453*4882a593Smuzhiyun 			/*
454*4882a593Smuzhiyun 			 * d > m
455*4882a593Smuzhiyun 			 */
456*4882a593Smuzhiyun 			ret |= FPSCR_C;
457*4882a593Smuzhiyun 		}
458*4882a593Smuzhiyun 	}
459*4882a593Smuzhiyun 	return ret;
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun 
vfp_single_fcmp(int sd,int unused,s32 m,u32 fpscr)462*4882a593Smuzhiyun static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
463*4882a593Smuzhiyun {
464*4882a593Smuzhiyun 	return vfp_compare(sd, 0, m, fpscr);
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun 
vfp_single_fcmpe(int sd,int unused,s32 m,u32 fpscr)467*4882a593Smuzhiyun static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
468*4882a593Smuzhiyun {
469*4882a593Smuzhiyun 	return vfp_compare(sd, 1, m, fpscr);
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun 
vfp_single_fcmpz(int sd,int unused,s32 m,u32 fpscr)472*4882a593Smuzhiyun static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
473*4882a593Smuzhiyun {
474*4882a593Smuzhiyun 	return vfp_compare(sd, 0, 0, fpscr);
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun 
vfp_single_fcmpez(int sd,int unused,s32 m,u32 fpscr)477*4882a593Smuzhiyun static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun 	return vfp_compare(sd, 1, 0, fpscr);
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun 
vfp_single_fcvtd(int dd,int unused,s32 m,u32 fpscr)482*4882a593Smuzhiyun static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun 	struct vfp_single vsm;
485*4882a593Smuzhiyun 	struct vfp_double vdd;
486*4882a593Smuzhiyun 	int tm;
487*4882a593Smuzhiyun 	u32 exceptions = 0;
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	tm = vfp_single_type(&vsm);
492*4882a593Smuzhiyun 
493*4882a593Smuzhiyun 	/*
494*4882a593Smuzhiyun 	 * If we have a signalling NaN, signal invalid operation.
495*4882a593Smuzhiyun 	 */
496*4882a593Smuzhiyun 	if (tm == VFP_SNAN)
497*4882a593Smuzhiyun 		exceptions = FPSCR_IOC;
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
500*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsm);
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun 	vdd.sign = vsm.sign;
503*4882a593Smuzhiyun 	vdd.significand = (u64)vsm.significand << 32;
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 	/*
506*4882a593Smuzhiyun 	 * If we have an infinity or NaN, the exponent must be 2047.
507*4882a593Smuzhiyun 	 */
508*4882a593Smuzhiyun 	if (tm & (VFP_INFINITY|VFP_NAN)) {
509*4882a593Smuzhiyun 		vdd.exponent = 2047;
510*4882a593Smuzhiyun 		if (tm == VFP_QNAN)
511*4882a593Smuzhiyun 			vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
512*4882a593Smuzhiyun 		goto pack_nan;
513*4882a593Smuzhiyun 	} else if (tm & VFP_ZERO)
514*4882a593Smuzhiyun 		vdd.exponent = 0;
515*4882a593Smuzhiyun 	else
516*4882a593Smuzhiyun 		vdd.exponent = vsm.exponent + (1023 - 127);
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
519*4882a593Smuzhiyun 
520*4882a593Smuzhiyun  pack_nan:
521*4882a593Smuzhiyun 	vfp_put_double(vfp_double_pack(&vdd), dd);
522*4882a593Smuzhiyun 	return exceptions;
523*4882a593Smuzhiyun }
524*4882a593Smuzhiyun 
vfp_single_fuito(int sd,int unused,s32 m,u32 fpscr)525*4882a593Smuzhiyun static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun 	struct vfp_single vs;
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	vs.sign = 0;
530*4882a593Smuzhiyun 	vs.exponent = 127 + 31 - 1;
531*4882a593Smuzhiyun 	vs.significand = (u32)m;
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun 
vfp_single_fsito(int sd,int unused,s32 m,u32 fpscr)536*4882a593Smuzhiyun static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
537*4882a593Smuzhiyun {
538*4882a593Smuzhiyun 	struct vfp_single vs;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	vs.sign = (m & 0x80000000) >> 16;
541*4882a593Smuzhiyun 	vs.exponent = 127 + 31 - 1;
542*4882a593Smuzhiyun 	vs.significand = vs.sign ? -m : m;
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
545*4882a593Smuzhiyun }
546*4882a593Smuzhiyun 
vfp_single_ftoui(int sd,int unused,s32 m,u32 fpscr)547*4882a593Smuzhiyun static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
548*4882a593Smuzhiyun {
549*4882a593Smuzhiyun 	struct vfp_single vsm;
550*4882a593Smuzhiyun 	u32 d, exceptions = 0;
551*4882a593Smuzhiyun 	int rmode = fpscr & FPSCR_RMODE_MASK;
552*4882a593Smuzhiyun 	int tm;
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
555*4882a593Smuzhiyun 	vfp_single_dump("VSM", &vsm);
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	/*
558*4882a593Smuzhiyun 	 * Do we have a denormalised number?
559*4882a593Smuzhiyun 	 */
560*4882a593Smuzhiyun 	tm = vfp_single_type(&vsm);
561*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
562*4882a593Smuzhiyun 		exceptions |= FPSCR_IDC;
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 	if (tm & VFP_NAN)
565*4882a593Smuzhiyun 		vsm.sign = 0;
566*4882a593Smuzhiyun 
567*4882a593Smuzhiyun 	if (vsm.exponent >= 127 + 32) {
568*4882a593Smuzhiyun 		d = vsm.sign ? 0 : 0xffffffff;
569*4882a593Smuzhiyun 		exceptions = FPSCR_IOC;
570*4882a593Smuzhiyun 	} else if (vsm.exponent >= 127 - 1) {
571*4882a593Smuzhiyun 		int shift = 127 + 31 - vsm.exponent;
572*4882a593Smuzhiyun 		u32 rem, incr = 0;
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun 		/*
575*4882a593Smuzhiyun 		 * 2^0 <= m < 2^32-2^8
576*4882a593Smuzhiyun 		 */
577*4882a593Smuzhiyun 		d = (vsm.significand << 1) >> shift;
578*4882a593Smuzhiyun 		rem = vsm.significand << (33 - shift);
579*4882a593Smuzhiyun 
580*4882a593Smuzhiyun 		if (rmode == FPSCR_ROUND_NEAREST) {
581*4882a593Smuzhiyun 			incr = 0x80000000;
582*4882a593Smuzhiyun 			if ((d & 1) == 0)
583*4882a593Smuzhiyun 				incr -= 1;
584*4882a593Smuzhiyun 		} else if (rmode == FPSCR_ROUND_TOZERO) {
585*4882a593Smuzhiyun 			incr = 0;
586*4882a593Smuzhiyun 		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
587*4882a593Smuzhiyun 			incr = ~0;
588*4882a593Smuzhiyun 		}
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 		if ((rem + incr) < rem) {
591*4882a593Smuzhiyun 			if (d < 0xffffffff)
592*4882a593Smuzhiyun 				d += 1;
593*4882a593Smuzhiyun 			else
594*4882a593Smuzhiyun 				exceptions |= FPSCR_IOC;
595*4882a593Smuzhiyun 		}
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 		if (d && vsm.sign) {
598*4882a593Smuzhiyun 			d = 0;
599*4882a593Smuzhiyun 			exceptions |= FPSCR_IOC;
600*4882a593Smuzhiyun 		} else if (rem)
601*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
602*4882a593Smuzhiyun 	} else {
603*4882a593Smuzhiyun 		d = 0;
604*4882a593Smuzhiyun 		if (vsm.exponent | vsm.significand) {
605*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
606*4882a593Smuzhiyun 			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
607*4882a593Smuzhiyun 				d = 1;
608*4882a593Smuzhiyun 			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
609*4882a593Smuzhiyun 				d = 0;
610*4882a593Smuzhiyun 				exceptions |= FPSCR_IOC;
611*4882a593Smuzhiyun 			}
612*4882a593Smuzhiyun 		}
613*4882a593Smuzhiyun 	}
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 	vfp_put_float(d, sd);
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 	return exceptions;
620*4882a593Smuzhiyun }
621*4882a593Smuzhiyun 
vfp_single_ftouiz(int sd,int unused,s32 m,u32 fpscr)622*4882a593Smuzhiyun static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
623*4882a593Smuzhiyun {
624*4882a593Smuzhiyun 	return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
625*4882a593Smuzhiyun }
626*4882a593Smuzhiyun 
vfp_single_ftosi(int sd,int unused,s32 m,u32 fpscr)627*4882a593Smuzhiyun static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
628*4882a593Smuzhiyun {
629*4882a593Smuzhiyun 	struct vfp_single vsm;
630*4882a593Smuzhiyun 	u32 d, exceptions = 0;
631*4882a593Smuzhiyun 	int rmode = fpscr & FPSCR_RMODE_MASK;
632*4882a593Smuzhiyun 	int tm;
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
635*4882a593Smuzhiyun 	vfp_single_dump("VSM", &vsm);
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	/*
638*4882a593Smuzhiyun 	 * Do we have a denormalised number?
639*4882a593Smuzhiyun 	 */
640*4882a593Smuzhiyun 	tm = vfp_single_type(&vsm);
641*4882a593Smuzhiyun 	if (vfp_single_type(&vsm) & VFP_DENORMAL)
642*4882a593Smuzhiyun 		exceptions |= FPSCR_IDC;
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	if (tm & VFP_NAN) {
645*4882a593Smuzhiyun 		d = 0;
646*4882a593Smuzhiyun 		exceptions |= FPSCR_IOC;
647*4882a593Smuzhiyun 	} else if (vsm.exponent >= 127 + 32) {
648*4882a593Smuzhiyun 		/*
649*4882a593Smuzhiyun 		 * m >= 2^31-2^7: invalid
650*4882a593Smuzhiyun 		 */
651*4882a593Smuzhiyun 		d = 0x7fffffff;
652*4882a593Smuzhiyun 		if (vsm.sign)
653*4882a593Smuzhiyun 			d = ~d;
654*4882a593Smuzhiyun 		exceptions |= FPSCR_IOC;
655*4882a593Smuzhiyun 	} else if (vsm.exponent >= 127 - 1) {
656*4882a593Smuzhiyun 		int shift = 127 + 31 - vsm.exponent;
657*4882a593Smuzhiyun 		u32 rem, incr = 0;
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun 		/* 2^0 <= m <= 2^31-2^7 */
660*4882a593Smuzhiyun 		d = (vsm.significand << 1) >> shift;
661*4882a593Smuzhiyun 		rem = vsm.significand << (33 - shift);
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 		if (rmode == FPSCR_ROUND_NEAREST) {
664*4882a593Smuzhiyun 			incr = 0x80000000;
665*4882a593Smuzhiyun 			if ((d & 1) == 0)
666*4882a593Smuzhiyun 				incr -= 1;
667*4882a593Smuzhiyun 		} else if (rmode == FPSCR_ROUND_TOZERO) {
668*4882a593Smuzhiyun 			incr = 0;
669*4882a593Smuzhiyun 		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
670*4882a593Smuzhiyun 			incr = ~0;
671*4882a593Smuzhiyun 		}
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 		if ((rem + incr) < rem && d < 0xffffffff)
674*4882a593Smuzhiyun 			d += 1;
675*4882a593Smuzhiyun 		if (d > 0x7fffffff + (vsm.sign != 0)) {
676*4882a593Smuzhiyun 			d = 0x7fffffff + (vsm.sign != 0);
677*4882a593Smuzhiyun 			exceptions |= FPSCR_IOC;
678*4882a593Smuzhiyun 		} else if (rem)
679*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun 		if (vsm.sign)
682*4882a593Smuzhiyun 			d = -d;
683*4882a593Smuzhiyun 	} else {
684*4882a593Smuzhiyun 		d = 0;
685*4882a593Smuzhiyun 		if (vsm.exponent | vsm.significand) {
686*4882a593Smuzhiyun 			exceptions |= FPSCR_IXC;
687*4882a593Smuzhiyun 			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
688*4882a593Smuzhiyun 				d = 1;
689*4882a593Smuzhiyun 			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
690*4882a593Smuzhiyun 				d = -1;
691*4882a593Smuzhiyun 		}
692*4882a593Smuzhiyun 	}
693*4882a593Smuzhiyun 
694*4882a593Smuzhiyun 	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	vfp_put_float((s32)d, sd);
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	return exceptions;
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun 
vfp_single_ftosiz(int sd,int unused,s32 m,u32 fpscr)701*4882a593Smuzhiyun static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
702*4882a593Smuzhiyun {
703*4882a593Smuzhiyun 	return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun static struct op fops_ext[32] = {
707*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCPY)]	= { vfp_single_fcpy,   0 },
708*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FABS)]	= { vfp_single_fabs,   0 },
709*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FNEG)]	= { vfp_single_fneg,   0 },
710*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FSQRT)]	= { vfp_single_fsqrt,  0 },
711*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMP)]	= { vfp_single_fcmp,   OP_SCALAR },
712*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMPE)]	= { vfp_single_fcmpe,  OP_SCALAR },
713*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMPZ)]	= { vfp_single_fcmpz,  OP_SCALAR },
714*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCMPEZ)]	= { vfp_single_fcmpez, OP_SCALAR },
715*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FCVT)]	= { vfp_single_fcvtd,  OP_SCALAR|OP_DD },
716*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FUITO)]	= { vfp_single_fuito,  OP_SCALAR },
717*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FSITO)]	= { vfp_single_fsito,  OP_SCALAR },
718*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOUI)]	= { vfp_single_ftoui,  OP_SCALAR },
719*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOUIZ)]	= { vfp_single_ftouiz, OP_SCALAR },
720*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOSI)]	= { vfp_single_ftosi,  OP_SCALAR },
721*4882a593Smuzhiyun 	[FEXT_TO_IDX(FEXT_FTOSIZ)]	= { vfp_single_ftosiz, OP_SCALAR },
722*4882a593Smuzhiyun };
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 
725*4882a593Smuzhiyun 
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun static u32
vfp_single_fadd_nonnumber(struct vfp_single * vsd,struct vfp_single * vsn,struct vfp_single * vsm,u32 fpscr)729*4882a593Smuzhiyun vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
730*4882a593Smuzhiyun 			  struct vfp_single *vsm, u32 fpscr)
731*4882a593Smuzhiyun {
732*4882a593Smuzhiyun 	struct vfp_single *vsp;
733*4882a593Smuzhiyun 	u32 exceptions = 0;
734*4882a593Smuzhiyun 	int tn, tm;
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 	tn = vfp_single_type(vsn);
737*4882a593Smuzhiyun 	tm = vfp_single_type(vsm);
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 	if (tn & tm & VFP_INFINITY) {
740*4882a593Smuzhiyun 		/*
741*4882a593Smuzhiyun 		 * Two infinities.  Are they different signs?
742*4882a593Smuzhiyun 		 */
743*4882a593Smuzhiyun 		if (vsn->sign ^ vsm->sign) {
744*4882a593Smuzhiyun 			/*
745*4882a593Smuzhiyun 			 * different signs -> invalid
746*4882a593Smuzhiyun 			 */
747*4882a593Smuzhiyun 			exceptions = FPSCR_IOC;
748*4882a593Smuzhiyun 			vsp = &vfp_single_default_qnan;
749*4882a593Smuzhiyun 		} else {
750*4882a593Smuzhiyun 			/*
751*4882a593Smuzhiyun 			 * same signs -> valid
752*4882a593Smuzhiyun 			 */
753*4882a593Smuzhiyun 			vsp = vsn;
754*4882a593Smuzhiyun 		}
755*4882a593Smuzhiyun 	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
756*4882a593Smuzhiyun 		/*
757*4882a593Smuzhiyun 		 * One infinity and one number -> infinity
758*4882a593Smuzhiyun 		 */
759*4882a593Smuzhiyun 		vsp = vsn;
760*4882a593Smuzhiyun 	} else {
761*4882a593Smuzhiyun 		/*
762*4882a593Smuzhiyun 		 * 'n' is a NaN of some type
763*4882a593Smuzhiyun 		 */
764*4882a593Smuzhiyun 		return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
765*4882a593Smuzhiyun 	}
766*4882a593Smuzhiyun 	*vsd = *vsp;
767*4882a593Smuzhiyun 	return exceptions;
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun 
770*4882a593Smuzhiyun static u32
vfp_single_add(struct vfp_single * vsd,struct vfp_single * vsn,struct vfp_single * vsm,u32 fpscr)771*4882a593Smuzhiyun vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
772*4882a593Smuzhiyun 	       struct vfp_single *vsm, u32 fpscr)
773*4882a593Smuzhiyun {
774*4882a593Smuzhiyun 	u32 exp_diff, m_sig;
775*4882a593Smuzhiyun 
776*4882a593Smuzhiyun 	if (vsn->significand & 0x80000000 ||
777*4882a593Smuzhiyun 	    vsm->significand & 0x80000000) {
778*4882a593Smuzhiyun 		pr_info("VFP: bad FP values in %s\n", __func__);
779*4882a593Smuzhiyun 		vfp_single_dump("VSN", vsn);
780*4882a593Smuzhiyun 		vfp_single_dump("VSM", vsm);
781*4882a593Smuzhiyun 	}
782*4882a593Smuzhiyun 
783*4882a593Smuzhiyun 	/*
784*4882a593Smuzhiyun 	 * Ensure that 'n' is the largest magnitude number.  Note that
785*4882a593Smuzhiyun 	 * if 'n' and 'm' have equal exponents, we do not swap them.
786*4882a593Smuzhiyun 	 * This ensures that NaN propagation works correctly.
787*4882a593Smuzhiyun 	 */
788*4882a593Smuzhiyun 	if (vsn->exponent < vsm->exponent) {
789*4882a593Smuzhiyun 		struct vfp_single *t = vsn;
790*4882a593Smuzhiyun 		vsn = vsm;
791*4882a593Smuzhiyun 		vsm = t;
792*4882a593Smuzhiyun 	}
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	/*
795*4882a593Smuzhiyun 	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
796*4882a593Smuzhiyun 	 * infinity or a NaN here.
797*4882a593Smuzhiyun 	 */
798*4882a593Smuzhiyun 	if (vsn->exponent == 255)
799*4882a593Smuzhiyun 		return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 	/*
802*4882a593Smuzhiyun 	 * We have two proper numbers, where 'vsn' is the larger magnitude.
803*4882a593Smuzhiyun 	 *
804*4882a593Smuzhiyun 	 * Copy 'n' to 'd' before doing the arithmetic.
805*4882a593Smuzhiyun 	 */
806*4882a593Smuzhiyun 	*vsd = *vsn;
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 	/*
809*4882a593Smuzhiyun 	 * Align both numbers.
810*4882a593Smuzhiyun 	 */
811*4882a593Smuzhiyun 	exp_diff = vsn->exponent - vsm->exponent;
812*4882a593Smuzhiyun 	m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 	/*
815*4882a593Smuzhiyun 	 * If the signs are different, we are really subtracting.
816*4882a593Smuzhiyun 	 */
817*4882a593Smuzhiyun 	if (vsn->sign ^ vsm->sign) {
818*4882a593Smuzhiyun 		m_sig = vsn->significand - m_sig;
819*4882a593Smuzhiyun 		if ((s32)m_sig < 0) {
820*4882a593Smuzhiyun 			vsd->sign = vfp_sign_negate(vsd->sign);
821*4882a593Smuzhiyun 			m_sig = -m_sig;
822*4882a593Smuzhiyun 		} else if (m_sig == 0) {
823*4882a593Smuzhiyun 			vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
824*4882a593Smuzhiyun 				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
825*4882a593Smuzhiyun 		}
826*4882a593Smuzhiyun 	} else {
827*4882a593Smuzhiyun 		m_sig = vsn->significand + m_sig;
828*4882a593Smuzhiyun 	}
829*4882a593Smuzhiyun 	vsd->significand = m_sig;
830*4882a593Smuzhiyun 
831*4882a593Smuzhiyun 	return 0;
832*4882a593Smuzhiyun }
833*4882a593Smuzhiyun 
834*4882a593Smuzhiyun static u32
vfp_single_multiply(struct vfp_single * vsd,struct vfp_single * vsn,struct vfp_single * vsm,u32 fpscr)835*4882a593Smuzhiyun vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
836*4882a593Smuzhiyun {
837*4882a593Smuzhiyun 	vfp_single_dump("VSN", vsn);
838*4882a593Smuzhiyun 	vfp_single_dump("VSM", vsm);
839*4882a593Smuzhiyun 
840*4882a593Smuzhiyun 	/*
841*4882a593Smuzhiyun 	 * Ensure that 'n' is the largest magnitude number.  Note that
842*4882a593Smuzhiyun 	 * if 'n' and 'm' have equal exponents, we do not swap them.
843*4882a593Smuzhiyun 	 * This ensures that NaN propagation works correctly.
844*4882a593Smuzhiyun 	 */
845*4882a593Smuzhiyun 	if (vsn->exponent < vsm->exponent) {
846*4882a593Smuzhiyun 		struct vfp_single *t = vsn;
847*4882a593Smuzhiyun 		vsn = vsm;
848*4882a593Smuzhiyun 		vsm = t;
849*4882a593Smuzhiyun 		pr_debug("VFP: swapping M <-> N\n");
850*4882a593Smuzhiyun 	}
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun 	vsd->sign = vsn->sign ^ vsm->sign;
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 	/*
855*4882a593Smuzhiyun 	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
856*4882a593Smuzhiyun 	 */
857*4882a593Smuzhiyun 	if (vsn->exponent == 255) {
858*4882a593Smuzhiyun 		if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
859*4882a593Smuzhiyun 			return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
860*4882a593Smuzhiyun 		if ((vsm->exponent | vsm->significand) == 0) {
861*4882a593Smuzhiyun 			*vsd = vfp_single_default_qnan;
862*4882a593Smuzhiyun 			return FPSCR_IOC;
863*4882a593Smuzhiyun 		}
864*4882a593Smuzhiyun 		vsd->exponent = vsn->exponent;
865*4882a593Smuzhiyun 		vsd->significand = 0;
866*4882a593Smuzhiyun 		return 0;
867*4882a593Smuzhiyun 	}
868*4882a593Smuzhiyun 
869*4882a593Smuzhiyun 	/*
870*4882a593Smuzhiyun 	 * If 'm' is zero, the result is always zero.  In this case,
871*4882a593Smuzhiyun 	 * 'n' may be zero or a number, but it doesn't matter which.
872*4882a593Smuzhiyun 	 */
873*4882a593Smuzhiyun 	if ((vsm->exponent | vsm->significand) == 0) {
874*4882a593Smuzhiyun 		vsd->exponent = 0;
875*4882a593Smuzhiyun 		vsd->significand = 0;
876*4882a593Smuzhiyun 		return 0;
877*4882a593Smuzhiyun 	}
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun 	/*
880*4882a593Smuzhiyun 	 * We add 2 to the destination exponent for the same reason as
881*4882a593Smuzhiyun 	 * the addition case - though this time we have +1 from each
882*4882a593Smuzhiyun 	 * input operand.
883*4882a593Smuzhiyun 	 */
884*4882a593Smuzhiyun 	vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
885*4882a593Smuzhiyun 	vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
886*4882a593Smuzhiyun 
887*4882a593Smuzhiyun 	vfp_single_dump("VSD", vsd);
888*4882a593Smuzhiyun 	return 0;
889*4882a593Smuzhiyun }
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun #define NEG_MULTIPLY	(1 << 0)
892*4882a593Smuzhiyun #define NEG_SUBTRACT	(1 << 1)
893*4882a593Smuzhiyun 
894*4882a593Smuzhiyun static u32
vfp_single_multiply_accumulate(int sd,int sn,s32 m,u32 fpscr,u32 negate,char * func)895*4882a593Smuzhiyun vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
896*4882a593Smuzhiyun {
897*4882a593Smuzhiyun 	struct vfp_single vsd, vsp, vsn, vsm;
898*4882a593Smuzhiyun 	u32 exceptions;
899*4882a593Smuzhiyun 	s32 v;
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 	v = vfp_get_float(sn);
902*4882a593Smuzhiyun 	pr_debug("VFP: s%u = %08x\n", sn, v);
903*4882a593Smuzhiyun 	vfp_single_unpack(&vsn, v);
904*4882a593Smuzhiyun 	if (vsn.exponent == 0 && vsn.significand)
905*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsn);
906*4882a593Smuzhiyun 
907*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
908*4882a593Smuzhiyun 	if (vsm.exponent == 0 && vsm.significand)
909*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsm);
910*4882a593Smuzhiyun 
911*4882a593Smuzhiyun 	exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
912*4882a593Smuzhiyun 	if (negate & NEG_MULTIPLY)
913*4882a593Smuzhiyun 		vsp.sign = vfp_sign_negate(vsp.sign);
914*4882a593Smuzhiyun 
915*4882a593Smuzhiyun 	v = vfp_get_float(sd);
916*4882a593Smuzhiyun 	pr_debug("VFP: s%u = %08x\n", sd, v);
917*4882a593Smuzhiyun 	vfp_single_unpack(&vsn, v);
918*4882a593Smuzhiyun 	if (vsn.exponent == 0 && vsn.significand)
919*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsn);
920*4882a593Smuzhiyun 	if (negate & NEG_SUBTRACT)
921*4882a593Smuzhiyun 		vsn.sign = vfp_sign_negate(vsn.sign);
922*4882a593Smuzhiyun 
923*4882a593Smuzhiyun 	exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
924*4882a593Smuzhiyun 
925*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
926*4882a593Smuzhiyun }
927*4882a593Smuzhiyun 
928*4882a593Smuzhiyun /*
929*4882a593Smuzhiyun  * Standard operations
930*4882a593Smuzhiyun  */
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun /*
933*4882a593Smuzhiyun  * sd = sd + (sn * sm)
934*4882a593Smuzhiyun  */
vfp_single_fmac(int sd,int sn,s32 m,u32 fpscr)935*4882a593Smuzhiyun static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
936*4882a593Smuzhiyun {
937*4882a593Smuzhiyun 	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
938*4882a593Smuzhiyun }
939*4882a593Smuzhiyun 
940*4882a593Smuzhiyun /*
941*4882a593Smuzhiyun  * sd = sd - (sn * sm)
942*4882a593Smuzhiyun  */
vfp_single_fnmac(int sd,int sn,s32 m,u32 fpscr)943*4882a593Smuzhiyun static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
944*4882a593Smuzhiyun {
945*4882a593Smuzhiyun 	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
946*4882a593Smuzhiyun }
947*4882a593Smuzhiyun 
948*4882a593Smuzhiyun /*
949*4882a593Smuzhiyun  * sd = -sd + (sn * sm)
950*4882a593Smuzhiyun  */
vfp_single_fmsc(int sd,int sn,s32 m,u32 fpscr)951*4882a593Smuzhiyun static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
952*4882a593Smuzhiyun {
953*4882a593Smuzhiyun 	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun 
956*4882a593Smuzhiyun /*
957*4882a593Smuzhiyun  * sd = -sd - (sn * sm)
958*4882a593Smuzhiyun  */
vfp_single_fnmsc(int sd,int sn,s32 m,u32 fpscr)959*4882a593Smuzhiyun static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
960*4882a593Smuzhiyun {
961*4882a593Smuzhiyun 	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
962*4882a593Smuzhiyun }
963*4882a593Smuzhiyun 
964*4882a593Smuzhiyun /*
965*4882a593Smuzhiyun  * sd = sn * sm
966*4882a593Smuzhiyun  */
vfp_single_fmul(int sd,int sn,s32 m,u32 fpscr)967*4882a593Smuzhiyun static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
968*4882a593Smuzhiyun {
969*4882a593Smuzhiyun 	struct vfp_single vsd, vsn, vsm;
970*4882a593Smuzhiyun 	u32 exceptions;
971*4882a593Smuzhiyun 	s32 n = vfp_get_float(sn);
972*4882a593Smuzhiyun 
973*4882a593Smuzhiyun 	pr_debug("VFP: s%u = %08x\n", sn, n);
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	vfp_single_unpack(&vsn, n);
976*4882a593Smuzhiyun 	if (vsn.exponent == 0 && vsn.significand)
977*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsn);
978*4882a593Smuzhiyun 
979*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
980*4882a593Smuzhiyun 	if (vsm.exponent == 0 && vsm.significand)
981*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsm);
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
984*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
985*4882a593Smuzhiyun }
986*4882a593Smuzhiyun 
987*4882a593Smuzhiyun /*
988*4882a593Smuzhiyun  * sd = -(sn * sm)
989*4882a593Smuzhiyun  */
vfp_single_fnmul(int sd,int sn,s32 m,u32 fpscr)990*4882a593Smuzhiyun static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
991*4882a593Smuzhiyun {
992*4882a593Smuzhiyun 	struct vfp_single vsd, vsn, vsm;
993*4882a593Smuzhiyun 	u32 exceptions;
994*4882a593Smuzhiyun 	s32 n = vfp_get_float(sn);
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 	pr_debug("VFP: s%u = %08x\n", sn, n);
997*4882a593Smuzhiyun 
998*4882a593Smuzhiyun 	vfp_single_unpack(&vsn, n);
999*4882a593Smuzhiyun 	if (vsn.exponent == 0 && vsn.significand)
1000*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsn);
1001*4882a593Smuzhiyun 
1002*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
1003*4882a593Smuzhiyun 	if (vsm.exponent == 0 && vsm.significand)
1004*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsm);
1005*4882a593Smuzhiyun 
1006*4882a593Smuzhiyun 	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1007*4882a593Smuzhiyun 	vsd.sign = vfp_sign_negate(vsd.sign);
1008*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1009*4882a593Smuzhiyun }
1010*4882a593Smuzhiyun 
1011*4882a593Smuzhiyun /*
1012*4882a593Smuzhiyun  * sd = sn + sm
1013*4882a593Smuzhiyun  */
vfp_single_fadd(int sd,int sn,s32 m,u32 fpscr)1014*4882a593Smuzhiyun static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1015*4882a593Smuzhiyun {
1016*4882a593Smuzhiyun 	struct vfp_single vsd, vsn, vsm;
1017*4882a593Smuzhiyun 	u32 exceptions;
1018*4882a593Smuzhiyun 	s32 n = vfp_get_float(sn);
1019*4882a593Smuzhiyun 
1020*4882a593Smuzhiyun 	pr_debug("VFP: s%u = %08x\n", sn, n);
1021*4882a593Smuzhiyun 
1022*4882a593Smuzhiyun 	/*
1023*4882a593Smuzhiyun 	 * Unpack and normalise denormals.
1024*4882a593Smuzhiyun 	 */
1025*4882a593Smuzhiyun 	vfp_single_unpack(&vsn, n);
1026*4882a593Smuzhiyun 	if (vsn.exponent == 0 && vsn.significand)
1027*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsn);
1028*4882a593Smuzhiyun 
1029*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
1030*4882a593Smuzhiyun 	if (vsm.exponent == 0 && vsm.significand)
1031*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsm);
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun 	exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1034*4882a593Smuzhiyun 
1035*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1036*4882a593Smuzhiyun }
1037*4882a593Smuzhiyun 
1038*4882a593Smuzhiyun /*
1039*4882a593Smuzhiyun  * sd = sn - sm
1040*4882a593Smuzhiyun  */
vfp_single_fsub(int sd,int sn,s32 m,u32 fpscr)1041*4882a593Smuzhiyun static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1042*4882a593Smuzhiyun {
1043*4882a593Smuzhiyun 	/*
1044*4882a593Smuzhiyun 	 * Subtraction is addition with one sign inverted.
1045*4882a593Smuzhiyun 	 */
1046*4882a593Smuzhiyun 	return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1047*4882a593Smuzhiyun }
1048*4882a593Smuzhiyun 
1049*4882a593Smuzhiyun /*
1050*4882a593Smuzhiyun  * sd = sn / sm
1051*4882a593Smuzhiyun  */
vfp_single_fdiv(int sd,int sn,s32 m,u32 fpscr)1052*4882a593Smuzhiyun static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1053*4882a593Smuzhiyun {
1054*4882a593Smuzhiyun 	struct vfp_single vsd, vsn, vsm;
1055*4882a593Smuzhiyun 	u32 exceptions = 0;
1056*4882a593Smuzhiyun 	s32 n = vfp_get_float(sn);
1057*4882a593Smuzhiyun 	int tm, tn;
1058*4882a593Smuzhiyun 
1059*4882a593Smuzhiyun 	pr_debug("VFP: s%u = %08x\n", sn, n);
1060*4882a593Smuzhiyun 
1061*4882a593Smuzhiyun 	vfp_single_unpack(&vsn, n);
1062*4882a593Smuzhiyun 	vfp_single_unpack(&vsm, m);
1063*4882a593Smuzhiyun 
1064*4882a593Smuzhiyun 	vsd.sign = vsn.sign ^ vsm.sign;
1065*4882a593Smuzhiyun 
1066*4882a593Smuzhiyun 	tn = vfp_single_type(&vsn);
1067*4882a593Smuzhiyun 	tm = vfp_single_type(&vsm);
1068*4882a593Smuzhiyun 
1069*4882a593Smuzhiyun 	/*
1070*4882a593Smuzhiyun 	 * Is n a NAN?
1071*4882a593Smuzhiyun 	 */
1072*4882a593Smuzhiyun 	if (tn & VFP_NAN)
1073*4882a593Smuzhiyun 		goto vsn_nan;
1074*4882a593Smuzhiyun 
1075*4882a593Smuzhiyun 	/*
1076*4882a593Smuzhiyun 	 * Is m a NAN?
1077*4882a593Smuzhiyun 	 */
1078*4882a593Smuzhiyun 	if (tm & VFP_NAN)
1079*4882a593Smuzhiyun 		goto vsm_nan;
1080*4882a593Smuzhiyun 
1081*4882a593Smuzhiyun 	/*
1082*4882a593Smuzhiyun 	 * If n and m are infinity, the result is invalid
1083*4882a593Smuzhiyun 	 * If n and m are zero, the result is invalid
1084*4882a593Smuzhiyun 	 */
1085*4882a593Smuzhiyun 	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1086*4882a593Smuzhiyun 		goto invalid;
1087*4882a593Smuzhiyun 
1088*4882a593Smuzhiyun 	/*
1089*4882a593Smuzhiyun 	 * If n is infinity, the result is infinity
1090*4882a593Smuzhiyun 	 */
1091*4882a593Smuzhiyun 	if (tn & VFP_INFINITY)
1092*4882a593Smuzhiyun 		goto infinity;
1093*4882a593Smuzhiyun 
1094*4882a593Smuzhiyun 	/*
1095*4882a593Smuzhiyun 	 * If m is zero, raise div0 exception
1096*4882a593Smuzhiyun 	 */
1097*4882a593Smuzhiyun 	if (tm & VFP_ZERO)
1098*4882a593Smuzhiyun 		goto divzero;
1099*4882a593Smuzhiyun 
1100*4882a593Smuzhiyun 	/*
1101*4882a593Smuzhiyun 	 * If m is infinity, or n is zero, the result is zero
1102*4882a593Smuzhiyun 	 */
1103*4882a593Smuzhiyun 	if (tm & VFP_INFINITY || tn & VFP_ZERO)
1104*4882a593Smuzhiyun 		goto zero;
1105*4882a593Smuzhiyun 
1106*4882a593Smuzhiyun 	if (tn & VFP_DENORMAL)
1107*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsn);
1108*4882a593Smuzhiyun 	if (tm & VFP_DENORMAL)
1109*4882a593Smuzhiyun 		vfp_single_normalise_denormal(&vsm);
1110*4882a593Smuzhiyun 
1111*4882a593Smuzhiyun 	/*
1112*4882a593Smuzhiyun 	 * Ok, we have two numbers, we can perform division.
1113*4882a593Smuzhiyun 	 */
1114*4882a593Smuzhiyun 	vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1115*4882a593Smuzhiyun 	vsm.significand <<= 1;
1116*4882a593Smuzhiyun 	if (vsm.significand <= (2 * vsn.significand)) {
1117*4882a593Smuzhiyun 		vsn.significand >>= 1;
1118*4882a593Smuzhiyun 		vsd.exponent++;
1119*4882a593Smuzhiyun 	}
1120*4882a593Smuzhiyun 	{
1121*4882a593Smuzhiyun 		u64 significand = (u64)vsn.significand << 32;
1122*4882a593Smuzhiyun 		do_div(significand, vsm.significand);
1123*4882a593Smuzhiyun 		vsd.significand = significand;
1124*4882a593Smuzhiyun 	}
1125*4882a593Smuzhiyun 	if ((vsd.significand & 0x3f) == 0)
1126*4882a593Smuzhiyun 		vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1127*4882a593Smuzhiyun 
1128*4882a593Smuzhiyun 	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1129*4882a593Smuzhiyun 
1130*4882a593Smuzhiyun  vsn_nan:
1131*4882a593Smuzhiyun 	exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1132*4882a593Smuzhiyun  pack:
1133*4882a593Smuzhiyun 	vfp_put_float(vfp_single_pack(&vsd), sd);
1134*4882a593Smuzhiyun 	return exceptions;
1135*4882a593Smuzhiyun 
1136*4882a593Smuzhiyun  vsm_nan:
1137*4882a593Smuzhiyun 	exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1138*4882a593Smuzhiyun 	goto pack;
1139*4882a593Smuzhiyun 
1140*4882a593Smuzhiyun  zero:
1141*4882a593Smuzhiyun 	vsd.exponent = 0;
1142*4882a593Smuzhiyun 	vsd.significand = 0;
1143*4882a593Smuzhiyun 	goto pack;
1144*4882a593Smuzhiyun 
1145*4882a593Smuzhiyun  divzero:
1146*4882a593Smuzhiyun 	exceptions = FPSCR_DZC;
1147*4882a593Smuzhiyun  infinity:
1148*4882a593Smuzhiyun 	vsd.exponent = 255;
1149*4882a593Smuzhiyun 	vsd.significand = 0;
1150*4882a593Smuzhiyun 	goto pack;
1151*4882a593Smuzhiyun 
1152*4882a593Smuzhiyun  invalid:
1153*4882a593Smuzhiyun 	vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1154*4882a593Smuzhiyun 	return FPSCR_IOC;
1155*4882a593Smuzhiyun }
1156*4882a593Smuzhiyun 
1157*4882a593Smuzhiyun static struct op fops[16] = {
1158*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FMAC)]	= { vfp_single_fmac,  0 },
1159*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FNMAC)]	= { vfp_single_fnmac, 0 },
1160*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FMSC)]	= { vfp_single_fmsc,  0 },
1161*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FNMSC)]	= { vfp_single_fnmsc, 0 },
1162*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FMUL)]	= { vfp_single_fmul,  0 },
1163*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FNMUL)]	= { vfp_single_fnmul, 0 },
1164*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FADD)]	= { vfp_single_fadd,  0 },
1165*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FSUB)]	= { vfp_single_fsub,  0 },
1166*4882a593Smuzhiyun 	[FOP_TO_IDX(FOP_FDIV)]	= { vfp_single_fdiv,  0 },
1167*4882a593Smuzhiyun };
1168*4882a593Smuzhiyun 
1169*4882a593Smuzhiyun #define FREG_BANK(x)	((x) & 0x18)
1170*4882a593Smuzhiyun #define FREG_IDX(x)	((x) & 7)
1171*4882a593Smuzhiyun 
vfp_single_cpdo(u32 inst,u32 fpscr)1172*4882a593Smuzhiyun u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1173*4882a593Smuzhiyun {
1174*4882a593Smuzhiyun 	u32 op = inst & FOP_MASK;
1175*4882a593Smuzhiyun 	u32 exceptions = 0;
1176*4882a593Smuzhiyun 	unsigned int dest;
1177*4882a593Smuzhiyun 	unsigned int sn = vfp_get_sn(inst);
1178*4882a593Smuzhiyun 	unsigned int sm = vfp_get_sm(inst);
1179*4882a593Smuzhiyun 	unsigned int vecitr, veclen, vecstride;
1180*4882a593Smuzhiyun 	struct op *fop;
1181*4882a593Smuzhiyun 
1182*4882a593Smuzhiyun 	vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1183*4882a593Smuzhiyun 
1184*4882a593Smuzhiyun 	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1185*4882a593Smuzhiyun 
1186*4882a593Smuzhiyun 	/*
1187*4882a593Smuzhiyun 	 * fcvtsd takes a dN register number as destination, not sN.
1188*4882a593Smuzhiyun 	 * Technically, if bit 0 of dd is set, this is an invalid
1189*4882a593Smuzhiyun 	 * instruction.  However, we ignore this for efficiency.
1190*4882a593Smuzhiyun 	 * It also only operates on scalars.
1191*4882a593Smuzhiyun 	 */
1192*4882a593Smuzhiyun 	if (fop->flags & OP_DD)
1193*4882a593Smuzhiyun 		dest = vfp_get_dd(inst);
1194*4882a593Smuzhiyun 	else
1195*4882a593Smuzhiyun 		dest = vfp_get_sd(inst);
1196*4882a593Smuzhiyun 
1197*4882a593Smuzhiyun 	/*
1198*4882a593Smuzhiyun 	 * If destination bank is zero, vector length is always '1'.
1199*4882a593Smuzhiyun 	 * ARM DDI0100F C5.1.3, C5.3.2.
1200*4882a593Smuzhiyun 	 */
1201*4882a593Smuzhiyun 	if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
1202*4882a593Smuzhiyun 		veclen = 0;
1203*4882a593Smuzhiyun 	else
1204*4882a593Smuzhiyun 		veclen = fpscr & FPSCR_LENGTH_MASK;
1205*4882a593Smuzhiyun 
1206*4882a593Smuzhiyun 	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1207*4882a593Smuzhiyun 		 (veclen >> FPSCR_LENGTH_BIT) + 1);
1208*4882a593Smuzhiyun 
1209*4882a593Smuzhiyun 	if (!fop->fn)
1210*4882a593Smuzhiyun 		goto invalid;
1211*4882a593Smuzhiyun 
1212*4882a593Smuzhiyun 	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1213*4882a593Smuzhiyun 		s32 m = vfp_get_float(sm);
1214*4882a593Smuzhiyun 		u32 except;
1215*4882a593Smuzhiyun 		char type;
1216*4882a593Smuzhiyun 
1217*4882a593Smuzhiyun 		type = fop->flags & OP_DD ? 'd' : 's';
1218*4882a593Smuzhiyun 		if (op == FOP_EXT)
1219*4882a593Smuzhiyun 			pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1220*4882a593Smuzhiyun 				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1221*4882a593Smuzhiyun 				 sm, m);
1222*4882a593Smuzhiyun 		else
1223*4882a593Smuzhiyun 			pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1224*4882a593Smuzhiyun 				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1225*4882a593Smuzhiyun 				 FOP_TO_IDX(op), sm, m);
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 		except = fop->fn(dest, sn, m, fpscr);
1228*4882a593Smuzhiyun 		pr_debug("VFP: itr%d: exceptions=%08x\n",
1229*4882a593Smuzhiyun 			 vecitr >> FPSCR_LENGTH_BIT, except);
1230*4882a593Smuzhiyun 
1231*4882a593Smuzhiyun 		exceptions |= except;
1232*4882a593Smuzhiyun 
1233*4882a593Smuzhiyun 		/*
1234*4882a593Smuzhiyun 		 * CHECK: It appears to be undefined whether we stop when
1235*4882a593Smuzhiyun 		 * we encounter an exception.  We continue.
1236*4882a593Smuzhiyun 		 */
1237*4882a593Smuzhiyun 		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1238*4882a593Smuzhiyun 		sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1239*4882a593Smuzhiyun 		if (FREG_BANK(sm) != 0)
1240*4882a593Smuzhiyun 			sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1241*4882a593Smuzhiyun 	}
1242*4882a593Smuzhiyun 	return exceptions;
1243*4882a593Smuzhiyun 
1244*4882a593Smuzhiyun  invalid:
1245*4882a593Smuzhiyun 	return (u32)-1;
1246*4882a593Smuzhiyun }
1247