xref: /OK3568_Linux_fs/kernel/arch/powerpc/math-emu/math_efp.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * arch/powerpc/math-emu/math_efp.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Author: Ebony Zhu,	<ebony.zhu@freescale.com>
8*4882a593Smuzhiyun  *         Yu Liu,	<yu.liu@freescale.com>
9*4882a593Smuzhiyun  *
10*4882a593Smuzhiyun  * Derived from arch/alpha/math-emu/math.c
11*4882a593Smuzhiyun  *              arch/powerpc/math-emu/math.c
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  * Description:
14*4882a593Smuzhiyun  * This file is the exception handler to make E500 SPE instructions
15*4882a593Smuzhiyun  * fully comply with IEEE-754 floating point standard.
16*4882a593Smuzhiyun  */
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #include <linux/types.h>
19*4882a593Smuzhiyun #include <linux/prctl.h>
20*4882a593Smuzhiyun #include <linux/module.h>
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun #include <linux/uaccess.h>
23*4882a593Smuzhiyun #include <asm/reg.h>
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun #define FP_EX_BOOKE_E500_SPE
26*4882a593Smuzhiyun #include <asm/sfp-machine.h>
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #include <math-emu/soft-fp.h>
29*4882a593Smuzhiyun #include <math-emu/single.h>
30*4882a593Smuzhiyun #include <math-emu/double.h>
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun #define EFAPU		0x4
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #define VCT		0x4
35*4882a593Smuzhiyun #define SPFP		0x6
36*4882a593Smuzhiyun #define DPFP		0x7
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun #define EFSADD		0x2c0
39*4882a593Smuzhiyun #define EFSSUB		0x2c1
40*4882a593Smuzhiyun #define EFSABS		0x2c4
41*4882a593Smuzhiyun #define EFSNABS		0x2c5
42*4882a593Smuzhiyun #define EFSNEG		0x2c6
43*4882a593Smuzhiyun #define EFSMUL		0x2c8
44*4882a593Smuzhiyun #define EFSDIV		0x2c9
45*4882a593Smuzhiyun #define EFSCMPGT	0x2cc
46*4882a593Smuzhiyun #define EFSCMPLT	0x2cd
47*4882a593Smuzhiyun #define EFSCMPEQ	0x2ce
48*4882a593Smuzhiyun #define EFSCFD		0x2cf
49*4882a593Smuzhiyun #define EFSCFSI		0x2d1
50*4882a593Smuzhiyun #define EFSCTUI		0x2d4
51*4882a593Smuzhiyun #define EFSCTSI		0x2d5
52*4882a593Smuzhiyun #define EFSCTUF		0x2d6
53*4882a593Smuzhiyun #define EFSCTSF		0x2d7
54*4882a593Smuzhiyun #define EFSCTUIZ	0x2d8
55*4882a593Smuzhiyun #define EFSCTSIZ	0x2da
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun #define EVFSADD		0x280
58*4882a593Smuzhiyun #define EVFSSUB		0x281
59*4882a593Smuzhiyun #define EVFSABS		0x284
60*4882a593Smuzhiyun #define EVFSNABS	0x285
61*4882a593Smuzhiyun #define EVFSNEG		0x286
62*4882a593Smuzhiyun #define EVFSMUL		0x288
63*4882a593Smuzhiyun #define EVFSDIV		0x289
64*4882a593Smuzhiyun #define EVFSCMPGT	0x28c
65*4882a593Smuzhiyun #define EVFSCMPLT	0x28d
66*4882a593Smuzhiyun #define EVFSCMPEQ	0x28e
67*4882a593Smuzhiyun #define EVFSCTUI	0x294
68*4882a593Smuzhiyun #define EVFSCTSI	0x295
69*4882a593Smuzhiyun #define EVFSCTUF	0x296
70*4882a593Smuzhiyun #define EVFSCTSF	0x297
71*4882a593Smuzhiyun #define EVFSCTUIZ	0x298
72*4882a593Smuzhiyun #define EVFSCTSIZ	0x29a
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun #define EFDADD		0x2e0
75*4882a593Smuzhiyun #define EFDSUB		0x2e1
76*4882a593Smuzhiyun #define EFDABS		0x2e4
77*4882a593Smuzhiyun #define EFDNABS		0x2e5
78*4882a593Smuzhiyun #define EFDNEG		0x2e6
79*4882a593Smuzhiyun #define EFDMUL		0x2e8
80*4882a593Smuzhiyun #define EFDDIV		0x2e9
81*4882a593Smuzhiyun #define EFDCTUIDZ	0x2ea
82*4882a593Smuzhiyun #define EFDCTSIDZ	0x2eb
83*4882a593Smuzhiyun #define EFDCMPGT	0x2ec
84*4882a593Smuzhiyun #define EFDCMPLT	0x2ed
85*4882a593Smuzhiyun #define EFDCMPEQ	0x2ee
86*4882a593Smuzhiyun #define EFDCFS		0x2ef
87*4882a593Smuzhiyun #define EFDCTUI		0x2f4
88*4882a593Smuzhiyun #define EFDCTSI		0x2f5
89*4882a593Smuzhiyun #define EFDCTUF		0x2f6
90*4882a593Smuzhiyun #define EFDCTSF		0x2f7
91*4882a593Smuzhiyun #define EFDCTUIZ	0x2f8
92*4882a593Smuzhiyun #define EFDCTSIZ	0x2fa
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun #define AB	2
95*4882a593Smuzhiyun #define XA	3
96*4882a593Smuzhiyun #define XB	4
97*4882a593Smuzhiyun #define XCR	5
98*4882a593Smuzhiyun #define NOTYPE	0
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun #define SIGN_BIT_S	(1UL << 31)
101*4882a593Smuzhiyun #define SIGN_BIT_D	(1ULL << 63)
102*4882a593Smuzhiyun #define FP_EX_MASK	(FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \
103*4882a593Smuzhiyun 			FP_EX_UNDERFLOW | FP_EX_OVERFLOW)
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun static int have_e500_cpu_a005_erratum;
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun union dw_union {
108*4882a593Smuzhiyun 	u64 dp[1];
109*4882a593Smuzhiyun 	u32 wp[2];
110*4882a593Smuzhiyun };
111*4882a593Smuzhiyun 
insn_type(unsigned long speinsn)112*4882a593Smuzhiyun static unsigned long insn_type(unsigned long speinsn)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun 	unsigned long ret = NOTYPE;
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	switch (speinsn & 0x7ff) {
117*4882a593Smuzhiyun 	case EFSABS:	ret = XA;	break;
118*4882a593Smuzhiyun 	case EFSADD:	ret = AB;	break;
119*4882a593Smuzhiyun 	case EFSCFD:	ret = XB;	break;
120*4882a593Smuzhiyun 	case EFSCMPEQ:	ret = XCR;	break;
121*4882a593Smuzhiyun 	case EFSCMPGT:	ret = XCR;	break;
122*4882a593Smuzhiyun 	case EFSCMPLT:	ret = XCR;	break;
123*4882a593Smuzhiyun 	case EFSCTSF:	ret = XB;	break;
124*4882a593Smuzhiyun 	case EFSCTSI:	ret = XB;	break;
125*4882a593Smuzhiyun 	case EFSCTSIZ:	ret = XB;	break;
126*4882a593Smuzhiyun 	case EFSCTUF:	ret = XB;	break;
127*4882a593Smuzhiyun 	case EFSCTUI:	ret = XB;	break;
128*4882a593Smuzhiyun 	case EFSCTUIZ:	ret = XB;	break;
129*4882a593Smuzhiyun 	case EFSDIV:	ret = AB;	break;
130*4882a593Smuzhiyun 	case EFSMUL:	ret = AB;	break;
131*4882a593Smuzhiyun 	case EFSNABS:	ret = XA;	break;
132*4882a593Smuzhiyun 	case EFSNEG:	ret = XA;	break;
133*4882a593Smuzhiyun 	case EFSSUB:	ret = AB;	break;
134*4882a593Smuzhiyun 	case EFSCFSI:	ret = XB;	break;
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	case EVFSABS:	ret = XA;	break;
137*4882a593Smuzhiyun 	case EVFSADD:	ret = AB;	break;
138*4882a593Smuzhiyun 	case EVFSCMPEQ:	ret = XCR;	break;
139*4882a593Smuzhiyun 	case EVFSCMPGT:	ret = XCR;	break;
140*4882a593Smuzhiyun 	case EVFSCMPLT:	ret = XCR;	break;
141*4882a593Smuzhiyun 	case EVFSCTSF:	ret = XB;	break;
142*4882a593Smuzhiyun 	case EVFSCTSI:	ret = XB;	break;
143*4882a593Smuzhiyun 	case EVFSCTSIZ:	ret = XB;	break;
144*4882a593Smuzhiyun 	case EVFSCTUF:	ret = XB;	break;
145*4882a593Smuzhiyun 	case EVFSCTUI:	ret = XB;	break;
146*4882a593Smuzhiyun 	case EVFSCTUIZ:	ret = XB;	break;
147*4882a593Smuzhiyun 	case EVFSDIV:	ret = AB;	break;
148*4882a593Smuzhiyun 	case EVFSMUL:	ret = AB;	break;
149*4882a593Smuzhiyun 	case EVFSNABS:	ret = XA;	break;
150*4882a593Smuzhiyun 	case EVFSNEG:	ret = XA;	break;
151*4882a593Smuzhiyun 	case EVFSSUB:	ret = AB;	break;
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	case EFDABS:	ret = XA;	break;
154*4882a593Smuzhiyun 	case EFDADD:	ret = AB;	break;
155*4882a593Smuzhiyun 	case EFDCFS:	ret = XB;	break;
156*4882a593Smuzhiyun 	case EFDCMPEQ:	ret = XCR;	break;
157*4882a593Smuzhiyun 	case EFDCMPGT:	ret = XCR;	break;
158*4882a593Smuzhiyun 	case EFDCMPLT:	ret = XCR;	break;
159*4882a593Smuzhiyun 	case EFDCTSF:	ret = XB;	break;
160*4882a593Smuzhiyun 	case EFDCTSI:	ret = XB;	break;
161*4882a593Smuzhiyun 	case EFDCTSIDZ:	ret = XB;	break;
162*4882a593Smuzhiyun 	case EFDCTSIZ:	ret = XB;	break;
163*4882a593Smuzhiyun 	case EFDCTUF:	ret = XB;	break;
164*4882a593Smuzhiyun 	case EFDCTUI:	ret = XB;	break;
165*4882a593Smuzhiyun 	case EFDCTUIDZ:	ret = XB;	break;
166*4882a593Smuzhiyun 	case EFDCTUIZ:	ret = XB;	break;
167*4882a593Smuzhiyun 	case EFDDIV:	ret = AB;	break;
168*4882a593Smuzhiyun 	case EFDMUL:	ret = AB;	break;
169*4882a593Smuzhiyun 	case EFDNABS:	ret = XA;	break;
170*4882a593Smuzhiyun 	case EFDNEG:	ret = XA;	break;
171*4882a593Smuzhiyun 	case EFDSUB:	ret = AB;	break;
172*4882a593Smuzhiyun 	}
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	return ret;
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun 
do_spe_mathemu(struct pt_regs * regs)177*4882a593Smuzhiyun int do_spe_mathemu(struct pt_regs *regs)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun 	FP_DECL_EX;
180*4882a593Smuzhiyun 	int IR, cmp;
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 	unsigned long type, func, fc, fa, fb, src, speinsn;
183*4882a593Smuzhiyun 	union dw_union vc, va, vb;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	if (get_user(speinsn, (unsigned int __user *) regs->nip))
186*4882a593Smuzhiyun 		return -EFAULT;
187*4882a593Smuzhiyun 	if ((speinsn >> 26) != EFAPU)
188*4882a593Smuzhiyun 		return -EINVAL;         /* not an spe instruction */
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	type = insn_type(speinsn);
191*4882a593Smuzhiyun 	if (type == NOTYPE)
192*4882a593Smuzhiyun 		goto illegal;
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 	func = speinsn & 0x7ff;
195*4882a593Smuzhiyun 	fc = (speinsn >> 21) & 0x1f;
196*4882a593Smuzhiyun 	fa = (speinsn >> 16) & 0x1f;
197*4882a593Smuzhiyun 	fb = (speinsn >> 11) & 0x1f;
198*4882a593Smuzhiyun 	src = (speinsn >> 5) & 0x7;
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	vc.wp[0] = current->thread.evr[fc];
201*4882a593Smuzhiyun 	vc.wp[1] = regs->gpr[fc];
202*4882a593Smuzhiyun 	va.wp[0] = current->thread.evr[fa];
203*4882a593Smuzhiyun 	va.wp[1] = regs->gpr[fa];
204*4882a593Smuzhiyun 	vb.wp[0] = current->thread.evr[fb];
205*4882a593Smuzhiyun 	vb.wp[1] = regs->gpr[fb];
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	__FPU_FPSCR = mfspr(SPRN_SPEFSCR);
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
210*4882a593Smuzhiyun 	pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
211*4882a593Smuzhiyun 	pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
212*4882a593Smuzhiyun 	pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	switch (src) {
215*4882a593Smuzhiyun 	case SPFP: {
216*4882a593Smuzhiyun 		FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 		switch (type) {
219*4882a593Smuzhiyun 		case AB:
220*4882a593Smuzhiyun 		case XCR:
221*4882a593Smuzhiyun 			FP_UNPACK_SP(SA, va.wp + 1);
222*4882a593Smuzhiyun 		case XB:
223*4882a593Smuzhiyun 			FP_UNPACK_SP(SB, vb.wp + 1);
224*4882a593Smuzhiyun 			break;
225*4882a593Smuzhiyun 		case XA:
226*4882a593Smuzhiyun 			FP_UNPACK_SP(SA, va.wp + 1);
227*4882a593Smuzhiyun 			break;
228*4882a593Smuzhiyun 		}
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 		pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c);
231*4882a593Smuzhiyun 		pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c);
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 		switch (func) {
234*4882a593Smuzhiyun 		case EFSABS:
235*4882a593Smuzhiyun 			vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
236*4882a593Smuzhiyun 			goto update_regs;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 		case EFSNABS:
239*4882a593Smuzhiyun 			vc.wp[1] = va.wp[1] | SIGN_BIT_S;
240*4882a593Smuzhiyun 			goto update_regs;
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 		case EFSNEG:
243*4882a593Smuzhiyun 			vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
244*4882a593Smuzhiyun 			goto update_regs;
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 		case EFSADD:
247*4882a593Smuzhiyun 			FP_ADD_S(SR, SA, SB);
248*4882a593Smuzhiyun 			goto pack_s;
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 		case EFSSUB:
251*4882a593Smuzhiyun 			FP_SUB_S(SR, SA, SB);
252*4882a593Smuzhiyun 			goto pack_s;
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 		case EFSMUL:
255*4882a593Smuzhiyun 			FP_MUL_S(SR, SA, SB);
256*4882a593Smuzhiyun 			goto pack_s;
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 		case EFSDIV:
259*4882a593Smuzhiyun 			FP_DIV_S(SR, SA, SB);
260*4882a593Smuzhiyun 			goto pack_s;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 		case EFSCMPEQ:
263*4882a593Smuzhiyun 			cmp = 0;
264*4882a593Smuzhiyun 			goto cmp_s;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 		case EFSCMPGT:
267*4882a593Smuzhiyun 			cmp = 1;
268*4882a593Smuzhiyun 			goto cmp_s;
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 		case EFSCMPLT:
271*4882a593Smuzhiyun 			cmp = -1;
272*4882a593Smuzhiyun 			goto cmp_s;
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 		case EFSCTSF:
275*4882a593Smuzhiyun 		case EFSCTUF:
276*4882a593Smuzhiyun 			if (SB_c == FP_CLS_NAN) {
277*4882a593Smuzhiyun 				vc.wp[1] = 0;
278*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
279*4882a593Smuzhiyun 			} else {
280*4882a593Smuzhiyun 				SB_e += (func == EFSCTSF ? 31 : 32);
281*4882a593Smuzhiyun 				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
282*4882a593Smuzhiyun 						(func == EFSCTSF));
283*4882a593Smuzhiyun 			}
284*4882a593Smuzhiyun 			goto update_regs;
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 		case EFSCFD: {
287*4882a593Smuzhiyun 			FP_DECL_D(DB);
288*4882a593Smuzhiyun 			FP_CLEAR_EXCEPTIONS;
289*4882a593Smuzhiyun 			FP_UNPACK_DP(DB, vb.dp);
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 			pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
292*4882a593Smuzhiyun 					DB_s, DB_f1, DB_f0, DB_e, DB_c);
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 			FP_CONV(S, D, 1, 2, SR, DB);
295*4882a593Smuzhiyun 			goto pack_s;
296*4882a593Smuzhiyun 		}
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 		case EFSCTSI:
299*4882a593Smuzhiyun 		case EFSCTUI:
300*4882a593Smuzhiyun 			if (SB_c == FP_CLS_NAN) {
301*4882a593Smuzhiyun 				vc.wp[1] = 0;
302*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
303*4882a593Smuzhiyun 			} else {
304*4882a593Smuzhiyun 				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
305*4882a593Smuzhiyun 						((func & 0x3) != 0));
306*4882a593Smuzhiyun 			}
307*4882a593Smuzhiyun 			goto update_regs;
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 		case EFSCTSIZ:
310*4882a593Smuzhiyun 		case EFSCTUIZ:
311*4882a593Smuzhiyun 			if (SB_c == FP_CLS_NAN) {
312*4882a593Smuzhiyun 				vc.wp[1] = 0;
313*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
314*4882a593Smuzhiyun 			} else {
315*4882a593Smuzhiyun 				FP_TO_INT_S(vc.wp[1], SB, 32,
316*4882a593Smuzhiyun 						((func & 0x3) != 0));
317*4882a593Smuzhiyun 			}
318*4882a593Smuzhiyun 			goto update_regs;
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 		default:
321*4882a593Smuzhiyun 			goto illegal;
322*4882a593Smuzhiyun 		}
323*4882a593Smuzhiyun 		break;
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun pack_s:
326*4882a593Smuzhiyun 		pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c);
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 		FP_PACK_SP(vc.wp + 1, SR);
329*4882a593Smuzhiyun 		goto update_regs;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun cmp_s:
332*4882a593Smuzhiyun 		FP_CMP_S(IR, SA, SB, 3);
333*4882a593Smuzhiyun 		if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB)))
334*4882a593Smuzhiyun 			FP_SET_EXCEPTION(FP_EX_INVALID);
335*4882a593Smuzhiyun 		if (IR == cmp) {
336*4882a593Smuzhiyun 			IR = 0x4;
337*4882a593Smuzhiyun 		} else {
338*4882a593Smuzhiyun 			IR = 0;
339*4882a593Smuzhiyun 		}
340*4882a593Smuzhiyun 		goto update_ccr;
341*4882a593Smuzhiyun 	}
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	case DPFP: {
344*4882a593Smuzhiyun 		FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 		switch (type) {
347*4882a593Smuzhiyun 		case AB:
348*4882a593Smuzhiyun 		case XCR:
349*4882a593Smuzhiyun 			FP_UNPACK_DP(DA, va.dp);
350*4882a593Smuzhiyun 		case XB:
351*4882a593Smuzhiyun 			FP_UNPACK_DP(DB, vb.dp);
352*4882a593Smuzhiyun 			break;
353*4882a593Smuzhiyun 		case XA:
354*4882a593Smuzhiyun 			FP_UNPACK_DP(DA, va.dp);
355*4882a593Smuzhiyun 			break;
356*4882a593Smuzhiyun 		}
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 		pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n",
359*4882a593Smuzhiyun 				DA_s, DA_f1, DA_f0, DA_e, DA_c);
360*4882a593Smuzhiyun 		pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
361*4882a593Smuzhiyun 				DB_s, DB_f1, DB_f0, DB_e, DB_c);
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 		switch (func) {
364*4882a593Smuzhiyun 		case EFDABS:
365*4882a593Smuzhiyun 			vc.dp[0] = va.dp[0] & ~SIGN_BIT_D;
366*4882a593Smuzhiyun 			goto update_regs;
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 		case EFDNABS:
369*4882a593Smuzhiyun 			vc.dp[0] = va.dp[0] | SIGN_BIT_D;
370*4882a593Smuzhiyun 			goto update_regs;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 		case EFDNEG:
373*4882a593Smuzhiyun 			vc.dp[0] = va.dp[0] ^ SIGN_BIT_D;
374*4882a593Smuzhiyun 			goto update_regs;
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 		case EFDADD:
377*4882a593Smuzhiyun 			FP_ADD_D(DR, DA, DB);
378*4882a593Smuzhiyun 			goto pack_d;
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 		case EFDSUB:
381*4882a593Smuzhiyun 			FP_SUB_D(DR, DA, DB);
382*4882a593Smuzhiyun 			goto pack_d;
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 		case EFDMUL:
385*4882a593Smuzhiyun 			FP_MUL_D(DR, DA, DB);
386*4882a593Smuzhiyun 			goto pack_d;
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 		case EFDDIV:
389*4882a593Smuzhiyun 			FP_DIV_D(DR, DA, DB);
390*4882a593Smuzhiyun 			goto pack_d;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 		case EFDCMPEQ:
393*4882a593Smuzhiyun 			cmp = 0;
394*4882a593Smuzhiyun 			goto cmp_d;
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 		case EFDCMPGT:
397*4882a593Smuzhiyun 			cmp = 1;
398*4882a593Smuzhiyun 			goto cmp_d;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 		case EFDCMPLT:
401*4882a593Smuzhiyun 			cmp = -1;
402*4882a593Smuzhiyun 			goto cmp_d;
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 		case EFDCTSF:
405*4882a593Smuzhiyun 		case EFDCTUF:
406*4882a593Smuzhiyun 			if (DB_c == FP_CLS_NAN) {
407*4882a593Smuzhiyun 				vc.wp[1] = 0;
408*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
409*4882a593Smuzhiyun 			} else {
410*4882a593Smuzhiyun 				DB_e += (func == EFDCTSF ? 31 : 32);
411*4882a593Smuzhiyun 				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
412*4882a593Smuzhiyun 						(func == EFDCTSF));
413*4882a593Smuzhiyun 			}
414*4882a593Smuzhiyun 			goto update_regs;
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun 		case EFDCFS: {
417*4882a593Smuzhiyun 			FP_DECL_S(SB);
418*4882a593Smuzhiyun 			FP_CLEAR_EXCEPTIONS;
419*4882a593Smuzhiyun 			FP_UNPACK_SP(SB, vb.wp + 1);
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 			pr_debug("SB: %ld %08lx %ld (%ld)\n",
422*4882a593Smuzhiyun 					SB_s, SB_f, SB_e, SB_c);
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 			FP_CONV(D, S, 2, 1, DR, SB);
425*4882a593Smuzhiyun 			goto pack_d;
426*4882a593Smuzhiyun 		}
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 		case EFDCTUIDZ:
429*4882a593Smuzhiyun 		case EFDCTSIDZ:
430*4882a593Smuzhiyun 			if (DB_c == FP_CLS_NAN) {
431*4882a593Smuzhiyun 				vc.dp[0] = 0;
432*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
433*4882a593Smuzhiyun 			} else {
434*4882a593Smuzhiyun 				FP_TO_INT_D(vc.dp[0], DB, 64,
435*4882a593Smuzhiyun 						((func & 0x1) == 0));
436*4882a593Smuzhiyun 			}
437*4882a593Smuzhiyun 			goto update_regs;
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 		case EFDCTUI:
440*4882a593Smuzhiyun 		case EFDCTSI:
441*4882a593Smuzhiyun 			if (DB_c == FP_CLS_NAN) {
442*4882a593Smuzhiyun 				vc.wp[1] = 0;
443*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
444*4882a593Smuzhiyun 			} else {
445*4882a593Smuzhiyun 				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
446*4882a593Smuzhiyun 						((func & 0x3) != 0));
447*4882a593Smuzhiyun 			}
448*4882a593Smuzhiyun 			goto update_regs;
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 		case EFDCTUIZ:
451*4882a593Smuzhiyun 		case EFDCTSIZ:
452*4882a593Smuzhiyun 			if (DB_c == FP_CLS_NAN) {
453*4882a593Smuzhiyun 				vc.wp[1] = 0;
454*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
455*4882a593Smuzhiyun 			} else {
456*4882a593Smuzhiyun 				FP_TO_INT_D(vc.wp[1], DB, 32,
457*4882a593Smuzhiyun 						((func & 0x3) != 0));
458*4882a593Smuzhiyun 			}
459*4882a593Smuzhiyun 			goto update_regs;
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 		default:
462*4882a593Smuzhiyun 			goto illegal;
463*4882a593Smuzhiyun 		}
464*4882a593Smuzhiyun 		break;
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun pack_d:
467*4882a593Smuzhiyun 		pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n",
468*4882a593Smuzhiyun 				DR_s, DR_f1, DR_f0, DR_e, DR_c);
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 		FP_PACK_DP(vc.dp, DR);
471*4882a593Smuzhiyun 		goto update_regs;
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun cmp_d:
474*4882a593Smuzhiyun 		FP_CMP_D(IR, DA, DB, 3);
475*4882a593Smuzhiyun 		if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB)))
476*4882a593Smuzhiyun 			FP_SET_EXCEPTION(FP_EX_INVALID);
477*4882a593Smuzhiyun 		if (IR == cmp) {
478*4882a593Smuzhiyun 			IR = 0x4;
479*4882a593Smuzhiyun 		} else {
480*4882a593Smuzhiyun 			IR = 0;
481*4882a593Smuzhiyun 		}
482*4882a593Smuzhiyun 		goto update_ccr;
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	}
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	case VCT: {
487*4882a593Smuzhiyun 		FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0);
488*4882a593Smuzhiyun 		FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1);
489*4882a593Smuzhiyun 		int IR0, IR1;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 		switch (type) {
492*4882a593Smuzhiyun 		case AB:
493*4882a593Smuzhiyun 		case XCR:
494*4882a593Smuzhiyun 			FP_UNPACK_SP(SA0, va.wp);
495*4882a593Smuzhiyun 			FP_UNPACK_SP(SA1, va.wp + 1);
496*4882a593Smuzhiyun 		case XB:
497*4882a593Smuzhiyun 			FP_UNPACK_SP(SB0, vb.wp);
498*4882a593Smuzhiyun 			FP_UNPACK_SP(SB1, vb.wp + 1);
499*4882a593Smuzhiyun 			break;
500*4882a593Smuzhiyun 		case XA:
501*4882a593Smuzhiyun 			FP_UNPACK_SP(SA0, va.wp);
502*4882a593Smuzhiyun 			FP_UNPACK_SP(SA1, va.wp + 1);
503*4882a593Smuzhiyun 			break;
504*4882a593Smuzhiyun 		}
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun 		pr_debug("SA0: %ld %08lx %ld (%ld)\n",
507*4882a593Smuzhiyun 				SA0_s, SA0_f, SA0_e, SA0_c);
508*4882a593Smuzhiyun 		pr_debug("SA1: %ld %08lx %ld (%ld)\n",
509*4882a593Smuzhiyun 				SA1_s, SA1_f, SA1_e, SA1_c);
510*4882a593Smuzhiyun 		pr_debug("SB0: %ld %08lx %ld (%ld)\n",
511*4882a593Smuzhiyun 				SB0_s, SB0_f, SB0_e, SB0_c);
512*4882a593Smuzhiyun 		pr_debug("SB1: %ld %08lx %ld (%ld)\n",
513*4882a593Smuzhiyun 				SB1_s, SB1_f, SB1_e, SB1_c);
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 		switch (func) {
516*4882a593Smuzhiyun 		case EVFSABS:
517*4882a593Smuzhiyun 			vc.wp[0] = va.wp[0] & ~SIGN_BIT_S;
518*4882a593Smuzhiyun 			vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
519*4882a593Smuzhiyun 			goto update_regs;
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 		case EVFSNABS:
522*4882a593Smuzhiyun 			vc.wp[0] = va.wp[0] | SIGN_BIT_S;
523*4882a593Smuzhiyun 			vc.wp[1] = va.wp[1] | SIGN_BIT_S;
524*4882a593Smuzhiyun 			goto update_regs;
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 		case EVFSNEG:
527*4882a593Smuzhiyun 			vc.wp[0] = va.wp[0] ^ SIGN_BIT_S;
528*4882a593Smuzhiyun 			vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
529*4882a593Smuzhiyun 			goto update_regs;
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 		case EVFSADD:
532*4882a593Smuzhiyun 			FP_ADD_S(SR0, SA0, SB0);
533*4882a593Smuzhiyun 			FP_ADD_S(SR1, SA1, SB1);
534*4882a593Smuzhiyun 			goto pack_vs;
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 		case EVFSSUB:
537*4882a593Smuzhiyun 			FP_SUB_S(SR0, SA0, SB0);
538*4882a593Smuzhiyun 			FP_SUB_S(SR1, SA1, SB1);
539*4882a593Smuzhiyun 			goto pack_vs;
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 		case EVFSMUL:
542*4882a593Smuzhiyun 			FP_MUL_S(SR0, SA0, SB0);
543*4882a593Smuzhiyun 			FP_MUL_S(SR1, SA1, SB1);
544*4882a593Smuzhiyun 			goto pack_vs;
545*4882a593Smuzhiyun 
546*4882a593Smuzhiyun 		case EVFSDIV:
547*4882a593Smuzhiyun 			FP_DIV_S(SR0, SA0, SB0);
548*4882a593Smuzhiyun 			FP_DIV_S(SR1, SA1, SB1);
549*4882a593Smuzhiyun 			goto pack_vs;
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 		case EVFSCMPEQ:
552*4882a593Smuzhiyun 			cmp = 0;
553*4882a593Smuzhiyun 			goto cmp_vs;
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 		case EVFSCMPGT:
556*4882a593Smuzhiyun 			cmp = 1;
557*4882a593Smuzhiyun 			goto cmp_vs;
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 		case EVFSCMPLT:
560*4882a593Smuzhiyun 			cmp = -1;
561*4882a593Smuzhiyun 			goto cmp_vs;
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 		case EVFSCTUF:
564*4882a593Smuzhiyun 		case EVFSCTSF:
565*4882a593Smuzhiyun 			if (SB0_c == FP_CLS_NAN) {
566*4882a593Smuzhiyun 				vc.wp[0] = 0;
567*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
568*4882a593Smuzhiyun 			} else {
569*4882a593Smuzhiyun 				SB0_e += (func == EVFSCTSF ? 31 : 32);
570*4882a593Smuzhiyun 				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
571*4882a593Smuzhiyun 						(func == EVFSCTSF));
572*4882a593Smuzhiyun 			}
573*4882a593Smuzhiyun 			if (SB1_c == FP_CLS_NAN) {
574*4882a593Smuzhiyun 				vc.wp[1] = 0;
575*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
576*4882a593Smuzhiyun 			} else {
577*4882a593Smuzhiyun 				SB1_e += (func == EVFSCTSF ? 31 : 32);
578*4882a593Smuzhiyun 				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
579*4882a593Smuzhiyun 						(func == EVFSCTSF));
580*4882a593Smuzhiyun 			}
581*4882a593Smuzhiyun 			goto update_regs;
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun 		case EVFSCTUI:
584*4882a593Smuzhiyun 		case EVFSCTSI:
585*4882a593Smuzhiyun 			if (SB0_c == FP_CLS_NAN) {
586*4882a593Smuzhiyun 				vc.wp[0] = 0;
587*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
588*4882a593Smuzhiyun 			} else {
589*4882a593Smuzhiyun 				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
590*4882a593Smuzhiyun 						((func & 0x3) != 0));
591*4882a593Smuzhiyun 			}
592*4882a593Smuzhiyun 			if (SB1_c == FP_CLS_NAN) {
593*4882a593Smuzhiyun 				vc.wp[1] = 0;
594*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
595*4882a593Smuzhiyun 			} else {
596*4882a593Smuzhiyun 				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
597*4882a593Smuzhiyun 						((func & 0x3) != 0));
598*4882a593Smuzhiyun 			}
599*4882a593Smuzhiyun 			goto update_regs;
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 		case EVFSCTUIZ:
602*4882a593Smuzhiyun 		case EVFSCTSIZ:
603*4882a593Smuzhiyun 			if (SB0_c == FP_CLS_NAN) {
604*4882a593Smuzhiyun 				vc.wp[0] = 0;
605*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
606*4882a593Smuzhiyun 			} else {
607*4882a593Smuzhiyun 				FP_TO_INT_S(vc.wp[0], SB0, 32,
608*4882a593Smuzhiyun 						((func & 0x3) != 0));
609*4882a593Smuzhiyun 			}
610*4882a593Smuzhiyun 			if (SB1_c == FP_CLS_NAN) {
611*4882a593Smuzhiyun 				vc.wp[1] = 0;
612*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
613*4882a593Smuzhiyun 			} else {
614*4882a593Smuzhiyun 				FP_TO_INT_S(vc.wp[1], SB1, 32,
615*4882a593Smuzhiyun 						((func & 0x3) != 0));
616*4882a593Smuzhiyun 			}
617*4882a593Smuzhiyun 			goto update_regs;
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 		default:
620*4882a593Smuzhiyun 			goto illegal;
621*4882a593Smuzhiyun 		}
622*4882a593Smuzhiyun 		break;
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun pack_vs:
625*4882a593Smuzhiyun 		pr_debug("SR0: %ld %08lx %ld (%ld)\n",
626*4882a593Smuzhiyun 				SR0_s, SR0_f, SR0_e, SR0_c);
627*4882a593Smuzhiyun 		pr_debug("SR1: %ld %08lx %ld (%ld)\n",
628*4882a593Smuzhiyun 				SR1_s, SR1_f, SR1_e, SR1_c);
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 		FP_PACK_SP(vc.wp, SR0);
631*4882a593Smuzhiyun 		FP_PACK_SP(vc.wp + 1, SR1);
632*4882a593Smuzhiyun 		goto update_regs;
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun cmp_vs:
635*4882a593Smuzhiyun 		{
636*4882a593Smuzhiyun 			int ch, cl;
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 			FP_CMP_S(IR0, SA0, SB0, 3);
639*4882a593Smuzhiyun 			FP_CMP_S(IR1, SA1, SB1, 3);
640*4882a593Smuzhiyun 			if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0)))
641*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
642*4882a593Smuzhiyun 			if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1)))
643*4882a593Smuzhiyun 				FP_SET_EXCEPTION(FP_EX_INVALID);
644*4882a593Smuzhiyun 			ch = (IR0 == cmp) ? 1 : 0;
645*4882a593Smuzhiyun 			cl = (IR1 == cmp) ? 1 : 0;
646*4882a593Smuzhiyun 			IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) |
647*4882a593Smuzhiyun 				((ch & cl) << 0);
648*4882a593Smuzhiyun 			goto update_ccr;
649*4882a593Smuzhiyun 		}
650*4882a593Smuzhiyun 	}
651*4882a593Smuzhiyun 	default:
652*4882a593Smuzhiyun 		return -EINVAL;
653*4882a593Smuzhiyun 	}
654*4882a593Smuzhiyun 
655*4882a593Smuzhiyun update_ccr:
656*4882a593Smuzhiyun 	regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2));
657*4882a593Smuzhiyun 	regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun update_regs:
660*4882a593Smuzhiyun 	/*
661*4882a593Smuzhiyun 	 * If the "invalid" exception sticky bit was set by the
662*4882a593Smuzhiyun 	 * processor for non-finite input, but was not set before the
663*4882a593Smuzhiyun 	 * instruction being emulated, clear it.  Likewise for the
664*4882a593Smuzhiyun 	 * "underflow" bit, which may have been set by the processor
665*4882a593Smuzhiyun 	 * for exact underflow, not just inexact underflow when the
666*4882a593Smuzhiyun 	 * flag should be set for IEEE 754 semantics.  Other sticky
667*4882a593Smuzhiyun 	 * exceptions will only be set by the processor when they are
668*4882a593Smuzhiyun 	 * correct according to IEEE 754 semantics, and we must not
669*4882a593Smuzhiyun 	 * clear sticky bits that were already set before the emulated
670*4882a593Smuzhiyun 	 * instruction as they represent the user-visible sticky
671*4882a593Smuzhiyun 	 * exception status.  "inexact" traps to kernel are not
672*4882a593Smuzhiyun 	 * required for IEEE semantics and are not enabled by default,
673*4882a593Smuzhiyun 	 * so the "inexact" sticky bit may have been set by a previous
674*4882a593Smuzhiyun 	 * instruction without the kernel being aware of it.
675*4882a593Smuzhiyun 	 */
676*4882a593Smuzhiyun 	__FPU_FPSCR
677*4882a593Smuzhiyun 	  &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
678*4882a593Smuzhiyun 	__FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
679*4882a593Smuzhiyun 	mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
680*4882a593Smuzhiyun 	current->thread.spefscr_last = __FPU_FPSCR;
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	current->thread.evr[fc] = vc.wp[0];
683*4882a593Smuzhiyun 	regs->gpr[fc] = vc.wp[1];
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun 	pr_debug("ccr = %08lx\n", regs->ccr);
686*4882a593Smuzhiyun 	pr_debug("cur exceptions = %08x spefscr = %08lx\n",
687*4882a593Smuzhiyun 			FP_CUR_EXCEPTIONS, __FPU_FPSCR);
688*4882a593Smuzhiyun 	pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
689*4882a593Smuzhiyun 	pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
690*4882a593Smuzhiyun 	pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
693*4882a593Smuzhiyun 		if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO)
694*4882a593Smuzhiyun 		    && (current->thread.fpexc_mode & PR_FP_EXC_DIV))
695*4882a593Smuzhiyun 			return 1;
696*4882a593Smuzhiyun 		if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW)
697*4882a593Smuzhiyun 		    && (current->thread.fpexc_mode & PR_FP_EXC_OVF))
698*4882a593Smuzhiyun 			return 1;
699*4882a593Smuzhiyun 		if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW)
700*4882a593Smuzhiyun 		    && (current->thread.fpexc_mode & PR_FP_EXC_UND))
701*4882a593Smuzhiyun 			return 1;
702*4882a593Smuzhiyun 		if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)
703*4882a593Smuzhiyun 		    && (current->thread.fpexc_mode & PR_FP_EXC_RES))
704*4882a593Smuzhiyun 			return 1;
705*4882a593Smuzhiyun 		if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID)
706*4882a593Smuzhiyun 		    && (current->thread.fpexc_mode & PR_FP_EXC_INV))
707*4882a593Smuzhiyun 			return 1;
708*4882a593Smuzhiyun 	}
709*4882a593Smuzhiyun 	return 0;
710*4882a593Smuzhiyun 
711*4882a593Smuzhiyun illegal:
712*4882a593Smuzhiyun 	if (have_e500_cpu_a005_erratum) {
713*4882a593Smuzhiyun 		/* according to e500 cpu a005 erratum, reissue efp inst */
714*4882a593Smuzhiyun 		regs->nip -= 4;
715*4882a593Smuzhiyun 		pr_debug("re-issue efp inst: %08lx\n", speinsn);
716*4882a593Smuzhiyun 		return 0;
717*4882a593Smuzhiyun 	}
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn);
720*4882a593Smuzhiyun 	return -ENOSYS;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun 
speround_handler(struct pt_regs * regs)723*4882a593Smuzhiyun int speround_handler(struct pt_regs *regs)
724*4882a593Smuzhiyun {
725*4882a593Smuzhiyun 	union dw_union fgpr;
726*4882a593Smuzhiyun 	int s_lo, s_hi;
727*4882a593Smuzhiyun 	int lo_inexact, hi_inexact;
728*4882a593Smuzhiyun 	int fp_result;
729*4882a593Smuzhiyun 	unsigned long speinsn, type, fb, fc, fptype, func;
730*4882a593Smuzhiyun 
731*4882a593Smuzhiyun 	if (get_user(speinsn, (unsigned int __user *) regs->nip))
732*4882a593Smuzhiyun 		return -EFAULT;
733*4882a593Smuzhiyun 	if ((speinsn >> 26) != 4)
734*4882a593Smuzhiyun 		return -EINVAL;         /* not an spe instruction */
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 	func = speinsn & 0x7ff;
737*4882a593Smuzhiyun 	type = insn_type(func);
738*4882a593Smuzhiyun 	if (type == XCR) return -ENOSYS;
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	__FPU_FPSCR = mfspr(SPRN_SPEFSCR);
741*4882a593Smuzhiyun 	pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
742*4882a593Smuzhiyun 
743*4882a593Smuzhiyun 	fptype = (speinsn >> 5) & 0x7;
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun 	/* No need to round if the result is exact */
746*4882a593Smuzhiyun 	lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX);
747*4882a593Smuzhiyun 	hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH);
748*4882a593Smuzhiyun 	if (!(lo_inexact || (hi_inexact && fptype == VCT)))
749*4882a593Smuzhiyun 		return 0;
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun 	fc = (speinsn >> 21) & 0x1f;
752*4882a593Smuzhiyun 	s_lo = regs->gpr[fc] & SIGN_BIT_S;
753*4882a593Smuzhiyun 	s_hi = current->thread.evr[fc] & SIGN_BIT_S;
754*4882a593Smuzhiyun 	fgpr.wp[0] = current->thread.evr[fc];
755*4882a593Smuzhiyun 	fgpr.wp[1] = regs->gpr[fc];
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun 	fb = (speinsn >> 11) & 0x1f;
758*4882a593Smuzhiyun 	switch (func) {
759*4882a593Smuzhiyun 	case EFSCTUIZ:
760*4882a593Smuzhiyun 	case EFSCTSIZ:
761*4882a593Smuzhiyun 	case EVFSCTUIZ:
762*4882a593Smuzhiyun 	case EVFSCTSIZ:
763*4882a593Smuzhiyun 	case EFDCTUIDZ:
764*4882a593Smuzhiyun 	case EFDCTSIDZ:
765*4882a593Smuzhiyun 	case EFDCTUIZ:
766*4882a593Smuzhiyun 	case EFDCTSIZ:
767*4882a593Smuzhiyun 		/*
768*4882a593Smuzhiyun 		 * These instructions always round to zero,
769*4882a593Smuzhiyun 		 * independent of the rounding mode.
770*4882a593Smuzhiyun 		 */
771*4882a593Smuzhiyun 		return 0;
772*4882a593Smuzhiyun 
773*4882a593Smuzhiyun 	case EFSCTUI:
774*4882a593Smuzhiyun 	case EFSCTUF:
775*4882a593Smuzhiyun 	case EVFSCTUI:
776*4882a593Smuzhiyun 	case EVFSCTUF:
777*4882a593Smuzhiyun 	case EFDCTUI:
778*4882a593Smuzhiyun 	case EFDCTUF:
779*4882a593Smuzhiyun 		fp_result = 0;
780*4882a593Smuzhiyun 		s_lo = 0;
781*4882a593Smuzhiyun 		s_hi = 0;
782*4882a593Smuzhiyun 		break;
783*4882a593Smuzhiyun 
784*4882a593Smuzhiyun 	case EFSCTSI:
785*4882a593Smuzhiyun 	case EFSCTSF:
786*4882a593Smuzhiyun 		fp_result = 0;
787*4882a593Smuzhiyun 		/* Recover the sign of a zero result if possible.  */
788*4882a593Smuzhiyun 		if (fgpr.wp[1] == 0)
789*4882a593Smuzhiyun 			s_lo = regs->gpr[fb] & SIGN_BIT_S;
790*4882a593Smuzhiyun 		break;
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun 	case EVFSCTSI:
793*4882a593Smuzhiyun 	case EVFSCTSF:
794*4882a593Smuzhiyun 		fp_result = 0;
795*4882a593Smuzhiyun 		/* Recover the sign of a zero result if possible.  */
796*4882a593Smuzhiyun 		if (fgpr.wp[1] == 0)
797*4882a593Smuzhiyun 			s_lo = regs->gpr[fb] & SIGN_BIT_S;
798*4882a593Smuzhiyun 		if (fgpr.wp[0] == 0)
799*4882a593Smuzhiyun 			s_hi = current->thread.evr[fb] & SIGN_BIT_S;
800*4882a593Smuzhiyun 		break;
801*4882a593Smuzhiyun 
802*4882a593Smuzhiyun 	case EFDCTSI:
803*4882a593Smuzhiyun 	case EFDCTSF:
804*4882a593Smuzhiyun 		fp_result = 0;
805*4882a593Smuzhiyun 		s_hi = s_lo;
806*4882a593Smuzhiyun 		/* Recover the sign of a zero result if possible.  */
807*4882a593Smuzhiyun 		if (fgpr.wp[1] == 0)
808*4882a593Smuzhiyun 			s_hi = current->thread.evr[fb] & SIGN_BIT_S;
809*4882a593Smuzhiyun 		break;
810*4882a593Smuzhiyun 
811*4882a593Smuzhiyun 	default:
812*4882a593Smuzhiyun 		fp_result = 1;
813*4882a593Smuzhiyun 		break;
814*4882a593Smuzhiyun 	}
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 	pr_debug("round fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
817*4882a593Smuzhiyun 
818*4882a593Smuzhiyun 	switch (fptype) {
819*4882a593Smuzhiyun 	/* Since SPE instructions on E500 core can handle round to nearest
820*4882a593Smuzhiyun 	 * and round toward zero with IEEE-754 complied, we just need
821*4882a593Smuzhiyun 	 * to handle round toward +Inf and round toward -Inf by software.
822*4882a593Smuzhiyun 	 */
823*4882a593Smuzhiyun 	case SPFP:
824*4882a593Smuzhiyun 		if ((FP_ROUNDMODE) == FP_RND_PINF) {
825*4882a593Smuzhiyun 			if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */
826*4882a593Smuzhiyun 		} else { /* round to -Inf */
827*4882a593Smuzhiyun 			if (s_lo) {
828*4882a593Smuzhiyun 				if (fp_result)
829*4882a593Smuzhiyun 					fgpr.wp[1]++; /* Z < 0, choose Z2 */
830*4882a593Smuzhiyun 				else
831*4882a593Smuzhiyun 					fgpr.wp[1]--; /* Z < 0, choose Z2 */
832*4882a593Smuzhiyun 			}
833*4882a593Smuzhiyun 		}
834*4882a593Smuzhiyun 		break;
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	case DPFP:
837*4882a593Smuzhiyun 		if (FP_ROUNDMODE == FP_RND_PINF) {
838*4882a593Smuzhiyun 			if (!s_hi) {
839*4882a593Smuzhiyun 				if (fp_result)
840*4882a593Smuzhiyun 					fgpr.dp[0]++; /* Z > 0, choose Z1 */
841*4882a593Smuzhiyun 				else
842*4882a593Smuzhiyun 					fgpr.wp[1]++; /* Z > 0, choose Z1 */
843*4882a593Smuzhiyun 			}
844*4882a593Smuzhiyun 		} else { /* round to -Inf */
845*4882a593Smuzhiyun 			if (s_hi) {
846*4882a593Smuzhiyun 				if (fp_result)
847*4882a593Smuzhiyun 					fgpr.dp[0]++; /* Z < 0, choose Z2 */
848*4882a593Smuzhiyun 				else
849*4882a593Smuzhiyun 					fgpr.wp[1]--; /* Z < 0, choose Z2 */
850*4882a593Smuzhiyun 			}
851*4882a593Smuzhiyun 		}
852*4882a593Smuzhiyun 		break;
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 	case VCT:
855*4882a593Smuzhiyun 		if (FP_ROUNDMODE == FP_RND_PINF) {
856*4882a593Smuzhiyun 			if (lo_inexact && !s_lo)
857*4882a593Smuzhiyun 				fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
858*4882a593Smuzhiyun 			if (hi_inexact && !s_hi)
859*4882a593Smuzhiyun 				fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
860*4882a593Smuzhiyun 		} else { /* round to -Inf */
861*4882a593Smuzhiyun 			if (lo_inexact && s_lo) {
862*4882a593Smuzhiyun 				if (fp_result)
863*4882a593Smuzhiyun 					fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
864*4882a593Smuzhiyun 				else
865*4882a593Smuzhiyun 					fgpr.wp[1]--; /* Z_low < 0, choose Z2 */
866*4882a593Smuzhiyun 			}
867*4882a593Smuzhiyun 			if (hi_inexact && s_hi) {
868*4882a593Smuzhiyun 				if (fp_result)
869*4882a593Smuzhiyun 					fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
870*4882a593Smuzhiyun 				else
871*4882a593Smuzhiyun 					fgpr.wp[0]--; /* Z_high < 0, choose Z2 */
872*4882a593Smuzhiyun 			}
873*4882a593Smuzhiyun 		}
874*4882a593Smuzhiyun 		break;
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 	default:
877*4882a593Smuzhiyun 		return -EINVAL;
878*4882a593Smuzhiyun 	}
879*4882a593Smuzhiyun 
880*4882a593Smuzhiyun 	current->thread.evr[fc] = fgpr.wp[0];
881*4882a593Smuzhiyun 	regs->gpr[fc] = fgpr.wp[1];
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 	pr_debug("  to fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
884*4882a593Smuzhiyun 
885*4882a593Smuzhiyun 	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
886*4882a593Smuzhiyun 		return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;
887*4882a593Smuzhiyun 	return 0;
888*4882a593Smuzhiyun }
889*4882a593Smuzhiyun 
spe_mathemu_init(void)890*4882a593Smuzhiyun int __init spe_mathemu_init(void)
891*4882a593Smuzhiyun {
892*4882a593Smuzhiyun 	u32 pvr, maj, min;
893*4882a593Smuzhiyun 
894*4882a593Smuzhiyun 	pvr = mfspr(SPRN_PVR);
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 	if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
897*4882a593Smuzhiyun 	    (PVR_VER(pvr) == PVR_VER_E500V2)) {
898*4882a593Smuzhiyun 		maj = PVR_MAJ(pvr);
899*4882a593Smuzhiyun 		min = PVR_MIN(pvr);
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 		/*
902*4882a593Smuzhiyun 		 * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1
903*4882a593Smuzhiyun 		 * need cpu a005 errata workaround
904*4882a593Smuzhiyun 		 */
905*4882a593Smuzhiyun 		switch (maj) {
906*4882a593Smuzhiyun 		case 1:
907*4882a593Smuzhiyun 			if (min < 1)
908*4882a593Smuzhiyun 				have_e500_cpu_a005_erratum = 1;
909*4882a593Smuzhiyun 			break;
910*4882a593Smuzhiyun 		case 2:
911*4882a593Smuzhiyun 			if (min < 3)
912*4882a593Smuzhiyun 				have_e500_cpu_a005_erratum = 1;
913*4882a593Smuzhiyun 			break;
914*4882a593Smuzhiyun 		case 3:
915*4882a593Smuzhiyun 		case 4:
916*4882a593Smuzhiyun 		case 5:
917*4882a593Smuzhiyun 			if (min < 1)
918*4882a593Smuzhiyun 				have_e500_cpu_a005_erratum = 1;
919*4882a593Smuzhiyun 			break;
920*4882a593Smuzhiyun 		default:
921*4882a593Smuzhiyun 			break;
922*4882a593Smuzhiyun 		}
923*4882a593Smuzhiyun 	}
924*4882a593Smuzhiyun 
925*4882a593Smuzhiyun 	return 0;
926*4882a593Smuzhiyun }
927*4882a593Smuzhiyun 
928*4882a593Smuzhiyun module_init(spe_mathemu_init);
929