1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * arch/powerpc/math-emu/math_efp.c
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Author: Ebony Zhu, <ebony.zhu@freescale.com>
8*4882a593Smuzhiyun * Yu Liu, <yu.liu@freescale.com>
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * Derived from arch/alpha/math-emu/math.c
11*4882a593Smuzhiyun * arch/powerpc/math-emu/math.c
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * Description:
14*4882a593Smuzhiyun * This file is the exception handler to make E500 SPE instructions
15*4882a593Smuzhiyun * fully comply with IEEE-754 floating point standard.
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #include <linux/types.h>
19*4882a593Smuzhiyun #include <linux/prctl.h>
20*4882a593Smuzhiyun #include <linux/module.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #include <linux/uaccess.h>
23*4882a593Smuzhiyun #include <asm/reg.h>
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun #define FP_EX_BOOKE_E500_SPE
26*4882a593Smuzhiyun #include <asm/sfp-machine.h>
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #include <math-emu/soft-fp.h>
29*4882a593Smuzhiyun #include <math-emu/single.h>
30*4882a593Smuzhiyun #include <math-emu/double.h>
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun #define EFAPU 0x4
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #define VCT 0x4
35*4882a593Smuzhiyun #define SPFP 0x6
36*4882a593Smuzhiyun #define DPFP 0x7
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #define EFSADD 0x2c0
39*4882a593Smuzhiyun #define EFSSUB 0x2c1
40*4882a593Smuzhiyun #define EFSABS 0x2c4
41*4882a593Smuzhiyun #define EFSNABS 0x2c5
42*4882a593Smuzhiyun #define EFSNEG 0x2c6
43*4882a593Smuzhiyun #define EFSMUL 0x2c8
44*4882a593Smuzhiyun #define EFSDIV 0x2c9
45*4882a593Smuzhiyun #define EFSCMPGT 0x2cc
46*4882a593Smuzhiyun #define EFSCMPLT 0x2cd
47*4882a593Smuzhiyun #define EFSCMPEQ 0x2ce
48*4882a593Smuzhiyun #define EFSCFD 0x2cf
49*4882a593Smuzhiyun #define EFSCFSI 0x2d1
50*4882a593Smuzhiyun #define EFSCTUI 0x2d4
51*4882a593Smuzhiyun #define EFSCTSI 0x2d5
52*4882a593Smuzhiyun #define EFSCTUF 0x2d6
53*4882a593Smuzhiyun #define EFSCTSF 0x2d7
54*4882a593Smuzhiyun #define EFSCTUIZ 0x2d8
55*4882a593Smuzhiyun #define EFSCTSIZ 0x2da
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun #define EVFSADD 0x280
58*4882a593Smuzhiyun #define EVFSSUB 0x281
59*4882a593Smuzhiyun #define EVFSABS 0x284
60*4882a593Smuzhiyun #define EVFSNABS 0x285
61*4882a593Smuzhiyun #define EVFSNEG 0x286
62*4882a593Smuzhiyun #define EVFSMUL 0x288
63*4882a593Smuzhiyun #define EVFSDIV 0x289
64*4882a593Smuzhiyun #define EVFSCMPGT 0x28c
65*4882a593Smuzhiyun #define EVFSCMPLT 0x28d
66*4882a593Smuzhiyun #define EVFSCMPEQ 0x28e
67*4882a593Smuzhiyun #define EVFSCTUI 0x294
68*4882a593Smuzhiyun #define EVFSCTSI 0x295
69*4882a593Smuzhiyun #define EVFSCTUF 0x296
70*4882a593Smuzhiyun #define EVFSCTSF 0x297
71*4882a593Smuzhiyun #define EVFSCTUIZ 0x298
72*4882a593Smuzhiyun #define EVFSCTSIZ 0x29a
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun #define EFDADD 0x2e0
75*4882a593Smuzhiyun #define EFDSUB 0x2e1
76*4882a593Smuzhiyun #define EFDABS 0x2e4
77*4882a593Smuzhiyun #define EFDNABS 0x2e5
78*4882a593Smuzhiyun #define EFDNEG 0x2e6
79*4882a593Smuzhiyun #define EFDMUL 0x2e8
80*4882a593Smuzhiyun #define EFDDIV 0x2e9
81*4882a593Smuzhiyun #define EFDCTUIDZ 0x2ea
82*4882a593Smuzhiyun #define EFDCTSIDZ 0x2eb
83*4882a593Smuzhiyun #define EFDCMPGT 0x2ec
84*4882a593Smuzhiyun #define EFDCMPLT 0x2ed
85*4882a593Smuzhiyun #define EFDCMPEQ 0x2ee
86*4882a593Smuzhiyun #define EFDCFS 0x2ef
87*4882a593Smuzhiyun #define EFDCTUI 0x2f4
88*4882a593Smuzhiyun #define EFDCTSI 0x2f5
89*4882a593Smuzhiyun #define EFDCTUF 0x2f6
90*4882a593Smuzhiyun #define EFDCTSF 0x2f7
91*4882a593Smuzhiyun #define EFDCTUIZ 0x2f8
92*4882a593Smuzhiyun #define EFDCTSIZ 0x2fa
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun #define AB 2
95*4882a593Smuzhiyun #define XA 3
96*4882a593Smuzhiyun #define XB 4
97*4882a593Smuzhiyun #define XCR 5
98*4882a593Smuzhiyun #define NOTYPE 0
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun #define SIGN_BIT_S (1UL << 31)
101*4882a593Smuzhiyun #define SIGN_BIT_D (1ULL << 63)
102*4882a593Smuzhiyun #define FP_EX_MASK (FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \
103*4882a593Smuzhiyun FP_EX_UNDERFLOW | FP_EX_OVERFLOW)
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun static int have_e500_cpu_a005_erratum;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun union dw_union {
108*4882a593Smuzhiyun u64 dp[1];
109*4882a593Smuzhiyun u32 wp[2];
110*4882a593Smuzhiyun };
111*4882a593Smuzhiyun
insn_type(unsigned long speinsn)112*4882a593Smuzhiyun static unsigned long insn_type(unsigned long speinsn)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun unsigned long ret = NOTYPE;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun switch (speinsn & 0x7ff) {
117*4882a593Smuzhiyun case EFSABS: ret = XA; break;
118*4882a593Smuzhiyun case EFSADD: ret = AB; break;
119*4882a593Smuzhiyun case EFSCFD: ret = XB; break;
120*4882a593Smuzhiyun case EFSCMPEQ: ret = XCR; break;
121*4882a593Smuzhiyun case EFSCMPGT: ret = XCR; break;
122*4882a593Smuzhiyun case EFSCMPLT: ret = XCR; break;
123*4882a593Smuzhiyun case EFSCTSF: ret = XB; break;
124*4882a593Smuzhiyun case EFSCTSI: ret = XB; break;
125*4882a593Smuzhiyun case EFSCTSIZ: ret = XB; break;
126*4882a593Smuzhiyun case EFSCTUF: ret = XB; break;
127*4882a593Smuzhiyun case EFSCTUI: ret = XB; break;
128*4882a593Smuzhiyun case EFSCTUIZ: ret = XB; break;
129*4882a593Smuzhiyun case EFSDIV: ret = AB; break;
130*4882a593Smuzhiyun case EFSMUL: ret = AB; break;
131*4882a593Smuzhiyun case EFSNABS: ret = XA; break;
132*4882a593Smuzhiyun case EFSNEG: ret = XA; break;
133*4882a593Smuzhiyun case EFSSUB: ret = AB; break;
134*4882a593Smuzhiyun case EFSCFSI: ret = XB; break;
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun case EVFSABS: ret = XA; break;
137*4882a593Smuzhiyun case EVFSADD: ret = AB; break;
138*4882a593Smuzhiyun case EVFSCMPEQ: ret = XCR; break;
139*4882a593Smuzhiyun case EVFSCMPGT: ret = XCR; break;
140*4882a593Smuzhiyun case EVFSCMPLT: ret = XCR; break;
141*4882a593Smuzhiyun case EVFSCTSF: ret = XB; break;
142*4882a593Smuzhiyun case EVFSCTSI: ret = XB; break;
143*4882a593Smuzhiyun case EVFSCTSIZ: ret = XB; break;
144*4882a593Smuzhiyun case EVFSCTUF: ret = XB; break;
145*4882a593Smuzhiyun case EVFSCTUI: ret = XB; break;
146*4882a593Smuzhiyun case EVFSCTUIZ: ret = XB; break;
147*4882a593Smuzhiyun case EVFSDIV: ret = AB; break;
148*4882a593Smuzhiyun case EVFSMUL: ret = AB; break;
149*4882a593Smuzhiyun case EVFSNABS: ret = XA; break;
150*4882a593Smuzhiyun case EVFSNEG: ret = XA; break;
151*4882a593Smuzhiyun case EVFSSUB: ret = AB; break;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun case EFDABS: ret = XA; break;
154*4882a593Smuzhiyun case EFDADD: ret = AB; break;
155*4882a593Smuzhiyun case EFDCFS: ret = XB; break;
156*4882a593Smuzhiyun case EFDCMPEQ: ret = XCR; break;
157*4882a593Smuzhiyun case EFDCMPGT: ret = XCR; break;
158*4882a593Smuzhiyun case EFDCMPLT: ret = XCR; break;
159*4882a593Smuzhiyun case EFDCTSF: ret = XB; break;
160*4882a593Smuzhiyun case EFDCTSI: ret = XB; break;
161*4882a593Smuzhiyun case EFDCTSIDZ: ret = XB; break;
162*4882a593Smuzhiyun case EFDCTSIZ: ret = XB; break;
163*4882a593Smuzhiyun case EFDCTUF: ret = XB; break;
164*4882a593Smuzhiyun case EFDCTUI: ret = XB; break;
165*4882a593Smuzhiyun case EFDCTUIDZ: ret = XB; break;
166*4882a593Smuzhiyun case EFDCTUIZ: ret = XB; break;
167*4882a593Smuzhiyun case EFDDIV: ret = AB; break;
168*4882a593Smuzhiyun case EFDMUL: ret = AB; break;
169*4882a593Smuzhiyun case EFDNABS: ret = XA; break;
170*4882a593Smuzhiyun case EFDNEG: ret = XA; break;
171*4882a593Smuzhiyun case EFDSUB: ret = AB; break;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun return ret;
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun
do_spe_mathemu(struct pt_regs * regs)177*4882a593Smuzhiyun int do_spe_mathemu(struct pt_regs *regs)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun FP_DECL_EX;
180*4882a593Smuzhiyun int IR, cmp;
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun unsigned long type, func, fc, fa, fb, src, speinsn;
183*4882a593Smuzhiyun union dw_union vc, va, vb;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun if (get_user(speinsn, (unsigned int __user *) regs->nip))
186*4882a593Smuzhiyun return -EFAULT;
187*4882a593Smuzhiyun if ((speinsn >> 26) != EFAPU)
188*4882a593Smuzhiyun return -EINVAL; /* not an spe instruction */
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun type = insn_type(speinsn);
191*4882a593Smuzhiyun if (type == NOTYPE)
192*4882a593Smuzhiyun goto illegal;
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun func = speinsn & 0x7ff;
195*4882a593Smuzhiyun fc = (speinsn >> 21) & 0x1f;
196*4882a593Smuzhiyun fa = (speinsn >> 16) & 0x1f;
197*4882a593Smuzhiyun fb = (speinsn >> 11) & 0x1f;
198*4882a593Smuzhiyun src = (speinsn >> 5) & 0x7;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun vc.wp[0] = current->thread.evr[fc];
201*4882a593Smuzhiyun vc.wp[1] = regs->gpr[fc];
202*4882a593Smuzhiyun va.wp[0] = current->thread.evr[fa];
203*4882a593Smuzhiyun va.wp[1] = regs->gpr[fa];
204*4882a593Smuzhiyun vb.wp[0] = current->thread.evr[fb];
205*4882a593Smuzhiyun vb.wp[1] = regs->gpr[fb];
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun __FPU_FPSCR = mfspr(SPRN_SPEFSCR);
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
210*4882a593Smuzhiyun pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]);
211*4882a593Smuzhiyun pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]);
212*4882a593Smuzhiyun pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]);
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun switch (src) {
215*4882a593Smuzhiyun case SPFP: {
216*4882a593Smuzhiyun FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun switch (type) {
219*4882a593Smuzhiyun case AB:
220*4882a593Smuzhiyun case XCR:
221*4882a593Smuzhiyun FP_UNPACK_SP(SA, va.wp + 1);
222*4882a593Smuzhiyun case XB:
223*4882a593Smuzhiyun FP_UNPACK_SP(SB, vb.wp + 1);
224*4882a593Smuzhiyun break;
225*4882a593Smuzhiyun case XA:
226*4882a593Smuzhiyun FP_UNPACK_SP(SA, va.wp + 1);
227*4882a593Smuzhiyun break;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c);
231*4882a593Smuzhiyun pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c);
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun switch (func) {
234*4882a593Smuzhiyun case EFSABS:
235*4882a593Smuzhiyun vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
236*4882a593Smuzhiyun goto update_regs;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun case EFSNABS:
239*4882a593Smuzhiyun vc.wp[1] = va.wp[1] | SIGN_BIT_S;
240*4882a593Smuzhiyun goto update_regs;
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun case EFSNEG:
243*4882a593Smuzhiyun vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
244*4882a593Smuzhiyun goto update_regs;
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun case EFSADD:
247*4882a593Smuzhiyun FP_ADD_S(SR, SA, SB);
248*4882a593Smuzhiyun goto pack_s;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun case EFSSUB:
251*4882a593Smuzhiyun FP_SUB_S(SR, SA, SB);
252*4882a593Smuzhiyun goto pack_s;
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun case EFSMUL:
255*4882a593Smuzhiyun FP_MUL_S(SR, SA, SB);
256*4882a593Smuzhiyun goto pack_s;
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun case EFSDIV:
259*4882a593Smuzhiyun FP_DIV_S(SR, SA, SB);
260*4882a593Smuzhiyun goto pack_s;
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun case EFSCMPEQ:
263*4882a593Smuzhiyun cmp = 0;
264*4882a593Smuzhiyun goto cmp_s;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun case EFSCMPGT:
267*4882a593Smuzhiyun cmp = 1;
268*4882a593Smuzhiyun goto cmp_s;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun case EFSCMPLT:
271*4882a593Smuzhiyun cmp = -1;
272*4882a593Smuzhiyun goto cmp_s;
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun case EFSCTSF:
275*4882a593Smuzhiyun case EFSCTUF:
276*4882a593Smuzhiyun if (SB_c == FP_CLS_NAN) {
277*4882a593Smuzhiyun vc.wp[1] = 0;
278*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
279*4882a593Smuzhiyun } else {
280*4882a593Smuzhiyun SB_e += (func == EFSCTSF ? 31 : 32);
281*4882a593Smuzhiyun FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
282*4882a593Smuzhiyun (func == EFSCTSF));
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun goto update_regs;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun case EFSCFD: {
287*4882a593Smuzhiyun FP_DECL_D(DB);
288*4882a593Smuzhiyun FP_CLEAR_EXCEPTIONS;
289*4882a593Smuzhiyun FP_UNPACK_DP(DB, vb.dp);
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
292*4882a593Smuzhiyun DB_s, DB_f1, DB_f0, DB_e, DB_c);
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun FP_CONV(S, D, 1, 2, SR, DB);
295*4882a593Smuzhiyun goto pack_s;
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun case EFSCTSI:
299*4882a593Smuzhiyun case EFSCTUI:
300*4882a593Smuzhiyun if (SB_c == FP_CLS_NAN) {
301*4882a593Smuzhiyun vc.wp[1] = 0;
302*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
303*4882a593Smuzhiyun } else {
304*4882a593Smuzhiyun FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
305*4882a593Smuzhiyun ((func & 0x3) != 0));
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun goto update_regs;
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun case EFSCTSIZ:
310*4882a593Smuzhiyun case EFSCTUIZ:
311*4882a593Smuzhiyun if (SB_c == FP_CLS_NAN) {
312*4882a593Smuzhiyun vc.wp[1] = 0;
313*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
314*4882a593Smuzhiyun } else {
315*4882a593Smuzhiyun FP_TO_INT_S(vc.wp[1], SB, 32,
316*4882a593Smuzhiyun ((func & 0x3) != 0));
317*4882a593Smuzhiyun }
318*4882a593Smuzhiyun goto update_regs;
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun default:
321*4882a593Smuzhiyun goto illegal;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun break;
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun pack_s:
326*4882a593Smuzhiyun pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c);
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun FP_PACK_SP(vc.wp + 1, SR);
329*4882a593Smuzhiyun goto update_regs;
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun cmp_s:
332*4882a593Smuzhiyun FP_CMP_S(IR, SA, SB, 3);
333*4882a593Smuzhiyun if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB)))
334*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
335*4882a593Smuzhiyun if (IR == cmp) {
336*4882a593Smuzhiyun IR = 0x4;
337*4882a593Smuzhiyun } else {
338*4882a593Smuzhiyun IR = 0;
339*4882a593Smuzhiyun }
340*4882a593Smuzhiyun goto update_ccr;
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun case DPFP: {
344*4882a593Smuzhiyun FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun switch (type) {
347*4882a593Smuzhiyun case AB:
348*4882a593Smuzhiyun case XCR:
349*4882a593Smuzhiyun FP_UNPACK_DP(DA, va.dp);
350*4882a593Smuzhiyun case XB:
351*4882a593Smuzhiyun FP_UNPACK_DP(DB, vb.dp);
352*4882a593Smuzhiyun break;
353*4882a593Smuzhiyun case XA:
354*4882a593Smuzhiyun FP_UNPACK_DP(DA, va.dp);
355*4882a593Smuzhiyun break;
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n",
359*4882a593Smuzhiyun DA_s, DA_f1, DA_f0, DA_e, DA_c);
360*4882a593Smuzhiyun pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
361*4882a593Smuzhiyun DB_s, DB_f1, DB_f0, DB_e, DB_c);
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun switch (func) {
364*4882a593Smuzhiyun case EFDABS:
365*4882a593Smuzhiyun vc.dp[0] = va.dp[0] & ~SIGN_BIT_D;
366*4882a593Smuzhiyun goto update_regs;
367*4882a593Smuzhiyun
368*4882a593Smuzhiyun case EFDNABS:
369*4882a593Smuzhiyun vc.dp[0] = va.dp[0] | SIGN_BIT_D;
370*4882a593Smuzhiyun goto update_regs;
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun case EFDNEG:
373*4882a593Smuzhiyun vc.dp[0] = va.dp[0] ^ SIGN_BIT_D;
374*4882a593Smuzhiyun goto update_regs;
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun case EFDADD:
377*4882a593Smuzhiyun FP_ADD_D(DR, DA, DB);
378*4882a593Smuzhiyun goto pack_d;
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun case EFDSUB:
381*4882a593Smuzhiyun FP_SUB_D(DR, DA, DB);
382*4882a593Smuzhiyun goto pack_d;
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun case EFDMUL:
385*4882a593Smuzhiyun FP_MUL_D(DR, DA, DB);
386*4882a593Smuzhiyun goto pack_d;
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun case EFDDIV:
389*4882a593Smuzhiyun FP_DIV_D(DR, DA, DB);
390*4882a593Smuzhiyun goto pack_d;
391*4882a593Smuzhiyun
392*4882a593Smuzhiyun case EFDCMPEQ:
393*4882a593Smuzhiyun cmp = 0;
394*4882a593Smuzhiyun goto cmp_d;
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun case EFDCMPGT:
397*4882a593Smuzhiyun cmp = 1;
398*4882a593Smuzhiyun goto cmp_d;
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun case EFDCMPLT:
401*4882a593Smuzhiyun cmp = -1;
402*4882a593Smuzhiyun goto cmp_d;
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun case EFDCTSF:
405*4882a593Smuzhiyun case EFDCTUF:
406*4882a593Smuzhiyun if (DB_c == FP_CLS_NAN) {
407*4882a593Smuzhiyun vc.wp[1] = 0;
408*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
409*4882a593Smuzhiyun } else {
410*4882a593Smuzhiyun DB_e += (func == EFDCTSF ? 31 : 32);
411*4882a593Smuzhiyun FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
412*4882a593Smuzhiyun (func == EFDCTSF));
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun goto update_regs;
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun case EFDCFS: {
417*4882a593Smuzhiyun FP_DECL_S(SB);
418*4882a593Smuzhiyun FP_CLEAR_EXCEPTIONS;
419*4882a593Smuzhiyun FP_UNPACK_SP(SB, vb.wp + 1);
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun pr_debug("SB: %ld %08lx %ld (%ld)\n",
422*4882a593Smuzhiyun SB_s, SB_f, SB_e, SB_c);
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun FP_CONV(D, S, 2, 1, DR, SB);
425*4882a593Smuzhiyun goto pack_d;
426*4882a593Smuzhiyun }
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun case EFDCTUIDZ:
429*4882a593Smuzhiyun case EFDCTSIDZ:
430*4882a593Smuzhiyun if (DB_c == FP_CLS_NAN) {
431*4882a593Smuzhiyun vc.dp[0] = 0;
432*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
433*4882a593Smuzhiyun } else {
434*4882a593Smuzhiyun FP_TO_INT_D(vc.dp[0], DB, 64,
435*4882a593Smuzhiyun ((func & 0x1) == 0));
436*4882a593Smuzhiyun }
437*4882a593Smuzhiyun goto update_regs;
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun case EFDCTUI:
440*4882a593Smuzhiyun case EFDCTSI:
441*4882a593Smuzhiyun if (DB_c == FP_CLS_NAN) {
442*4882a593Smuzhiyun vc.wp[1] = 0;
443*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
444*4882a593Smuzhiyun } else {
445*4882a593Smuzhiyun FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
446*4882a593Smuzhiyun ((func & 0x3) != 0));
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun goto update_regs;
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun case EFDCTUIZ:
451*4882a593Smuzhiyun case EFDCTSIZ:
452*4882a593Smuzhiyun if (DB_c == FP_CLS_NAN) {
453*4882a593Smuzhiyun vc.wp[1] = 0;
454*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
455*4882a593Smuzhiyun } else {
456*4882a593Smuzhiyun FP_TO_INT_D(vc.wp[1], DB, 32,
457*4882a593Smuzhiyun ((func & 0x3) != 0));
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun goto update_regs;
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun default:
462*4882a593Smuzhiyun goto illegal;
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun break;
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun pack_d:
467*4882a593Smuzhiyun pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n",
468*4882a593Smuzhiyun DR_s, DR_f1, DR_f0, DR_e, DR_c);
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun FP_PACK_DP(vc.dp, DR);
471*4882a593Smuzhiyun goto update_regs;
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun cmp_d:
474*4882a593Smuzhiyun FP_CMP_D(IR, DA, DB, 3);
475*4882a593Smuzhiyun if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB)))
476*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
477*4882a593Smuzhiyun if (IR == cmp) {
478*4882a593Smuzhiyun IR = 0x4;
479*4882a593Smuzhiyun } else {
480*4882a593Smuzhiyun IR = 0;
481*4882a593Smuzhiyun }
482*4882a593Smuzhiyun goto update_ccr;
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun case VCT: {
487*4882a593Smuzhiyun FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0);
488*4882a593Smuzhiyun FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1);
489*4882a593Smuzhiyun int IR0, IR1;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun switch (type) {
492*4882a593Smuzhiyun case AB:
493*4882a593Smuzhiyun case XCR:
494*4882a593Smuzhiyun FP_UNPACK_SP(SA0, va.wp);
495*4882a593Smuzhiyun FP_UNPACK_SP(SA1, va.wp + 1);
496*4882a593Smuzhiyun case XB:
497*4882a593Smuzhiyun FP_UNPACK_SP(SB0, vb.wp);
498*4882a593Smuzhiyun FP_UNPACK_SP(SB1, vb.wp + 1);
499*4882a593Smuzhiyun break;
500*4882a593Smuzhiyun case XA:
501*4882a593Smuzhiyun FP_UNPACK_SP(SA0, va.wp);
502*4882a593Smuzhiyun FP_UNPACK_SP(SA1, va.wp + 1);
503*4882a593Smuzhiyun break;
504*4882a593Smuzhiyun }
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun pr_debug("SA0: %ld %08lx %ld (%ld)\n",
507*4882a593Smuzhiyun SA0_s, SA0_f, SA0_e, SA0_c);
508*4882a593Smuzhiyun pr_debug("SA1: %ld %08lx %ld (%ld)\n",
509*4882a593Smuzhiyun SA1_s, SA1_f, SA1_e, SA1_c);
510*4882a593Smuzhiyun pr_debug("SB0: %ld %08lx %ld (%ld)\n",
511*4882a593Smuzhiyun SB0_s, SB0_f, SB0_e, SB0_c);
512*4882a593Smuzhiyun pr_debug("SB1: %ld %08lx %ld (%ld)\n",
513*4882a593Smuzhiyun SB1_s, SB1_f, SB1_e, SB1_c);
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun switch (func) {
516*4882a593Smuzhiyun case EVFSABS:
517*4882a593Smuzhiyun vc.wp[0] = va.wp[0] & ~SIGN_BIT_S;
518*4882a593Smuzhiyun vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
519*4882a593Smuzhiyun goto update_regs;
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun case EVFSNABS:
522*4882a593Smuzhiyun vc.wp[0] = va.wp[0] | SIGN_BIT_S;
523*4882a593Smuzhiyun vc.wp[1] = va.wp[1] | SIGN_BIT_S;
524*4882a593Smuzhiyun goto update_regs;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun case EVFSNEG:
527*4882a593Smuzhiyun vc.wp[0] = va.wp[0] ^ SIGN_BIT_S;
528*4882a593Smuzhiyun vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
529*4882a593Smuzhiyun goto update_regs;
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun case EVFSADD:
532*4882a593Smuzhiyun FP_ADD_S(SR0, SA0, SB0);
533*4882a593Smuzhiyun FP_ADD_S(SR1, SA1, SB1);
534*4882a593Smuzhiyun goto pack_vs;
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun case EVFSSUB:
537*4882a593Smuzhiyun FP_SUB_S(SR0, SA0, SB0);
538*4882a593Smuzhiyun FP_SUB_S(SR1, SA1, SB1);
539*4882a593Smuzhiyun goto pack_vs;
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun case EVFSMUL:
542*4882a593Smuzhiyun FP_MUL_S(SR0, SA0, SB0);
543*4882a593Smuzhiyun FP_MUL_S(SR1, SA1, SB1);
544*4882a593Smuzhiyun goto pack_vs;
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun case EVFSDIV:
547*4882a593Smuzhiyun FP_DIV_S(SR0, SA0, SB0);
548*4882a593Smuzhiyun FP_DIV_S(SR1, SA1, SB1);
549*4882a593Smuzhiyun goto pack_vs;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun case EVFSCMPEQ:
552*4882a593Smuzhiyun cmp = 0;
553*4882a593Smuzhiyun goto cmp_vs;
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun case EVFSCMPGT:
556*4882a593Smuzhiyun cmp = 1;
557*4882a593Smuzhiyun goto cmp_vs;
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun case EVFSCMPLT:
560*4882a593Smuzhiyun cmp = -1;
561*4882a593Smuzhiyun goto cmp_vs;
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun case EVFSCTUF:
564*4882a593Smuzhiyun case EVFSCTSF:
565*4882a593Smuzhiyun if (SB0_c == FP_CLS_NAN) {
566*4882a593Smuzhiyun vc.wp[0] = 0;
567*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
568*4882a593Smuzhiyun } else {
569*4882a593Smuzhiyun SB0_e += (func == EVFSCTSF ? 31 : 32);
570*4882a593Smuzhiyun FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
571*4882a593Smuzhiyun (func == EVFSCTSF));
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun if (SB1_c == FP_CLS_NAN) {
574*4882a593Smuzhiyun vc.wp[1] = 0;
575*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
576*4882a593Smuzhiyun } else {
577*4882a593Smuzhiyun SB1_e += (func == EVFSCTSF ? 31 : 32);
578*4882a593Smuzhiyun FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
579*4882a593Smuzhiyun (func == EVFSCTSF));
580*4882a593Smuzhiyun }
581*4882a593Smuzhiyun goto update_regs;
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun case EVFSCTUI:
584*4882a593Smuzhiyun case EVFSCTSI:
585*4882a593Smuzhiyun if (SB0_c == FP_CLS_NAN) {
586*4882a593Smuzhiyun vc.wp[0] = 0;
587*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
588*4882a593Smuzhiyun } else {
589*4882a593Smuzhiyun FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
590*4882a593Smuzhiyun ((func & 0x3) != 0));
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun if (SB1_c == FP_CLS_NAN) {
593*4882a593Smuzhiyun vc.wp[1] = 0;
594*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
595*4882a593Smuzhiyun } else {
596*4882a593Smuzhiyun FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
597*4882a593Smuzhiyun ((func & 0x3) != 0));
598*4882a593Smuzhiyun }
599*4882a593Smuzhiyun goto update_regs;
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun case EVFSCTUIZ:
602*4882a593Smuzhiyun case EVFSCTSIZ:
603*4882a593Smuzhiyun if (SB0_c == FP_CLS_NAN) {
604*4882a593Smuzhiyun vc.wp[0] = 0;
605*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
606*4882a593Smuzhiyun } else {
607*4882a593Smuzhiyun FP_TO_INT_S(vc.wp[0], SB0, 32,
608*4882a593Smuzhiyun ((func & 0x3) != 0));
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun if (SB1_c == FP_CLS_NAN) {
611*4882a593Smuzhiyun vc.wp[1] = 0;
612*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
613*4882a593Smuzhiyun } else {
614*4882a593Smuzhiyun FP_TO_INT_S(vc.wp[1], SB1, 32,
615*4882a593Smuzhiyun ((func & 0x3) != 0));
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun goto update_regs;
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun default:
620*4882a593Smuzhiyun goto illegal;
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun break;
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun pack_vs:
625*4882a593Smuzhiyun pr_debug("SR0: %ld %08lx %ld (%ld)\n",
626*4882a593Smuzhiyun SR0_s, SR0_f, SR0_e, SR0_c);
627*4882a593Smuzhiyun pr_debug("SR1: %ld %08lx %ld (%ld)\n",
628*4882a593Smuzhiyun SR1_s, SR1_f, SR1_e, SR1_c);
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun FP_PACK_SP(vc.wp, SR0);
631*4882a593Smuzhiyun FP_PACK_SP(vc.wp + 1, SR1);
632*4882a593Smuzhiyun goto update_regs;
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun cmp_vs:
635*4882a593Smuzhiyun {
636*4882a593Smuzhiyun int ch, cl;
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun FP_CMP_S(IR0, SA0, SB0, 3);
639*4882a593Smuzhiyun FP_CMP_S(IR1, SA1, SB1, 3);
640*4882a593Smuzhiyun if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0)))
641*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
642*4882a593Smuzhiyun if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1)))
643*4882a593Smuzhiyun FP_SET_EXCEPTION(FP_EX_INVALID);
644*4882a593Smuzhiyun ch = (IR0 == cmp) ? 1 : 0;
645*4882a593Smuzhiyun cl = (IR1 == cmp) ? 1 : 0;
646*4882a593Smuzhiyun IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) |
647*4882a593Smuzhiyun ((ch & cl) << 0);
648*4882a593Smuzhiyun goto update_ccr;
649*4882a593Smuzhiyun }
650*4882a593Smuzhiyun }
651*4882a593Smuzhiyun default:
652*4882a593Smuzhiyun return -EINVAL;
653*4882a593Smuzhiyun }
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun update_ccr:
656*4882a593Smuzhiyun regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2));
657*4882a593Smuzhiyun regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun update_regs:
660*4882a593Smuzhiyun /*
661*4882a593Smuzhiyun * If the "invalid" exception sticky bit was set by the
662*4882a593Smuzhiyun * processor for non-finite input, but was not set before the
663*4882a593Smuzhiyun * instruction being emulated, clear it. Likewise for the
664*4882a593Smuzhiyun * "underflow" bit, which may have been set by the processor
665*4882a593Smuzhiyun * for exact underflow, not just inexact underflow when the
666*4882a593Smuzhiyun * flag should be set for IEEE 754 semantics. Other sticky
667*4882a593Smuzhiyun * exceptions will only be set by the processor when they are
668*4882a593Smuzhiyun * correct according to IEEE 754 semantics, and we must not
669*4882a593Smuzhiyun * clear sticky bits that were already set before the emulated
670*4882a593Smuzhiyun * instruction as they represent the user-visible sticky
671*4882a593Smuzhiyun * exception status. "inexact" traps to kernel are not
672*4882a593Smuzhiyun * required for IEEE semantics and are not enabled by default,
673*4882a593Smuzhiyun * so the "inexact" sticky bit may have been set by a previous
674*4882a593Smuzhiyun * instruction without the kernel being aware of it.
675*4882a593Smuzhiyun */
676*4882a593Smuzhiyun __FPU_FPSCR
677*4882a593Smuzhiyun &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
678*4882a593Smuzhiyun __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
679*4882a593Smuzhiyun mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
680*4882a593Smuzhiyun current->thread.spefscr_last = __FPU_FPSCR;
681*4882a593Smuzhiyun
682*4882a593Smuzhiyun current->thread.evr[fc] = vc.wp[0];
683*4882a593Smuzhiyun regs->gpr[fc] = vc.wp[1];
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun pr_debug("ccr = %08lx\n", regs->ccr);
686*4882a593Smuzhiyun pr_debug("cur exceptions = %08x spefscr = %08lx\n",
687*4882a593Smuzhiyun FP_CUR_EXCEPTIONS, __FPU_FPSCR);
688*4882a593Smuzhiyun pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]);
689*4882a593Smuzhiyun pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]);
690*4882a593Smuzhiyun pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]);
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
693*4882a593Smuzhiyun if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO)
694*4882a593Smuzhiyun && (current->thread.fpexc_mode & PR_FP_EXC_DIV))
695*4882a593Smuzhiyun return 1;
696*4882a593Smuzhiyun if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW)
697*4882a593Smuzhiyun && (current->thread.fpexc_mode & PR_FP_EXC_OVF))
698*4882a593Smuzhiyun return 1;
699*4882a593Smuzhiyun if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW)
700*4882a593Smuzhiyun && (current->thread.fpexc_mode & PR_FP_EXC_UND))
701*4882a593Smuzhiyun return 1;
702*4882a593Smuzhiyun if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)
703*4882a593Smuzhiyun && (current->thread.fpexc_mode & PR_FP_EXC_RES))
704*4882a593Smuzhiyun return 1;
705*4882a593Smuzhiyun if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID)
706*4882a593Smuzhiyun && (current->thread.fpexc_mode & PR_FP_EXC_INV))
707*4882a593Smuzhiyun return 1;
708*4882a593Smuzhiyun }
709*4882a593Smuzhiyun return 0;
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun illegal:
712*4882a593Smuzhiyun if (have_e500_cpu_a005_erratum) {
713*4882a593Smuzhiyun /* according to e500 cpu a005 erratum, reissue efp inst */
714*4882a593Smuzhiyun regs->nip -= 4;
715*4882a593Smuzhiyun pr_debug("re-issue efp inst: %08lx\n", speinsn);
716*4882a593Smuzhiyun return 0;
717*4882a593Smuzhiyun }
718*4882a593Smuzhiyun
719*4882a593Smuzhiyun printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn);
720*4882a593Smuzhiyun return -ENOSYS;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun
speround_handler(struct pt_regs * regs)723*4882a593Smuzhiyun int speround_handler(struct pt_regs *regs)
724*4882a593Smuzhiyun {
725*4882a593Smuzhiyun union dw_union fgpr;
726*4882a593Smuzhiyun int s_lo, s_hi;
727*4882a593Smuzhiyun int lo_inexact, hi_inexact;
728*4882a593Smuzhiyun int fp_result;
729*4882a593Smuzhiyun unsigned long speinsn, type, fb, fc, fptype, func;
730*4882a593Smuzhiyun
731*4882a593Smuzhiyun if (get_user(speinsn, (unsigned int __user *) regs->nip))
732*4882a593Smuzhiyun return -EFAULT;
733*4882a593Smuzhiyun if ((speinsn >> 26) != 4)
734*4882a593Smuzhiyun return -EINVAL; /* not an spe instruction */
735*4882a593Smuzhiyun
736*4882a593Smuzhiyun func = speinsn & 0x7ff;
737*4882a593Smuzhiyun type = insn_type(func);
738*4882a593Smuzhiyun if (type == XCR) return -ENOSYS;
739*4882a593Smuzhiyun
740*4882a593Smuzhiyun __FPU_FPSCR = mfspr(SPRN_SPEFSCR);
741*4882a593Smuzhiyun pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
742*4882a593Smuzhiyun
743*4882a593Smuzhiyun fptype = (speinsn >> 5) & 0x7;
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun /* No need to round if the result is exact */
746*4882a593Smuzhiyun lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX);
747*4882a593Smuzhiyun hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH);
748*4882a593Smuzhiyun if (!(lo_inexact || (hi_inexact && fptype == VCT)))
749*4882a593Smuzhiyun return 0;
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun fc = (speinsn >> 21) & 0x1f;
752*4882a593Smuzhiyun s_lo = regs->gpr[fc] & SIGN_BIT_S;
753*4882a593Smuzhiyun s_hi = current->thread.evr[fc] & SIGN_BIT_S;
754*4882a593Smuzhiyun fgpr.wp[0] = current->thread.evr[fc];
755*4882a593Smuzhiyun fgpr.wp[1] = regs->gpr[fc];
756*4882a593Smuzhiyun
757*4882a593Smuzhiyun fb = (speinsn >> 11) & 0x1f;
758*4882a593Smuzhiyun switch (func) {
759*4882a593Smuzhiyun case EFSCTUIZ:
760*4882a593Smuzhiyun case EFSCTSIZ:
761*4882a593Smuzhiyun case EVFSCTUIZ:
762*4882a593Smuzhiyun case EVFSCTSIZ:
763*4882a593Smuzhiyun case EFDCTUIDZ:
764*4882a593Smuzhiyun case EFDCTSIDZ:
765*4882a593Smuzhiyun case EFDCTUIZ:
766*4882a593Smuzhiyun case EFDCTSIZ:
767*4882a593Smuzhiyun /*
768*4882a593Smuzhiyun * These instructions always round to zero,
769*4882a593Smuzhiyun * independent of the rounding mode.
770*4882a593Smuzhiyun */
771*4882a593Smuzhiyun return 0;
772*4882a593Smuzhiyun
773*4882a593Smuzhiyun case EFSCTUI:
774*4882a593Smuzhiyun case EFSCTUF:
775*4882a593Smuzhiyun case EVFSCTUI:
776*4882a593Smuzhiyun case EVFSCTUF:
777*4882a593Smuzhiyun case EFDCTUI:
778*4882a593Smuzhiyun case EFDCTUF:
779*4882a593Smuzhiyun fp_result = 0;
780*4882a593Smuzhiyun s_lo = 0;
781*4882a593Smuzhiyun s_hi = 0;
782*4882a593Smuzhiyun break;
783*4882a593Smuzhiyun
784*4882a593Smuzhiyun case EFSCTSI:
785*4882a593Smuzhiyun case EFSCTSF:
786*4882a593Smuzhiyun fp_result = 0;
787*4882a593Smuzhiyun /* Recover the sign of a zero result if possible. */
788*4882a593Smuzhiyun if (fgpr.wp[1] == 0)
789*4882a593Smuzhiyun s_lo = regs->gpr[fb] & SIGN_BIT_S;
790*4882a593Smuzhiyun break;
791*4882a593Smuzhiyun
792*4882a593Smuzhiyun case EVFSCTSI:
793*4882a593Smuzhiyun case EVFSCTSF:
794*4882a593Smuzhiyun fp_result = 0;
795*4882a593Smuzhiyun /* Recover the sign of a zero result if possible. */
796*4882a593Smuzhiyun if (fgpr.wp[1] == 0)
797*4882a593Smuzhiyun s_lo = regs->gpr[fb] & SIGN_BIT_S;
798*4882a593Smuzhiyun if (fgpr.wp[0] == 0)
799*4882a593Smuzhiyun s_hi = current->thread.evr[fb] & SIGN_BIT_S;
800*4882a593Smuzhiyun break;
801*4882a593Smuzhiyun
802*4882a593Smuzhiyun case EFDCTSI:
803*4882a593Smuzhiyun case EFDCTSF:
804*4882a593Smuzhiyun fp_result = 0;
805*4882a593Smuzhiyun s_hi = s_lo;
806*4882a593Smuzhiyun /* Recover the sign of a zero result if possible. */
807*4882a593Smuzhiyun if (fgpr.wp[1] == 0)
808*4882a593Smuzhiyun s_hi = current->thread.evr[fb] & SIGN_BIT_S;
809*4882a593Smuzhiyun break;
810*4882a593Smuzhiyun
811*4882a593Smuzhiyun default:
812*4882a593Smuzhiyun fp_result = 1;
813*4882a593Smuzhiyun break;
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun
816*4882a593Smuzhiyun pr_debug("round fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]);
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun switch (fptype) {
819*4882a593Smuzhiyun /* Since SPE instructions on E500 core can handle round to nearest
820*4882a593Smuzhiyun * and round toward zero with IEEE-754 complied, we just need
821*4882a593Smuzhiyun * to handle round toward +Inf and round toward -Inf by software.
822*4882a593Smuzhiyun */
823*4882a593Smuzhiyun case SPFP:
824*4882a593Smuzhiyun if ((FP_ROUNDMODE) == FP_RND_PINF) {
825*4882a593Smuzhiyun if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */
826*4882a593Smuzhiyun } else { /* round to -Inf */
827*4882a593Smuzhiyun if (s_lo) {
828*4882a593Smuzhiyun if (fp_result)
829*4882a593Smuzhiyun fgpr.wp[1]++; /* Z < 0, choose Z2 */
830*4882a593Smuzhiyun else
831*4882a593Smuzhiyun fgpr.wp[1]--; /* Z < 0, choose Z2 */
832*4882a593Smuzhiyun }
833*4882a593Smuzhiyun }
834*4882a593Smuzhiyun break;
835*4882a593Smuzhiyun
836*4882a593Smuzhiyun case DPFP:
837*4882a593Smuzhiyun if (FP_ROUNDMODE == FP_RND_PINF) {
838*4882a593Smuzhiyun if (!s_hi) {
839*4882a593Smuzhiyun if (fp_result)
840*4882a593Smuzhiyun fgpr.dp[0]++; /* Z > 0, choose Z1 */
841*4882a593Smuzhiyun else
842*4882a593Smuzhiyun fgpr.wp[1]++; /* Z > 0, choose Z1 */
843*4882a593Smuzhiyun }
844*4882a593Smuzhiyun } else { /* round to -Inf */
845*4882a593Smuzhiyun if (s_hi) {
846*4882a593Smuzhiyun if (fp_result)
847*4882a593Smuzhiyun fgpr.dp[0]++; /* Z < 0, choose Z2 */
848*4882a593Smuzhiyun else
849*4882a593Smuzhiyun fgpr.wp[1]--; /* Z < 0, choose Z2 */
850*4882a593Smuzhiyun }
851*4882a593Smuzhiyun }
852*4882a593Smuzhiyun break;
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun case VCT:
855*4882a593Smuzhiyun if (FP_ROUNDMODE == FP_RND_PINF) {
856*4882a593Smuzhiyun if (lo_inexact && !s_lo)
857*4882a593Smuzhiyun fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
858*4882a593Smuzhiyun if (hi_inexact && !s_hi)
859*4882a593Smuzhiyun fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
860*4882a593Smuzhiyun } else { /* round to -Inf */
861*4882a593Smuzhiyun if (lo_inexact && s_lo) {
862*4882a593Smuzhiyun if (fp_result)
863*4882a593Smuzhiyun fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
864*4882a593Smuzhiyun else
865*4882a593Smuzhiyun fgpr.wp[1]--; /* Z_low < 0, choose Z2 */
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun if (hi_inexact && s_hi) {
868*4882a593Smuzhiyun if (fp_result)
869*4882a593Smuzhiyun fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
870*4882a593Smuzhiyun else
871*4882a593Smuzhiyun fgpr.wp[0]--; /* Z_high < 0, choose Z2 */
872*4882a593Smuzhiyun }
873*4882a593Smuzhiyun }
874*4882a593Smuzhiyun break;
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun default:
877*4882a593Smuzhiyun return -EINVAL;
878*4882a593Smuzhiyun }
879*4882a593Smuzhiyun
880*4882a593Smuzhiyun current->thread.evr[fc] = fgpr.wp[0];
881*4882a593Smuzhiyun regs->gpr[fc] = fgpr.wp[1];
882*4882a593Smuzhiyun
883*4882a593Smuzhiyun pr_debug(" to fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]);
884*4882a593Smuzhiyun
885*4882a593Smuzhiyun if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
886*4882a593Smuzhiyun return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;
887*4882a593Smuzhiyun return 0;
888*4882a593Smuzhiyun }
889*4882a593Smuzhiyun
spe_mathemu_init(void)890*4882a593Smuzhiyun int __init spe_mathemu_init(void)
891*4882a593Smuzhiyun {
892*4882a593Smuzhiyun u32 pvr, maj, min;
893*4882a593Smuzhiyun
894*4882a593Smuzhiyun pvr = mfspr(SPRN_PVR);
895*4882a593Smuzhiyun
896*4882a593Smuzhiyun if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
897*4882a593Smuzhiyun (PVR_VER(pvr) == PVR_VER_E500V2)) {
898*4882a593Smuzhiyun maj = PVR_MAJ(pvr);
899*4882a593Smuzhiyun min = PVR_MIN(pvr);
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun /*
902*4882a593Smuzhiyun * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1
903*4882a593Smuzhiyun * need cpu a005 errata workaround
904*4882a593Smuzhiyun */
905*4882a593Smuzhiyun switch (maj) {
906*4882a593Smuzhiyun case 1:
907*4882a593Smuzhiyun if (min < 1)
908*4882a593Smuzhiyun have_e500_cpu_a005_erratum = 1;
909*4882a593Smuzhiyun break;
910*4882a593Smuzhiyun case 2:
911*4882a593Smuzhiyun if (min < 3)
912*4882a593Smuzhiyun have_e500_cpu_a005_erratum = 1;
913*4882a593Smuzhiyun break;
914*4882a593Smuzhiyun case 3:
915*4882a593Smuzhiyun case 4:
916*4882a593Smuzhiyun case 5:
917*4882a593Smuzhiyun if (min < 1)
918*4882a593Smuzhiyun have_e500_cpu_a005_erratum = 1;
919*4882a593Smuzhiyun break;
920*4882a593Smuzhiyun default:
921*4882a593Smuzhiyun break;
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun }
924*4882a593Smuzhiyun
925*4882a593Smuzhiyun return 0;
926*4882a593Smuzhiyun }
927*4882a593Smuzhiyun
928*4882a593Smuzhiyun module_init(spe_mathemu_init);
929