xref: /OK3568_Linux_fs/kernel/arch/m68k/ifpsp060/src/pfpsp.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2*4882a593SmuzhiyunMOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3*4882a593SmuzhiyunM68000 Hi-Performance Microprocessor Division
4*4882a593SmuzhiyunM68060 Software Package
5*4882a593SmuzhiyunProduction Release P1.00 -- October 10, 1994
6*4882a593Smuzhiyun
7*4882a593SmuzhiyunM68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8*4882a593Smuzhiyun
9*4882a593SmuzhiyunTHE SOFTWARE is provided on an "AS IS" basis and without warranty.
10*4882a593SmuzhiyunTo the maximum extent permitted by applicable law,
11*4882a593SmuzhiyunMOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12*4882a593SmuzhiyunINCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13*4882a593Smuzhiyunand any warranty against infringement with regard to the SOFTWARE
14*4882a593Smuzhiyun(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15*4882a593Smuzhiyun
16*4882a593SmuzhiyunTo the maximum extent permitted by applicable law,
17*4882a593SmuzhiyunIN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18*4882a593Smuzhiyun(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19*4882a593SmuzhiyunBUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20*4882a593SmuzhiyunARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21*4882a593SmuzhiyunMotorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22*4882a593Smuzhiyun
23*4882a593SmuzhiyunYou are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24*4882a593Smuzhiyunso long as this entire notice is retained without alteration in any modified and/or
25*4882a593Smuzhiyunredistributed versions, and that such modified versions are clearly identified as such.
26*4882a593SmuzhiyunNo licenses are granted by implication, estoppel or otherwise under any patents
27*4882a593Smuzhiyunor trademarks of Motorola, Inc.
28*4882a593Smuzhiyun~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29*4882a593Smuzhiyun# freal.s:
30*4882a593Smuzhiyun#	This file is appended to the top of the 060FPSP package
31*4882a593Smuzhiyun# and contains the entry points into the package. The user, in
32*4882a593Smuzhiyun# effect, branches to one of the branch table entries located
33*4882a593Smuzhiyun# after _060FPSP_TABLE.
34*4882a593Smuzhiyun#	Also, subroutine stubs exist in this file (_fpsp_done for
35*4882a593Smuzhiyun# example) that are referenced by the FPSP package itself in order
36*4882a593Smuzhiyun# to call a given routine. The stub routine actually performs the
37*4882a593Smuzhiyun# callout. The FPSP code does a "bsr" to the stub routine. This
38*4882a593Smuzhiyun# extra layer of hierarchy adds a slight performance penalty but
39*4882a593Smuzhiyun# it makes the FPSP code easier to read and more mainatinable.
40*4882a593Smuzhiyun#
41*4882a593Smuzhiyun
42*4882a593Smuzhiyunset	_off_bsun,	0x00
43*4882a593Smuzhiyunset	_off_snan,	0x04
44*4882a593Smuzhiyunset	_off_operr,	0x08
45*4882a593Smuzhiyunset	_off_ovfl,	0x0c
46*4882a593Smuzhiyunset	_off_unfl,	0x10
47*4882a593Smuzhiyunset	_off_dz,	0x14
48*4882a593Smuzhiyunset	_off_inex,	0x18
49*4882a593Smuzhiyunset	_off_fline,	0x1c
50*4882a593Smuzhiyunset	_off_fpu_dis,	0x20
51*4882a593Smuzhiyunset	_off_trap,	0x24
52*4882a593Smuzhiyunset	_off_trace,	0x28
53*4882a593Smuzhiyunset	_off_access,	0x2c
54*4882a593Smuzhiyunset	_off_done,	0x30
55*4882a593Smuzhiyun
56*4882a593Smuzhiyunset	_off_imr,	0x40
57*4882a593Smuzhiyunset	_off_dmr,	0x44
58*4882a593Smuzhiyunset	_off_dmw,	0x48
59*4882a593Smuzhiyunset	_off_irw,	0x4c
60*4882a593Smuzhiyunset	_off_irl,	0x50
61*4882a593Smuzhiyunset	_off_drb,	0x54
62*4882a593Smuzhiyunset	_off_drw,	0x58
63*4882a593Smuzhiyunset	_off_drl,	0x5c
64*4882a593Smuzhiyunset	_off_dwb,	0x60
65*4882a593Smuzhiyunset	_off_dww,	0x64
66*4882a593Smuzhiyunset	_off_dwl,	0x68
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun_060FPSP_TABLE:
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun###############################################################
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun# Here's the table of ENTRY POINTS for those linking the package.
73*4882a593Smuzhiyun	bra.l		_fpsp_snan
74*4882a593Smuzhiyun	short		0x0000
75*4882a593Smuzhiyun	bra.l		_fpsp_operr
76*4882a593Smuzhiyun	short		0x0000
77*4882a593Smuzhiyun	bra.l		_fpsp_ovfl
78*4882a593Smuzhiyun	short		0x0000
79*4882a593Smuzhiyun	bra.l		_fpsp_unfl
80*4882a593Smuzhiyun	short		0x0000
81*4882a593Smuzhiyun	bra.l		_fpsp_dz
82*4882a593Smuzhiyun	short		0x0000
83*4882a593Smuzhiyun	bra.l		_fpsp_inex
84*4882a593Smuzhiyun	short		0x0000
85*4882a593Smuzhiyun	bra.l		_fpsp_fline
86*4882a593Smuzhiyun	short		0x0000
87*4882a593Smuzhiyun	bra.l		_fpsp_unsupp
88*4882a593Smuzhiyun	short		0x0000
89*4882a593Smuzhiyun	bra.l		_fpsp_effadd
90*4882a593Smuzhiyun	short		0x0000
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	space		56
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun###############################################################
95*4882a593Smuzhiyun	global		_fpsp_done
96*4882a593Smuzhiyun_fpsp_done:
97*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
98*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
99*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
100*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
101*4882a593Smuzhiyun	rtd		&0x4
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun	global		_real_ovfl
104*4882a593Smuzhiyun_real_ovfl:
105*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
106*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
107*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
108*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
109*4882a593Smuzhiyun	rtd		&0x4
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun	global		_real_unfl
112*4882a593Smuzhiyun_real_unfl:
113*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
114*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
115*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
116*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
117*4882a593Smuzhiyun	rtd		&0x4
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun	global		_real_inex
120*4882a593Smuzhiyun_real_inex:
121*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
122*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
123*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
124*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
125*4882a593Smuzhiyun	rtd		&0x4
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	global		_real_bsun
128*4882a593Smuzhiyun_real_bsun:
129*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
130*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
131*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
132*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
133*4882a593Smuzhiyun	rtd		&0x4
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun	global		_real_operr
136*4882a593Smuzhiyun_real_operr:
137*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
138*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
139*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
140*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
141*4882a593Smuzhiyun	rtd		&0x4
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun	global		_real_snan
144*4882a593Smuzhiyun_real_snan:
145*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
146*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
147*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
148*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
149*4882a593Smuzhiyun	rtd		&0x4
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun	global		_real_dz
152*4882a593Smuzhiyun_real_dz:
153*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
154*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
155*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
156*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
157*4882a593Smuzhiyun	rtd		&0x4
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun	global		_real_fline
160*4882a593Smuzhiyun_real_fline:
161*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
162*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
163*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
164*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
165*4882a593Smuzhiyun	rtd		&0x4
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun	global		_real_fpu_disabled
168*4882a593Smuzhiyun_real_fpu_disabled:
169*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
170*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
171*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
172*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
173*4882a593Smuzhiyun	rtd		&0x4
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun	global		_real_trap
176*4882a593Smuzhiyun_real_trap:
177*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
178*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
179*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
180*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
181*4882a593Smuzhiyun	rtd		&0x4
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun	global		_real_trace
184*4882a593Smuzhiyun_real_trace:
185*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
186*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
187*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
188*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
189*4882a593Smuzhiyun	rtd		&0x4
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun	global		_real_access
192*4882a593Smuzhiyun_real_access:
193*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
194*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
195*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
196*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
197*4882a593Smuzhiyun	rtd		&0x4
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun#######################################
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun	global		_imem_read
202*4882a593Smuzhiyun_imem_read:
203*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
204*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
205*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
206*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
207*4882a593Smuzhiyun	rtd		&0x4
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun	global		_dmem_read
210*4882a593Smuzhiyun_dmem_read:
211*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
212*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
213*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
214*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
215*4882a593Smuzhiyun	rtd		&0x4
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun	global		_dmem_write
218*4882a593Smuzhiyun_dmem_write:
219*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
220*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
221*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
222*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
223*4882a593Smuzhiyun	rtd		&0x4
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun	global		_imem_read_word
226*4882a593Smuzhiyun_imem_read_word:
227*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
228*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
229*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
230*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
231*4882a593Smuzhiyun	rtd		&0x4
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun	global		_imem_read_long
234*4882a593Smuzhiyun_imem_read_long:
235*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
236*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
237*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
238*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
239*4882a593Smuzhiyun	rtd		&0x4
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun	global		_dmem_read_byte
242*4882a593Smuzhiyun_dmem_read_byte:
243*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
244*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
245*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
246*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
247*4882a593Smuzhiyun	rtd		&0x4
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun	global		_dmem_read_word
250*4882a593Smuzhiyun_dmem_read_word:
251*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
252*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
253*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
254*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
255*4882a593Smuzhiyun	rtd		&0x4
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun	global		_dmem_read_long
258*4882a593Smuzhiyun_dmem_read_long:
259*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
260*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
261*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
262*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
263*4882a593Smuzhiyun	rtd		&0x4
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun	global		_dmem_write_byte
266*4882a593Smuzhiyun_dmem_write_byte:
267*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
268*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
269*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
270*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
271*4882a593Smuzhiyun	rtd		&0x4
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun	global		_dmem_write_word
274*4882a593Smuzhiyun_dmem_write_word:
275*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
276*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
277*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
278*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
279*4882a593Smuzhiyun	rtd		&0x4
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun	global		_dmem_write_long
282*4882a593Smuzhiyun_dmem_write_long:
283*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
284*4882a593Smuzhiyun	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
285*4882a593Smuzhiyun	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
286*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0
287*4882a593Smuzhiyun	rtd		&0x4
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun#
290*4882a593Smuzhiyun# This file contains a set of define statements for constants
291*4882a593Smuzhiyun# in order to promote readability within the corecode itself.
292*4882a593Smuzhiyun#
293*4882a593Smuzhiyun
294*4882a593Smuzhiyunset LOCAL_SIZE,		192			# stack frame size(bytes)
295*4882a593Smuzhiyunset LV,			-LOCAL_SIZE		# stack offset
296*4882a593Smuzhiyun
297*4882a593Smuzhiyunset EXC_SR,		0x4			# stack status register
298*4882a593Smuzhiyunset EXC_PC,		0x6			# stack pc
299*4882a593Smuzhiyunset EXC_VOFF,		0xa			# stacked vector offset
300*4882a593Smuzhiyunset EXC_EA,		0xc			# stacked <ea>
301*4882a593Smuzhiyun
302*4882a593Smuzhiyunset EXC_FP,		0x0			# frame pointer
303*4882a593Smuzhiyun
304*4882a593Smuzhiyunset EXC_AREGS,		-68			# offset of all address regs
305*4882a593Smuzhiyunset EXC_DREGS,		-100			# offset of all data regs
306*4882a593Smuzhiyunset EXC_FPREGS,		-36			# offset of all fp regs
307*4882a593Smuzhiyun
308*4882a593Smuzhiyunset EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
309*4882a593Smuzhiyunset OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
310*4882a593Smuzhiyunset EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
311*4882a593Smuzhiyunset EXC_A5,		EXC_AREGS+(5*4)
312*4882a593Smuzhiyunset EXC_A4,		EXC_AREGS+(4*4)
313*4882a593Smuzhiyunset EXC_A3,		EXC_AREGS+(3*4)
314*4882a593Smuzhiyunset EXC_A2,		EXC_AREGS+(2*4)
315*4882a593Smuzhiyunset EXC_A1,		EXC_AREGS+(1*4)
316*4882a593Smuzhiyunset EXC_A0,		EXC_AREGS+(0*4)
317*4882a593Smuzhiyunset EXC_D7,		EXC_DREGS+(7*4)
318*4882a593Smuzhiyunset EXC_D6,		EXC_DREGS+(6*4)
319*4882a593Smuzhiyunset EXC_D5,		EXC_DREGS+(5*4)
320*4882a593Smuzhiyunset EXC_D4,		EXC_DREGS+(4*4)
321*4882a593Smuzhiyunset EXC_D3,		EXC_DREGS+(3*4)
322*4882a593Smuzhiyunset EXC_D2,		EXC_DREGS+(2*4)
323*4882a593Smuzhiyunset EXC_D1,		EXC_DREGS+(1*4)
324*4882a593Smuzhiyunset EXC_D0,		EXC_DREGS+(0*4)
325*4882a593Smuzhiyun
326*4882a593Smuzhiyunset EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
327*4882a593Smuzhiyunset EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
328*4882a593Smuzhiyunset EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
329*4882a593Smuzhiyun
330*4882a593Smuzhiyunset FP_SCR1,		LV+80			# fp scratch 1
331*4882a593Smuzhiyunset FP_SCR1_EX,		FP_SCR1+0
332*4882a593Smuzhiyunset FP_SCR1_SGN,	FP_SCR1+2
333*4882a593Smuzhiyunset FP_SCR1_HI,		FP_SCR1+4
334*4882a593Smuzhiyunset FP_SCR1_LO,		FP_SCR1+8
335*4882a593Smuzhiyun
336*4882a593Smuzhiyunset FP_SCR0,		LV+68			# fp scratch 0
337*4882a593Smuzhiyunset FP_SCR0_EX,		FP_SCR0+0
338*4882a593Smuzhiyunset FP_SCR0_SGN,	FP_SCR0+2
339*4882a593Smuzhiyunset FP_SCR0_HI,		FP_SCR0+4
340*4882a593Smuzhiyunset FP_SCR0_LO,		FP_SCR0+8
341*4882a593Smuzhiyun
342*4882a593Smuzhiyunset FP_DST,		LV+56			# fp destination operand
343*4882a593Smuzhiyunset FP_DST_EX,		FP_DST+0
344*4882a593Smuzhiyunset FP_DST_SGN,		FP_DST+2
345*4882a593Smuzhiyunset FP_DST_HI,		FP_DST+4
346*4882a593Smuzhiyunset FP_DST_LO,		FP_DST+8
347*4882a593Smuzhiyun
348*4882a593Smuzhiyunset FP_SRC,		LV+44			# fp source operand
349*4882a593Smuzhiyunset FP_SRC_EX,		FP_SRC+0
350*4882a593Smuzhiyunset FP_SRC_SGN,		FP_SRC+2
351*4882a593Smuzhiyunset FP_SRC_HI,		FP_SRC+4
352*4882a593Smuzhiyunset FP_SRC_LO,		FP_SRC+8
353*4882a593Smuzhiyun
354*4882a593Smuzhiyunset USER_FPIAR,		LV+40			# FP instr address register
355*4882a593Smuzhiyun
356*4882a593Smuzhiyunset USER_FPSR,		LV+36			# FP status register
357*4882a593Smuzhiyunset FPSR_CC,		USER_FPSR+0		# FPSR condition codes
358*4882a593Smuzhiyunset FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
359*4882a593Smuzhiyunset FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
360*4882a593Smuzhiyunset FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
361*4882a593Smuzhiyun
362*4882a593Smuzhiyunset USER_FPCR,		LV+32			# FP control register
363*4882a593Smuzhiyunset FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
364*4882a593Smuzhiyunset FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
365*4882a593Smuzhiyun
366*4882a593Smuzhiyunset L_SCR3,		LV+28			# integer scratch 3
367*4882a593Smuzhiyunset L_SCR2,		LV+24			# integer scratch 2
368*4882a593Smuzhiyunset L_SCR1,		LV+20			# integer scratch 1
369*4882a593Smuzhiyun
370*4882a593Smuzhiyunset STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
371*4882a593Smuzhiyun
372*4882a593Smuzhiyunset EXC_TEMP2,		LV+24			# temporary space
373*4882a593Smuzhiyunset EXC_TEMP,		LV+16			# temporary space
374*4882a593Smuzhiyun
375*4882a593Smuzhiyunset DTAG,		LV+15			# destination operand type
376*4882a593Smuzhiyunset STAG,		LV+14			# source operand type
377*4882a593Smuzhiyun
378*4882a593Smuzhiyunset SPCOND_FLG,		LV+10			# flag: special case (see below)
379*4882a593Smuzhiyun
380*4882a593Smuzhiyunset EXC_CC,		LV+8			# saved condition codes
381*4882a593Smuzhiyunset EXC_EXTWPTR,	LV+4			# saved current PC (active)
382*4882a593Smuzhiyunset EXC_EXTWORD,	LV+2			# saved extension word
383*4882a593Smuzhiyunset EXC_CMDREG,		LV+2			# saved extension word
384*4882a593Smuzhiyunset EXC_OPWORD,		LV+0			# saved operation word
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun################################
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun# Helpful macros
389*4882a593Smuzhiyun
390*4882a593Smuzhiyunset FTEMP,		0			# offsets within an
391*4882a593Smuzhiyunset FTEMP_EX,		0			# extended precision
392*4882a593Smuzhiyunset FTEMP_SGN,		2			# value saved in memory.
393*4882a593Smuzhiyunset FTEMP_HI,		4
394*4882a593Smuzhiyunset FTEMP_LO,		8
395*4882a593Smuzhiyunset FTEMP_GRS,		12
396*4882a593Smuzhiyun
397*4882a593Smuzhiyunset LOCAL,		0			# offsets within an
398*4882a593Smuzhiyunset LOCAL_EX,		0			# extended precision
399*4882a593Smuzhiyunset LOCAL_SGN,		2			# value saved in memory.
400*4882a593Smuzhiyunset LOCAL_HI,		4
401*4882a593Smuzhiyunset LOCAL_LO,		8
402*4882a593Smuzhiyunset LOCAL_GRS,		12
403*4882a593Smuzhiyun
404*4882a593Smuzhiyunset DST,		0			# offsets within an
405*4882a593Smuzhiyunset DST_EX,		0			# extended precision
406*4882a593Smuzhiyunset DST_HI,		4			# value saved in memory.
407*4882a593Smuzhiyunset DST_LO,		8
408*4882a593Smuzhiyun
409*4882a593Smuzhiyunset SRC,		0			# offsets within an
410*4882a593Smuzhiyunset SRC_EX,		0			# extended precision
411*4882a593Smuzhiyunset SRC_HI,		4			# value saved in memory.
412*4882a593Smuzhiyunset SRC_LO,		8
413*4882a593Smuzhiyun
414*4882a593Smuzhiyunset SGL_LO,		0x3f81			# min sgl prec exponent
415*4882a593Smuzhiyunset SGL_HI,		0x407e			# max sgl prec exponent
416*4882a593Smuzhiyunset DBL_LO,		0x3c01			# min dbl prec exponent
417*4882a593Smuzhiyunset DBL_HI,		0x43fe			# max dbl prec exponent
418*4882a593Smuzhiyunset EXT_LO,		0x0			# min ext prec exponent
419*4882a593Smuzhiyunset EXT_HI,		0x7ffe			# max ext prec exponent
420*4882a593Smuzhiyun
421*4882a593Smuzhiyunset EXT_BIAS,		0x3fff			# extended precision bias
422*4882a593Smuzhiyunset SGL_BIAS,		0x007f			# single precision bias
423*4882a593Smuzhiyunset DBL_BIAS,		0x03ff			# double precision bias
424*4882a593Smuzhiyun
425*4882a593Smuzhiyunset NORM,		0x00			# operand type for STAG/DTAG
426*4882a593Smuzhiyunset ZERO,		0x01			# operand type for STAG/DTAG
427*4882a593Smuzhiyunset INF,		0x02			# operand type for STAG/DTAG
428*4882a593Smuzhiyunset QNAN,		0x03			# operand type for STAG/DTAG
429*4882a593Smuzhiyunset DENORM,		0x04			# operand type for STAG/DTAG
430*4882a593Smuzhiyunset SNAN,		0x05			# operand type for STAG/DTAG
431*4882a593Smuzhiyunset UNNORM,		0x06			# operand type for STAG/DTAG
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun##################
434*4882a593Smuzhiyun# FPSR/FPCR bits #
435*4882a593Smuzhiyun##################
436*4882a593Smuzhiyunset neg_bit,		0x3			# negative result
437*4882a593Smuzhiyunset z_bit,		0x2			# zero result
438*4882a593Smuzhiyunset inf_bit,		0x1			# infinite result
439*4882a593Smuzhiyunset nan_bit,		0x0			# NAN result
440*4882a593Smuzhiyun
441*4882a593Smuzhiyunset q_sn_bit,		0x7			# sign bit of quotient byte
442*4882a593Smuzhiyun
443*4882a593Smuzhiyunset bsun_bit,		7			# branch on unordered
444*4882a593Smuzhiyunset snan_bit,		6			# signalling NAN
445*4882a593Smuzhiyunset operr_bit,		5			# operand error
446*4882a593Smuzhiyunset ovfl_bit,		4			# overflow
447*4882a593Smuzhiyunset unfl_bit,		3			# underflow
448*4882a593Smuzhiyunset dz_bit,		2			# divide by zero
449*4882a593Smuzhiyunset inex2_bit,		1			# inexact result 2
450*4882a593Smuzhiyunset inex1_bit,		0			# inexact result 1
451*4882a593Smuzhiyun
452*4882a593Smuzhiyunset aiop_bit,		7			# accrued inexact operation bit
453*4882a593Smuzhiyunset aovfl_bit,		6			# accrued overflow bit
454*4882a593Smuzhiyunset aunfl_bit,		5			# accrued underflow bit
455*4882a593Smuzhiyunset adz_bit,		4			# accrued dz bit
456*4882a593Smuzhiyunset ainex_bit,		3			# accrued inexact bit
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun#############################
459*4882a593Smuzhiyun# FPSR individual bit masks #
460*4882a593Smuzhiyun#############################
461*4882a593Smuzhiyunset neg_mask,		0x08000000		# negative bit mask (lw)
462*4882a593Smuzhiyunset inf_mask,		0x02000000		# infinity bit mask (lw)
463*4882a593Smuzhiyunset z_mask,		0x04000000		# zero bit mask (lw)
464*4882a593Smuzhiyunset nan_mask,		0x01000000		# nan bit mask (lw)
465*4882a593Smuzhiyun
466*4882a593Smuzhiyunset neg_bmask,		0x08			# negative bit mask (byte)
467*4882a593Smuzhiyunset inf_bmask,		0x02			# infinity bit mask (byte)
468*4882a593Smuzhiyunset z_bmask,		0x04			# zero bit mask (byte)
469*4882a593Smuzhiyunset nan_bmask,		0x01			# nan bit mask (byte)
470*4882a593Smuzhiyun
471*4882a593Smuzhiyunset bsun_mask,		0x00008000		# bsun exception mask
472*4882a593Smuzhiyunset snan_mask,		0x00004000		# snan exception mask
473*4882a593Smuzhiyunset operr_mask,		0x00002000		# operr exception mask
474*4882a593Smuzhiyunset ovfl_mask,		0x00001000		# overflow exception mask
475*4882a593Smuzhiyunset unfl_mask,		0x00000800		# underflow exception mask
476*4882a593Smuzhiyunset dz_mask,		0x00000400		# dz exception mask
477*4882a593Smuzhiyunset inex2_mask,		0x00000200		# inex2 exception mask
478*4882a593Smuzhiyunset inex1_mask,		0x00000100		# inex1 exception mask
479*4882a593Smuzhiyun
480*4882a593Smuzhiyunset aiop_mask,		0x00000080		# accrued illegal operation
481*4882a593Smuzhiyunset aovfl_mask,		0x00000040		# accrued overflow
482*4882a593Smuzhiyunset aunfl_mask,		0x00000020		# accrued underflow
483*4882a593Smuzhiyunset adz_mask,		0x00000010		# accrued divide by zero
484*4882a593Smuzhiyunset ainex_mask,		0x00000008		# accrued inexact
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun######################################
487*4882a593Smuzhiyun# FPSR combinations used in the FPSP #
488*4882a593Smuzhiyun######################################
489*4882a593Smuzhiyunset dzinf_mask,		inf_mask+dz_mask+adz_mask
490*4882a593Smuzhiyunset opnan_mask,		nan_mask+operr_mask+aiop_mask
491*4882a593Smuzhiyunset nzi_mask,		0x01ffffff		#clears N, Z, and I
492*4882a593Smuzhiyunset unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
493*4882a593Smuzhiyunset unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
494*4882a593Smuzhiyunset ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
495*4882a593Smuzhiyunset inx1a_mask,		inex1_mask+ainex_mask
496*4882a593Smuzhiyunset inx2a_mask,		inex2_mask+ainex_mask
497*4882a593Smuzhiyunset snaniop_mask,	nan_mask+snan_mask+aiop_mask
498*4882a593Smuzhiyunset snaniop2_mask,	snan_mask+aiop_mask
499*4882a593Smuzhiyunset naniop_mask,	nan_mask+aiop_mask
500*4882a593Smuzhiyunset neginf_mask,	neg_mask+inf_mask
501*4882a593Smuzhiyunset infaiop_mask,	inf_mask+aiop_mask
502*4882a593Smuzhiyunset negz_mask,		neg_mask+z_mask
503*4882a593Smuzhiyunset opaop_mask,		operr_mask+aiop_mask
504*4882a593Smuzhiyunset unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
505*4882a593Smuzhiyunset ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun#########
508*4882a593Smuzhiyun# misc. #
509*4882a593Smuzhiyun#########
510*4882a593Smuzhiyunset rnd_stky_bit,	29			# stky bit pos in longword
511*4882a593Smuzhiyun
512*4882a593Smuzhiyunset sign_bit,		0x7			# sign bit
513*4882a593Smuzhiyunset signan_bit,		0x6			# signalling nan bit
514*4882a593Smuzhiyun
515*4882a593Smuzhiyunset sgl_thresh,		0x3f81			# minimum sgl exponent
516*4882a593Smuzhiyunset dbl_thresh,		0x3c01			# minimum dbl exponent
517*4882a593Smuzhiyun
518*4882a593Smuzhiyunset x_mode,		0x0			# extended precision
519*4882a593Smuzhiyunset s_mode,		0x4			# single precision
520*4882a593Smuzhiyunset d_mode,		0x8			# double precision
521*4882a593Smuzhiyun
522*4882a593Smuzhiyunset rn_mode,		0x0			# round-to-nearest
523*4882a593Smuzhiyunset rz_mode,		0x1			# round-to-zero
524*4882a593Smuzhiyunset rm_mode,		0x2			# round-tp-minus-infinity
525*4882a593Smuzhiyunset rp_mode,		0x3			# round-to-plus-infinity
526*4882a593Smuzhiyun
527*4882a593Smuzhiyunset mantissalen,	64			# length of mantissa in bits
528*4882a593Smuzhiyun
529*4882a593Smuzhiyunset BYTE,		1			# len(byte) == 1 byte
530*4882a593Smuzhiyunset WORD,		2			# len(word) == 2 bytes
531*4882a593Smuzhiyunset LONG,		4			# len(longword) == 2 bytes
532*4882a593Smuzhiyun
533*4882a593Smuzhiyunset BSUN_VEC,		0xc0			# bsun    vector offset
534*4882a593Smuzhiyunset INEX_VEC,		0xc4			# inexact vector offset
535*4882a593Smuzhiyunset DZ_VEC,		0xc8			# dz      vector offset
536*4882a593Smuzhiyunset UNFL_VEC,		0xcc			# unfl    vector offset
537*4882a593Smuzhiyunset OPERR_VEC,		0xd0			# operr   vector offset
538*4882a593Smuzhiyunset OVFL_VEC,		0xd4			# ovfl    vector offset
539*4882a593Smuzhiyunset SNAN_VEC,		0xd8			# snan    vector offset
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun###########################
542*4882a593Smuzhiyun# SPecial CONDition FLaGs #
543*4882a593Smuzhiyun###########################
544*4882a593Smuzhiyunset ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
545*4882a593Smuzhiyunset fbsun_flg,		0x02			# flag bit: bsun exception
546*4882a593Smuzhiyunset mia7_flg,		0x04			# flag bit: (a7)+ <ea>
547*4882a593Smuzhiyunset mda7_flg,		0x08			# flag bit: -(a7) <ea>
548*4882a593Smuzhiyunset fmovm_flg,		0x40			# flag bit: fmovm instruction
549*4882a593Smuzhiyunset immed_flg,		0x80			# flag bit: &<data> <ea>
550*4882a593Smuzhiyun
551*4882a593Smuzhiyunset ftrapcc_bit,	0x0
552*4882a593Smuzhiyunset fbsun_bit,		0x1
553*4882a593Smuzhiyunset mia7_bit,		0x2
554*4882a593Smuzhiyunset mda7_bit,		0x3
555*4882a593Smuzhiyunset immed_bit,		0x7
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun##################################
558*4882a593Smuzhiyun# TRANSCENDENTAL "LAST-OP" FLAGS #
559*4882a593Smuzhiyun##################################
560*4882a593Smuzhiyunset FMUL_OP,		0x0			# fmul instr performed last
561*4882a593Smuzhiyunset FDIV_OP,		0x1			# fdiv performed last
562*4882a593Smuzhiyunset FADD_OP,		0x2			# fadd performed last
563*4882a593Smuzhiyunset FMOV_OP,		0x3			# fmov performed last
564*4882a593Smuzhiyun
565*4882a593Smuzhiyun#############
566*4882a593Smuzhiyun# CONSTANTS #
567*4882a593Smuzhiyun#############
568*4882a593SmuzhiyunT1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
569*4882a593SmuzhiyunT2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
570*4882a593Smuzhiyun
571*4882a593SmuzhiyunPI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
572*4882a593SmuzhiyunPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
573*4882a593Smuzhiyun
574*4882a593SmuzhiyunTWOBYPI:
575*4882a593Smuzhiyun	long		0x3FE45F30,0x6DC9C883
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun#########################################################################
578*4882a593Smuzhiyun# XDEF ****************************************************************	#
579*4882a593Smuzhiyun#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
580*4882a593Smuzhiyun#									#
581*4882a593Smuzhiyun#	This handler should be the first code executed upon taking the	#
582*4882a593Smuzhiyun#	FP Overflow exception in an operating system.			#
583*4882a593Smuzhiyun#									#
584*4882a593Smuzhiyun# XREF ****************************************************************	#
585*4882a593Smuzhiyun#	_imem_read_long() - read instruction longword			#
586*4882a593Smuzhiyun#	fix_skewed_ops() - adjust src operand in fsave frame		#
587*4882a593Smuzhiyun#	set_tag_x() - determine optype of src/dst operands		#
588*4882a593Smuzhiyun#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
589*4882a593Smuzhiyun#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
590*4882a593Smuzhiyun#	load_fpn2() - load dst operand from FP regfile			#
591*4882a593Smuzhiyun#	fout() - emulate an opclass 3 instruction			#
592*4882a593Smuzhiyun#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
593*4882a593Smuzhiyun#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
594*4882a593Smuzhiyun#	_real_ovfl() - "callout" for Overflow exception enabled code	#
595*4882a593Smuzhiyun#	_real_inex() - "callout" for Inexact exception enabled code	#
596*4882a593Smuzhiyun#	_real_trace() - "callout" for Trace exception code		#
597*4882a593Smuzhiyun#									#
598*4882a593Smuzhiyun# INPUT ***************************************************************	#
599*4882a593Smuzhiyun#	- The system stack contains the FP Ovfl exception stack frame	#
600*4882a593Smuzhiyun#	- The fsave frame contains the source operand			#
601*4882a593Smuzhiyun#									#
602*4882a593Smuzhiyun# OUTPUT **************************************************************	#
603*4882a593Smuzhiyun#	Overflow Exception enabled:					#
604*4882a593Smuzhiyun#	- The system stack is unchanged					#
605*4882a593Smuzhiyun#	- The fsave frame contains the adjusted src op for opclass 0,2	#
606*4882a593Smuzhiyun#	Overflow Exception disabled:					#
607*4882a593Smuzhiyun#	- The system stack is unchanged					#
608*4882a593Smuzhiyun#	- The "exception present" flag in the fsave frame is cleared	#
609*4882a593Smuzhiyun#									#
610*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
611*4882a593Smuzhiyun#	On the 060, if an FP overflow is present as the result of any	#
612*4882a593Smuzhiyun# instruction, the 060 will take an overflow exception whether the	#
613*4882a593Smuzhiyun# exception is enabled or disabled in the FPCR. For the disabled case,	#
614*4882a593Smuzhiyun# This handler emulates the instruction to determine what the correct	#
615*4882a593Smuzhiyun# default result should be for the operation. This default result is	#
616*4882a593Smuzhiyun# then stored in either the FP regfile, data regfile, or memory.	#
617*4882a593Smuzhiyun# Finally, the handler exits through the "callout" _fpsp_done()		#
618*4882a593Smuzhiyun# denoting that no exceptional conditions exist within the machine.	#
619*4882a593Smuzhiyun#	If the exception is enabled, then this handler must create the	#
620*4882a593Smuzhiyun# exceptional operand and plave it in the fsave state frame, and store	#
621*4882a593Smuzhiyun# the default result (only if the instruction is opclass 3). For	#
622*4882a593Smuzhiyun# exceptions enabled, this handler must exit through the "callout"	#
623*4882a593Smuzhiyun# _real_ovfl() so that the operating system enabled overflow handler	#
624*4882a593Smuzhiyun# can handle this case.							#
625*4882a593Smuzhiyun#	Two other conditions exist. First, if overflow was disabled	#
626*4882a593Smuzhiyun# but the inexact exception was enabled, this handler must exit		#
627*4882a593Smuzhiyun# through the "callout" _real_inex() regardless of whether the result	#
628*4882a593Smuzhiyun# was inexact.								#
629*4882a593Smuzhiyun#	Also, in the case of an opclass three instruction where		#
630*4882a593Smuzhiyun# overflow was disabled and the trace exception was enabled, this	#
631*4882a593Smuzhiyun# handler must exit through the "callout" _real_trace().		#
632*4882a593Smuzhiyun#									#
633*4882a593Smuzhiyun#########################################################################
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun	global		_fpsp_ovfl
636*4882a593Smuzhiyun_fpsp_ovfl:
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun#$#	sub.l		&24,%sp			# make room for src/dst
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun	link.w		%a6,&-LOCAL_SIZE	# init stack frame
641*4882a593Smuzhiyun
642*4882a593Smuzhiyun	fsave		FP_SRC(%a6)		# grab the "busy" frame
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
645*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
646*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction
649*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
650*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
651*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
652*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
653*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun##############################################################################
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
658*4882a593Smuzhiyun	bne.w		fovfl_out
659*4882a593Smuzhiyun
660*4882a593Smuzhiyun
661*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
662*4882a593Smuzhiyun	bsr.l		fix_skewed_ops		# fix src op
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun# since, I believe, only NORMs and DENORMs can come through here,
665*4882a593Smuzhiyun# maybe we can avoid the subroutine call.
666*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
667*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
668*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
669*4882a593Smuzhiyun
670*4882a593Smuzhiyun# bit five of the fp extension word separates the monadic and dyadic operations
671*4882a593Smuzhiyun# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
672*4882a593Smuzhiyun# will never take this exception.
673*4882a593Smuzhiyun	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
674*4882a593Smuzhiyun	beq.b		fovfl_extract		# monadic
675*4882a593Smuzhiyun
676*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
677*4882a593Smuzhiyun	bsr.l		load_fpn2		# load dst into FP_DST
678*4882a593Smuzhiyun
679*4882a593Smuzhiyun	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
680*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
681*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
682*4882a593Smuzhiyun	bne.b		fovfl_op2_done		# no
683*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
684*4882a593Smuzhiyunfovfl_op2_done:
685*4882a593Smuzhiyun	mov.b		%d0,DTAG(%a6)		# save dst optype tag
686*4882a593Smuzhiyun
687*4882a593Smuzhiyunfovfl_extract:
688*4882a593Smuzhiyun
689*4882a593Smuzhiyun#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
690*4882a593Smuzhiyun#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
691*4882a593Smuzhiyun#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
692*4882a593Smuzhiyun#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
693*4882a593Smuzhiyun#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
694*4882a593Smuzhiyun#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun	clr.l		%d0
697*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d1
700*4882a593Smuzhiyun	andi.w		&0x007f,%d1		# extract extension
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
705*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
706*4882a593Smuzhiyun
707*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
708*4882a593Smuzhiyun	lea		FP_DST(%a6),%a1
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun# maybe we can make these entry points ONLY the OVFL entry points of each routine.
711*4882a593Smuzhiyun	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
712*4882a593Smuzhiyun	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun# the operation has been emulated. the result is in fp0.
715*4882a593Smuzhiyun# the EXOP, if an exception occurred, is in fp1.
716*4882a593Smuzhiyun# we must save the default result regardless of whether
717*4882a593Smuzhiyun# traps are enabled or disabled.
718*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
719*4882a593Smuzhiyun	bsr.l		store_fpreg
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun# the exceptional possibilities we have left ourselves with are ONLY overflow
722*4882a593Smuzhiyun# and inexact. and, the inexact is such that overflow occurred and was disabled
723*4882a593Smuzhiyun# but inexact was enabled.
724*4882a593Smuzhiyun	btst		&ovfl_bit,FPCR_ENABLE(%a6)
725*4882a593Smuzhiyun	bne.b		fovfl_ovfl_on
726*4882a593Smuzhiyun
727*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6)
728*4882a593Smuzhiyun	bne.b		fovfl_inex_on
729*4882a593Smuzhiyun
730*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
731*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
732*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
733*4882a593Smuzhiyun
734*4882a593Smuzhiyun	unlk		%a6
735*4882a593Smuzhiyun#$#	add.l		&24,%sp
736*4882a593Smuzhiyun	bra.l		_fpsp_done
737*4882a593Smuzhiyun
738*4882a593Smuzhiyun# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
739*4882a593Smuzhiyun# in fp1. now, simply jump to _real_ovfl()!
740*4882a593Smuzhiyunfovfl_ovfl_on:
741*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
742*4882a593Smuzhiyun
743*4882a593Smuzhiyun	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
746*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
747*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun	unlk		%a6
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun	bra.l		_real_ovfl
754*4882a593Smuzhiyun
755*4882a593Smuzhiyun# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
756*4882a593Smuzhiyun# we must jump to real_inex().
757*4882a593Smuzhiyunfovfl_inex_on:
758*4882a593Smuzhiyun
759*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
760*4882a593Smuzhiyun
761*4882a593Smuzhiyun	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
762*4882a593Smuzhiyun	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
763*4882a593Smuzhiyun
764*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
765*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
766*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
767*4882a593Smuzhiyun
768*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
769*4882a593Smuzhiyun
770*4882a593Smuzhiyun	unlk		%a6
771*4882a593Smuzhiyun
772*4882a593Smuzhiyun	bra.l		_real_inex
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun########################################################################
775*4882a593Smuzhiyunfovfl_out:
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun
778*4882a593Smuzhiyun#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
779*4882a593Smuzhiyun#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
780*4882a593Smuzhiyun#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
781*4882a593Smuzhiyun
782*4882a593Smuzhiyun# the src operand is definitely a NORM(!), so tag it as such
783*4882a593Smuzhiyun	mov.b		&NORM,STAG(%a6)		# set src optype tag
784*4882a593Smuzhiyun
785*4882a593Smuzhiyun	clr.l		%d0
786*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
787*4882a593Smuzhiyun
788*4882a593Smuzhiyun	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
789*4882a593Smuzhiyun
790*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
791*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
792*4882a593Smuzhiyun
793*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun	bsr.l		fout
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun	btst		&ovfl_bit,FPCR_ENABLE(%a6)
798*4882a593Smuzhiyun	bne.w		fovfl_ovfl_on
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6)
801*4882a593Smuzhiyun	bne.w		fovfl_inex_on
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
804*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
805*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun	unlk		%a6
808*4882a593Smuzhiyun#$#	add.l		&24,%sp
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
811*4882a593Smuzhiyun	beq.l		_fpsp_done		# no
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
814*4882a593Smuzhiyun	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
815*4882a593Smuzhiyun	bra.l		_real_trace
816*4882a593Smuzhiyun
817*4882a593Smuzhiyun#########################################################################
818*4882a593Smuzhiyun# XDEF ****************************************************************	#
819*4882a593Smuzhiyun#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
820*4882a593Smuzhiyun#									#
821*4882a593Smuzhiyun#	This handler should be the first code executed upon taking the	#
822*4882a593Smuzhiyun#	FP Underflow exception in an operating system.			#
823*4882a593Smuzhiyun#									#
824*4882a593Smuzhiyun# XREF ****************************************************************	#
825*4882a593Smuzhiyun#	_imem_read_long() - read instruction longword			#
826*4882a593Smuzhiyun#	fix_skewed_ops() - adjust src operand in fsave frame		#
827*4882a593Smuzhiyun#	set_tag_x() - determine optype of src/dst operands		#
828*4882a593Smuzhiyun#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
829*4882a593Smuzhiyun#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
830*4882a593Smuzhiyun#	load_fpn2() - load dst operand from FP regfile			#
831*4882a593Smuzhiyun#	fout() - emulate an opclass 3 instruction			#
832*4882a593Smuzhiyun#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
833*4882a593Smuzhiyun#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
834*4882a593Smuzhiyun#	_real_ovfl() - "callout" for Overflow exception enabled code	#
835*4882a593Smuzhiyun#	_real_inex() - "callout" for Inexact exception enabled code	#
836*4882a593Smuzhiyun#	_real_trace() - "callout" for Trace exception code		#
837*4882a593Smuzhiyun#									#
838*4882a593Smuzhiyun# INPUT ***************************************************************	#
839*4882a593Smuzhiyun#	- The system stack contains the FP Unfl exception stack frame	#
840*4882a593Smuzhiyun#	- The fsave frame contains the source operand			#
841*4882a593Smuzhiyun#									#
842*4882a593Smuzhiyun# OUTPUT **************************************************************	#
843*4882a593Smuzhiyun#	Underflow Exception enabled:					#
844*4882a593Smuzhiyun#	- The system stack is unchanged					#
845*4882a593Smuzhiyun#	- The fsave frame contains the adjusted src op for opclass 0,2	#
846*4882a593Smuzhiyun#	Underflow Exception disabled:					#
847*4882a593Smuzhiyun#	- The system stack is unchanged					#
848*4882a593Smuzhiyun#	- The "exception present" flag in the fsave frame is cleared	#
849*4882a593Smuzhiyun#									#
850*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
851*4882a593Smuzhiyun#	On the 060, if an FP underflow is present as the result of any	#
852*4882a593Smuzhiyun# instruction, the 060 will take an underflow exception whether the	#
853*4882a593Smuzhiyun# exception is enabled or disabled in the FPCR. For the disabled case,	#
854*4882a593Smuzhiyun# This handler emulates the instruction to determine what the correct	#
855*4882a593Smuzhiyun# default result should be for the operation. This default result is	#
856*4882a593Smuzhiyun# then stored in either the FP regfile, data regfile, or memory.	#
857*4882a593Smuzhiyun# Finally, the handler exits through the "callout" _fpsp_done()		#
858*4882a593Smuzhiyun# denoting that no exceptional conditions exist within the machine.	#
859*4882a593Smuzhiyun#	If the exception is enabled, then this handler must create the	#
860*4882a593Smuzhiyun# exceptional operand and plave it in the fsave state frame, and store	#
861*4882a593Smuzhiyun# the default result (only if the instruction is opclass 3). For	#
862*4882a593Smuzhiyun# exceptions enabled, this handler must exit through the "callout"	#
863*4882a593Smuzhiyun# _real_unfl() so that the operating system enabled overflow handler	#
864*4882a593Smuzhiyun# can handle this case.							#
865*4882a593Smuzhiyun#	Two other conditions exist. First, if underflow was disabled	#
866*4882a593Smuzhiyun# but the inexact exception was enabled and the result was inexact,	#
867*4882a593Smuzhiyun# this handler must exit through the "callout" _real_inex().		#
868*4882a593Smuzhiyun# was inexact.								#
869*4882a593Smuzhiyun#	Also, in the case of an opclass three instruction where		#
870*4882a593Smuzhiyun# underflow was disabled and the trace exception was enabled, this	#
871*4882a593Smuzhiyun# handler must exit through the "callout" _real_trace().		#
872*4882a593Smuzhiyun#									#
873*4882a593Smuzhiyun#########################################################################
874*4882a593Smuzhiyun
875*4882a593Smuzhiyun	global		_fpsp_unfl
876*4882a593Smuzhiyun_fpsp_unfl:
877*4882a593Smuzhiyun
878*4882a593Smuzhiyun#$#	sub.l		&24,%sp			# make room for src/dst
879*4882a593Smuzhiyun
880*4882a593Smuzhiyun	link.w		%a6,&-LOCAL_SIZE	# init stack frame
881*4882a593Smuzhiyun
882*4882a593Smuzhiyun	fsave		FP_SRC(%a6)		# grab the "busy" frame
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
885*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
886*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
887*4882a593Smuzhiyun
888*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction
889*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
890*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
891*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
892*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
893*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun##############################################################################
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
898*4882a593Smuzhiyun	bne.w		funfl_out
899*4882a593Smuzhiyun
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
902*4882a593Smuzhiyun	bsr.l		fix_skewed_ops		# fix src op
903*4882a593Smuzhiyun
904*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
905*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
906*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun# bit five of the fp ext word separates the monadic and dyadic operations
909*4882a593Smuzhiyun# that can pass through fpsp_unfl(). remember that fcmp, and ftst
910*4882a593Smuzhiyun# will never take this exception.
911*4882a593Smuzhiyun	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
912*4882a593Smuzhiyun	beq.b		funfl_extract		# monadic
913*4882a593Smuzhiyun
914*4882a593Smuzhiyun# now, what's left that's not dyadic is fsincos. we can distinguish it
915*4882a593Smuzhiyun# from all dyadics by the '0110xxx pattern
916*4882a593Smuzhiyun	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
917*4882a593Smuzhiyun	bne.b		funfl_extract		# yes
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
920*4882a593Smuzhiyun	bsr.l		load_fpn2		# load dst into FP_DST
921*4882a593Smuzhiyun
922*4882a593Smuzhiyun	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
923*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
924*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
925*4882a593Smuzhiyun	bne.b		funfl_op2_done		# no
926*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
927*4882a593Smuzhiyunfunfl_op2_done:
928*4882a593Smuzhiyun	mov.b		%d0,DTAG(%a6)		# save dst optype tag
929*4882a593Smuzhiyun
930*4882a593Smuzhiyunfunfl_extract:
931*4882a593Smuzhiyun
932*4882a593Smuzhiyun#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
933*4882a593Smuzhiyun#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
934*4882a593Smuzhiyun#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
935*4882a593Smuzhiyun#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
936*4882a593Smuzhiyun#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
937*4882a593Smuzhiyun#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
938*4882a593Smuzhiyun
939*4882a593Smuzhiyun	clr.l		%d0
940*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d1
943*4882a593Smuzhiyun	andi.w		&0x007f,%d1		# extract extension
944*4882a593Smuzhiyun
945*4882a593Smuzhiyun	andi.l		&0x00ff01ff,USER_FPSR(%a6)
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
948*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
949*4882a593Smuzhiyun
950*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
951*4882a593Smuzhiyun	lea		FP_DST(%a6),%a1
952*4882a593Smuzhiyun
953*4882a593Smuzhiyun# maybe we can make these entry points ONLY the OVFL entry points of each routine.
954*4882a593Smuzhiyun	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
955*4882a593Smuzhiyun	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
956*4882a593Smuzhiyun
957*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
958*4882a593Smuzhiyun	bsr.l		store_fpreg
959*4882a593Smuzhiyun
960*4882a593Smuzhiyun# The `060 FPU multiplier hardware is such that if the result of a
961*4882a593Smuzhiyun# multiply operation is the smallest possible normalized number
962*4882a593Smuzhiyun# (0x00000000_80000000_00000000), then the machine will take an
963*4882a593Smuzhiyun# underflow exception. Since this is incorrect, we need to check
964*4882a593Smuzhiyun# if our emulation, after re-doing the operation, decided that
965*4882a593Smuzhiyun# no underflow was called for. We do these checks only in
966*4882a593Smuzhiyun# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
967*4882a593Smuzhiyun# special case will simply exit gracefully with the correct result.
968*4882a593Smuzhiyun
969*4882a593Smuzhiyun# the exceptional possibilities we have left ourselves with are ONLY overflow
970*4882a593Smuzhiyun# and inexact. and, the inexact is such that overflow occurred and was disabled
971*4882a593Smuzhiyun# but inexact was enabled.
972*4882a593Smuzhiyun	btst		&unfl_bit,FPCR_ENABLE(%a6)
973*4882a593Smuzhiyun	bne.b		funfl_unfl_on
974*4882a593Smuzhiyun
975*4882a593Smuzhiyunfunfl_chkinex:
976*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6)
977*4882a593Smuzhiyun	bne.b		funfl_inex_on
978*4882a593Smuzhiyun
979*4882a593Smuzhiyunfunfl_exit:
980*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
981*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
982*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
983*4882a593Smuzhiyun
984*4882a593Smuzhiyun	unlk		%a6
985*4882a593Smuzhiyun#$#	add.l		&24,%sp
986*4882a593Smuzhiyun	bra.l		_fpsp_done
987*4882a593Smuzhiyun
988*4882a593Smuzhiyun# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
989*4882a593Smuzhiyun# in fp1 (don't forget to save fp0). what to do now?
990*4882a593Smuzhiyun# well, we simply have to get to go to _real_unfl()!
991*4882a593Smuzhiyunfunfl_unfl_on:
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun# The `060 FPU multiplier hardware is such that if the result of a
994*4882a593Smuzhiyun# multiply operation is the smallest possible normalized number
995*4882a593Smuzhiyun# (0x00000000_80000000_00000000), then the machine will take an
996*4882a593Smuzhiyun# underflow exception. Since this is incorrect, we check here to see
997*4882a593Smuzhiyun# if our emulation, after re-doing the operation, decided that
998*4882a593Smuzhiyun# no underflow was called for.
999*4882a593Smuzhiyun	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1000*4882a593Smuzhiyun	beq.w		funfl_chkinex
1001*4882a593Smuzhiyun
1002*4882a593Smuzhiyunfunfl_unfl_on2:
1003*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1004*4882a593Smuzhiyun
1005*4882a593Smuzhiyun	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
1006*4882a593Smuzhiyun
1007*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1008*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1010*4882a593Smuzhiyun
1011*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun	unlk		%a6
1014*4882a593Smuzhiyun
1015*4882a593Smuzhiyun	bra.l		_real_unfl
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018*4882a593Smuzhiyun# we must jump to real_inex().
1019*4882a593Smuzhiyunfunfl_inex_on:
1020*4882a593Smuzhiyun
1021*4882a593Smuzhiyun# The `060 FPU multiplier hardware is such that if the result of a
1022*4882a593Smuzhiyun# multiply operation is the smallest possible normalized number
1023*4882a593Smuzhiyun# (0x00000000_80000000_00000000), then the machine will take an
1024*4882a593Smuzhiyun# underflow exception.
1025*4882a593Smuzhiyun# But, whether bogus or not, if inexact is enabled AND it occurred,
1026*4882a593Smuzhiyun# then we have to branch to real_inex.
1027*4882a593Smuzhiyun
1028*4882a593Smuzhiyun	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1029*4882a593Smuzhiyun	beq.w		funfl_exit
1030*4882a593Smuzhiyun
1031*4882a593Smuzhiyunfunfl_inex_on2:
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
1034*4882a593Smuzhiyun
1035*4882a593Smuzhiyun	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1036*4882a593Smuzhiyun	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
1037*4882a593Smuzhiyun
1038*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1039*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1041*4882a593Smuzhiyun
1042*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1043*4882a593Smuzhiyun
1044*4882a593Smuzhiyun	unlk		%a6
1045*4882a593Smuzhiyun
1046*4882a593Smuzhiyun	bra.l		_real_inex
1047*4882a593Smuzhiyun
1048*4882a593Smuzhiyun#######################################################################
1049*4882a593Smuzhiyunfunfl_out:
1050*4882a593Smuzhiyun
1051*4882a593Smuzhiyun
1052*4882a593Smuzhiyun#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053*4882a593Smuzhiyun#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054*4882a593Smuzhiyun#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055*4882a593Smuzhiyun
1056*4882a593Smuzhiyun# the src operand is definitely a NORM(!), so tag it as such
1057*4882a593Smuzhiyun	mov.b		&NORM,STAG(%a6)		# set src optype tag
1058*4882a593Smuzhiyun
1059*4882a593Smuzhiyun	clr.l		%d0
1060*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1061*4882a593Smuzhiyun
1062*4882a593Smuzhiyun	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
1065*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
1066*4882a593Smuzhiyun
1067*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1068*4882a593Smuzhiyun
1069*4882a593Smuzhiyun	bsr.l		fout
1070*4882a593Smuzhiyun
1071*4882a593Smuzhiyun	btst		&unfl_bit,FPCR_ENABLE(%a6)
1072*4882a593Smuzhiyun	bne.w		funfl_unfl_on2
1073*4882a593Smuzhiyun
1074*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6)
1075*4882a593Smuzhiyun	bne.w		funfl_inex_on2
1076*4882a593Smuzhiyun
1077*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1078*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1080*4882a593Smuzhiyun
1081*4882a593Smuzhiyun	unlk		%a6
1082*4882a593Smuzhiyun#$#	add.l		&24,%sp
1083*4882a593Smuzhiyun
1084*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
1085*4882a593Smuzhiyun	beq.l		_fpsp_done		# no
1086*4882a593Smuzhiyun
1087*4882a593Smuzhiyun	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1088*4882a593Smuzhiyun	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1089*4882a593Smuzhiyun	bra.l		_real_trace
1090*4882a593Smuzhiyun
1091*4882a593Smuzhiyun#########################################################################
1092*4882a593Smuzhiyun# XDEF ****************************************************************	#
1093*4882a593Smuzhiyun#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1094*4882a593Smuzhiyun#		        Data Type" exception.				#
1095*4882a593Smuzhiyun#									#
1096*4882a593Smuzhiyun#	This handler should be the first code executed upon taking the	#
1097*4882a593Smuzhiyun#	FP Unimplemented Data Type exception in an operating system.	#
1098*4882a593Smuzhiyun#									#
1099*4882a593Smuzhiyun# XREF ****************************************************************	#
1100*4882a593Smuzhiyun#	_imem_read_{word,long}() - read instruction word/longword	#
1101*4882a593Smuzhiyun#	fix_skewed_ops() - adjust src operand in fsave frame		#
1102*4882a593Smuzhiyun#	set_tag_x() - determine optype of src/dst operands		#
1103*4882a593Smuzhiyun#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1104*4882a593Smuzhiyun#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1105*4882a593Smuzhiyun#	load_fpn2() - load dst operand from FP regfile			#
1106*4882a593Smuzhiyun#	load_fpn1() - load src operand from FP regfile			#
1107*4882a593Smuzhiyun#	fout() - emulate an opclass 3 instruction			#
1108*4882a593Smuzhiyun#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1109*4882a593Smuzhiyun#	_real_inex() - "callout" to operating system inexact handler	#
1110*4882a593Smuzhiyun#	_fpsp_done() - "callout" for exit; work all done		#
1111*4882a593Smuzhiyun#	_real_trace() - "callout" for Trace enabled exception		#
1112*4882a593Smuzhiyun#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1113*4882a593Smuzhiyun#	_real_snan() - "callout" for SNAN exception			#
1114*4882a593Smuzhiyun#	_real_operr() - "callout" for OPERR exception			#
1115*4882a593Smuzhiyun#	_real_ovfl() - "callout" for OVFL exception			#
1116*4882a593Smuzhiyun#	_real_unfl() - "callout" for UNFL exception			#
1117*4882a593Smuzhiyun#	get_packed() - fetch packed operand from memory			#
1118*4882a593Smuzhiyun#									#
1119*4882a593Smuzhiyun# INPUT ***************************************************************	#
1120*4882a593Smuzhiyun#	- The system stack contains the "Unimp Data Type" stk frame	#
1121*4882a593Smuzhiyun#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1122*4882a593Smuzhiyun#									#
1123*4882a593Smuzhiyun# OUTPUT **************************************************************	#
1124*4882a593Smuzhiyun#	If Inexact exception (opclass 3):				#
1125*4882a593Smuzhiyun#	- The system stack is changed to an Inexact exception stk frame	#
1126*4882a593Smuzhiyun#	If SNAN exception (opclass 3):					#
1127*4882a593Smuzhiyun#	- The system stack is changed to an SNAN exception stk frame	#
1128*4882a593Smuzhiyun#	If OPERR exception (opclass 3):					#
1129*4882a593Smuzhiyun#	- The system stack is changed to an OPERR exception stk frame	#
1130*4882a593Smuzhiyun#	If OVFL exception (opclass 3):					#
1131*4882a593Smuzhiyun#	- The system stack is changed to an OVFL exception stk frame	#
1132*4882a593Smuzhiyun#	If UNFL exception (opclass 3):					#
1133*4882a593Smuzhiyun#	- The system stack is changed to an UNFL exception stack frame	#
1134*4882a593Smuzhiyun#	If Trace exception enabled:					#
1135*4882a593Smuzhiyun#	- The system stack is changed to a Trace exception stack frame	#
1136*4882a593Smuzhiyun#	Else: (normal case)						#
1137*4882a593Smuzhiyun#	- Correct result has been stored as appropriate			#
1138*4882a593Smuzhiyun#									#
1139*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
1140*4882a593Smuzhiyun#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1141*4882a593Smuzhiyun# unimplemented data types. These can be either opclass 0,2 or 3	#
1142*4882a593Smuzhiyun# instructions, and (2) PACKED unimplemented data format instructions	#
1143*4882a593Smuzhiyun# also of opclasses 0,2, or 3.						#
1144*4882a593Smuzhiyun#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1145*4882a593Smuzhiyun# operand from the fsave state frame and the dst operand (if dyadic)	#
1146*4882a593Smuzhiyun# from the FP register file. The instruction is then emulated by	#
1147*4882a593Smuzhiyun# choosing an emulation routine from a table of routines indexed by	#
1148*4882a593Smuzhiyun# instruction type. Once the instruction has been emulated and result	#
1149*4882a593Smuzhiyun# saved, then we check to see if any enabled exceptions resulted from	#
1150*4882a593Smuzhiyun# instruction emulation. If none, then we exit through the "callout"	#
1151*4882a593Smuzhiyun# _fpsp_done(). If there is an enabled FP exception, then we insert	#
1152*4882a593Smuzhiyun# this exception into the FPU in the fsave state frame and then exit	#
1153*4882a593Smuzhiyun# through _fpsp_done().							#
1154*4882a593Smuzhiyun#	PACKED opclass 0 and 2 is similar in how the instruction is	#
1155*4882a593Smuzhiyun# emulated and exceptions handled. The differences occur in how the	#
1156*4882a593Smuzhiyun# handler loads the packed op (by calling get_packed() routine) and	#
1157*4882a593Smuzhiyun# by the fact that a Trace exception could be pending for PACKED ops.	#
1158*4882a593Smuzhiyun# If a Trace exception is pending, then the current exception stack	#
1159*4882a593Smuzhiyun# frame is changed to a Trace exception stack frame and an exit is	#
1160*4882a593Smuzhiyun# made through _real_trace().						#
1161*4882a593Smuzhiyun#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1162*4882a593Smuzhiyun# performed by calling the routine fout(). If no exception should occur	#
1163*4882a593Smuzhiyun# as the result of emulation, then an exit either occurs through	#
1164*4882a593Smuzhiyun# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1165*4882a593Smuzhiyun# (a Trace stack frame must be created here, too). If an FP exception	#
1166*4882a593Smuzhiyun# should occur, then we must create an exception stack frame of that	#
1167*4882a593Smuzhiyun# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1168*4882a593Smuzhiyun# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
1169*4882a593Smuzhiyun# emulation is performed in a similar manner.				#
1170*4882a593Smuzhiyun#									#
1171*4882a593Smuzhiyun#########################################################################
1172*4882a593Smuzhiyun
1173*4882a593Smuzhiyun#
1174*4882a593Smuzhiyun# (1) DENORM and UNNORM (unimplemented) data types:
1175*4882a593Smuzhiyun#
1176*4882a593Smuzhiyun#				post-instruction
1177*4882a593Smuzhiyun#				*****************
1178*4882a593Smuzhiyun#				*      EA	*
1179*4882a593Smuzhiyun#	 pre-instruction	*		*
1180*4882a593Smuzhiyun#	*****************	*****************
1181*4882a593Smuzhiyun#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1182*4882a593Smuzhiyun#	*****************	*****************
1183*4882a593Smuzhiyun#	*     Next	*	*     Next	*
1184*4882a593Smuzhiyun#	*      PC	*	*      PC	*
1185*4882a593Smuzhiyun#	*****************	*****************
1186*4882a593Smuzhiyun#	*      SR	*	*      SR	*
1187*4882a593Smuzhiyun#	*****************	*****************
1188*4882a593Smuzhiyun#
1189*4882a593Smuzhiyun# (2) PACKED format (unsupported) opclasses two and three:
1190*4882a593Smuzhiyun#	*****************
1191*4882a593Smuzhiyun#	*      EA	*
1192*4882a593Smuzhiyun#	*		*
1193*4882a593Smuzhiyun#	*****************
1194*4882a593Smuzhiyun#	* 0x2 *  0x0dc	*
1195*4882a593Smuzhiyun#	*****************
1196*4882a593Smuzhiyun#	*     Next	*
1197*4882a593Smuzhiyun#	*      PC	*
1198*4882a593Smuzhiyun#	*****************
1199*4882a593Smuzhiyun#	*      SR	*
1200*4882a593Smuzhiyun#	*****************
1201*4882a593Smuzhiyun#
1202*4882a593Smuzhiyun	global		_fpsp_unsupp
1203*4882a593Smuzhiyun_fpsp_unsupp:
1204*4882a593Smuzhiyun
1205*4882a593Smuzhiyun	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1206*4882a593Smuzhiyun
1207*4882a593Smuzhiyun	fsave		FP_SRC(%a6)		# save fp state
1208*4882a593Smuzhiyun
1209*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1210*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1212*4882a593Smuzhiyun
1213*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1214*4882a593Smuzhiyun	bne.b		fu_s
1215*4882a593Smuzhiyunfu_u:
1216*4882a593Smuzhiyun	mov.l		%usp,%a0		# fetch user stack pointer
1217*4882a593Smuzhiyun	mov.l		%a0,EXC_A7(%a6)		# save on stack
1218*4882a593Smuzhiyun	bra.b		fu_cont
1219*4882a593Smuzhiyun# if the exception is an opclass zero or two unimplemented data type
1220*4882a593Smuzhiyun# exception, then the a7' calculated here is wrong since it doesn't
1221*4882a593Smuzhiyun# stack an ea. however, we don't need an a7' for this case anyways.
1222*4882a593Smuzhiyunfu_s:
1223*4882a593Smuzhiyun	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1224*4882a593Smuzhiyun	mov.l		%a0,EXC_A7(%a6)		# save on stack
1225*4882a593Smuzhiyun
1226*4882a593Smuzhiyunfu_cont:
1227*4882a593Smuzhiyun
1228*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction
1229*4882a593Smuzhiyun# the FPIAR should be set correctly for ALL exceptions passing through
1230*4882a593Smuzhiyun# this point.
1231*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1233*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1234*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
1235*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1236*4882a593Smuzhiyun
1237*4882a593Smuzhiyun############################
1238*4882a593Smuzhiyun
1239*4882a593Smuzhiyun	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1240*4882a593Smuzhiyun
1241*4882a593Smuzhiyun# Separate opclass three (fpn-to-mem) ops since they have a different
1242*4882a593Smuzhiyun# stack frame and protocol.
1243*4882a593Smuzhiyun	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1244*4882a593Smuzhiyun	bne.w		fu_out			# yes
1245*4882a593Smuzhiyun
1246*4882a593Smuzhiyun# Separate packed opclass two instructions.
1247*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1248*4882a593Smuzhiyun	cmpi.b		%d0,&0x13
1249*4882a593Smuzhiyun	beq.w		fu_in_pack
1250*4882a593Smuzhiyun
1251*4882a593Smuzhiyun
1252*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction.
1253*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field
1254*4882a593Smuzhiyun	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255*4882a593Smuzhiyun
1256*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
1257*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
1258*4882a593Smuzhiyun
1259*4882a593Smuzhiyun# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260*4882a593Smuzhiyun# precision format if the src format was single or double and the
1261*4882a593Smuzhiyun# source data type was an INF, NAN, DENORM, or UNNORM
1262*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to input
1263*4882a593Smuzhiyun	bsr.l		fix_skewed_ops
1264*4882a593Smuzhiyun
1265*4882a593Smuzhiyun# we don't know whether the src operand or the dst operand (or both) is the
1266*4882a593Smuzhiyun# UNNORM or DENORM. call the function that tags the operand type. if the
1267*4882a593Smuzhiyun# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1269*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
1270*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1271*4882a593Smuzhiyun	bne.b		fu_op2			# no
1272*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1273*4882a593Smuzhiyun
1274*4882a593Smuzhiyunfu_op2:
1275*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# save src optype tag
1276*4882a593Smuzhiyun
1277*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278*4882a593Smuzhiyun
1279*4882a593Smuzhiyun# bit five of the fp extension word separates the monadic and dyadic operations
1280*4882a593Smuzhiyun# at this point
1281*4882a593Smuzhiyun	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1282*4882a593Smuzhiyun	beq.b		fu_extract		# monadic
1283*4882a593Smuzhiyun	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1284*4882a593Smuzhiyun	beq.b		fu_extract		# yes, so it's monadic, too
1285*4882a593Smuzhiyun
1286*4882a593Smuzhiyun	bsr.l		load_fpn2		# load dst into FP_DST
1287*4882a593Smuzhiyun
1288*4882a593Smuzhiyun	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1289*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
1290*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1291*4882a593Smuzhiyun	bne.b		fu_op2_done		# no
1292*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1293*4882a593Smuzhiyunfu_op2_done:
1294*4882a593Smuzhiyun	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1295*4882a593Smuzhiyun
1296*4882a593Smuzhiyunfu_extract:
1297*4882a593Smuzhiyun	clr.l		%d0
1298*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1299*4882a593Smuzhiyun
1300*4882a593Smuzhiyun	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301*4882a593Smuzhiyun
1302*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
1303*4882a593Smuzhiyun	lea		FP_DST(%a6),%a1
1304*4882a593Smuzhiyun
1305*4882a593Smuzhiyun	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306*4882a593Smuzhiyun	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1307*4882a593Smuzhiyun
1308*4882a593Smuzhiyun#
1309*4882a593Smuzhiyun# Exceptions in order of precedence:
1310*4882a593Smuzhiyun#	BSUN	: none
1311*4882a593Smuzhiyun#	SNAN	: all dyadic ops
1312*4882a593Smuzhiyun#	OPERR	: fsqrt(-NORM)
1313*4882a593Smuzhiyun#	OVFL	: all except ftst,fcmp
1314*4882a593Smuzhiyun#	UNFL	: all except ftst,fcmp
1315*4882a593Smuzhiyun#	DZ	: fdiv
1316*4882a593Smuzhiyun#	INEX2	: all except ftst,fcmp
1317*4882a593Smuzhiyun#	INEX1	: none (packed doesn't go through here)
1318*4882a593Smuzhiyun#
1319*4882a593Smuzhiyun
1320*4882a593Smuzhiyun# we determine the highest priority exception(if any) set by the
1321*4882a593Smuzhiyun# emulation routine that has also been enabled by the user.
1322*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1323*4882a593Smuzhiyun	bne.b		fu_in_ena		# some are enabled
1324*4882a593Smuzhiyun
1325*4882a593Smuzhiyunfu_in_cont:
1326*4882a593Smuzhiyun# fcmp and ftst do not store any result.
1327*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1328*4882a593Smuzhiyun	andi.b		&0x38,%d0		# extract bits 3-5
1329*4882a593Smuzhiyun	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1330*4882a593Smuzhiyun	beq.b		fu_in_exit		# yes
1331*4882a593Smuzhiyun
1332*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333*4882a593Smuzhiyun	bsr.l		store_fpreg		# store the result
1334*4882a593Smuzhiyun
1335*4882a593Smuzhiyunfu_in_exit:
1336*4882a593Smuzhiyun
1337*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1338*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1340*4882a593Smuzhiyun
1341*4882a593Smuzhiyun	unlk		%a6
1342*4882a593Smuzhiyun
1343*4882a593Smuzhiyun	bra.l		_fpsp_done
1344*4882a593Smuzhiyun
1345*4882a593Smuzhiyunfu_in_ena:
1346*4882a593Smuzhiyun	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1347*4882a593Smuzhiyun	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1348*4882a593Smuzhiyun	bne.b		fu_in_exc		# there is at least one set
1349*4882a593Smuzhiyun
1350*4882a593Smuzhiyun#
1351*4882a593Smuzhiyun# No exceptions occurred that were also enabled. Now:
1352*4882a593Smuzhiyun#
1353*4882a593Smuzhiyun#	if (OVFL && ovfl_disabled && inexact_enabled) {
1354*4882a593Smuzhiyun#	    branch to _real_inex() (even if the result was exact!);
1355*4882a593Smuzhiyun#	} else {
1356*4882a593Smuzhiyun#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1357*4882a593Smuzhiyun#	    return;
1358*4882a593Smuzhiyun#	}
1359*4882a593Smuzhiyun#
1360*4882a593Smuzhiyun	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361*4882a593Smuzhiyun	beq.b		fu_in_cont		# no
1362*4882a593Smuzhiyun
1363*4882a593Smuzhiyunfu_in_ovflchk:
1364*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365*4882a593Smuzhiyun	beq.b		fu_in_cont		# no
1366*4882a593Smuzhiyun	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1367*4882a593Smuzhiyun
1368*4882a593Smuzhiyun#
1369*4882a593Smuzhiyun# An exception occurred and that exception was enabled:
1370*4882a593Smuzhiyun#
1371*4882a593Smuzhiyun#	shift enabled exception field into lo byte of d0;
1372*4882a593Smuzhiyun#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373*4882a593Smuzhiyun#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374*4882a593Smuzhiyun#		/*
1375*4882a593Smuzhiyun#		 * this is the case where we must call _real_inex() now or else
1376*4882a593Smuzhiyun#		 * there will be no other way to pass it the exceptional operand
1377*4882a593Smuzhiyun#		 */
1378*4882a593Smuzhiyun#		call _real_inex();
1379*4882a593Smuzhiyun#	} else {
1380*4882a593Smuzhiyun#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381*4882a593Smuzhiyun#	}
1382*4882a593Smuzhiyun#
1383*4882a593Smuzhiyunfu_in_exc:
1384*4882a593Smuzhiyun	subi.l		&24,%d0			# fix offset to be 0-8
1385*4882a593Smuzhiyun	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1386*4882a593Smuzhiyun	bne.b		fu_in_exc_exit		# no
1387*4882a593Smuzhiyun
1388*4882a593Smuzhiyun# the enabled exception was inexact
1389*4882a593Smuzhiyun	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390*4882a593Smuzhiyun	bne.w		fu_in_exc_unfl		# yes
1391*4882a593Smuzhiyun	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392*4882a593Smuzhiyun	bne.w		fu_in_exc_ovfl		# yes
1393*4882a593Smuzhiyun
1394*4882a593Smuzhiyun# here, we insert the correct fsave status value into the fsave frame for the
1395*4882a593Smuzhiyun# corresponding exception. the operand in the fsave frame should be the original
1396*4882a593Smuzhiyun# src operand.
1397*4882a593Smuzhiyunfu_in_exc_exit:
1398*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save d0
1399*4882a593Smuzhiyun	bsr.l		funimp_skew		# skew sgl or dbl inputs
1400*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# restore d0
1401*4882a593Smuzhiyun
1402*4882a593Smuzhiyun	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403*4882a593Smuzhiyun
1404*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1405*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1407*4882a593Smuzhiyun
1408*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore src op
1409*4882a593Smuzhiyun
1410*4882a593Smuzhiyun	unlk		%a6
1411*4882a593Smuzhiyun
1412*4882a593Smuzhiyun	bra.l		_fpsp_done
1413*4882a593Smuzhiyun
1414*4882a593Smuzhiyuntbl_except:
1415*4882a593Smuzhiyun	short		0xe000,0xe006,0xe004,0xe005
1416*4882a593Smuzhiyun	short		0xe003,0xe002,0xe001,0xe001
1417*4882a593Smuzhiyun
1418*4882a593Smuzhiyunfu_in_exc_unfl:
1419*4882a593Smuzhiyun	mov.w		&0x4,%d0
1420*4882a593Smuzhiyun	bra.b		fu_in_exc_exit
1421*4882a593Smuzhiyunfu_in_exc_ovfl:
1422*4882a593Smuzhiyun	mov.w		&0x03,%d0
1423*4882a593Smuzhiyun	bra.b		fu_in_exc_exit
1424*4882a593Smuzhiyun
1425*4882a593Smuzhiyun# If the input operand to this operation was opclass two and a single
1426*4882a593Smuzhiyun# or double precision denorm, inf, or nan, the operand needs to be
1427*4882a593Smuzhiyun# "corrected" in order to have the proper equivalent extended precision
1428*4882a593Smuzhiyun# number.
1429*4882a593Smuzhiyun	global		fix_skewed_ops
1430*4882a593Smuzhiyunfix_skewed_ops:
1431*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432*4882a593Smuzhiyun	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1433*4882a593Smuzhiyun	beq.b		fso_sgl			# yes
1434*4882a593Smuzhiyun	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1435*4882a593Smuzhiyun	beq.b		fso_dbl			# yes
1436*4882a593Smuzhiyun	rts					# no
1437*4882a593Smuzhiyun
1438*4882a593Smuzhiyunfso_sgl:
1439*4882a593Smuzhiyun	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1440*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# strip sign
1441*4882a593Smuzhiyun	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1442*4882a593Smuzhiyun	beq.b		fso_sgl_dnrm_zero	# yes
1443*4882a593Smuzhiyun	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1444*4882a593Smuzhiyun	beq.b		fso_infnan		# yes
1445*4882a593Smuzhiyun	rts					# no
1446*4882a593Smuzhiyun
1447*4882a593Smuzhiyunfso_sgl_dnrm_zero:
1448*4882a593Smuzhiyun	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449*4882a593Smuzhiyun	beq.b		fso_zero		# it's a skewed zero
1450*4882a593Smuzhiyunfso_sgl_dnrm:
1451*4882a593Smuzhiyun# here, we count on norm not to alter a0...
1452*4882a593Smuzhiyun	bsr.l		norm			# normalize mantissa
1453*4882a593Smuzhiyun	neg.w		%d0			# -shft amt
1454*4882a593Smuzhiyun	addi.w		&0x3f81,%d0		# adjust new exponent
1455*4882a593Smuzhiyun	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1456*4882a593Smuzhiyun	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1457*4882a593Smuzhiyun	rts
1458*4882a593Smuzhiyun
1459*4882a593Smuzhiyunfso_zero:
1460*4882a593Smuzhiyun	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1461*4882a593Smuzhiyun	rts
1462*4882a593Smuzhiyun
1463*4882a593Smuzhiyunfso_infnan:
1464*4882a593Smuzhiyun	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
1465*4882a593Smuzhiyun	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1466*4882a593Smuzhiyun	rts
1467*4882a593Smuzhiyun
1468*4882a593Smuzhiyunfso_dbl:
1469*4882a593Smuzhiyun	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1470*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# strip sign
1471*4882a593Smuzhiyun	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1472*4882a593Smuzhiyun	beq.b		fso_dbl_dnrm_zero	# yes
1473*4882a593Smuzhiyun	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1474*4882a593Smuzhiyun	beq.b		fso_infnan		# yes
1475*4882a593Smuzhiyun	rts					# no
1476*4882a593Smuzhiyun
1477*4882a593Smuzhiyunfso_dbl_dnrm_zero:
1478*4882a593Smuzhiyun	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479*4882a593Smuzhiyun	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1480*4882a593Smuzhiyun	tst.l		LOCAL_LO(%a0)		# is it a zero?
1481*4882a593Smuzhiyun	beq.b		fso_zero		# yes
1482*4882a593Smuzhiyunfso_dbl_dnrm:
1483*4882a593Smuzhiyun# here, we count on norm not to alter a0...
1484*4882a593Smuzhiyun	bsr.l		norm			# normalize mantissa
1485*4882a593Smuzhiyun	neg.w		%d0			# -shft amt
1486*4882a593Smuzhiyun	addi.w		&0x3c01,%d0		# adjust new exponent
1487*4882a593Smuzhiyun	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1488*4882a593Smuzhiyun	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1489*4882a593Smuzhiyun	rts
1490*4882a593Smuzhiyun
1491*4882a593Smuzhiyun#################################################################
1492*4882a593Smuzhiyun
1493*4882a593Smuzhiyun# fmove out took an unimplemented data type exception.
1494*4882a593Smuzhiyun# the src operand is in FP_SRC. Call _fout() to write out the result and
1495*4882a593Smuzhiyun# to determine which exceptions, if any, to take.
1496*4882a593Smuzhiyunfu_out:
1497*4882a593Smuzhiyun
1498*4882a593Smuzhiyun# Separate packed move outs from the UNNORM and DENORM move outs.
1499*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1500*4882a593Smuzhiyun	cmpi.b		%d0,&0x3
1501*4882a593Smuzhiyun	beq.w		fu_out_pack
1502*4882a593Smuzhiyun	cmpi.b		%d0,&0x7
1503*4882a593Smuzhiyun	beq.w		fu_out_pack
1504*4882a593Smuzhiyun
1505*4882a593Smuzhiyun
1506*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction.
1507*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field.
1508*4882a593Smuzhiyun# fmove out doesn't affect ccodes.
1509*4882a593Smuzhiyun	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1510*4882a593Smuzhiyun
1511*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
1512*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
1513*4882a593Smuzhiyun
1514*4882a593Smuzhiyun# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515*4882a593Smuzhiyun# call here. just figure out what it is...
1516*4882a593Smuzhiyun	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1517*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# strip sign
1518*4882a593Smuzhiyun	beq.b		fu_out_denorm		# it's a DENORM
1519*4882a593Smuzhiyun
1520*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
1521*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; fix it
1522*4882a593Smuzhiyun
1523*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)
1524*4882a593Smuzhiyun
1525*4882a593Smuzhiyun	bra.b		fu_out_cont
1526*4882a593Smuzhiyunfu_out_denorm:
1527*4882a593Smuzhiyun	mov.b		&DENORM,STAG(%a6)
1528*4882a593Smuzhiyunfu_out_cont:
1529*4882a593Smuzhiyun
1530*4882a593Smuzhiyun	clr.l		%d0
1531*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1532*4882a593Smuzhiyun
1533*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1534*4882a593Smuzhiyun
1535*4882a593Smuzhiyun	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1536*4882a593Smuzhiyun	bsr.l		fout			# call fmove out routine
1537*4882a593Smuzhiyun
1538*4882a593Smuzhiyun# Exceptions in order of precedence:
1539*4882a593Smuzhiyun#	BSUN	: none
1540*4882a593Smuzhiyun#	SNAN	: none
1541*4882a593Smuzhiyun#	OPERR	: fmove.{b,w,l} out of large UNNORM
1542*4882a593Smuzhiyun#	OVFL	: fmove.{s,d}
1543*4882a593Smuzhiyun#	UNFL	: fmove.{s,d,x}
1544*4882a593Smuzhiyun#	DZ	: none
1545*4882a593Smuzhiyun#	INEX2	: all
1546*4882a593Smuzhiyun#	INEX1	: none (packed doesn't travel through here)
1547*4882a593Smuzhiyun
1548*4882a593Smuzhiyun# determine the highest priority exception(if any) set by the
1549*4882a593Smuzhiyun# emulation routine that has also been enabled by the user.
1550*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1551*4882a593Smuzhiyun	bne.w		fu_out_ena		# some are enabled
1552*4882a593Smuzhiyun
1553*4882a593Smuzhiyunfu_out_done:
1554*4882a593Smuzhiyun
1555*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1556*4882a593Smuzhiyun
1557*4882a593Smuzhiyun# on extended precision opclass three instructions using pre-decrement or
1558*4882a593Smuzhiyun# post-increment addressing mode, the address register is not updated. is the
1559*4882a593Smuzhiyun# address register was the stack pointer used from user mode, then let's update
1560*4882a593Smuzhiyun# it here. if it was used from supervisor mode, then we have to handle this
1561*4882a593Smuzhiyun# as a special case.
1562*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)
1563*4882a593Smuzhiyun	bne.b		fu_out_done_s
1564*4882a593Smuzhiyun
1565*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0		# restore a7
1566*4882a593Smuzhiyun	mov.l		%a0,%usp
1567*4882a593Smuzhiyun
1568*4882a593Smuzhiyunfu_out_done_cont:
1569*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1570*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1572*4882a593Smuzhiyun
1573*4882a593Smuzhiyun	unlk		%a6
1574*4882a593Smuzhiyun
1575*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
1576*4882a593Smuzhiyun	bne.b		fu_out_trace		# yes
1577*4882a593Smuzhiyun
1578*4882a593Smuzhiyun	bra.l		_fpsp_done
1579*4882a593Smuzhiyun
1580*4882a593Smuzhiyun# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581*4882a593Smuzhiyun# ("fmov.x fpm,-(a7)") if so,
1582*4882a593Smuzhiyunfu_out_done_s:
1583*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1584*4882a593Smuzhiyun	bne.b		fu_out_done_cont
1585*4882a593Smuzhiyun
1586*4882a593Smuzhiyun# the extended precision result is still in fp0. but, we need to save it
1587*4882a593Smuzhiyun# somewhere on the stack until we can copy it to its final resting place.
1588*4882a593Smuzhiyun# here, we're counting on the top of the stack to be the old place-holders
1589*4882a593Smuzhiyun# for fp0/fp1 which have already been restored. that way, we can write
1590*4882a593Smuzhiyun# over those destinations with the shifted stack frame.
1591*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1592*4882a593Smuzhiyun
1593*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1594*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1596*4882a593Smuzhiyun
1597*4882a593Smuzhiyun	mov.l		(%a6),%a6		# restore frame pointer
1598*4882a593Smuzhiyun
1599*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601*4882a593Smuzhiyun
1602*4882a593Smuzhiyun# now, copy the result to the proper place on the stack
1603*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606*4882a593Smuzhiyun
1607*4882a593Smuzhiyun	add.l		&LOCAL_SIZE-0x8,%sp
1608*4882a593Smuzhiyun
1609*4882a593Smuzhiyun	btst		&0x7,(%sp)
1610*4882a593Smuzhiyun	bne.b		fu_out_trace
1611*4882a593Smuzhiyun
1612*4882a593Smuzhiyun	bra.l		_fpsp_done
1613*4882a593Smuzhiyun
1614*4882a593Smuzhiyunfu_out_ena:
1615*4882a593Smuzhiyun	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1616*4882a593Smuzhiyun	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1617*4882a593Smuzhiyun	bne.b		fu_out_exc		# there is at least one set
1618*4882a593Smuzhiyun
1619*4882a593Smuzhiyun# no exceptions were set.
1620*4882a593Smuzhiyun# if a disabled overflow occurred and inexact was enabled but the result
1621*4882a593Smuzhiyun# was exact, then a branch to _real_inex() is made.
1622*4882a593Smuzhiyun	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623*4882a593Smuzhiyun	beq.w		fu_out_done		# no
1624*4882a593Smuzhiyun
1625*4882a593Smuzhiyunfu_out_ovflchk:
1626*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627*4882a593Smuzhiyun	beq.w		fu_out_done		# no
1628*4882a593Smuzhiyun	bra.w		fu_inex			# yes
1629*4882a593Smuzhiyun
1630*4882a593Smuzhiyun#
1631*4882a593Smuzhiyun# The fp move out that took the "Unimplemented Data Type" exception was
1632*4882a593Smuzhiyun# being traced. Since the stack frames are similar, get the "current" PC
1633*4882a593Smuzhiyun# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634*4882a593Smuzhiyun#
1635*4882a593Smuzhiyun#		  UNSUPP FRAME		   TRACE FRAME
1636*4882a593Smuzhiyun#		*****************	*****************
1637*4882a593Smuzhiyun#		*      EA	*	*    Current	*
1638*4882a593Smuzhiyun#		*		*	*      PC	*
1639*4882a593Smuzhiyun#		*****************	*****************
1640*4882a593Smuzhiyun#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1641*4882a593Smuzhiyun#		*****************	*****************
1642*4882a593Smuzhiyun#		*     Next	*	*     Next	*
1643*4882a593Smuzhiyun#		*      PC	*	*      PC	*
1644*4882a593Smuzhiyun#		*****************	*****************
1645*4882a593Smuzhiyun#		*      SR	*	*      SR	*
1646*4882a593Smuzhiyun#		*****************	*****************
1647*4882a593Smuzhiyun#
1648*4882a593Smuzhiyunfu_out_trace:
1649*4882a593Smuzhiyun	mov.w		&0x2024,0x6(%sp)
1650*4882a593Smuzhiyun	fmov.l		%fpiar,0x8(%sp)
1651*4882a593Smuzhiyun	bra.l		_real_trace
1652*4882a593Smuzhiyun
1653*4882a593Smuzhiyun# an exception occurred and that exception was enabled.
1654*4882a593Smuzhiyunfu_out_exc:
1655*4882a593Smuzhiyun	subi.l		&24,%d0			# fix offset to be 0-8
1656*4882a593Smuzhiyun
1657*4882a593Smuzhiyun# we don't mess with the existing fsave frame. just re-insert it and
1658*4882a593Smuzhiyun# jump to the "_real_{}()" handler...
1659*4882a593Smuzhiyun	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1660*4882a593Smuzhiyun	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1661*4882a593Smuzhiyun
1662*4882a593Smuzhiyun	swbeg		&0x8
1663*4882a593Smuzhiyuntbl_fu_out:
1664*4882a593Smuzhiyun	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1665*4882a593Smuzhiyun	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
1666*4882a593Smuzhiyun	short		fu_operr	- tbl_fu_out	# OPERR
1667*4882a593Smuzhiyun	short		fu_ovfl		- tbl_fu_out	# OVFL
1668*4882a593Smuzhiyun	short		fu_unfl		- tbl_fu_out	# UNFL
1669*4882a593Smuzhiyun	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1670*4882a593Smuzhiyun	short		fu_inex		- tbl_fu_out	# INEX2
1671*4882a593Smuzhiyun	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1672*4882a593Smuzhiyun
1673*4882a593Smuzhiyun# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674*4882a593Smuzhiyun# frestore it.
1675*4882a593Smuzhiyunfu_snan:
1676*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1677*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1679*4882a593Smuzhiyun
1680*4882a593Smuzhiyun	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1681*4882a593Smuzhiyun	mov.w		&0xe006,2+FP_SRC(%a6)
1682*4882a593Smuzhiyun
1683*4882a593Smuzhiyun	frestore	FP_SRC(%a6)
1684*4882a593Smuzhiyun
1685*4882a593Smuzhiyun	unlk		%a6
1686*4882a593Smuzhiyun
1687*4882a593Smuzhiyun
1688*4882a593Smuzhiyun	bra.l		_real_snan
1689*4882a593Smuzhiyun
1690*4882a593Smuzhiyunfu_operr:
1691*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1692*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1694*4882a593Smuzhiyun
1695*4882a593Smuzhiyun	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1696*4882a593Smuzhiyun	mov.w		&0xe004,2+FP_SRC(%a6)
1697*4882a593Smuzhiyun
1698*4882a593Smuzhiyun	frestore	FP_SRC(%a6)
1699*4882a593Smuzhiyun
1700*4882a593Smuzhiyun	unlk		%a6
1701*4882a593Smuzhiyun
1702*4882a593Smuzhiyun
1703*4882a593Smuzhiyun	bra.l		_real_operr
1704*4882a593Smuzhiyun
1705*4882a593Smuzhiyunfu_ovfl:
1706*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1707*4882a593Smuzhiyun
1708*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1709*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1711*4882a593Smuzhiyun
1712*4882a593Smuzhiyun	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1713*4882a593Smuzhiyun	mov.w		&0xe005,2+FP_SRC(%a6)
1714*4882a593Smuzhiyun
1715*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore EXOP
1716*4882a593Smuzhiyun
1717*4882a593Smuzhiyun	unlk		%a6
1718*4882a593Smuzhiyun
1719*4882a593Smuzhiyun	bra.l		_real_ovfl
1720*4882a593Smuzhiyun
1721*4882a593Smuzhiyun# underflow can happen for extended precision. extended precision opclass
1722*4882a593Smuzhiyun# three instruction exceptions don't update the stack pointer. so, if the
1723*4882a593Smuzhiyun# exception occurred from user mode, then simply update a7 and exit normally.
1724*4882a593Smuzhiyun# if the exception occurred from supervisor mode, check if
1725*4882a593Smuzhiyunfu_unfl:
1726*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1727*4882a593Smuzhiyun
1728*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)
1729*4882a593Smuzhiyun	bne.w		fu_unfl_s
1730*4882a593Smuzhiyun
1731*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1732*4882a593Smuzhiyun	mov.l		%a0,%usp		# to or not...
1733*4882a593Smuzhiyun
1734*4882a593Smuzhiyunfu_unfl_cont:
1735*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1736*4882a593Smuzhiyun
1737*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1738*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1740*4882a593Smuzhiyun
1741*4882a593Smuzhiyun	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1742*4882a593Smuzhiyun	mov.w		&0xe003,2+FP_SRC(%a6)
1743*4882a593Smuzhiyun
1744*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore EXOP
1745*4882a593Smuzhiyun
1746*4882a593Smuzhiyun	unlk		%a6
1747*4882a593Smuzhiyun
1748*4882a593Smuzhiyun	bra.l		_real_unfl
1749*4882a593Smuzhiyun
1750*4882a593Smuzhiyunfu_unfl_s:
1751*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752*4882a593Smuzhiyun	bne.b		fu_unfl_cont
1753*4882a593Smuzhiyun
1754*4882a593Smuzhiyun# the extended precision result is still in fp0. but, we need to save it
1755*4882a593Smuzhiyun# somewhere on the stack until we can copy it to its final resting place
1756*4882a593Smuzhiyun# (where the exc frame is currently). make sure it's not at the top of the
1757*4882a593Smuzhiyun# frame or it will get overwritten when the exc stack frame is shifted "down".
1758*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1759*4882a593Smuzhiyun	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1760*4882a593Smuzhiyun
1761*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1762*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1764*4882a593Smuzhiyun
1765*4882a593Smuzhiyun	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1766*4882a593Smuzhiyun	mov.w		&0xe003,2+FP_DST(%a6)
1767*4882a593Smuzhiyun
1768*4882a593Smuzhiyun	frestore	FP_DST(%a6)		# restore EXOP
1769*4882a593Smuzhiyun
1770*4882a593Smuzhiyun	mov.l		(%a6),%a6		# restore frame pointer
1771*4882a593Smuzhiyun
1772*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775*4882a593Smuzhiyun
1776*4882a593Smuzhiyun# now, copy the result to the proper place on the stack
1777*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780*4882a593Smuzhiyun
1781*4882a593Smuzhiyun	add.l		&LOCAL_SIZE-0x8,%sp
1782*4882a593Smuzhiyun
1783*4882a593Smuzhiyun	bra.l		_real_unfl
1784*4882a593Smuzhiyun
1785*4882a593Smuzhiyun# fmove in and out enter here.
1786*4882a593Smuzhiyunfu_inex:
1787*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1788*4882a593Smuzhiyun
1789*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1790*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1792*4882a593Smuzhiyun
1793*4882a593Smuzhiyun	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1794*4882a593Smuzhiyun	mov.w		&0xe001,2+FP_SRC(%a6)
1795*4882a593Smuzhiyun
1796*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore EXOP
1797*4882a593Smuzhiyun
1798*4882a593Smuzhiyun	unlk		%a6
1799*4882a593Smuzhiyun
1800*4882a593Smuzhiyun
1801*4882a593Smuzhiyun	bra.l		_real_inex
1802*4882a593Smuzhiyun
1803*4882a593Smuzhiyun#########################################################################
1804*4882a593Smuzhiyun#########################################################################
1805*4882a593Smuzhiyunfu_in_pack:
1806*4882a593Smuzhiyun
1807*4882a593Smuzhiyun
1808*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction.
1809*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field
1810*4882a593Smuzhiyun	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811*4882a593Smuzhiyun
1812*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
1813*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
1814*4882a593Smuzhiyun
1815*4882a593Smuzhiyun	bsr.l		get_packed		# fetch packed src operand
1816*4882a593Smuzhiyun
1817*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to src
1818*4882a593Smuzhiyun	bsr.l		set_tag_x		# set src optype tag
1819*4882a593Smuzhiyun
1820*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# save src optype tag
1821*4882a593Smuzhiyun
1822*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823*4882a593Smuzhiyun
1824*4882a593Smuzhiyun# bit five of the fp extension word separates the monadic and dyadic operations
1825*4882a593Smuzhiyun# at this point
1826*4882a593Smuzhiyun	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1827*4882a593Smuzhiyun	beq.b		fu_extract_p		# monadic
1828*4882a593Smuzhiyun	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1829*4882a593Smuzhiyun	beq.b		fu_extract_p		# yes, so it's monadic, too
1830*4882a593Smuzhiyun
1831*4882a593Smuzhiyun	bsr.l		load_fpn2		# load dst into FP_DST
1832*4882a593Smuzhiyun
1833*4882a593Smuzhiyun	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1834*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
1835*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1836*4882a593Smuzhiyun	bne.b		fu_op2_done_p		# no
1837*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1838*4882a593Smuzhiyunfu_op2_done_p:
1839*4882a593Smuzhiyun	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1840*4882a593Smuzhiyun
1841*4882a593Smuzhiyunfu_extract_p:
1842*4882a593Smuzhiyun	clr.l		%d0
1843*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1844*4882a593Smuzhiyun
1845*4882a593Smuzhiyun	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846*4882a593Smuzhiyun
1847*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
1848*4882a593Smuzhiyun	lea		FP_DST(%a6),%a1
1849*4882a593Smuzhiyun
1850*4882a593Smuzhiyun	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851*4882a593Smuzhiyun	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1852*4882a593Smuzhiyun
1853*4882a593Smuzhiyun#
1854*4882a593Smuzhiyun# Exceptions in order of precedence:
1855*4882a593Smuzhiyun#	BSUN	: none
1856*4882a593Smuzhiyun#	SNAN	: all dyadic ops
1857*4882a593Smuzhiyun#	OPERR	: fsqrt(-NORM)
1858*4882a593Smuzhiyun#	OVFL	: all except ftst,fcmp
1859*4882a593Smuzhiyun#	UNFL	: all except ftst,fcmp
1860*4882a593Smuzhiyun#	DZ	: fdiv
1861*4882a593Smuzhiyun#	INEX2	: all except ftst,fcmp
1862*4882a593Smuzhiyun#	INEX1	: all
1863*4882a593Smuzhiyun#
1864*4882a593Smuzhiyun
1865*4882a593Smuzhiyun# we determine the highest priority exception(if any) set by the
1866*4882a593Smuzhiyun# emulation routine that has also been enabled by the user.
1867*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1868*4882a593Smuzhiyun	bne.w		fu_in_ena_p		# some are enabled
1869*4882a593Smuzhiyun
1870*4882a593Smuzhiyunfu_in_cont_p:
1871*4882a593Smuzhiyun# fcmp and ftst do not store any result.
1872*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1873*4882a593Smuzhiyun	andi.b		&0x38,%d0		# extract bits 3-5
1874*4882a593Smuzhiyun	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1875*4882a593Smuzhiyun	beq.b		fu_in_exit_p		# yes
1876*4882a593Smuzhiyun
1877*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878*4882a593Smuzhiyun	bsr.l		store_fpreg		# store the result
1879*4882a593Smuzhiyun
1880*4882a593Smuzhiyunfu_in_exit_p:
1881*4882a593Smuzhiyun
1882*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1883*4882a593Smuzhiyun	bne.w		fu_in_exit_s_p		# supervisor
1884*4882a593Smuzhiyun
1885*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0		# update user a7
1886*4882a593Smuzhiyun	mov.l		%a0,%usp
1887*4882a593Smuzhiyun
1888*4882a593Smuzhiyunfu_in_exit_cont_p:
1889*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1890*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1892*4882a593Smuzhiyun
1893*4882a593Smuzhiyun	unlk		%a6			# unravel stack frame
1894*4882a593Smuzhiyun
1895*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
1896*4882a593Smuzhiyun	bne.w		fu_trace_p		# yes
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun	bra.l		_fpsp_done		# exit to os
1899*4882a593Smuzhiyun
1900*4882a593Smuzhiyun# the exception occurred in supervisor mode. check to see if the
1901*4882a593Smuzhiyun# addressing mode was (a7)+. if so, we'll need to shift the
1902*4882a593Smuzhiyun# stack frame "up".
1903*4882a593Smuzhiyunfu_in_exit_s_p:
1904*4882a593Smuzhiyun	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905*4882a593Smuzhiyun	beq.b		fu_in_exit_cont_p	# no
1906*4882a593Smuzhiyun
1907*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1908*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1910*4882a593Smuzhiyun
1911*4882a593Smuzhiyun	unlk		%a6			# unravel stack frame
1912*4882a593Smuzhiyun
1913*4882a593Smuzhiyun# shift the stack frame "up". we don't really care about the <ea> field.
1914*4882a593Smuzhiyun	mov.l		0x4(%sp),0x10(%sp)
1915*4882a593Smuzhiyun	mov.l		0x0(%sp),0xc(%sp)
1916*4882a593Smuzhiyun	add.l		&0xc,%sp
1917*4882a593Smuzhiyun
1918*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
1919*4882a593Smuzhiyun	bne.w		fu_trace_p		# yes
1920*4882a593Smuzhiyun
1921*4882a593Smuzhiyun	bra.l		_fpsp_done		# exit to os
1922*4882a593Smuzhiyun
1923*4882a593Smuzhiyunfu_in_ena_p:
1924*4882a593Smuzhiyun	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1925*4882a593Smuzhiyun	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1926*4882a593Smuzhiyun	bne.b		fu_in_exc_p		# at least one was set
1927*4882a593Smuzhiyun
1928*4882a593Smuzhiyun#
1929*4882a593Smuzhiyun# No exceptions occurred that were also enabled. Now:
1930*4882a593Smuzhiyun#
1931*4882a593Smuzhiyun#	if (OVFL && ovfl_disabled && inexact_enabled) {
1932*4882a593Smuzhiyun#	    branch to _real_inex() (even if the result was exact!);
1933*4882a593Smuzhiyun#	} else {
1934*4882a593Smuzhiyun#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1935*4882a593Smuzhiyun#	    return;
1936*4882a593Smuzhiyun#	}
1937*4882a593Smuzhiyun#
1938*4882a593Smuzhiyun	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939*4882a593Smuzhiyun	beq.w		fu_in_cont_p		# no
1940*4882a593Smuzhiyun
1941*4882a593Smuzhiyunfu_in_ovflchk_p:
1942*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943*4882a593Smuzhiyun	beq.w		fu_in_cont_p		# no
1944*4882a593Smuzhiyun	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1945*4882a593Smuzhiyun
1946*4882a593Smuzhiyun#
1947*4882a593Smuzhiyun# An exception occurred and that exception was enabled:
1948*4882a593Smuzhiyun#
1949*4882a593Smuzhiyun#	shift enabled exception field into lo byte of d0;
1950*4882a593Smuzhiyun#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951*4882a593Smuzhiyun#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952*4882a593Smuzhiyun#		/*
1953*4882a593Smuzhiyun#		 * this is the case where we must call _real_inex() now or else
1954*4882a593Smuzhiyun#		 * there will be no other way to pass it the exceptional operand
1955*4882a593Smuzhiyun#		 */
1956*4882a593Smuzhiyun#		call _real_inex();
1957*4882a593Smuzhiyun#	} else {
1958*4882a593Smuzhiyun#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959*4882a593Smuzhiyun#	}
1960*4882a593Smuzhiyun#
1961*4882a593Smuzhiyunfu_in_exc_p:
1962*4882a593Smuzhiyun	subi.l		&24,%d0			# fix offset to be 0-8
1963*4882a593Smuzhiyun	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1964*4882a593Smuzhiyun	blt.b		fu_in_exc_exit_p	# no
1965*4882a593Smuzhiyun
1966*4882a593Smuzhiyun# the enabled exception was inexact
1967*4882a593Smuzhiyun	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968*4882a593Smuzhiyun	bne.w		fu_in_exc_unfl_p	# yes
1969*4882a593Smuzhiyun	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970*4882a593Smuzhiyun	bne.w		fu_in_exc_ovfl_p	# yes
1971*4882a593Smuzhiyun
1972*4882a593Smuzhiyun# here, we insert the correct fsave status value into the fsave frame for the
1973*4882a593Smuzhiyun# corresponding exception. the operand in the fsave frame should be the original
1974*4882a593Smuzhiyun# src operand.
1975*4882a593Smuzhiyun# as a reminder for future predicted pain and agony, we are passing in fsave the
1976*4882a593Smuzhiyun# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977*4882a593Smuzhiyun# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978*4882a593Smuzhiyunfu_in_exc_exit_p:
1979*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1980*4882a593Smuzhiyun	bne.w		fu_in_exc_exit_s_p	# supervisor
1981*4882a593Smuzhiyun
1982*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0		# update user a7
1983*4882a593Smuzhiyun	mov.l		%a0,%usp
1984*4882a593Smuzhiyun
1985*4882a593Smuzhiyunfu_in_exc_exit_cont_p:
1986*4882a593Smuzhiyun	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987*4882a593Smuzhiyun
1988*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1989*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1991*4882a593Smuzhiyun
1992*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore src op
1993*4882a593Smuzhiyun
1994*4882a593Smuzhiyun	unlk		%a6
1995*4882a593Smuzhiyun
1996*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace enabled?
1997*4882a593Smuzhiyun	bne.w		fu_trace_p		# yes
1998*4882a593Smuzhiyun
1999*4882a593Smuzhiyun	bra.l		_fpsp_done
2000*4882a593Smuzhiyun
2001*4882a593Smuzhiyuntbl_except_p:
2002*4882a593Smuzhiyun	short		0xe000,0xe006,0xe004,0xe005
2003*4882a593Smuzhiyun	short		0xe003,0xe002,0xe001,0xe001
2004*4882a593Smuzhiyun
2005*4882a593Smuzhiyunfu_in_exc_ovfl_p:
2006*4882a593Smuzhiyun	mov.w		&0x3,%d0
2007*4882a593Smuzhiyun	bra.w		fu_in_exc_exit_p
2008*4882a593Smuzhiyun
2009*4882a593Smuzhiyunfu_in_exc_unfl_p:
2010*4882a593Smuzhiyun	mov.w		&0x4,%d0
2011*4882a593Smuzhiyun	bra.w		fu_in_exc_exit_p
2012*4882a593Smuzhiyun
2013*4882a593Smuzhiyunfu_in_exc_exit_s_p:
2014*4882a593Smuzhiyun	btst		&mia7_bit,SPCOND_FLG(%a6)
2015*4882a593Smuzhiyun	beq.b		fu_in_exc_exit_cont_p
2016*4882a593Smuzhiyun
2017*4882a593Smuzhiyun	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018*4882a593Smuzhiyun
2019*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2020*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2022*4882a593Smuzhiyun
2023*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore src op
2024*4882a593Smuzhiyun
2025*4882a593Smuzhiyun	unlk		%a6			# unravel stack frame
2026*4882a593Smuzhiyun
2027*4882a593Smuzhiyun# shift stack frame "up". who cares about <ea> field.
2028*4882a593Smuzhiyun	mov.l		0x4(%sp),0x10(%sp)
2029*4882a593Smuzhiyun	mov.l		0x0(%sp),0xc(%sp)
2030*4882a593Smuzhiyun	add.l		&0xc,%sp
2031*4882a593Smuzhiyun
2032*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
2033*4882a593Smuzhiyun	bne.b		fu_trace_p		# yes
2034*4882a593Smuzhiyun
2035*4882a593Smuzhiyun	bra.l		_fpsp_done		# exit to os
2036*4882a593Smuzhiyun
2037*4882a593Smuzhiyun#
2038*4882a593Smuzhiyun# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039*4882a593Smuzhiyun# exception was being traced. Make the "current" PC the FPIAR and put it in the
2040*4882a593Smuzhiyun# trace stack frame then jump to _real_trace().
2041*4882a593Smuzhiyun#
2042*4882a593Smuzhiyun#		  UNSUPP FRAME		   TRACE FRAME
2043*4882a593Smuzhiyun#		*****************	*****************
2044*4882a593Smuzhiyun#		*      EA	*	*    Current	*
2045*4882a593Smuzhiyun#		*		*	*      PC	*
2046*4882a593Smuzhiyun#		*****************	*****************
2047*4882a593Smuzhiyun#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
2048*4882a593Smuzhiyun#		*****************	*****************
2049*4882a593Smuzhiyun#		*     Next	*	*     Next	*
2050*4882a593Smuzhiyun#		*      PC	*	*      PC	*
2051*4882a593Smuzhiyun#		*****************	*****************
2052*4882a593Smuzhiyun#		*      SR	*	*      SR	*
2053*4882a593Smuzhiyun#		*****************	*****************
2054*4882a593Smuzhiyunfu_trace_p:
2055*4882a593Smuzhiyun	mov.w		&0x2024,0x6(%sp)
2056*4882a593Smuzhiyun	fmov.l		%fpiar,0x8(%sp)
2057*4882a593Smuzhiyun
2058*4882a593Smuzhiyun	bra.l		_real_trace
2059*4882a593Smuzhiyun
2060*4882a593Smuzhiyun#########################################################
2061*4882a593Smuzhiyun#########################################################
2062*4882a593Smuzhiyunfu_out_pack:
2063*4882a593Smuzhiyun
2064*4882a593Smuzhiyun
2065*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction.
2066*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field.
2067*4882a593Smuzhiyun# fmove out doesn't affect ccodes.
2068*4882a593Smuzhiyun	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2069*4882a593Smuzhiyun
2070*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
2071*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
2072*4882a593Smuzhiyun
2073*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2074*4882a593Smuzhiyun	bsr.l		load_fpn1
2075*4882a593Smuzhiyun
2076*4882a593Smuzhiyun# unlike other opclass 3, unimplemented data type exceptions, packed must be
2077*4882a593Smuzhiyun# able to detect all operand types.
2078*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
2079*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
2080*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2081*4882a593Smuzhiyun	bne.b		fu_op2_p		# no
2082*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2083*4882a593Smuzhiyun
2084*4882a593Smuzhiyunfu_op2_p:
2085*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# save src optype tag
2086*4882a593Smuzhiyun
2087*4882a593Smuzhiyun	clr.l		%d0
2088*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2089*4882a593Smuzhiyun
2090*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2091*4882a593Smuzhiyun
2092*4882a593Smuzhiyun	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2093*4882a593Smuzhiyun	bsr.l		fout			# call fmove out routine
2094*4882a593Smuzhiyun
2095*4882a593Smuzhiyun# Exceptions in order of precedence:
2096*4882a593Smuzhiyun#	BSUN	: no
2097*4882a593Smuzhiyun#	SNAN	: yes
2098*4882a593Smuzhiyun#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099*4882a593Smuzhiyun#	OVFL	: no
2100*4882a593Smuzhiyun#	UNFL	: no
2101*4882a593Smuzhiyun#	DZ	: no
2102*4882a593Smuzhiyun#	INEX2	: yes
2103*4882a593Smuzhiyun#	INEX1	: no
2104*4882a593Smuzhiyun
2105*4882a593Smuzhiyun# determine the highest priority exception(if any) set by the
2106*4882a593Smuzhiyun# emulation routine that has also been enabled by the user.
2107*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2108*4882a593Smuzhiyun	bne.w		fu_out_ena_p		# some are enabled
2109*4882a593Smuzhiyun
2110*4882a593Smuzhiyunfu_out_exit_p:
2111*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2112*4882a593Smuzhiyun
2113*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2114*4882a593Smuzhiyun	bne.b		fu_out_exit_s_p		# supervisor
2115*4882a593Smuzhiyun
2116*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0		# update user a7
2117*4882a593Smuzhiyun	mov.l		%a0,%usp
2118*4882a593Smuzhiyun
2119*4882a593Smuzhiyunfu_out_exit_cont_p:
2120*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2121*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2123*4882a593Smuzhiyun
2124*4882a593Smuzhiyun	unlk		%a6			# unravel stack frame
2125*4882a593Smuzhiyun
2126*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
2127*4882a593Smuzhiyun	bne.w		fu_trace_p		# yes
2128*4882a593Smuzhiyun
2129*4882a593Smuzhiyun	bra.l		_fpsp_done		# exit to os
2130*4882a593Smuzhiyun
2131*4882a593Smuzhiyun# the exception occurred in supervisor mode. check to see if the
2132*4882a593Smuzhiyun# addressing mode was -(a7). if so, we'll need to shift the
2133*4882a593Smuzhiyun# stack frame "down".
2134*4882a593Smuzhiyunfu_out_exit_s_p:
2135*4882a593Smuzhiyun	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136*4882a593Smuzhiyun	beq.b		fu_out_exit_cont_p	# no
2137*4882a593Smuzhiyun
2138*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2139*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2141*4882a593Smuzhiyun
2142*4882a593Smuzhiyun	mov.l		(%a6),%a6		# restore frame pointer
2143*4882a593Smuzhiyun
2144*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146*4882a593Smuzhiyun
2147*4882a593Smuzhiyun# now, copy the result to the proper place on the stack
2148*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151*4882a593Smuzhiyun
2152*4882a593Smuzhiyun	add.l		&LOCAL_SIZE-0x8,%sp
2153*4882a593Smuzhiyun
2154*4882a593Smuzhiyun	btst		&0x7,(%sp)
2155*4882a593Smuzhiyun	bne.w		fu_trace_p
2156*4882a593Smuzhiyun
2157*4882a593Smuzhiyun	bra.l		_fpsp_done
2158*4882a593Smuzhiyun
2159*4882a593Smuzhiyunfu_out_ena_p:
2160*4882a593Smuzhiyun	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2161*4882a593Smuzhiyun	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2162*4882a593Smuzhiyun	beq.w		fu_out_exit_p
2163*4882a593Smuzhiyun
2164*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2165*4882a593Smuzhiyun
2166*4882a593Smuzhiyun# an exception occurred and that exception was enabled.
2167*4882a593Smuzhiyun# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168*4882a593Smuzhiyunfu_out_exc_p:
2169*4882a593Smuzhiyun	cmpi.b		%d0,&0x1a
2170*4882a593Smuzhiyun	bgt.w		fu_inex_p2
2171*4882a593Smuzhiyun	beq.w		fu_operr_p
2172*4882a593Smuzhiyun
2173*4882a593Smuzhiyunfu_snan_p:
2174*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)
2175*4882a593Smuzhiyun	bne.b		fu_snan_s_p
2176*4882a593Smuzhiyun
2177*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0
2178*4882a593Smuzhiyun	mov.l		%a0,%usp
2179*4882a593Smuzhiyun	bra.w		fu_snan
2180*4882a593Smuzhiyun
2181*4882a593Smuzhiyunfu_snan_s_p:
2182*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2183*4882a593Smuzhiyun	bne.w		fu_snan
2184*4882a593Smuzhiyun
2185*4882a593Smuzhiyun# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186*4882a593Smuzhiyun# the strategy is to move the exception frame "down" 12 bytes. then, we
2187*4882a593Smuzhiyun# can store the default result where the exception frame was.
2188*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2189*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2191*4882a593Smuzhiyun
2192*4882a593Smuzhiyun	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2193*4882a593Smuzhiyun	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
2194*4882a593Smuzhiyun
2195*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore src operand
2196*4882a593Smuzhiyun
2197*4882a593Smuzhiyun	mov.l		(%a6),%a6		# restore frame pointer
2198*4882a593Smuzhiyun
2199*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202*4882a593Smuzhiyun
2203*4882a593Smuzhiyun# now, we copy the default result to its proper location
2204*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207*4882a593Smuzhiyun
2208*4882a593Smuzhiyun	add.l		&LOCAL_SIZE-0x8,%sp
2209*4882a593Smuzhiyun
2210*4882a593Smuzhiyun
2211*4882a593Smuzhiyun	bra.l		_real_snan
2212*4882a593Smuzhiyun
2213*4882a593Smuzhiyunfu_operr_p:
2214*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)
2215*4882a593Smuzhiyun	bne.w		fu_operr_p_s
2216*4882a593Smuzhiyun
2217*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0
2218*4882a593Smuzhiyun	mov.l		%a0,%usp
2219*4882a593Smuzhiyun	bra.w		fu_operr
2220*4882a593Smuzhiyun
2221*4882a593Smuzhiyunfu_operr_p_s:
2222*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2223*4882a593Smuzhiyun	bne.w		fu_operr
2224*4882a593Smuzhiyun
2225*4882a593Smuzhiyun# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226*4882a593Smuzhiyun# the strategy is to move the exception frame "down" 12 bytes. then, we
2227*4882a593Smuzhiyun# can store the default result where the exception frame was.
2228*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2229*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2231*4882a593Smuzhiyun
2232*4882a593Smuzhiyun	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2233*4882a593Smuzhiyun	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
2234*4882a593Smuzhiyun
2235*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore src operand
2236*4882a593Smuzhiyun
2237*4882a593Smuzhiyun	mov.l		(%a6),%a6		# restore frame pointer
2238*4882a593Smuzhiyun
2239*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242*4882a593Smuzhiyun
2243*4882a593Smuzhiyun# now, we copy the default result to its proper location
2244*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247*4882a593Smuzhiyun
2248*4882a593Smuzhiyun	add.l		&LOCAL_SIZE-0x8,%sp
2249*4882a593Smuzhiyun
2250*4882a593Smuzhiyun
2251*4882a593Smuzhiyun	bra.l		_real_operr
2252*4882a593Smuzhiyun
2253*4882a593Smuzhiyunfu_inex_p2:
2254*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)
2255*4882a593Smuzhiyun	bne.w		fu_inex_s_p2
2256*4882a593Smuzhiyun
2257*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0
2258*4882a593Smuzhiyun	mov.l		%a0,%usp
2259*4882a593Smuzhiyun	bra.w		fu_inex
2260*4882a593Smuzhiyun
2261*4882a593Smuzhiyunfu_inex_s_p2:
2262*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2263*4882a593Smuzhiyun	bne.w		fu_inex
2264*4882a593Smuzhiyun
2265*4882a593Smuzhiyun# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266*4882a593Smuzhiyun# the strategy is to move the exception frame "down" 12 bytes. then, we
2267*4882a593Smuzhiyun# can store the default result where the exception frame was.
2268*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2269*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2271*4882a593Smuzhiyun
2272*4882a593Smuzhiyun	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
2273*4882a593Smuzhiyun	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
2274*4882a593Smuzhiyun
2275*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore src operand
2276*4882a593Smuzhiyun
2277*4882a593Smuzhiyun	mov.l		(%a6),%a6		# restore frame pointer
2278*4882a593Smuzhiyun
2279*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282*4882a593Smuzhiyun
2283*4882a593Smuzhiyun# now, we copy the default result to its proper location
2284*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287*4882a593Smuzhiyun
2288*4882a593Smuzhiyun	add.l		&LOCAL_SIZE-0x8,%sp
2289*4882a593Smuzhiyun
2290*4882a593Smuzhiyun
2291*4882a593Smuzhiyun	bra.l		_real_inex
2292*4882a593Smuzhiyun
2293*4882a593Smuzhiyun#########################################################################
2294*4882a593Smuzhiyun
2295*4882a593Smuzhiyun#
2296*4882a593Smuzhiyun# if we're stuffing a source operand back into an fsave frame then we
2297*4882a593Smuzhiyun# have to make sure that for single or double source operands that the
2298*4882a593Smuzhiyun# format stuffed is as weird as the hardware usually makes it.
2299*4882a593Smuzhiyun#
2300*4882a593Smuzhiyun	global		funimp_skew
2301*4882a593Smuzhiyunfunimp_skew:
2302*4882a593Smuzhiyun	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303*4882a593Smuzhiyun	cmpi.b		%d0,&0x1		# was src sgl?
2304*4882a593Smuzhiyun	beq.b		funimp_skew_sgl		# yes
2305*4882a593Smuzhiyun	cmpi.b		%d0,&0x5		# was src dbl?
2306*4882a593Smuzhiyun	beq.b		funimp_skew_dbl		# yes
2307*4882a593Smuzhiyun	rts
2308*4882a593Smuzhiyun
2309*4882a593Smuzhiyunfunimp_skew_sgl:
2310*4882a593Smuzhiyun	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2311*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# strip sign
2312*4882a593Smuzhiyun	beq.b		funimp_skew_sgl_not
2313*4882a593Smuzhiyun	cmpi.w		%d0,&0x3f80
2314*4882a593Smuzhiyun	bgt.b		funimp_skew_sgl_not
2315*4882a593Smuzhiyun	neg.w		%d0			# make exponent negative
2316*4882a593Smuzhiyun	addi.w		&0x3f81,%d0		# find amt to shift
2317*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2318*4882a593Smuzhiyun	lsr.l		%d0,%d1			# shift it
2319*4882a593Smuzhiyun	bset		&31,%d1			# set j-bit
2320*4882a593Smuzhiyun	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2321*4882a593Smuzhiyun	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2322*4882a593Smuzhiyun	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2323*4882a593Smuzhiyunfunimp_skew_sgl_not:
2324*4882a593Smuzhiyun	rts
2325*4882a593Smuzhiyun
2326*4882a593Smuzhiyunfunimp_skew_dbl:
2327*4882a593Smuzhiyun	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2328*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# strip sign
2329*4882a593Smuzhiyun	beq.b		funimp_skew_dbl_not
2330*4882a593Smuzhiyun	cmpi.w		%d0,&0x3c00
2331*4882a593Smuzhiyun	bgt.b		funimp_skew_dbl_not
2332*4882a593Smuzhiyun
2333*4882a593Smuzhiyun	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2334*4882a593Smuzhiyun	smi.b		0x2+FP_SRC(%a6)
2335*4882a593Smuzhiyun	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2336*4882a593Smuzhiyun	clr.l		%d0			# clear g,r,s
2337*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2338*4882a593Smuzhiyun	mov.w		&0x3c01,%d1		# pass denorm threshold
2339*4882a593Smuzhiyun	bsr.l		dnrm_lp			# denorm it
2340*4882a593Smuzhiyun	mov.w		&0x3c00,%d0		# new exponent
2341*4882a593Smuzhiyun	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2342*4882a593Smuzhiyun	beq.b		fss_dbl_denorm_done	# no
2343*4882a593Smuzhiyun	bset		&15,%d0			# set sign
2344*4882a593Smuzhiyunfss_dbl_denorm_done:
2345*4882a593Smuzhiyun	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2346*4882a593Smuzhiyun	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2347*4882a593Smuzhiyunfunimp_skew_dbl_not:
2348*4882a593Smuzhiyun	rts
2349*4882a593Smuzhiyun
2350*4882a593Smuzhiyun#########################################################################
2351*4882a593Smuzhiyun	global		_mem_write2
2352*4882a593Smuzhiyun_mem_write2:
2353*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)
2354*4882a593Smuzhiyun	beq.l		_dmem_write
2355*4882a593Smuzhiyun	mov.l		0x0(%a0),FP_DST_EX(%a6)
2356*4882a593Smuzhiyun	mov.l		0x4(%a0),FP_DST_HI(%a6)
2357*4882a593Smuzhiyun	mov.l		0x8(%a0),FP_DST_LO(%a6)
2358*4882a593Smuzhiyun	clr.l		%d1
2359*4882a593Smuzhiyun	rts
2360*4882a593Smuzhiyun
2361*4882a593Smuzhiyun#########################################################################
2362*4882a593Smuzhiyun# XDEF ****************************************************************	#
2363*4882a593Smuzhiyun#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2364*4882a593Smuzhiyun#			effective address" exception.			#
2365*4882a593Smuzhiyun#									#
2366*4882a593Smuzhiyun#	This handler should be the first code executed upon taking the	#
2367*4882a593Smuzhiyun#	FP Unimplemented Effective Address exception in an operating	#
2368*4882a593Smuzhiyun#	system.								#
2369*4882a593Smuzhiyun#									#
2370*4882a593Smuzhiyun# XREF ****************************************************************	#
2371*4882a593Smuzhiyun#	_imem_read_long() - read instruction longword			#
2372*4882a593Smuzhiyun#	fix_skewed_ops() - adjust src operand in fsave frame		#
2373*4882a593Smuzhiyun#	set_tag_x() - determine optype of src/dst operands		#
2374*4882a593Smuzhiyun#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2375*4882a593Smuzhiyun#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2376*4882a593Smuzhiyun#	load_fpn2() - load dst operand from FP regfile			#
2377*4882a593Smuzhiyun#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2378*4882a593Smuzhiyun#	decbin() - convert packed data to FP binary data		#
2379*4882a593Smuzhiyun#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2380*4882a593Smuzhiyun#	_real_access() - "callout" for access error exception		#
2381*4882a593Smuzhiyun#	_mem_read() - read extended immediate operand from memory	#
2382*4882a593Smuzhiyun#	_fpsp_done() - "callout" for exit; work all done		#
2383*4882a593Smuzhiyun#	_real_trace() - "callout" for Trace enabled exception		#
2384*4882a593Smuzhiyun#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2385*4882a593Smuzhiyun#	fmovm_ctrl() - emulate fmovm control instruction		#
2386*4882a593Smuzhiyun#									#
2387*4882a593Smuzhiyun# INPUT ***************************************************************	#
2388*4882a593Smuzhiyun#	- The system stack contains the "Unimplemented <ea>" stk frame	#
2389*4882a593Smuzhiyun#									#
2390*4882a593Smuzhiyun# OUTPUT **************************************************************	#
2391*4882a593Smuzhiyun#	If access error:						#
2392*4882a593Smuzhiyun#	- The system stack is changed to an access error stack frame	#
2393*4882a593Smuzhiyun#	If FPU disabled:						#
2394*4882a593Smuzhiyun#	- The system stack is changed to an FPU disabled stack frame	#
2395*4882a593Smuzhiyun#	If Trace exception enabled:					#
2396*4882a593Smuzhiyun#	- The system stack is changed to a Trace exception stack frame	#
2397*4882a593Smuzhiyun#	Else: (normal case)						#
2398*4882a593Smuzhiyun#	- None (correct result has been stored as appropriate)		#
2399*4882a593Smuzhiyun#									#
2400*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
2401*4882a593Smuzhiyun#	This exception handles 3 types of operations:			#
2402*4882a593Smuzhiyun# (1) FP Instructions using extended precision or packed immediate	#
2403*4882a593Smuzhiyun#     addressing mode.							#
2404*4882a593Smuzhiyun# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2405*4882a593Smuzhiyun# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2406*4882a593Smuzhiyun#									#
2407*4882a593Smuzhiyun#	For immediate data operations, the data is read in w/ a		#
2408*4882a593Smuzhiyun# _mem_read() "callout", converted to FP binary (if packed), and used	#
2409*4882a593Smuzhiyun# as the source operand to the instruction specified by the instruction	#
2410*4882a593Smuzhiyun# word. If no FP exception should be reported ads a result of the	#
2411*4882a593Smuzhiyun# emulation, then the result is stored to the destination register and	#
2412*4882a593Smuzhiyun# the handler exits through _fpsp_done(). If an enabled exc has been	#
2413*4882a593Smuzhiyun# signalled as a result of emulation, then an fsave state frame		#
2414*4882a593Smuzhiyun# corresponding to the FP exception type must be entered into the 060	#
2415*4882a593Smuzhiyun# FPU before exiting. In either the enabled or disabled cases, we	#
2416*4882a593Smuzhiyun# must also check if a Trace exception is pending, in which case, we	#
2417*4882a593Smuzhiyun# must create a Trace exception stack frame from the current exception	#
2418*4882a593Smuzhiyun# stack frame. If no Trace is pending, we simply exit through		#
2419*4882a593Smuzhiyun# _fpsp_done().								#
2420*4882a593Smuzhiyun#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
2421*4882a593Smuzhiyun# decode and emulate the instruction. No FP exceptions can be pending	#
2422*4882a593Smuzhiyun# as a result of this operation emulation. A Trace exception can be	#
2423*4882a593Smuzhiyun# pending, though, which means the current stack frame must be changed	#
2424*4882a593Smuzhiyun# to a Trace stack frame and an exit made through _real_trace().	#
2425*4882a593Smuzhiyun# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2426*4882a593Smuzhiyun# was executed from supervisor mode, this handler must store the FP	#
2427*4882a593Smuzhiyun# register file values to the system stack by itself since		#
2428*4882a593Smuzhiyun# fmovm_dynamic() can't handle this. A normal exit is made through	#
2429*4882a593Smuzhiyun# fpsp_done().								#
2430*4882a593Smuzhiyun#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2431*4882a593Smuzhiyun# Again, a Trace exception may be pending and an exit made through	#
2432*4882a593Smuzhiyun# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2433*4882a593Smuzhiyun#									#
2434*4882a593Smuzhiyun#	Before any of the above is attempted, it must be checked to	#
2435*4882a593Smuzhiyun# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2436*4882a593Smuzhiyun# before the "FPU disabled" exception, but the "FPU disabled" exception	#
2437*4882a593Smuzhiyun# has higher priority, we check the disabled bit in the PCR. If set,	#
2438*4882a593Smuzhiyun# then we must create an 8 word "FPU disabled" exception stack frame	#
2439*4882a593Smuzhiyun# from the current 4 word exception stack frame. This includes		#
2440*4882a593Smuzhiyun# reproducing the effective address of the instruction to put on the	#
2441*4882a593Smuzhiyun# new stack frame.							#
2442*4882a593Smuzhiyun#									#
2443*4882a593Smuzhiyun#	In the process of all emulation work, if a _mem_read()		#
2444*4882a593Smuzhiyun# "callout" returns a failing result indicating an access error, then	#
2445*4882a593Smuzhiyun# we must create an access error stack frame from the current stack	#
2446*4882a593Smuzhiyun# frame. This information includes a faulting address and a fault-	#
2447*4882a593Smuzhiyun# status-longword. These are created within this handler.		#
2448*4882a593Smuzhiyun#									#
2449*4882a593Smuzhiyun#########################################################################
2450*4882a593Smuzhiyun
2451*4882a593Smuzhiyun	global		_fpsp_effadd
2452*4882a593Smuzhiyun_fpsp_effadd:
2453*4882a593Smuzhiyun
2454*4882a593Smuzhiyun# This exception type takes priority over the "Line F Emulator"
2455*4882a593Smuzhiyun# exception. Therefore, the FPU could be disabled when entering here.
2456*4882a593Smuzhiyun# So, we must check to see if it's disabled and handle that case separately.
2457*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save d0
2458*4882a593Smuzhiyun	movc		%pcr,%d0		# load proc cr
2459*4882a593Smuzhiyun	btst		&0x1,%d0		# is FPU disabled?
2460*4882a593Smuzhiyun	bne.w		iea_disabled		# yes
2461*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# restore d0
2462*4882a593Smuzhiyun
2463*4882a593Smuzhiyun	link		%a6,&-LOCAL_SIZE	# init stack frame
2464*4882a593Smuzhiyun
2465*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2466*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2468*4882a593Smuzhiyun
2469*4882a593Smuzhiyun# PC of instruction that took the exception is the PC in the frame
2470*4882a593Smuzhiyun	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471*4882a593Smuzhiyun
2472*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2473*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2474*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
2475*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2476*4882a593Smuzhiyun
2477*4882a593Smuzhiyun#########################################################################
2478*4882a593Smuzhiyun
2479*4882a593Smuzhiyun	tst.w		%d0			# is operation fmovem?
2480*4882a593Smuzhiyun	bmi.w		iea_fmovm		# yes
2481*4882a593Smuzhiyun
2482*4882a593Smuzhiyun#
2483*4882a593Smuzhiyun# here, we will have:
2484*4882a593Smuzhiyun#	fabs	fdabs	fsabs		facos		fmod
2485*4882a593Smuzhiyun#	fadd	fdadd	fsadd		fasin		frem
2486*4882a593Smuzhiyun#	fcmp				fatan		fscale
2487*4882a593Smuzhiyun#	fdiv	fddiv	fsdiv		fatanh		fsin
2488*4882a593Smuzhiyun#	fint				fcos		fsincos
2489*4882a593Smuzhiyun#	fintrz				fcosh		fsinh
2490*4882a593Smuzhiyun#	fmove	fdmove	fsmove		fetox		ftan
2491*4882a593Smuzhiyun#	fmul	fdmul	fsmul		fetoxm1		ftanh
2492*4882a593Smuzhiyun#	fneg	fdneg	fsneg		fgetexp		ftentox
2493*4882a593Smuzhiyun#	fsgldiv				fgetman		ftwotox
2494*4882a593Smuzhiyun#	fsglmul				flog10
2495*4882a593Smuzhiyun#	fsqrt				flog2
2496*4882a593Smuzhiyun#	fsub	fdsub	fssub		flogn
2497*4882a593Smuzhiyun#	ftst				flognp1
2498*4882a593Smuzhiyun# which can all use f<op>.{x,p}
2499*4882a593Smuzhiyun# so, now it's immediate data extended precision AND PACKED FORMAT!
2500*4882a593Smuzhiyun#
2501*4882a593Smuzhiyuniea_op:
2502*4882a593Smuzhiyun	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2503*4882a593Smuzhiyun
2504*4882a593Smuzhiyun	btst		&0xa,%d0		# is src fmt x or p?
2505*4882a593Smuzhiyun	bne.b		iea_op_pack		# packed
2506*4882a593Smuzhiyun
2507*4882a593Smuzhiyun
2508*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2509*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2510*4882a593Smuzhiyun	mov.l		&0xc,%d0		# pass: 12 bytes
2511*4882a593Smuzhiyun	bsr.l		_imem_read		# read extended immediate
2512*4882a593Smuzhiyun
2513*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
2514*4882a593Smuzhiyun	bne.w		iea_iacc		# yes
2515*4882a593Smuzhiyun
2516*4882a593Smuzhiyun	bra.b		iea_op_setsrc
2517*4882a593Smuzhiyun
2518*4882a593Smuzhiyuniea_op_pack:
2519*4882a593Smuzhiyun
2520*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2521*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2522*4882a593Smuzhiyun	mov.l		&0xc,%d0		# pass: 12 bytes
2523*4882a593Smuzhiyun	bsr.l		_imem_read		# read packed operand
2524*4882a593Smuzhiyun
2525*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
2526*4882a593Smuzhiyun	bne.w		iea_iacc		# yes
2527*4882a593Smuzhiyun
2528*4882a593Smuzhiyun# The packed operand is an INF or a NAN if the exponent field is all ones.
2529*4882a593Smuzhiyun	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2530*4882a593Smuzhiyun	cmpi.w		%d0,&0x7fff		# INF or NAN?
2531*4882a593Smuzhiyun	beq.b		iea_op_setsrc		# operand is an INF or NAN
2532*4882a593Smuzhiyun
2533*4882a593Smuzhiyun# The packed operand is a zero if the mantissa is all zero, else it's
2534*4882a593Smuzhiyun# a normal packed op.
2535*4882a593Smuzhiyun	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2536*4882a593Smuzhiyun	andi.b		&0x0f,%d0		# clear all but last nybble
2537*4882a593Smuzhiyun	bne.b		iea_op_gp_not_spec	# not a zero
2538*4882a593Smuzhiyun	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2539*4882a593Smuzhiyun	bne.b		iea_op_gp_not_spec	# not a zero
2540*4882a593Smuzhiyun	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2541*4882a593Smuzhiyun	beq.b		iea_op_setsrc		# operand is a ZERO
2542*4882a593Smuzhiyuniea_op_gp_not_spec:
2543*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2544*4882a593Smuzhiyun	bsr.l		decbin			# convert to extended
2545*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2546*4882a593Smuzhiyun
2547*4882a593Smuzhiyuniea_op_setsrc:
2548*4882a593Smuzhiyun	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2549*4882a593Smuzhiyun
2550*4882a593Smuzhiyun# FP_SRC now holds the src operand.
2551*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2552*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
2553*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2554*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2555*4882a593Smuzhiyun	bne.b		iea_op_getdst		# no
2556*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2557*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# set new optype tag
2558*4882a593Smuzhiyuniea_op_getdst:
2559*4882a593Smuzhiyun	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2560*4882a593Smuzhiyun
2561*4882a593Smuzhiyun	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2562*4882a593Smuzhiyun	beq.b		iea_op_extract		# monadic
2563*4882a593Smuzhiyun	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2564*4882a593Smuzhiyun	bne.b		iea_op_spec		# yes
2565*4882a593Smuzhiyun
2566*4882a593Smuzhiyuniea_op_loaddst:
2567*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568*4882a593Smuzhiyun	bsr.l		load_fpn2		# load dst operand
2569*4882a593Smuzhiyun
2570*4882a593Smuzhiyun	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2571*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
2572*4882a593Smuzhiyun	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2573*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2574*4882a593Smuzhiyun	bne.b		iea_op_extract		# no
2575*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2576*4882a593Smuzhiyun	mov.b		%d0,DTAG(%a6)		# set new optype tag
2577*4882a593Smuzhiyun	bra.b		iea_op_extract
2578*4882a593Smuzhiyun
2579*4882a593Smuzhiyun# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580*4882a593Smuzhiyuniea_op_spec:
2581*4882a593Smuzhiyun	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2582*4882a593Smuzhiyun	beq.b		iea_op_extract		# yes
2583*4882a593Smuzhiyun# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584*4882a593Smuzhiyun# store a result. then, only fcmp will branch back and pick up a dst operand.
2585*4882a593Smuzhiyun	st		STORE_FLG(%a6)		# don't store a final result
2586*4882a593Smuzhiyun	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2587*4882a593Smuzhiyun	beq.b		iea_op_loaddst		# yes
2588*4882a593Smuzhiyun
2589*4882a593Smuzhiyuniea_op_extract:
2590*4882a593Smuzhiyun	clr.l		%d0
2591*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2592*4882a593Smuzhiyun
2593*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d1
2594*4882a593Smuzhiyun	andi.w		&0x007f,%d1		# extract extension
2595*4882a593Smuzhiyun
2596*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr
2597*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
2598*4882a593Smuzhiyun
2599*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
2600*4882a593Smuzhiyun	lea		FP_DST(%a6),%a1
2601*4882a593Smuzhiyun
2602*4882a593Smuzhiyun	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603*4882a593Smuzhiyun	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2604*4882a593Smuzhiyun
2605*4882a593Smuzhiyun#
2606*4882a593Smuzhiyun# Exceptions in order of precedence:
2607*4882a593Smuzhiyun#	BSUN	: none
2608*4882a593Smuzhiyun#	SNAN	: all operations
2609*4882a593Smuzhiyun#	OPERR	: all reg-reg or mem-reg operations that can normally operr
2610*4882a593Smuzhiyun#	OVFL	: same as OPERR
2611*4882a593Smuzhiyun#	UNFL	: same as OPERR
2612*4882a593Smuzhiyun#	DZ	: same as OPERR
2613*4882a593Smuzhiyun#	INEX2	: same as OPERR
2614*4882a593Smuzhiyun#	INEX1	: all packed immediate operations
2615*4882a593Smuzhiyun#
2616*4882a593Smuzhiyun
2617*4882a593Smuzhiyun# we determine the highest priority exception(if any) set by the
2618*4882a593Smuzhiyun# emulation routine that has also been enabled by the user.
2619*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2620*4882a593Smuzhiyun	bne.b		iea_op_ena		# some are enabled
2621*4882a593Smuzhiyun
2622*4882a593Smuzhiyun# now, we save the result, unless, of course, the operation was ftst or fcmp.
2623*4882a593Smuzhiyun# these don't save results.
2624*4882a593Smuzhiyuniea_op_save:
2625*4882a593Smuzhiyun	tst.b		STORE_FLG(%a6)		# does this op store a result?
2626*4882a593Smuzhiyun	bne.b		iea_op_exit1		# exit with no frestore
2627*4882a593Smuzhiyun
2628*4882a593Smuzhiyuniea_op_store:
2629*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630*4882a593Smuzhiyun	bsr.l		store_fpreg		# store the result
2631*4882a593Smuzhiyun
2632*4882a593Smuzhiyuniea_op_exit1:
2633*4882a593Smuzhiyun	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635*4882a593Smuzhiyun
2636*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2637*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2639*4882a593Smuzhiyun
2640*4882a593Smuzhiyun	unlk		%a6			# unravel the frame
2641*4882a593Smuzhiyun
2642*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
2643*4882a593Smuzhiyun	bne.w		iea_op_trace		# yes
2644*4882a593Smuzhiyun
2645*4882a593Smuzhiyun	bra.l		_fpsp_done		# exit to os
2646*4882a593Smuzhiyun
2647*4882a593Smuzhiyuniea_op_ena:
2648*4882a593Smuzhiyun	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2649*4882a593Smuzhiyun	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2650*4882a593Smuzhiyun	bne.b		iea_op_exc		# at least one was set
2651*4882a593Smuzhiyun
2652*4882a593Smuzhiyun# no exception occurred. now, did a disabled, exact overflow occur with inexact
2653*4882a593Smuzhiyun# enabled? if so, then we have to stuff an overflow frame into the FPU.
2654*4882a593Smuzhiyun	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655*4882a593Smuzhiyun	beq.b		iea_op_save
2656*4882a593Smuzhiyun
2657*4882a593Smuzhiyuniea_op_ovfl:
2658*4882a593Smuzhiyun	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659*4882a593Smuzhiyun	beq.b		iea_op_store		# no
2660*4882a593Smuzhiyun	bra.b		iea_op_exc_ovfl		# yes
2661*4882a593Smuzhiyun
2662*4882a593Smuzhiyun# an enabled exception occurred. we have to insert the exception type back into
2663*4882a593Smuzhiyun# the machine.
2664*4882a593Smuzhiyuniea_op_exc:
2665*4882a593Smuzhiyun	subi.l		&24,%d0			# fix offset to be 0-8
2666*4882a593Smuzhiyun	cmpi.b		%d0,&0x6		# is exception INEX?
2667*4882a593Smuzhiyun	bne.b		iea_op_exc_force	# no
2668*4882a593Smuzhiyun
2669*4882a593Smuzhiyun# the enabled exception was inexact. so, if it occurs with an overflow
2670*4882a593Smuzhiyun# or underflow that was disabled, then we have to force an overflow or
2671*4882a593Smuzhiyun# underflow frame.
2672*4882a593Smuzhiyun	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673*4882a593Smuzhiyun	bne.b		iea_op_exc_ovfl		# yes
2674*4882a593Smuzhiyun	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675*4882a593Smuzhiyun	bne.b		iea_op_exc_unfl		# yes
2676*4882a593Smuzhiyun
2677*4882a593Smuzhiyuniea_op_exc_force:
2678*4882a593Smuzhiyun	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679*4882a593Smuzhiyun	bra.b		iea_op_exit2		# exit with frestore
2680*4882a593Smuzhiyun
2681*4882a593Smuzhiyuntbl_iea_except:
2682*4882a593Smuzhiyun	short		0xe002, 0xe006, 0xe004, 0xe005
2683*4882a593Smuzhiyun	short		0xe003, 0xe002, 0xe001, 0xe001
2684*4882a593Smuzhiyun
2685*4882a593Smuzhiyuniea_op_exc_ovfl:
2686*4882a593Smuzhiyun	mov.w		&0xe005,2+FP_SRC(%a6)
2687*4882a593Smuzhiyun	bra.b		iea_op_exit2
2688*4882a593Smuzhiyun
2689*4882a593Smuzhiyuniea_op_exc_unfl:
2690*4882a593Smuzhiyun	mov.w		&0xe003,2+FP_SRC(%a6)
2691*4882a593Smuzhiyun
2692*4882a593Smuzhiyuniea_op_exit2:
2693*4882a593Smuzhiyun	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695*4882a593Smuzhiyun
2696*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2697*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2699*4882a593Smuzhiyun
2700*4882a593Smuzhiyun	frestore	FP_SRC(%a6)		# restore exceptional state
2701*4882a593Smuzhiyun
2702*4882a593Smuzhiyun	unlk		%a6			# unravel the frame
2703*4882a593Smuzhiyun
2704*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace on?
2705*4882a593Smuzhiyun	bne.b		iea_op_trace		# yes
2706*4882a593Smuzhiyun
2707*4882a593Smuzhiyun	bra.l		_fpsp_done		# exit to os
2708*4882a593Smuzhiyun
2709*4882a593Smuzhiyun#
2710*4882a593Smuzhiyun# The opclass two instruction that took an "Unimplemented Effective Address"
2711*4882a593Smuzhiyun# exception was being traced. Make the "current" PC the FPIAR and put it in
2712*4882a593Smuzhiyun# the trace stack frame then jump to _real_trace().
2713*4882a593Smuzhiyun#
2714*4882a593Smuzhiyun#		 UNIMP EA FRAME		   TRACE FRAME
2715*4882a593Smuzhiyun#		*****************	*****************
2716*4882a593Smuzhiyun#		* 0x0 *  0x0f0	*	*    Current	*
2717*4882a593Smuzhiyun#		*****************	*      PC	*
2718*4882a593Smuzhiyun#		*    Current	*	*****************
2719*4882a593Smuzhiyun#		*      PC	*	* 0x2 *  0x024	*
2720*4882a593Smuzhiyun#		*****************	*****************
2721*4882a593Smuzhiyun#		*      SR	*	*     Next	*
2722*4882a593Smuzhiyun#		*****************	*      PC	*
2723*4882a593Smuzhiyun#					*****************
2724*4882a593Smuzhiyun#					*      SR	*
2725*4882a593Smuzhiyun#					*****************
2726*4882a593Smuzhiyuniea_op_trace:
2727*4882a593Smuzhiyun	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2728*4882a593Smuzhiyun	mov.w		0x8(%sp),0x4(%sp)
2729*4882a593Smuzhiyun	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2730*4882a593Smuzhiyun	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2731*4882a593Smuzhiyun
2732*4882a593Smuzhiyun	bra.l		_real_trace
2733*4882a593Smuzhiyun
2734*4882a593Smuzhiyun#########################################################################
2735*4882a593Smuzhiyuniea_fmovm:
2736*4882a593Smuzhiyun	btst		&14,%d0			# ctrl or data reg
2737*4882a593Smuzhiyun	beq.w		iea_fmovm_ctrl
2738*4882a593Smuzhiyun
2739*4882a593Smuzhiyuniea_fmovm_data:
2740*4882a593Smuzhiyun
2741*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2742*4882a593Smuzhiyun	bne.b		iea_fmovm_data_s
2743*4882a593Smuzhiyun
2744*4882a593Smuzhiyuniea_fmovm_data_u:
2745*4882a593Smuzhiyun	mov.l		%usp,%a0
2746*4882a593Smuzhiyun	mov.l		%a0,EXC_A7(%a6)		# store current a7
2747*4882a593Smuzhiyun	bsr.l		fmovm_dynamic		# do dynamic fmovm
2748*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2749*4882a593Smuzhiyun	mov.l		%a0,%usp		# update usp
2750*4882a593Smuzhiyun	bra.w		iea_fmovm_exit
2751*4882a593Smuzhiyun
2752*4882a593Smuzhiyuniea_fmovm_data_s:
2753*4882a593Smuzhiyun	clr.b		SPCOND_FLG(%a6)
2754*4882a593Smuzhiyun	lea		0x2+EXC_VOFF(%a6),%a0
2755*4882a593Smuzhiyun	mov.l		%a0,EXC_A7(%a6)
2756*4882a593Smuzhiyun	bsr.l		fmovm_dynamic		# do dynamic fmovm
2757*4882a593Smuzhiyun
2758*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2759*4882a593Smuzhiyun	beq.w		iea_fmovm_data_predec
2760*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2761*4882a593Smuzhiyun	bne.w		iea_fmovm_exit
2762*4882a593Smuzhiyun
2763*4882a593Smuzhiyun# right now, d0 = the size.
2764*4882a593Smuzhiyun# the data has been fetched from the supervisor stack, but we have not
2765*4882a593Smuzhiyun# incremented the stack pointer by the appropriate number of bytes.
2766*4882a593Smuzhiyun# do it here.
2767*4882a593Smuzhiyuniea_fmovm_data_postinc:
2768*4882a593Smuzhiyun	btst		&0x7,EXC_SR(%a6)
2769*4882a593Smuzhiyun	bne.b		iea_fmovm_data_pi_trace
2770*4882a593Smuzhiyun
2771*4882a593Smuzhiyun	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773*4882a593Smuzhiyun	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2774*4882a593Smuzhiyun
2775*4882a593Smuzhiyun	lea		(EXC_SR,%a6,%d0),%a0
2776*4882a593Smuzhiyun	mov.l		%a0,EXC_SR(%a6)
2777*4882a593Smuzhiyun
2778*4882a593Smuzhiyun	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2779*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2781*4882a593Smuzhiyun
2782*4882a593Smuzhiyun	unlk		%a6
2783*4882a593Smuzhiyun	mov.l		(%sp)+,%sp
2784*4882a593Smuzhiyun	bra.l		_fpsp_done
2785*4882a593Smuzhiyun
2786*4882a593Smuzhiyuniea_fmovm_data_pi_trace:
2787*4882a593Smuzhiyun	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789*4882a593Smuzhiyun	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790*4882a593Smuzhiyun	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791*4882a593Smuzhiyun
2792*4882a593Smuzhiyun	lea		(EXC_SR-0x4,%a6,%d0),%a0
2793*4882a593Smuzhiyun	mov.l		%a0,EXC_SR(%a6)
2794*4882a593Smuzhiyun
2795*4882a593Smuzhiyun	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2796*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2798*4882a593Smuzhiyun
2799*4882a593Smuzhiyun	unlk		%a6
2800*4882a593Smuzhiyun	mov.l		(%sp)+,%sp
2801*4882a593Smuzhiyun	bra.l		_real_trace
2802*4882a593Smuzhiyun
2803*4882a593Smuzhiyun# right now, d1 = size and d0 = the strg.
2804*4882a593Smuzhiyuniea_fmovm_data_predec:
2805*4882a593Smuzhiyun	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2806*4882a593Smuzhiyun	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2807*4882a593Smuzhiyun
2808*4882a593Smuzhiyun	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2809*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2811*4882a593Smuzhiyun
2812*4882a593Smuzhiyun	mov.l		(%a6),-(%sp)		# make a copy of a6
2813*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save d0
2814*4882a593Smuzhiyun	mov.l		%d1,-(%sp)		# save d1
2815*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2816*4882a593Smuzhiyun
2817*4882a593Smuzhiyun	clr.l		%d0
2818*4882a593Smuzhiyun	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2819*4882a593Smuzhiyun	neg.l		%d0			# get negative of size
2820*4882a593Smuzhiyun
2821*4882a593Smuzhiyun	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2822*4882a593Smuzhiyun	beq.b		iea_fmovm_data_p2
2823*4882a593Smuzhiyun
2824*4882a593Smuzhiyun	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825*4882a593Smuzhiyun	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826*4882a593Smuzhiyun	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2827*4882a593Smuzhiyun	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828*4882a593Smuzhiyun
2829*4882a593Smuzhiyun	pea		(%a6,%d0)		# create final sp
2830*4882a593Smuzhiyun	bra.b		iea_fmovm_data_p3
2831*4882a593Smuzhiyun
2832*4882a593Smuzhiyuniea_fmovm_data_p2:
2833*4882a593Smuzhiyun	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834*4882a593Smuzhiyun	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2835*4882a593Smuzhiyun	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2836*4882a593Smuzhiyun
2837*4882a593Smuzhiyun	pea		(0x4,%a6,%d0)		# create final sp
2838*4882a593Smuzhiyun
2839*4882a593Smuzhiyuniea_fmovm_data_p3:
2840*4882a593Smuzhiyun	clr.l		%d1
2841*4882a593Smuzhiyun	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2842*4882a593Smuzhiyun
2843*4882a593Smuzhiyun	tst.b		%d1
2844*4882a593Smuzhiyun	bpl.b		fm_1
2845*4882a593Smuzhiyun	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2846*4882a593Smuzhiyun	addi.l		&0xc,%d0
2847*4882a593Smuzhiyunfm_1:
2848*4882a593Smuzhiyun	lsl.b		&0x1,%d1
2849*4882a593Smuzhiyun	bpl.b		fm_2
2850*4882a593Smuzhiyun	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2851*4882a593Smuzhiyun	addi.l		&0xc,%d0
2852*4882a593Smuzhiyunfm_2:
2853*4882a593Smuzhiyun	lsl.b		&0x1,%d1
2854*4882a593Smuzhiyun	bpl.b		fm_3
2855*4882a593Smuzhiyun	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2856*4882a593Smuzhiyun	addi.l		&0xc,%d0
2857*4882a593Smuzhiyunfm_3:
2858*4882a593Smuzhiyun	lsl.b		&0x1,%d1
2859*4882a593Smuzhiyun	bpl.b		fm_4
2860*4882a593Smuzhiyun	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2861*4882a593Smuzhiyun	addi.l		&0xc,%d0
2862*4882a593Smuzhiyunfm_4:
2863*4882a593Smuzhiyun	lsl.b		&0x1,%d1
2864*4882a593Smuzhiyun	bpl.b		fm_5
2865*4882a593Smuzhiyun	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2866*4882a593Smuzhiyun	addi.l		&0xc,%d0
2867*4882a593Smuzhiyunfm_5:
2868*4882a593Smuzhiyun	lsl.b		&0x1,%d1
2869*4882a593Smuzhiyun	bpl.b		fm_6
2870*4882a593Smuzhiyun	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2871*4882a593Smuzhiyun	addi.l		&0xc,%d0
2872*4882a593Smuzhiyunfm_6:
2873*4882a593Smuzhiyun	lsl.b		&0x1,%d1
2874*4882a593Smuzhiyun	bpl.b		fm_7
2875*4882a593Smuzhiyun	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2876*4882a593Smuzhiyun	addi.l		&0xc,%d0
2877*4882a593Smuzhiyunfm_7:
2878*4882a593Smuzhiyun	lsl.b		&0x1,%d1
2879*4882a593Smuzhiyun	bpl.b		fm_end
2880*4882a593Smuzhiyun	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2881*4882a593Smuzhiyunfm_end:
2882*4882a593Smuzhiyun	mov.l		0x4(%sp),%d1
2883*4882a593Smuzhiyun	mov.l		0x8(%sp),%d0
2884*4882a593Smuzhiyun	mov.l		0xc(%sp),%a6
2885*4882a593Smuzhiyun	mov.l		(%sp)+,%sp
2886*4882a593Smuzhiyun
2887*4882a593Smuzhiyun	btst		&0x7,(%sp)		# is trace enabled?
2888*4882a593Smuzhiyun	beq.l		_fpsp_done
2889*4882a593Smuzhiyun	bra.l		_real_trace
2890*4882a593Smuzhiyun
2891*4882a593Smuzhiyun#########################################################################
2892*4882a593Smuzhiyuniea_fmovm_ctrl:
2893*4882a593Smuzhiyun
2894*4882a593Smuzhiyun	bsr.l		fmovm_ctrl		# load ctrl regs
2895*4882a593Smuzhiyun
2896*4882a593Smuzhiyuniea_fmovm_exit:
2897*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2898*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2900*4882a593Smuzhiyun
2901*4882a593Smuzhiyun	btst		&0x7,EXC_SR(%a6)	# is trace on?
2902*4882a593Smuzhiyun	bne.b		iea_fmovm_trace		# yes
2903*4882a593Smuzhiyun
2904*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905*4882a593Smuzhiyun
2906*4882a593Smuzhiyun	unlk		%a6			# unravel the frame
2907*4882a593Smuzhiyun
2908*4882a593Smuzhiyun	bra.l		_fpsp_done		# exit to os
2909*4882a593Smuzhiyun
2910*4882a593Smuzhiyun#
2911*4882a593Smuzhiyun# The control reg instruction that took an "Unimplemented Effective Address"
2912*4882a593Smuzhiyun# exception was being traced. The "Current PC" for the trace frame is the
2913*4882a593Smuzhiyun# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914*4882a593Smuzhiyun# After fixing the stack frame, jump to _real_trace().
2915*4882a593Smuzhiyun#
2916*4882a593Smuzhiyun#		 UNIMP EA FRAME		   TRACE FRAME
2917*4882a593Smuzhiyun#		*****************	*****************
2918*4882a593Smuzhiyun#		* 0x0 *  0x0f0	*	*    Current	*
2919*4882a593Smuzhiyun#		*****************	*      PC	*
2920*4882a593Smuzhiyun#		*    Current	*	*****************
2921*4882a593Smuzhiyun#		*      PC	*	* 0x2 *  0x024	*
2922*4882a593Smuzhiyun#		*****************	*****************
2923*4882a593Smuzhiyun#		*      SR	*	*     Next	*
2924*4882a593Smuzhiyun#		*****************	*      PC	*
2925*4882a593Smuzhiyun#					*****************
2926*4882a593Smuzhiyun#					*      SR	*
2927*4882a593Smuzhiyun#					*****************
2928*4882a593Smuzhiyun# this ain't a pretty solution, but it works:
2929*4882a593Smuzhiyun# -restore a6 (not with unlk)
2930*4882a593Smuzhiyun# -shift stack frame down over where old a6 used to be
2931*4882a593Smuzhiyun# -add LOCAL_SIZE to stack pointer
2932*4882a593Smuzhiyuniea_fmovm_trace:
2933*4882a593Smuzhiyun	mov.l		(%a6),%a6		# restore frame pointer
2934*4882a593Smuzhiyun	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935*4882a593Smuzhiyun	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937*4882a593Smuzhiyun	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938*4882a593Smuzhiyun	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2939*4882a593Smuzhiyun
2940*4882a593Smuzhiyun	bra.l		_real_trace
2941*4882a593Smuzhiyun
2942*4882a593Smuzhiyun#########################################################################
2943*4882a593Smuzhiyun# The FPU is disabled and so we should really have taken the "Line
2944*4882a593Smuzhiyun# F Emulator" exception. So, here we create an 8-word stack frame
2945*4882a593Smuzhiyun# from our 4-word stack frame. This means we must calculate the length
2946*4882a593Smuzhiyun# the faulting instruction to get the "next PC". This is trivial for
2947*4882a593Smuzhiyun# immediate operands but requires some extra work for fmovm dynamic
2948*4882a593Smuzhiyun# which can use most addressing modes.
2949*4882a593Smuzhiyuniea_disabled:
2950*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# restore d0
2951*4882a593Smuzhiyun
2952*4882a593Smuzhiyun	link		%a6,&-LOCAL_SIZE	# init stack frame
2953*4882a593Smuzhiyun
2954*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2955*4882a593Smuzhiyun
2956*4882a593Smuzhiyun# PC of instruction that took the exception is the PC in the frame
2957*4882a593Smuzhiyun	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2959*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2960*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
2961*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2962*4882a593Smuzhiyun
2963*4882a593Smuzhiyun	tst.w		%d0			# is instr fmovm?
2964*4882a593Smuzhiyun	bmi.b		iea_dis_fmovm		# yes
2965*4882a593Smuzhiyun# instruction is using an extended precision immediate operand. Therefore,
2966*4882a593Smuzhiyun# the total instruction length is 16 bytes.
2967*4882a593Smuzhiyuniea_dis_immed:
2968*4882a593Smuzhiyun	mov.l		&0x10,%d0		# 16 bytes of instruction
2969*4882a593Smuzhiyun	bra.b		iea_dis_cont
2970*4882a593Smuzhiyuniea_dis_fmovm:
2971*4882a593Smuzhiyun	btst		&0xe,%d0		# is instr fmovm ctrl
2972*4882a593Smuzhiyun	bne.b		iea_dis_fmovm_data	# no
2973*4882a593Smuzhiyun# the instruction is a fmovm.l with 2 or 3 registers.
2974*4882a593Smuzhiyun	bfextu		%d0{&19:&3},%d1
2975*4882a593Smuzhiyun	mov.l		&0xc,%d0
2976*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# move all regs?
2977*4882a593Smuzhiyun	bne.b		iea_dis_cont
2978*4882a593Smuzhiyun	addq.l		&0x4,%d0
2979*4882a593Smuzhiyun	bra.b		iea_dis_cont
2980*4882a593Smuzhiyun# the instruction is an fmovm.x dynamic which can use many addressing
2981*4882a593Smuzhiyun# modes and thus can have several different total instruction lengths.
2982*4882a593Smuzhiyun# call fmovm_calc_ea which will go through the ea calc process and,
2983*4882a593Smuzhiyun# as a by-product, will tell us how long the instruction is.
2984*4882a593Smuzhiyuniea_dis_fmovm_data:
2985*4882a593Smuzhiyun	clr.l		%d0
2986*4882a593Smuzhiyun	bsr.l		fmovm_calc_ea
2987*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%d0
2988*4882a593Smuzhiyun	sub.l		EXC_PC(%a6),%d0
2989*4882a593Smuzhiyuniea_dis_cont:
2990*4882a593Smuzhiyun	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
2991*4882a593Smuzhiyun
2992*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2993*4882a593Smuzhiyun
2994*4882a593Smuzhiyun	unlk		%a6
2995*4882a593Smuzhiyun
2996*4882a593Smuzhiyun# here, we actually create the 8-word frame from the 4-word frame,
2997*4882a593Smuzhiyun# with the "next PC" as additional info.
2998*4882a593Smuzhiyun# the <ea> field is let as undefined.
2999*4882a593Smuzhiyun	subq.l		&0x8,%sp		# make room for new stack
3000*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save d0
3001*4882a593Smuzhiyun	mov.w		0xc(%sp),0x4(%sp)	# move SR
3002*4882a593Smuzhiyun	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3003*4882a593Smuzhiyun	clr.l		%d0
3004*4882a593Smuzhiyun	mov.w		0x12(%sp),%d0
3005*4882a593Smuzhiyun	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3006*4882a593Smuzhiyun	add.l		%d0,0x6(%sp)		# make Next PC
3007*4882a593Smuzhiyun	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3008*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# restore d0
3009*4882a593Smuzhiyun
3010*4882a593Smuzhiyun	bra.l		_real_fpu_disabled
3011*4882a593Smuzhiyun
3012*4882a593Smuzhiyun##########
3013*4882a593Smuzhiyun
3014*4882a593Smuzhiyuniea_iacc:
3015*4882a593Smuzhiyun	movc		%pcr,%d0
3016*4882a593Smuzhiyun	btst		&0x1,%d0
3017*4882a593Smuzhiyun	bne.b		iea_iacc_cont
3018*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3020*4882a593Smuzhiyuniea_iacc_cont:
3021*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3022*4882a593Smuzhiyun
3023*4882a593Smuzhiyun	unlk		%a6
3024*4882a593Smuzhiyun
3025*4882a593Smuzhiyun	subq.w		&0x8,%sp		# make stack frame bigger
3026*4882a593Smuzhiyun	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3027*4882a593Smuzhiyun	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3028*4882a593Smuzhiyun	mov.w		&0x4008,0x6(%sp)	# store voff
3029*4882a593Smuzhiyun	mov.l		0x2(%sp),0x8(%sp)	# store ea
3030*4882a593Smuzhiyun	mov.l		&0x09428001,0xc(%sp)	# store fslw
3031*4882a593Smuzhiyun
3032*4882a593Smuzhiyuniea_acc_done:
3033*4882a593Smuzhiyun	btst		&0x5,(%sp)		# user or supervisor mode?
3034*4882a593Smuzhiyun	beq.b		iea_acc_done2		# user
3035*4882a593Smuzhiyun	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3036*4882a593Smuzhiyun
3037*4882a593Smuzhiyuniea_acc_done2:
3038*4882a593Smuzhiyun	bra.l		_real_access
3039*4882a593Smuzhiyun
3040*4882a593Smuzhiyuniea_dacc:
3041*4882a593Smuzhiyun	lea		-LOCAL_SIZE(%a6),%sp
3042*4882a593Smuzhiyun
3043*4882a593Smuzhiyun	movc		%pcr,%d1
3044*4882a593Smuzhiyun	btst		&0x1,%d1
3045*4882a593Smuzhiyun	bne.b		iea_dacc_cont
3046*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3047*4882a593Smuzhiyun	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048*4882a593Smuzhiyuniea_dacc_cont:
3049*4882a593Smuzhiyun	mov.l		(%a6),%a6
3050*4882a593Smuzhiyun
3051*4882a593Smuzhiyun	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052*4882a593Smuzhiyun	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053*4882a593Smuzhiyun	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054*4882a593Smuzhiyun	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055*4882a593Smuzhiyun	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056*4882a593Smuzhiyun	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057*4882a593Smuzhiyun
3058*4882a593Smuzhiyun	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059*4882a593Smuzhiyun	add.w		&LOCAL_SIZE-0x4,%sp
3060*4882a593Smuzhiyun
3061*4882a593Smuzhiyun	bra.b		iea_acc_done
3062*4882a593Smuzhiyun
3063*4882a593Smuzhiyun#########################################################################
3064*4882a593Smuzhiyun# XDEF ****************************************************************	#
3065*4882a593Smuzhiyun#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3066*4882a593Smuzhiyun#									#
3067*4882a593Smuzhiyun#	This handler should be the first code executed upon taking the	#
3068*4882a593Smuzhiyun#	FP Operand Error exception in an operating system.		#
3069*4882a593Smuzhiyun#									#
3070*4882a593Smuzhiyun# XREF ****************************************************************	#
3071*4882a593Smuzhiyun#	_imem_read_long() - read instruction longword			#
3072*4882a593Smuzhiyun#	fix_skewed_ops() - adjust src operand in fsave frame		#
3073*4882a593Smuzhiyun#	_real_operr() - "callout" to operating system operr handler	#
3074*4882a593Smuzhiyun#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3075*4882a593Smuzhiyun#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3076*4882a593Smuzhiyun#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3077*4882a593Smuzhiyun#									#
3078*4882a593Smuzhiyun# INPUT ***************************************************************	#
3079*4882a593Smuzhiyun#	- The system stack contains the FP Operr exception frame	#
3080*4882a593Smuzhiyun#	- The fsave frame contains the source operand			#
3081*4882a593Smuzhiyun#									#
3082*4882a593Smuzhiyun# OUTPUT **************************************************************	#
3083*4882a593Smuzhiyun#	No access error:						#
3084*4882a593Smuzhiyun#	- The system stack is unchanged					#
3085*4882a593Smuzhiyun#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3086*4882a593Smuzhiyun#									#
3087*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
3088*4882a593Smuzhiyun#	In a system where the FP Operr exception is enabled, the goal	#
3089*4882a593Smuzhiyun# is to get to the handler specified at _real_operr(). But, on the 060,	#
3090*4882a593Smuzhiyun# for opclass zero and two instruction taking this exception, the	#
3091*4882a593Smuzhiyun# input operand in the fsave frame may be incorrect for some cases	#
3092*4882a593Smuzhiyun# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3093*4882a593Smuzhiyun# do just this and then exits through _real_operr().			#
3094*4882a593Smuzhiyun#	For opclass 3 instructions, the 060 doesn't store the default	#
3095*4882a593Smuzhiyun# operr result out to memory or data register file as it should.	#
3096*4882a593Smuzhiyun# This code must emulate the move out before finally exiting through	#
3097*4882a593Smuzhiyun# _real_inex(). The move out, if to memory, is performed using		#
3098*4882a593Smuzhiyun# _mem_write() "callout" routines that may return a failing result.	#
3099*4882a593Smuzhiyun# In this special case, the handler must exit through facc_out()	#
3100*4882a593Smuzhiyun# which creates an access error stack frame from the current operr	#
3101*4882a593Smuzhiyun# stack frame.								#
3102*4882a593Smuzhiyun#									#
3103*4882a593Smuzhiyun#########################################################################
3104*4882a593Smuzhiyun
3105*4882a593Smuzhiyun	global		_fpsp_operr
3106*4882a593Smuzhiyun_fpsp_operr:
3107*4882a593Smuzhiyun
3108*4882a593Smuzhiyun	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3109*4882a593Smuzhiyun
3110*4882a593Smuzhiyun	fsave		FP_SRC(%a6)		# grab the "busy" frame
3111*4882a593Smuzhiyun
3112*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3113*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3115*4882a593Smuzhiyun
3116*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction
3117*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118*4882a593Smuzhiyun
3119*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3120*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3121*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
3122*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)
3123*4882a593Smuzhiyun
3124*4882a593Smuzhiyun##############################################################################
3125*4882a593Smuzhiyun
3126*4882a593Smuzhiyun	btst		&13,%d0			# is instr an fmove out?
3127*4882a593Smuzhiyun	bne.b		foperr_out		# fmove out
3128*4882a593Smuzhiyun
3129*4882a593Smuzhiyun
3130*4882a593Smuzhiyun# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131*4882a593Smuzhiyun# this would be the case for opclass two operations with a source infinity or
3132*4882a593Smuzhiyun# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133*4882a593Smuzhiyun# cause an operr so we don't need to check for them here.
3134*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3135*4882a593Smuzhiyun	bsr.l		fix_skewed_ops		# fix src op
3136*4882a593Smuzhiyun
3137*4882a593Smuzhiyunfoperr_exit:
3138*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3139*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3141*4882a593Smuzhiyun
3142*4882a593Smuzhiyun	frestore	FP_SRC(%a6)
3143*4882a593Smuzhiyun
3144*4882a593Smuzhiyun	unlk		%a6
3145*4882a593Smuzhiyun	bra.l		_real_operr
3146*4882a593Smuzhiyun
3147*4882a593Smuzhiyun########################################################################
3148*4882a593Smuzhiyun
3149*4882a593Smuzhiyun#
3150*4882a593Smuzhiyun# the hardware does not save the default result to memory on enabled
3151*4882a593Smuzhiyun# operand error exceptions. we do this here before passing control to
3152*4882a593Smuzhiyun# the user operand error handler.
3153*4882a593Smuzhiyun#
3154*4882a593Smuzhiyun# byte, word, and long destination format operations can pass
3155*4882a593Smuzhiyun# through here. we simply need to test the sign of the src
3156*4882a593Smuzhiyun# operand and save the appropriate minimum or maximum integer value
3157*4882a593Smuzhiyun# to the effective address as pointed to by the stacked effective address.
3158*4882a593Smuzhiyun#
3159*4882a593Smuzhiyun# although packed opclass three operations can take operand error
3160*4882a593Smuzhiyun# exceptions, they won't pass through here since they are caught
3161*4882a593Smuzhiyun# first by the unsupported data format exception handler. that handler
3162*4882a593Smuzhiyun# sends them directly to _real_operr() if necessary.
3163*4882a593Smuzhiyun#
3164*4882a593Smuzhiyunfoperr_out:
3165*4882a593Smuzhiyun
3166*4882a593Smuzhiyun	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3167*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
3168*4882a593Smuzhiyun	cmpi.w		%d1,&0x7fff
3169*4882a593Smuzhiyun	bne.b		foperr_out_not_qnan
3170*4882a593Smuzhiyun# the operand is either an infinity or a QNAN.
3171*4882a593Smuzhiyun	tst.l		FP_SRC_LO(%a6)
3172*4882a593Smuzhiyun	bne.b		foperr_out_qnan
3173*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d1
3174*4882a593Smuzhiyun	andi.l		&0x7fffffff,%d1
3175*4882a593Smuzhiyun	beq.b		foperr_out_not_qnan
3176*4882a593Smuzhiyunfoperr_out_qnan:
3177*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3178*4882a593Smuzhiyun	bra.b		foperr_out_jmp
3179*4882a593Smuzhiyun
3180*4882a593Smuzhiyunfoperr_out_not_qnan:
3181*4882a593Smuzhiyun	mov.l		&0x7fffffff,%d1
3182*4882a593Smuzhiyun	tst.b		FP_SRC_EX(%a6)
3183*4882a593Smuzhiyun	bpl.b		foperr_out_not_qnan2
3184*4882a593Smuzhiyun	addq.l		&0x1,%d1
3185*4882a593Smuzhiyunfoperr_out_not_qnan2:
3186*4882a593Smuzhiyun	mov.l		%d1,L_SCR1(%a6)
3187*4882a593Smuzhiyun
3188*4882a593Smuzhiyunfoperr_out_jmp:
3189*4882a593Smuzhiyun	bfextu		%d0{&19:&3},%d0		# extract dst format field
3190*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3191*4882a593Smuzhiyun	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3192*4882a593Smuzhiyun	jmp		(tbl_operr.b,%pc,%a0)
3193*4882a593Smuzhiyun
3194*4882a593Smuzhiyuntbl_operr:
3195*4882a593Smuzhiyun	short		foperr_out_l - tbl_operr # long word integer
3196*4882a593Smuzhiyun	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3197*4882a593Smuzhiyun	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3198*4882a593Smuzhiyun	short		foperr_exit  - tbl_operr # packed won't enter here
3199*4882a593Smuzhiyun	short		foperr_out_w - tbl_operr # word integer
3200*4882a593Smuzhiyun	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3201*4882a593Smuzhiyun	short		foperr_out_b - tbl_operr # byte integer
3202*4882a593Smuzhiyun	short		tbl_operr    - tbl_operr # packed won't enter here
3203*4882a593Smuzhiyun
3204*4882a593Smuzhiyunfoperr_out_b:
3205*4882a593Smuzhiyun	mov.b		L_SCR1(%a6),%d0		# load positive default result
3206*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3207*4882a593Smuzhiyun	ble.b		foperr_out_b_save_dn	# yes
3208*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3209*4882a593Smuzhiyun	bsr.l		_dmem_write_byte	# write the default result
3210*4882a593Smuzhiyun
3211*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3212*4882a593Smuzhiyun	bne.l		facc_out_b		# yes
3213*4882a593Smuzhiyun
3214*4882a593Smuzhiyun	bra.w		foperr_exit
3215*4882a593Smuzhiyunfoperr_out_b_save_dn:
3216*4882a593Smuzhiyun	andi.w		&0x0007,%d1
3217*4882a593Smuzhiyun	bsr.l		store_dreg_b		# store result to regfile
3218*4882a593Smuzhiyun	bra.w		foperr_exit
3219*4882a593Smuzhiyun
3220*4882a593Smuzhiyunfoperr_out_w:
3221*4882a593Smuzhiyun	mov.w		L_SCR1(%a6),%d0		# load positive default result
3222*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3223*4882a593Smuzhiyun	ble.b		foperr_out_w_save_dn	# yes
3224*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3225*4882a593Smuzhiyun	bsr.l		_dmem_write_word	# write the default result
3226*4882a593Smuzhiyun
3227*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3228*4882a593Smuzhiyun	bne.l		facc_out_w		# yes
3229*4882a593Smuzhiyun
3230*4882a593Smuzhiyun	bra.w		foperr_exit
3231*4882a593Smuzhiyunfoperr_out_w_save_dn:
3232*4882a593Smuzhiyun	andi.w		&0x0007,%d1
3233*4882a593Smuzhiyun	bsr.l		store_dreg_w		# store result to regfile
3234*4882a593Smuzhiyun	bra.w		foperr_exit
3235*4882a593Smuzhiyun
3236*4882a593Smuzhiyunfoperr_out_l:
3237*4882a593Smuzhiyun	mov.l		L_SCR1(%a6),%d0		# load positive default result
3238*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3239*4882a593Smuzhiyun	ble.b		foperr_out_l_save_dn	# yes
3240*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3241*4882a593Smuzhiyun	bsr.l		_dmem_write_long	# write the default result
3242*4882a593Smuzhiyun
3243*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3244*4882a593Smuzhiyun	bne.l		facc_out_l		# yes
3245*4882a593Smuzhiyun
3246*4882a593Smuzhiyun	bra.w		foperr_exit
3247*4882a593Smuzhiyunfoperr_out_l_save_dn:
3248*4882a593Smuzhiyun	andi.w		&0x0007,%d1
3249*4882a593Smuzhiyun	bsr.l		store_dreg_l		# store result to regfile
3250*4882a593Smuzhiyun	bra.w		foperr_exit
3251*4882a593Smuzhiyun
3252*4882a593Smuzhiyun#########################################################################
3253*4882a593Smuzhiyun# XDEF ****************************************************************	#
3254*4882a593Smuzhiyun#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3255*4882a593Smuzhiyun#									#
3256*4882a593Smuzhiyun#	This handler should be the first code executed upon taking the	#
3257*4882a593Smuzhiyun#	FP Signalling NAN exception in an operating system.		#
3258*4882a593Smuzhiyun#									#
3259*4882a593Smuzhiyun# XREF ****************************************************************	#
3260*4882a593Smuzhiyun#	_imem_read_long() - read instruction longword			#
3261*4882a593Smuzhiyun#	fix_skewed_ops() - adjust src operand in fsave frame		#
3262*4882a593Smuzhiyun#	_real_snan() - "callout" to operating system SNAN handler	#
3263*4882a593Smuzhiyun#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3264*4882a593Smuzhiyun#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3265*4882a593Smuzhiyun#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3266*4882a593Smuzhiyun#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3267*4882a593Smuzhiyun#									#
3268*4882a593Smuzhiyun# INPUT ***************************************************************	#
3269*4882a593Smuzhiyun#	- The system stack contains the FP SNAN exception frame		#
3270*4882a593Smuzhiyun#	- The fsave frame contains the source operand			#
3271*4882a593Smuzhiyun#									#
3272*4882a593Smuzhiyun# OUTPUT **************************************************************	#
3273*4882a593Smuzhiyun#	No access error:						#
3274*4882a593Smuzhiyun#	- The system stack is unchanged					#
3275*4882a593Smuzhiyun#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3276*4882a593Smuzhiyun#									#
3277*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
3278*4882a593Smuzhiyun#	In a system where the FP SNAN exception is enabled, the goal	#
3279*4882a593Smuzhiyun# is to get to the handler specified at _real_snan(). But, on the 060,	#
3280*4882a593Smuzhiyun# for opclass zero and two instructions taking this exception, the	#
3281*4882a593Smuzhiyun# input operand in the fsave frame may be incorrect for some cases	#
3282*4882a593Smuzhiyun# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3283*4882a593Smuzhiyun# do just this and then exits through _real_snan().			#
3284*4882a593Smuzhiyun#	For opclass 3 instructions, the 060 doesn't store the default	#
3285*4882a593Smuzhiyun# SNAN result out to memory or data register file as it should.		#
3286*4882a593Smuzhiyun# This code must emulate the move out before finally exiting through	#
3287*4882a593Smuzhiyun# _real_snan(). The move out, if to memory, is performed using		#
3288*4882a593Smuzhiyun# _mem_write() "callout" routines that may return a failing result.	#
3289*4882a593Smuzhiyun# In this special case, the handler must exit through facc_out()	#
3290*4882a593Smuzhiyun# which creates an access error stack frame from the current SNAN	#
3291*4882a593Smuzhiyun# stack frame.								#
3292*4882a593Smuzhiyun#	For the case of an extended precision opclass 3 instruction,	#
3293*4882a593Smuzhiyun# if the effective addressing mode was -() or ()+, then the address	#
3294*4882a593Smuzhiyun# register must get updated by calling _calc_ea_fout(). If the <ea>	#
3295*4882a593Smuzhiyun# was -(a7) from supervisor mode, then the exception frame currently	#
3296*4882a593Smuzhiyun# on the system stack must be carefully moved "down" to make room	#
3297*4882a593Smuzhiyun# for the operand being moved.						#
3298*4882a593Smuzhiyun#									#
3299*4882a593Smuzhiyun#########################################################################
3300*4882a593Smuzhiyun
3301*4882a593Smuzhiyun	global		_fpsp_snan
3302*4882a593Smuzhiyun_fpsp_snan:
3303*4882a593Smuzhiyun
3304*4882a593Smuzhiyun	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3305*4882a593Smuzhiyun
3306*4882a593Smuzhiyun	fsave		FP_SRC(%a6)		# grab the "busy" frame
3307*4882a593Smuzhiyun
3308*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3309*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3311*4882a593Smuzhiyun
3312*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction
3313*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314*4882a593Smuzhiyun
3315*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3316*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3317*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
3318*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)
3319*4882a593Smuzhiyun
3320*4882a593Smuzhiyun##############################################################################
3321*4882a593Smuzhiyun
3322*4882a593Smuzhiyun	btst		&13,%d0			# is instr an fmove out?
3323*4882a593Smuzhiyun	bne.w		fsnan_out		# fmove out
3324*4882a593Smuzhiyun
3325*4882a593Smuzhiyun
3326*4882a593Smuzhiyun# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327*4882a593Smuzhiyun# this would be the case for opclass two operations with a source infinity or
3328*4882a593Smuzhiyun# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329*4882a593Smuzhiyun# fixed here.
3330*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3331*4882a593Smuzhiyun	bsr.l		fix_skewed_ops		# fix src op
3332*4882a593Smuzhiyun
3333*4882a593Smuzhiyunfsnan_exit:
3334*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3335*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3337*4882a593Smuzhiyun
3338*4882a593Smuzhiyun	frestore	FP_SRC(%a6)
3339*4882a593Smuzhiyun
3340*4882a593Smuzhiyun	unlk		%a6
3341*4882a593Smuzhiyun	bra.l		_real_snan
3342*4882a593Smuzhiyun
3343*4882a593Smuzhiyun########################################################################
3344*4882a593Smuzhiyun
3345*4882a593Smuzhiyun#
3346*4882a593Smuzhiyun# the hardware does not save the default result to memory on enabled
3347*4882a593Smuzhiyun# snan exceptions. we do this here before passing control to
3348*4882a593Smuzhiyun# the user snan handler.
3349*4882a593Smuzhiyun#
3350*4882a593Smuzhiyun# byte, word, long, and packed destination format operations can pass
3351*4882a593Smuzhiyun# through here. since packed format operations already were handled by
3352*4882a593Smuzhiyun# fpsp_unsupp(), then we need to do nothing else for them here.
3353*4882a593Smuzhiyun# for byte, word, and long, we simply need to test the sign of the src
3354*4882a593Smuzhiyun# operand and save the appropriate minimum or maximum integer value
3355*4882a593Smuzhiyun# to the effective address as pointed to by the stacked effective address.
3356*4882a593Smuzhiyun#
3357*4882a593Smuzhiyunfsnan_out:
3358*4882a593Smuzhiyun
3359*4882a593Smuzhiyun	bfextu		%d0{&19:&3},%d0		# extract dst format field
3360*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3361*4882a593Smuzhiyun	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3362*4882a593Smuzhiyun	jmp		(tbl_snan.b,%pc,%a0)
3363*4882a593Smuzhiyun
3364*4882a593Smuzhiyuntbl_snan:
3365*4882a593Smuzhiyun	short		fsnan_out_l - tbl_snan # long word integer
3366*4882a593Smuzhiyun	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367*4882a593Smuzhiyun	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368*4882a593Smuzhiyun	short		tbl_snan    - tbl_snan # packed needs no help
3369*4882a593Smuzhiyun	short		fsnan_out_w - tbl_snan # word integer
3370*4882a593Smuzhiyun	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371*4882a593Smuzhiyun	short		fsnan_out_b - tbl_snan # byte integer
3372*4882a593Smuzhiyun	short		tbl_snan    - tbl_snan # packed needs no help
3373*4882a593Smuzhiyun
3374*4882a593Smuzhiyunfsnan_out_b:
3375*4882a593Smuzhiyun	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3376*4882a593Smuzhiyun	bset		&6,%d0			# set SNAN bit
3377*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3378*4882a593Smuzhiyun	ble.b		fsnan_out_b_dn		# yes
3379*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3380*4882a593Smuzhiyun	bsr.l		_dmem_write_byte	# write the default result
3381*4882a593Smuzhiyun
3382*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3383*4882a593Smuzhiyun	bne.l		facc_out_b		# yes
3384*4882a593Smuzhiyun
3385*4882a593Smuzhiyun	bra.w		fsnan_exit
3386*4882a593Smuzhiyunfsnan_out_b_dn:
3387*4882a593Smuzhiyun	andi.w		&0x0007,%d1
3388*4882a593Smuzhiyun	bsr.l		store_dreg_b		# store result to regfile
3389*4882a593Smuzhiyun	bra.w		fsnan_exit
3390*4882a593Smuzhiyun
3391*4882a593Smuzhiyunfsnan_out_w:
3392*4882a593Smuzhiyun	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3393*4882a593Smuzhiyun	bset		&14,%d0			# set SNAN bit
3394*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3395*4882a593Smuzhiyun	ble.b		fsnan_out_w_dn		# yes
3396*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3397*4882a593Smuzhiyun	bsr.l		_dmem_write_word	# write the default result
3398*4882a593Smuzhiyun
3399*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3400*4882a593Smuzhiyun	bne.l		facc_out_w		# yes
3401*4882a593Smuzhiyun
3402*4882a593Smuzhiyun	bra.w		fsnan_exit
3403*4882a593Smuzhiyunfsnan_out_w_dn:
3404*4882a593Smuzhiyun	andi.w		&0x0007,%d1
3405*4882a593Smuzhiyun	bsr.l		store_dreg_w		# store result to regfile
3406*4882a593Smuzhiyun	bra.w		fsnan_exit
3407*4882a593Smuzhiyun
3408*4882a593Smuzhiyunfsnan_out_l:
3409*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3410*4882a593Smuzhiyun	bset		&30,%d0			# set SNAN bit
3411*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3412*4882a593Smuzhiyun	ble.b		fsnan_out_l_dn		# yes
3413*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3414*4882a593Smuzhiyun	bsr.l		_dmem_write_long	# write the default result
3415*4882a593Smuzhiyun
3416*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3417*4882a593Smuzhiyun	bne.l		facc_out_l		# yes
3418*4882a593Smuzhiyun
3419*4882a593Smuzhiyun	bra.w		fsnan_exit
3420*4882a593Smuzhiyunfsnan_out_l_dn:
3421*4882a593Smuzhiyun	andi.w		&0x0007,%d1
3422*4882a593Smuzhiyun	bsr.l		store_dreg_l		# store result to regfile
3423*4882a593Smuzhiyun	bra.w		fsnan_exit
3424*4882a593Smuzhiyun
3425*4882a593Smuzhiyunfsnan_out_s:
3426*4882a593Smuzhiyun	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3427*4882a593Smuzhiyun	ble.b		fsnan_out_d_dn		# yes
3428*4882a593Smuzhiyun	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3429*4882a593Smuzhiyun	andi.l		&0x80000000,%d0		# keep sign
3430*4882a593Smuzhiyun	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3431*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3432*4882a593Smuzhiyun	lsr.l		&0x8,%d1		# shift mantissa for sgl
3433*4882a593Smuzhiyun	or.l		%d1,%d0			# create sgl SNAN
3434*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3435*4882a593Smuzhiyun	bsr.l		_dmem_write_long	# write the default result
3436*4882a593Smuzhiyun
3437*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3438*4882a593Smuzhiyun	bne.l		facc_out_l		# yes
3439*4882a593Smuzhiyun
3440*4882a593Smuzhiyun	bra.w		fsnan_exit
3441*4882a593Smuzhiyunfsnan_out_d_dn:
3442*4882a593Smuzhiyun	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3443*4882a593Smuzhiyun	andi.l		&0x80000000,%d0		# keep sign
3444*4882a593Smuzhiyun	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3445*4882a593Smuzhiyun	mov.l		%d1,-(%sp)
3446*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3447*4882a593Smuzhiyun	lsr.l		&0x8,%d1		# shift mantissa for sgl
3448*4882a593Smuzhiyun	or.l		%d1,%d0			# create sgl SNAN
3449*4882a593Smuzhiyun	mov.l		(%sp)+,%d1
3450*4882a593Smuzhiyun	andi.w		&0x0007,%d1
3451*4882a593Smuzhiyun	bsr.l		store_dreg_l		# store result to regfile
3452*4882a593Smuzhiyun	bra.w		fsnan_exit
3453*4882a593Smuzhiyun
3454*4882a593Smuzhiyunfsnan_out_d:
3455*4882a593Smuzhiyun	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3456*4882a593Smuzhiyun	andi.l		&0x80000000,%d0		# keep sign
3457*4882a593Smuzhiyun	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3458*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3459*4882a593Smuzhiyun	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3460*4882a593Smuzhiyun	mov.l		&11,%d0			# load shift amt
3461*4882a593Smuzhiyun	lsr.l		%d0,%d1
3462*4882a593Smuzhiyun	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3463*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3464*4882a593Smuzhiyun	andi.l		&0x000007ff,%d1
3465*4882a593Smuzhiyun	ror.l		%d0,%d1
3466*4882a593Smuzhiyun	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3467*4882a593Smuzhiyun	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3468*4882a593Smuzhiyun	lsr.l		%d0,%d1
3469*4882a593Smuzhiyun	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3470*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3471*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3472*4882a593Smuzhiyun	movq.l		&0x8,%d0		# pass: size of 8 bytes
3473*4882a593Smuzhiyun	bsr.l		_dmem_write		# write the default result
3474*4882a593Smuzhiyun
3475*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3476*4882a593Smuzhiyun	bne.l		facc_out_d		# yes
3477*4882a593Smuzhiyun
3478*4882a593Smuzhiyun	bra.w		fsnan_exit
3479*4882a593Smuzhiyun
3480*4882a593Smuzhiyun# for extended precision, if the addressing mode is pre-decrement or
3481*4882a593Smuzhiyun# post-increment, then the address register did not get updated.
3482*4882a593Smuzhiyun# in addition, for pre-decrement, the stacked <ea> is incorrect.
3483*4882a593Smuzhiyunfsnan_out_x:
3484*4882a593Smuzhiyun	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3485*4882a593Smuzhiyun
3486*4882a593Smuzhiyun	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487*4882a593Smuzhiyun	clr.w		2+FP_SCR0(%a6)
3488*4882a593Smuzhiyun	mov.l		FP_SRC_HI(%a6),%d0
3489*4882a593Smuzhiyun	bset		&30,%d0
3490*4882a593Smuzhiyun	mov.l		%d0,FP_SCR0_HI(%a6)
3491*4882a593Smuzhiyun	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492*4882a593Smuzhiyun
3493*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3494*4882a593Smuzhiyun	bne.b		fsnan_out_x_s		# yes
3495*4882a593Smuzhiyun
3496*4882a593Smuzhiyun	mov.l		%usp,%a0		# fetch user stack pointer
3497*4882a593Smuzhiyun	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3498*4882a593Smuzhiyun	mov.l		(%a6),EXC_A6(%a6)
3499*4882a593Smuzhiyun
3500*4882a593Smuzhiyun	bsr.l		_calc_ea_fout		# find the correct ea,update An
3501*4882a593Smuzhiyun	mov.l		%a0,%a1
3502*4882a593Smuzhiyun	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3503*4882a593Smuzhiyun
3504*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0
3505*4882a593Smuzhiyun	mov.l		%a0,%usp		# restore user stack pointer
3506*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),(%a6)
3507*4882a593Smuzhiyun
3508*4882a593Smuzhiyunfsnan_out_x_save:
3509*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3510*4882a593Smuzhiyun	movq.l		&0xc,%d0		# pass: size of extended
3511*4882a593Smuzhiyun	bsr.l		_dmem_write		# write the default result
3512*4882a593Smuzhiyun
3513*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
3514*4882a593Smuzhiyun	bne.l		facc_out_x		# yes
3515*4882a593Smuzhiyun
3516*4882a593Smuzhiyun	bra.w		fsnan_exit
3517*4882a593Smuzhiyun
3518*4882a593Smuzhiyunfsnan_out_x_s:
3519*4882a593Smuzhiyun	mov.l		(%a6),EXC_A6(%a6)
3520*4882a593Smuzhiyun
3521*4882a593Smuzhiyun	bsr.l		_calc_ea_fout		# find the correct ea,update An
3522*4882a593Smuzhiyun	mov.l		%a0,%a1
3523*4882a593Smuzhiyun	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3524*4882a593Smuzhiyun
3525*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),(%a6)
3526*4882a593Smuzhiyun
3527*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528*4882a593Smuzhiyun	bne.b		fsnan_out_x_save	# no
3529*4882a593Smuzhiyun
3530*4882a593Smuzhiyun# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3532*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3534*4882a593Smuzhiyun
3535*4882a593Smuzhiyun	frestore	FP_SRC(%a6)
3536*4882a593Smuzhiyun
3537*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3538*4882a593Smuzhiyun
3539*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542*4882a593Smuzhiyun
3543*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545*4882a593Smuzhiyun	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546*4882a593Smuzhiyun
3547*4882a593Smuzhiyun	add.l		&LOCAL_SIZE-0x8,%sp
3548*4882a593Smuzhiyun
3549*4882a593Smuzhiyun	bra.l		_real_snan
3550*4882a593Smuzhiyun
3551*4882a593Smuzhiyun#########################################################################
3552*4882a593Smuzhiyun# XDEF ****************************************************************	#
3553*4882a593Smuzhiyun#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3554*4882a593Smuzhiyun#									#
3555*4882a593Smuzhiyun#	This handler should be the first code executed upon taking the	#
3556*4882a593Smuzhiyun#	FP Inexact exception in an operating system.			#
3557*4882a593Smuzhiyun#									#
3558*4882a593Smuzhiyun# XREF ****************************************************************	#
3559*4882a593Smuzhiyun#	_imem_read_long() - read instruction longword			#
3560*4882a593Smuzhiyun#	fix_skewed_ops() - adjust src operand in fsave frame		#
3561*4882a593Smuzhiyun#	set_tag_x() - determine optype of src/dst operands		#
3562*4882a593Smuzhiyun#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3563*4882a593Smuzhiyun#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3564*4882a593Smuzhiyun#	load_fpn2() - load dst operand from FP regfile			#
3565*4882a593Smuzhiyun#	smovcr() - emulate an "fmovcr" instruction			#
3566*4882a593Smuzhiyun#	fout() - emulate an opclass 3 instruction			#
3567*4882a593Smuzhiyun#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3568*4882a593Smuzhiyun#	_real_inex() - "callout" to operating system inexact handler	#
3569*4882a593Smuzhiyun#									#
3570*4882a593Smuzhiyun# INPUT ***************************************************************	#
3571*4882a593Smuzhiyun#	- The system stack contains the FP Inexact exception frame	#
3572*4882a593Smuzhiyun#	- The fsave frame contains the source operand			#
3573*4882a593Smuzhiyun#									#
3574*4882a593Smuzhiyun# OUTPUT **************************************************************	#
3575*4882a593Smuzhiyun#	- The system stack is unchanged					#
3576*4882a593Smuzhiyun#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3577*4882a593Smuzhiyun#									#
3578*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
3579*4882a593Smuzhiyun#	In a system where the FP Inexact exception is enabled, the goal	#
3580*4882a593Smuzhiyun# is to get to the handler specified at _real_inex(). But, on the 060,	#
3581*4882a593Smuzhiyun# for opclass zero and two instruction taking this exception, the	#
3582*4882a593Smuzhiyun# hardware doesn't store the correct result to the destination FP	#
3583*4882a593Smuzhiyun# register as did the '040 and '881/2. This handler must emulate the	#
3584*4882a593Smuzhiyun# instruction in order to get this value and then store it to the	#
3585*4882a593Smuzhiyun# correct register before calling _real_inex().				#
3586*4882a593Smuzhiyun#	For opclass 3 instructions, the 060 doesn't store the default	#
3587*4882a593Smuzhiyun# inexact result out to memory or data register file as it should.	#
3588*4882a593Smuzhiyun# This code must emulate the move out by calling fout() before finally	#
3589*4882a593Smuzhiyun# exiting through _real_inex().						#
3590*4882a593Smuzhiyun#									#
3591*4882a593Smuzhiyun#########################################################################
3592*4882a593Smuzhiyun
3593*4882a593Smuzhiyun	global		_fpsp_inex
3594*4882a593Smuzhiyun_fpsp_inex:
3595*4882a593Smuzhiyun
3596*4882a593Smuzhiyun	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3597*4882a593Smuzhiyun
3598*4882a593Smuzhiyun	fsave		FP_SRC(%a6)		# grab the "busy" frame
3599*4882a593Smuzhiyun
3600*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3601*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3603*4882a593Smuzhiyun
3604*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction
3605*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606*4882a593Smuzhiyun
3607*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3608*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3609*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
3610*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)
3611*4882a593Smuzhiyun
3612*4882a593Smuzhiyun##############################################################################
3613*4882a593Smuzhiyun
3614*4882a593Smuzhiyun	btst		&13,%d0			# is instr an fmove out?
3615*4882a593Smuzhiyun	bne.w		finex_out		# fmove out
3616*4882a593Smuzhiyun
3617*4882a593Smuzhiyun
3618*4882a593Smuzhiyun# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619*4882a593Smuzhiyun# longword integer directly into the upper longword of the mantissa along
3620*4882a593Smuzhiyun# w/ an exponent value of 0x401e. we convert this to extended precision here.
3621*4882a593Smuzhiyun	bfextu		%d0{&19:&3},%d0		# fetch instr size
3622*4882a593Smuzhiyun	bne.b		finex_cont		# instr size is not long
3623*4882a593Smuzhiyun	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3624*4882a593Smuzhiyun	bne.b		finex_cont		# no
3625*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr
3626*4882a593Smuzhiyun	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3627*4882a593Smuzhiyun	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3628*4882a593Smuzhiyun	mov.w		&0xe001,0x2+FP_SRC(%a6)
3629*4882a593Smuzhiyun
3630*4882a593Smuzhiyunfinex_cont:
3631*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3632*4882a593Smuzhiyun	bsr.l		fix_skewed_ops		# fix src op
3633*4882a593Smuzhiyun
3634*4882a593Smuzhiyun# Here, we zero the ccode and exception byte field since we're going to
3635*4882a593Smuzhiyun# emulate the whole instruction. Notice, though, that we don't kill the
3636*4882a593Smuzhiyun# INEX1 bit. This is because a packed op has long since been converted
3637*4882a593Smuzhiyun# to extended before arriving here. Therefore, we need to retain the
3638*4882a593Smuzhiyun# INEX1 bit from when the operand was first converted.
3639*4882a593Smuzhiyun	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640*4882a593Smuzhiyun
3641*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# zero current control regs
3642*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
3643*4882a593Smuzhiyun
3644*4882a593Smuzhiyun	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645*4882a593Smuzhiyun	cmpi.b		%d1,&0x17		# is op an fmovecr?
3646*4882a593Smuzhiyun	beq.w		finex_fmovcr		# yes
3647*4882a593Smuzhiyun
3648*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3649*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
3650*4882a593Smuzhiyun	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3651*4882a593Smuzhiyun
3652*4882a593Smuzhiyun# bits four and five of the fp extension word separate the monadic and dyadic
3653*4882a593Smuzhiyun# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654*4882a593Smuzhiyun# will never take this exception, but fsincos will.
3655*4882a593Smuzhiyun	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3656*4882a593Smuzhiyun	beq.b		finex_extract		# monadic
3657*4882a593Smuzhiyun
3658*4882a593Smuzhiyun	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3659*4882a593Smuzhiyun	bne.b		finex_extract		# yes
3660*4882a593Smuzhiyun
3661*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662*4882a593Smuzhiyun	bsr.l		load_fpn2		# load dst into FP_DST
3663*4882a593Smuzhiyun
3664*4882a593Smuzhiyun	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3665*4882a593Smuzhiyun	bsr.l		set_tag_x		# tag the operand type
3666*4882a593Smuzhiyun	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3667*4882a593Smuzhiyun	bne.b		finex_op2_done		# no
3668*4882a593Smuzhiyun	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3669*4882a593Smuzhiyunfinex_op2_done:
3670*4882a593Smuzhiyun	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3671*4882a593Smuzhiyun
3672*4882a593Smuzhiyunfinex_extract:
3673*4882a593Smuzhiyun	clr.l		%d0
3674*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3675*4882a593Smuzhiyun
3676*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d1
3677*4882a593Smuzhiyun	andi.w		&0x007f,%d1		# extract extension
3678*4882a593Smuzhiyun
3679*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
3680*4882a593Smuzhiyun	lea		FP_DST(%a6),%a1
3681*4882a593Smuzhiyun
3682*4882a593Smuzhiyun	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683*4882a593Smuzhiyun	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3684*4882a593Smuzhiyun
3685*4882a593Smuzhiyun# the operation has been emulated. the result is in fp0.
3686*4882a593Smuzhiyunfinex_save:
3687*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3688*4882a593Smuzhiyun	bsr.l		store_fpreg
3689*4882a593Smuzhiyun
3690*4882a593Smuzhiyunfinex_exit:
3691*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3692*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3694*4882a593Smuzhiyun
3695*4882a593Smuzhiyun	frestore	FP_SRC(%a6)
3696*4882a593Smuzhiyun
3697*4882a593Smuzhiyun	unlk		%a6
3698*4882a593Smuzhiyun	bra.l		_real_inex
3699*4882a593Smuzhiyun
3700*4882a593Smuzhiyunfinex_fmovcr:
3701*4882a593Smuzhiyun	clr.l		%d0
3702*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3703*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d1
3704*4882a593Smuzhiyun	andi.l		&0x0000007f,%d1		# pass rom offset
3705*4882a593Smuzhiyun	bsr.l		smovcr
3706*4882a593Smuzhiyun	bra.b		finex_save
3707*4882a593Smuzhiyun
3708*4882a593Smuzhiyun########################################################################
3709*4882a593Smuzhiyun
3710*4882a593Smuzhiyun#
3711*4882a593Smuzhiyun# the hardware does not save the default result to memory on enabled
3712*4882a593Smuzhiyun# inexact exceptions. we do this here before passing control to
3713*4882a593Smuzhiyun# the user inexact handler.
3714*4882a593Smuzhiyun#
3715*4882a593Smuzhiyun# byte, word, and long destination format operations can pass
3716*4882a593Smuzhiyun# through here. so can double and single precision.
3717*4882a593Smuzhiyun# although packed opclass three operations can take inexact
3718*4882a593Smuzhiyun# exceptions, they won't pass through here since they are caught
3719*4882a593Smuzhiyun# first by the unsupported data format exception handler. that handler
3720*4882a593Smuzhiyun# sends them directly to _real_inex() if necessary.
3721*4882a593Smuzhiyun#
3722*4882a593Smuzhiyunfinex_out:
3723*4882a593Smuzhiyun
3724*4882a593Smuzhiyun	mov.b		&NORM,STAG(%a6)		# src is a NORM
3725*4882a593Smuzhiyun
3726*4882a593Smuzhiyun	clr.l		%d0
3727*4882a593Smuzhiyun	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3728*4882a593Smuzhiyun
3729*4882a593Smuzhiyun	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3730*4882a593Smuzhiyun
3731*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3732*4882a593Smuzhiyun
3733*4882a593Smuzhiyun	bsr.l		fout			# store the default result
3734*4882a593Smuzhiyun
3735*4882a593Smuzhiyun	bra.b		finex_exit
3736*4882a593Smuzhiyun
3737*4882a593Smuzhiyun#########################################################################
3738*4882a593Smuzhiyun# XDEF ****************************************************************	#
3739*4882a593Smuzhiyun#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3740*4882a593Smuzhiyun#									#
3741*4882a593Smuzhiyun#	This handler should be the first code executed upon taking	#
3742*4882a593Smuzhiyun#	the FP DZ exception in an operating system.			#
3743*4882a593Smuzhiyun#									#
3744*4882a593Smuzhiyun# XREF ****************************************************************	#
3745*4882a593Smuzhiyun#	_imem_read_long() - read instruction longword from memory	#
3746*4882a593Smuzhiyun#	fix_skewed_ops() - adjust fsave operand				#
3747*4882a593Smuzhiyun#	_real_dz() - "callout" exit point from FP DZ handler		#
3748*4882a593Smuzhiyun#									#
3749*4882a593Smuzhiyun# INPUT ***************************************************************	#
3750*4882a593Smuzhiyun#	- The system stack contains the FP DZ exception stack.		#
3751*4882a593Smuzhiyun#	- The fsave frame contains the source operand.			#
3752*4882a593Smuzhiyun#									#
3753*4882a593Smuzhiyun# OUTPUT **************************************************************	#
3754*4882a593Smuzhiyun#	- The system stack contains the FP DZ exception stack.		#
3755*4882a593Smuzhiyun#	- The fsave frame contains the adjusted source operand.		#
3756*4882a593Smuzhiyun#									#
3757*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
3758*4882a593Smuzhiyun#	In a system where the DZ exception is enabled, the goal is to	#
3759*4882a593Smuzhiyun# get to the handler specified at _real_dz(). But, on the 060, when the	#
3760*4882a593Smuzhiyun# exception is taken, the input operand in the fsave state frame may	#
3761*4882a593Smuzhiyun# be incorrect for some cases and need to be adjusted. So, this package	#
3762*4882a593Smuzhiyun# adjusts the operand using fix_skewed_ops() and then branches to	#
3763*4882a593Smuzhiyun# _real_dz().								#
3764*4882a593Smuzhiyun#									#
3765*4882a593Smuzhiyun#########################################################################
3766*4882a593Smuzhiyun
3767*4882a593Smuzhiyun	global		_fpsp_dz
3768*4882a593Smuzhiyun_fpsp_dz:
3769*4882a593Smuzhiyun
3770*4882a593Smuzhiyun	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3771*4882a593Smuzhiyun
3772*4882a593Smuzhiyun	fsave		FP_SRC(%a6)		# grab the "busy" frame
3773*4882a593Smuzhiyun
3774*4882a593Smuzhiyun	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3775*4882a593Smuzhiyun	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776*4882a593Smuzhiyun	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3777*4882a593Smuzhiyun
3778*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction
3779*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780*4882a593Smuzhiyun
3781*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3782*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3783*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch the instruction words
3784*4882a593Smuzhiyun	mov.l		%d0,EXC_OPWORD(%a6)
3785*4882a593Smuzhiyun
3786*4882a593Smuzhiyun##############################################################################
3787*4882a593Smuzhiyun
3788*4882a593Smuzhiyun
3789*4882a593Smuzhiyun# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790*4882a593Smuzhiyun# this would be the case for opclass two operations with a source zero
3791*4882a593Smuzhiyun# in the sgl or dbl format.
3792*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3793*4882a593Smuzhiyun	bsr.l		fix_skewed_ops		# fix src op
3794*4882a593Smuzhiyun
3795*4882a593Smuzhiyunfdz_exit:
3796*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3797*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3799*4882a593Smuzhiyun
3800*4882a593Smuzhiyun	frestore	FP_SRC(%a6)
3801*4882a593Smuzhiyun
3802*4882a593Smuzhiyun	unlk		%a6
3803*4882a593Smuzhiyun	bra.l		_real_dz
3804*4882a593Smuzhiyun
3805*4882a593Smuzhiyun#########################################################################
3806*4882a593Smuzhiyun# XDEF ****************************************************************	#
3807*4882a593Smuzhiyun#	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
3808*4882a593Smuzhiyun#		       exception when the "reduced" version of the	#
3809*4882a593Smuzhiyun#		       FPSP is implemented that does not emulate	#
3810*4882a593Smuzhiyun#		       FP unimplemented instructions.			#
3811*4882a593Smuzhiyun#									#
3812*4882a593Smuzhiyun#	This handler should be the first code executed upon taking a	#
3813*4882a593Smuzhiyun#	"Line F Emulator" exception in an operating system integrating	#
3814*4882a593Smuzhiyun#	the reduced version of 060FPSP.					#
3815*4882a593Smuzhiyun#									#
3816*4882a593Smuzhiyun# XREF ****************************************************************	#
3817*4882a593Smuzhiyun#	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
3818*4882a593Smuzhiyun#	_real_fline() - Handle all other cases (treated equally)	#
3819*4882a593Smuzhiyun#									#
3820*4882a593Smuzhiyun# INPUT ***************************************************************	#
3821*4882a593Smuzhiyun#	- The system stack contains a "Line F Emulator" exception	#
3822*4882a593Smuzhiyun#	  stack frame.							#
3823*4882a593Smuzhiyun#									#
3824*4882a593Smuzhiyun# OUTPUT **************************************************************	#
3825*4882a593Smuzhiyun#	- The system stack is unchanged.				#
3826*4882a593Smuzhiyun#									#
3827*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
3828*4882a593Smuzhiyun#	When a "Line F Emulator" exception occurs in a system where	#
3829*4882a593Smuzhiyun# "FPU Unimplemented" instructions will not be emulated, the exception	#
3830*4882a593Smuzhiyun# can occur because then FPU is disabled or the instruction is to be	#
3831*4882a593Smuzhiyun# classifed as "Line F". This module determines which case exists and	#
3832*4882a593Smuzhiyun# calls the appropriate "callout".					#
3833*4882a593Smuzhiyun#									#
3834*4882a593Smuzhiyun#########################################################################
3835*4882a593Smuzhiyun
3836*4882a593Smuzhiyun	global		_fpsp_fline
3837*4882a593Smuzhiyun_fpsp_fline:
3838*4882a593Smuzhiyun
3839*4882a593Smuzhiyun# check to see if the FPU is disabled. if so, jump to the OS entry
3840*4882a593Smuzhiyun# point for that condition.
3841*4882a593Smuzhiyun	cmpi.w		0x6(%sp),&0x402c
3842*4882a593Smuzhiyun	beq.l		_real_fpu_disabled
3843*4882a593Smuzhiyun
3844*4882a593Smuzhiyun	bra.l		_real_fline
3845*4882a593Smuzhiyun
3846*4882a593Smuzhiyun#########################################################################
3847*4882a593Smuzhiyun# XDEF ****************************************************************	#
3848*4882a593Smuzhiyun#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
3849*4882a593Smuzhiyun#									#
3850*4882a593Smuzhiyun# XREF ****************************************************************	#
3851*4882a593Smuzhiyun#	inc_areg() - increment an address register			#
3852*4882a593Smuzhiyun#	dec_areg() - decrement an address register			#
3853*4882a593Smuzhiyun#									#
3854*4882a593Smuzhiyun# INPUT ***************************************************************	#
3855*4882a593Smuzhiyun#	d0 = number of bytes to adjust <ea> by				#
3856*4882a593Smuzhiyun#									#
3857*4882a593Smuzhiyun# OUTPUT **************************************************************	#
3858*4882a593Smuzhiyun#	None								#
3859*4882a593Smuzhiyun#									#
3860*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
3861*4882a593Smuzhiyun# "Dummy" CALCulate Effective Address:					#
3862*4882a593Smuzhiyun#	The stacked <ea> for FP unimplemented instructions and opclass	#
3863*4882a593Smuzhiyun#	two packed instructions is correct with the exception of...	#
3864*4882a593Smuzhiyun#									#
3865*4882a593Smuzhiyun#	1) -(An)   : The register is not updated regardless of size.	#
3866*4882a593Smuzhiyun#		     Also, for extended precision and packed, the	#
3867*4882a593Smuzhiyun#		     stacked <ea> value is 8 bytes too big		#
3868*4882a593Smuzhiyun#	2) (An)+   : The register is not updated.			#
3869*4882a593Smuzhiyun#	3) #<data> : The upper longword of the immediate operand is	#
3870*4882a593Smuzhiyun#		     stacked b,w,l and s sizes are completely stacked.	#
3871*4882a593Smuzhiyun#		     d,x, and p are not.				#
3872*4882a593Smuzhiyun#									#
3873*4882a593Smuzhiyun#########################################################################
3874*4882a593Smuzhiyun
3875*4882a593Smuzhiyun	global		_dcalc_ea
3876*4882a593Smuzhiyun_dcalc_ea:
3877*4882a593Smuzhiyun	mov.l		%d0, %a0		# move # bytes to %a0
3878*4882a593Smuzhiyun
3879*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
3880*4882a593Smuzhiyun	mov.l		%d0, %d1		# make a copy
3881*4882a593Smuzhiyun
3882*4882a593Smuzhiyun	andi.w		&0x38, %d0		# extract mode field
3883*4882a593Smuzhiyun	andi.l		&0x7, %d1		# extract reg  field
3884*4882a593Smuzhiyun
3885*4882a593Smuzhiyun	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3886*4882a593Smuzhiyun	beq.b		dcea_pi			# yes
3887*4882a593Smuzhiyun
3888*4882a593Smuzhiyun	cmpi.b		%d0,&0x20		# is mode -(An) ?
3889*4882a593Smuzhiyun	beq.b		dcea_pd			# yes
3890*4882a593Smuzhiyun
3891*4882a593Smuzhiyun	or.w		%d1,%d0			# concat mode,reg
3892*4882a593Smuzhiyun	cmpi.b		%d0,&0x3c		# is mode #<data>?
3893*4882a593Smuzhiyun
3894*4882a593Smuzhiyun	beq.b		dcea_imm		# yes
3895*4882a593Smuzhiyun
3896*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# return <ea>
3897*4882a593Smuzhiyun	rts
3898*4882a593Smuzhiyun
3899*4882a593Smuzhiyun# need to set immediate data flag here since we'll need to do
3900*4882a593Smuzhiyun# an imem_read to fetch this later.
3901*4882a593Smuzhiyundcea_imm:
3902*4882a593Smuzhiyun	mov.b		&immed_flg,SPCOND_FLG(%a6)
3903*4882a593Smuzhiyun	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904*4882a593Smuzhiyun	rts
3905*4882a593Smuzhiyun
3906*4882a593Smuzhiyun# here, the <ea> is stacked correctly. however, we must update the
3907*4882a593Smuzhiyun# address register...
3908*4882a593Smuzhiyundcea_pi:
3909*4882a593Smuzhiyun	mov.l		%a0,%d0			# pass amt to inc by
3910*4882a593Smuzhiyun	bsr.l		inc_areg		# inc addr register
3911*4882a593Smuzhiyun
3912*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3913*4882a593Smuzhiyun	rts
3914*4882a593Smuzhiyun
3915*4882a593Smuzhiyun# the <ea> is stacked correctly for all but extended and packed which
3916*4882a593Smuzhiyun# the <ea>s are 8 bytes too large.
3917*4882a593Smuzhiyun# it would make no sense to have a pre-decrement to a7 in supervisor
3918*4882a593Smuzhiyun# mode so we don't even worry about this tricky case here : )
3919*4882a593Smuzhiyundcea_pd:
3920*4882a593Smuzhiyun	mov.l		%a0,%d0			# pass amt to dec by
3921*4882a593Smuzhiyun	bsr.l		dec_areg		# dec addr register
3922*4882a593Smuzhiyun
3923*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3924*4882a593Smuzhiyun
3925*4882a593Smuzhiyun	cmpi.b		%d0,&0xc		# is opsize ext or packed?
3926*4882a593Smuzhiyun	beq.b		dcea_pd2		# yes
3927*4882a593Smuzhiyun	rts
3928*4882a593Smuzhiyundcea_pd2:
3929*4882a593Smuzhiyun	sub.l		&0x8,%a0		# correct <ea>
3930*4882a593Smuzhiyun	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
3931*4882a593Smuzhiyun	rts
3932*4882a593Smuzhiyun
3933*4882a593Smuzhiyun#########################################################################
3934*4882a593Smuzhiyun# XDEF ****************************************************************	#
3935*4882a593Smuzhiyun#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
3936*4882a593Smuzhiyun#			 and packed data opclass 3 operations.		#
3937*4882a593Smuzhiyun#									#
3938*4882a593Smuzhiyun# XREF ****************************************************************	#
3939*4882a593Smuzhiyun#	None								#
3940*4882a593Smuzhiyun#									#
3941*4882a593Smuzhiyun# INPUT ***************************************************************	#
3942*4882a593Smuzhiyun#	None								#
3943*4882a593Smuzhiyun#									#
3944*4882a593Smuzhiyun# OUTPUT **************************************************************	#
3945*4882a593Smuzhiyun#	a0 = return correct effective address				#
3946*4882a593Smuzhiyun#									#
3947*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
3948*4882a593Smuzhiyun#	For opclass 3 extended and packed data operations, the <ea>	#
3949*4882a593Smuzhiyun# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
3950*4882a593Smuzhiyun# modes. Also, while we're at it, the index register itself must get	#
3951*4882a593Smuzhiyun# updated.								#
3952*4882a593Smuzhiyun#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
3953*4882a593Smuzhiyun# and return that value as the correct <ea> and store that value in An.	#
3954*4882a593Smuzhiyun# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
3955*4882a593Smuzhiyun#									#
3956*4882a593Smuzhiyun#########################################################################
3957*4882a593Smuzhiyun
3958*4882a593Smuzhiyun# This calc_ea is currently used to retrieve the correct <ea>
3959*4882a593Smuzhiyun# for fmove outs of type extended and packed.
3960*4882a593Smuzhiyun	global		_calc_ea_fout
3961*4882a593Smuzhiyun_calc_ea_fout:
3962*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
3963*4882a593Smuzhiyun	mov.l		%d0,%d1			# make a copy
3964*4882a593Smuzhiyun
3965*4882a593Smuzhiyun	andi.w		&0x38,%d0		# extract mode field
3966*4882a593Smuzhiyun	andi.l		&0x7,%d1		# extract reg  field
3967*4882a593Smuzhiyun
3968*4882a593Smuzhiyun	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3969*4882a593Smuzhiyun	beq.b		ceaf_pi			# yes
3970*4882a593Smuzhiyun
3971*4882a593Smuzhiyun	cmpi.b		%d0,&0x20		# is mode -(An) ?
3972*4882a593Smuzhiyun	beq.w		ceaf_pd			# yes
3973*4882a593Smuzhiyun
3974*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3975*4882a593Smuzhiyun	rts
3976*4882a593Smuzhiyun
3977*4882a593Smuzhiyun# (An)+ : extended and packed fmove out
3978*4882a593Smuzhiyun#	: stacked <ea> is correct
3979*4882a593Smuzhiyun#	: "An" not updated
3980*4882a593Smuzhiyunceaf_pi:
3981*4882a593Smuzhiyun	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0
3983*4882a593Smuzhiyun	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
3984*4882a593Smuzhiyun
3985*4882a593Smuzhiyun	swbeg		&0x8
3986*4882a593Smuzhiyuntbl_ceaf_pi:
3987*4882a593Smuzhiyun	short		ceaf_pi0 - tbl_ceaf_pi
3988*4882a593Smuzhiyun	short		ceaf_pi1 - tbl_ceaf_pi
3989*4882a593Smuzhiyun	short		ceaf_pi2 - tbl_ceaf_pi
3990*4882a593Smuzhiyun	short		ceaf_pi3 - tbl_ceaf_pi
3991*4882a593Smuzhiyun	short		ceaf_pi4 - tbl_ceaf_pi
3992*4882a593Smuzhiyun	short		ceaf_pi5 - tbl_ceaf_pi
3993*4882a593Smuzhiyun	short		ceaf_pi6 - tbl_ceaf_pi
3994*4882a593Smuzhiyun	short		ceaf_pi7 - tbl_ceaf_pi
3995*4882a593Smuzhiyun
3996*4882a593Smuzhiyunceaf_pi0:
3997*4882a593Smuzhiyun	addi.l		&0xc,EXC_DREGS+0x8(%a6)
3998*4882a593Smuzhiyun	rts
3999*4882a593Smuzhiyunceaf_pi1:
4000*4882a593Smuzhiyun	addi.l		&0xc,EXC_DREGS+0xc(%a6)
4001*4882a593Smuzhiyun	rts
4002*4882a593Smuzhiyunceaf_pi2:
4003*4882a593Smuzhiyun	add.l		&0xc,%a2
4004*4882a593Smuzhiyun	rts
4005*4882a593Smuzhiyunceaf_pi3:
4006*4882a593Smuzhiyun	add.l		&0xc,%a3
4007*4882a593Smuzhiyun	rts
4008*4882a593Smuzhiyunceaf_pi4:
4009*4882a593Smuzhiyun	add.l		&0xc,%a4
4010*4882a593Smuzhiyun	rts
4011*4882a593Smuzhiyunceaf_pi5:
4012*4882a593Smuzhiyun	add.l		&0xc,%a5
4013*4882a593Smuzhiyun	rts
4014*4882a593Smuzhiyunceaf_pi6:
4015*4882a593Smuzhiyun	addi.l		&0xc,EXC_A6(%a6)
4016*4882a593Smuzhiyun	rts
4017*4882a593Smuzhiyunceaf_pi7:
4018*4882a593Smuzhiyun	mov.b		&mia7_flg,SPCOND_FLG(%a6)
4019*4882a593Smuzhiyun	addi.l		&0xc,EXC_A7(%a6)
4020*4882a593Smuzhiyun	rts
4021*4882a593Smuzhiyun
4022*4882a593Smuzhiyun# -(An) : extended and packed fmove out
4023*4882a593Smuzhiyun#	: stacked <ea> = actual <ea> + 8
4024*4882a593Smuzhiyun#	: "An" not updated
4025*4882a593Smuzhiyunceaf_pd:
4026*4882a593Smuzhiyun	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0
4028*4882a593Smuzhiyun	sub.l		&0x8,%a0
4029*4882a593Smuzhiyun	sub.l		&0x8,EXC_EA(%a6)
4030*4882a593Smuzhiyun	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
4031*4882a593Smuzhiyun
4032*4882a593Smuzhiyun	swbeg		&0x8
4033*4882a593Smuzhiyuntbl_ceaf_pd:
4034*4882a593Smuzhiyun	short		ceaf_pd0 - tbl_ceaf_pd
4035*4882a593Smuzhiyun	short		ceaf_pd1 - tbl_ceaf_pd
4036*4882a593Smuzhiyun	short		ceaf_pd2 - tbl_ceaf_pd
4037*4882a593Smuzhiyun	short		ceaf_pd3 - tbl_ceaf_pd
4038*4882a593Smuzhiyun	short		ceaf_pd4 - tbl_ceaf_pd
4039*4882a593Smuzhiyun	short		ceaf_pd5 - tbl_ceaf_pd
4040*4882a593Smuzhiyun	short		ceaf_pd6 - tbl_ceaf_pd
4041*4882a593Smuzhiyun	short		ceaf_pd7 - tbl_ceaf_pd
4042*4882a593Smuzhiyun
4043*4882a593Smuzhiyunceaf_pd0:
4044*4882a593Smuzhiyun	mov.l		%a0,EXC_DREGS+0x8(%a6)
4045*4882a593Smuzhiyun	rts
4046*4882a593Smuzhiyunceaf_pd1:
4047*4882a593Smuzhiyun	mov.l		%a0,EXC_DREGS+0xc(%a6)
4048*4882a593Smuzhiyun	rts
4049*4882a593Smuzhiyunceaf_pd2:
4050*4882a593Smuzhiyun	mov.l		%a0,%a2
4051*4882a593Smuzhiyun	rts
4052*4882a593Smuzhiyunceaf_pd3:
4053*4882a593Smuzhiyun	mov.l		%a0,%a3
4054*4882a593Smuzhiyun	rts
4055*4882a593Smuzhiyunceaf_pd4:
4056*4882a593Smuzhiyun	mov.l		%a0,%a4
4057*4882a593Smuzhiyun	rts
4058*4882a593Smuzhiyunceaf_pd5:
4059*4882a593Smuzhiyun	mov.l		%a0,%a5
4060*4882a593Smuzhiyun	rts
4061*4882a593Smuzhiyunceaf_pd6:
4062*4882a593Smuzhiyun	mov.l		%a0,EXC_A6(%a6)
4063*4882a593Smuzhiyun	rts
4064*4882a593Smuzhiyunceaf_pd7:
4065*4882a593Smuzhiyun	mov.l		%a0,EXC_A7(%a6)
4066*4882a593Smuzhiyun	mov.b		&mda7_flg,SPCOND_FLG(%a6)
4067*4882a593Smuzhiyun	rts
4068*4882a593Smuzhiyun
4069*4882a593Smuzhiyun#
4070*4882a593Smuzhiyun# This table holds the offsets of the emulation routines for each individual
4071*4882a593Smuzhiyun# math operation relative to the address of this table. Included are
4072*4882a593Smuzhiyun# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073*4882a593Smuzhiyun# this table is for the version if the 060FPSP without transcendentals.
4074*4882a593Smuzhiyun# The location within the table is determined by the extension bits of the
4075*4882a593Smuzhiyun# operation longword.
4076*4882a593Smuzhiyun#
4077*4882a593Smuzhiyun
4078*4882a593Smuzhiyun	swbeg		&109
4079*4882a593Smuzhiyuntbl_unsupp:
4080*4882a593Smuzhiyun	long		fin		- tbl_unsupp	# 00: fmove
4081*4882a593Smuzhiyun	long		fint		- tbl_unsupp	# 01: fint
4082*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 02: fsinh
4083*4882a593Smuzhiyun	long		fintrz		- tbl_unsupp	# 03: fintrz
4084*4882a593Smuzhiyun	long		fsqrt		- tbl_unsupp	# 04: fsqrt
4085*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4086*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
4087*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4088*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
4089*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
4090*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
4091*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4092*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
4093*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
4094*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
4095*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
4096*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
4097*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
4098*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
4099*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4100*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
4101*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
4102*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
4103*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4104*4882a593Smuzhiyun	long		fabs		- tbl_unsupp	# 18: fabs
4105*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
4106*4882a593Smuzhiyun	long		fneg		- tbl_unsupp	# 1a: fneg
4107*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4108*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
4109*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
4110*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
4111*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
4112*4882a593Smuzhiyun	long		fdiv		- tbl_unsupp	# 20: fdiv
4113*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
4114*4882a593Smuzhiyun	long		fadd		- tbl_unsupp	# 22: fadd
4115*4882a593Smuzhiyun	long		fmul		- tbl_unsupp	# 23: fmul
4116*4882a593Smuzhiyun	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
4117*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 25: frem
4118*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
4119*4882a593Smuzhiyun	long		fsglmul		- tbl_unsupp	# 27: fsglmul
4120*4882a593Smuzhiyun	long		fsub		- tbl_unsupp	# 28: fsub
4121*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4122*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4123*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4124*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4125*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4126*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4127*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4128*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
4129*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
4130*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
4131*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
4132*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
4133*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
4134*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
4135*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
4136*4882a593Smuzhiyun	long		fcmp		- tbl_unsupp	# 38: fcmp
4137*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4138*4882a593Smuzhiyun	long		ftst		- tbl_unsupp	# 3a: ftst
4139*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4140*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4141*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4142*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4143*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4144*4882a593Smuzhiyun	long		fsin		- tbl_unsupp	# 40: fsmove
4145*4882a593Smuzhiyun	long		fssqrt		- tbl_unsupp	# 41: fssqrt
4146*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4147*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4148*4882a593Smuzhiyun	long		fdin		- tbl_unsupp	# 44: fdmove
4149*4882a593Smuzhiyun	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
4150*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4151*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4152*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4153*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4154*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4155*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4156*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4157*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4158*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4159*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4160*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4161*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4162*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4163*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4164*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4165*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4166*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4167*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4168*4882a593Smuzhiyun	long		fsabs		- tbl_unsupp	# 58: fsabs
4169*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4170*4882a593Smuzhiyun	long		fsneg		- tbl_unsupp	# 5a: fsneg
4171*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4172*4882a593Smuzhiyun	long		fdabs		- tbl_unsupp	# 5c: fdabs
4173*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4174*4882a593Smuzhiyun	long		fdneg		- tbl_unsupp	# 5e: fdneg
4175*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4176*4882a593Smuzhiyun	long		fsdiv		- tbl_unsupp	# 60: fsdiv
4177*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4178*4882a593Smuzhiyun	long		fsadd		- tbl_unsupp	# 62: fsadd
4179*4882a593Smuzhiyun	long		fsmul		- tbl_unsupp	# 63: fsmul
4180*4882a593Smuzhiyun	long		fddiv		- tbl_unsupp	# 64: fddiv
4181*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4182*4882a593Smuzhiyun	long		fdadd		- tbl_unsupp	# 66: fdadd
4183*4882a593Smuzhiyun	long		fdmul		- tbl_unsupp	# 67: fdmul
4184*4882a593Smuzhiyun	long		fssub		- tbl_unsupp	# 68: fssub
4185*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4186*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4187*4882a593Smuzhiyun	long		tbl_unsupp	- tbl_unsupp
4188*4882a593Smuzhiyun	long		fdsub		- tbl_unsupp	# 6c: fdsub
4189*4882a593Smuzhiyun
4190*4882a593Smuzhiyun#################################################
4191*4882a593Smuzhiyun# Add this here so non-fp modules can compile.
4192*4882a593Smuzhiyun# (smovcr is called from fpsp_inex.)
4193*4882a593Smuzhiyun	global		smovcr
4194*4882a593Smuzhiyunsmovcr:
4195*4882a593Smuzhiyun	bra.b		smovcr
4196*4882a593Smuzhiyun
4197*4882a593Smuzhiyun#########################################################################
4198*4882a593Smuzhiyun# XDEF ****************************************************************	#
4199*4882a593Smuzhiyun#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
4200*4882a593Smuzhiyun#									#
4201*4882a593Smuzhiyun# XREF ****************************************************************	#
4202*4882a593Smuzhiyun#	fetch_dreg() - fetch data register				#
4203*4882a593Smuzhiyun#	{i,d,}mem_read() - fetch data from memory			#
4204*4882a593Smuzhiyun#	_mem_write() - write data to memory				#
4205*4882a593Smuzhiyun#	iea_iacc() - instruction memory access error occurred		#
4206*4882a593Smuzhiyun#	iea_dacc() - data memory access error occurred			#
4207*4882a593Smuzhiyun#	restore() - restore An index regs if access error occurred	#
4208*4882a593Smuzhiyun#									#
4209*4882a593Smuzhiyun# INPUT ***************************************************************	#
4210*4882a593Smuzhiyun#	None								#
4211*4882a593Smuzhiyun#									#
4212*4882a593Smuzhiyun# OUTPUT **************************************************************	#
4213*4882a593Smuzhiyun#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
4214*4882a593Smuzhiyun#		d0 = size of dump					#
4215*4882a593Smuzhiyun#		d1 = Dn							#
4216*4882a593Smuzhiyun#	Else if instruction access error,				#
4217*4882a593Smuzhiyun#		d0 = FSLW						#
4218*4882a593Smuzhiyun#	Else if data access error,					#
4219*4882a593Smuzhiyun#		d0 = FSLW						#
4220*4882a593Smuzhiyun#		a0 = address of fault					#
4221*4882a593Smuzhiyun#	Else								#
4222*4882a593Smuzhiyun#		none.							#
4223*4882a593Smuzhiyun#									#
4224*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
4225*4882a593Smuzhiyun#	The effective address must be calculated since this is entered	#
4226*4882a593Smuzhiyun# from an "Unimplemented Effective Address" exception handler. So, we	#
4227*4882a593Smuzhiyun# have our own fcalc_ea() routine here. If an access error is flagged	#
4228*4882a593Smuzhiyun# by a _{i,d,}mem_read() call, we must exit through the special		#
4229*4882a593Smuzhiyun# handler.								#
4230*4882a593Smuzhiyun#	The data register is determined and its value loaded to get the	#
4231*4882a593Smuzhiyun# string of FP registers affected. This value is used as an index into	#
4232*4882a593Smuzhiyun# a lookup table such that we can determine the number of bytes		#
4233*4882a593Smuzhiyun# involved.								#
4234*4882a593Smuzhiyun#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
4235*4882a593Smuzhiyun# to read in all FP values. Again, _mem_read() may fail and require a	#
4236*4882a593Smuzhiyun# special exit.								#
4237*4882a593Smuzhiyun#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
4238*4882a593Smuzhiyun# to write all FP values. _mem_write() may also fail.			#
4239*4882a593Smuzhiyun#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
4240*4882a593Smuzhiyun# then we return the size of the dump and the string to the caller	#
4241*4882a593Smuzhiyun# so that the move can occur outside of this routine. This special	#
4242*4882a593Smuzhiyun# case is required so that moves to the system stack are handled	#
4243*4882a593Smuzhiyun# correctly.								#
4244*4882a593Smuzhiyun#									#
4245*4882a593Smuzhiyun# DYNAMIC:								#
4246*4882a593Smuzhiyun#	fmovm.x	dn, <ea>						#
4247*4882a593Smuzhiyun#	fmovm.x	<ea>, dn						#
4248*4882a593Smuzhiyun#									#
4249*4882a593Smuzhiyun#	      <WORD 1>		      <WORD2>				#
4250*4882a593Smuzhiyun#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
4251*4882a593Smuzhiyun#									#
4252*4882a593Smuzhiyun#	& = (0): predecrement addressing mode				#
4253*4882a593Smuzhiyun#	    (1): postincrement or control addressing mode		#
4254*4882a593Smuzhiyun#	@ = (0): move listed regs from memory to the FPU		#
4255*4882a593Smuzhiyun#	    (1): move listed regs from the FPU to memory		#
4256*4882a593Smuzhiyun#	$$$    : index of data register holding reg select mask		#
4257*4882a593Smuzhiyun#									#
4258*4882a593Smuzhiyun# NOTES:								#
4259*4882a593Smuzhiyun#	If the data register holds a zero, then the			#
4260*4882a593Smuzhiyun#	instruction is a nop.						#
4261*4882a593Smuzhiyun#									#
4262*4882a593Smuzhiyun#########################################################################
4263*4882a593Smuzhiyun
4264*4882a593Smuzhiyun	global		fmovm_dynamic
4265*4882a593Smuzhiyunfmovm_dynamic:
4266*4882a593Smuzhiyun
4267*4882a593Smuzhiyun# extract the data register in which the bit string resides...
4268*4882a593Smuzhiyun	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
4269*4882a593Smuzhiyun	andi.w		&0x70,%d1		# extract reg bits
4270*4882a593Smuzhiyun	lsr.b		&0x4,%d1		# shift into lo bits
4271*4882a593Smuzhiyun
4272*4882a593Smuzhiyun# fetch the bit string into d0...
4273*4882a593Smuzhiyun	bsr.l		fetch_dreg		# fetch reg string
4274*4882a593Smuzhiyun
4275*4882a593Smuzhiyun	andi.l		&0x000000ff,%d0		# keep only lo byte
4276*4882a593Smuzhiyun
4277*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save strg
4278*4882a593Smuzhiyun	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
4279*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save size
4280*4882a593Smuzhiyun	bsr.l		fmovm_calc_ea		# calculate <ea>
4281*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# restore size
4282*4882a593Smuzhiyun	mov.l		(%sp)+,%d1		# restore strg
4283*4882a593Smuzhiyun
4284*4882a593Smuzhiyun# if the bit string is a zero, then the operation is a no-op
4285*4882a593Smuzhiyun# but, make sure that we've calculated ea and advanced the opword pointer
4286*4882a593Smuzhiyun	beq.w		fmovm_data_done
4287*4882a593Smuzhiyun
4288*4882a593Smuzhiyun# separate move ins from move outs...
4289*4882a593Smuzhiyun	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
4290*4882a593Smuzhiyun	beq.w		fmovm_data_in		# it's a move out
4291*4882a593Smuzhiyun
4292*4882a593Smuzhiyun#############
4293*4882a593Smuzhiyun# MOVE OUT: #
4294*4882a593Smuzhiyun#############
4295*4882a593Smuzhiyunfmovm_data_out:
4296*4882a593Smuzhiyun	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
4297*4882a593Smuzhiyun	bne.w		fmovm_out_ctrl		# control
4298*4882a593Smuzhiyun
4299*4882a593Smuzhiyun############################
4300*4882a593Smuzhiyunfmovm_out_predec:
4301*4882a593Smuzhiyun# for predecrement mode, the bit string is the opposite of both control
4302*4882a593Smuzhiyun# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303*4882a593Smuzhiyun# here, we convert it to be just like the others...
4304*4882a593Smuzhiyun	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305*4882a593Smuzhiyun
4306*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
4307*4882a593Smuzhiyun	beq.b		fmovm_out_ctrl		# user
4308*4882a593Smuzhiyun
4309*4882a593Smuzhiyunfmovm_out_predec_s:
4310*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311*4882a593Smuzhiyun	bne.b		fmovm_out_ctrl
4312*4882a593Smuzhiyun
4313*4882a593Smuzhiyun# the operation was unfortunately an: fmovm.x dn,-(sp)
4314*4882a593Smuzhiyun# called from supervisor mode.
4315*4882a593Smuzhiyun# we're also passing "size" and "strg" back to the calling routine
4316*4882a593Smuzhiyun	rts
4317*4882a593Smuzhiyun
4318*4882a593Smuzhiyun############################
4319*4882a593Smuzhiyunfmovm_out_ctrl:
4320*4882a593Smuzhiyun	mov.l		%a0,%a1			# move <ea> to a1
4321*4882a593Smuzhiyun
4322*4882a593Smuzhiyun	sub.l		%d0,%sp			# subtract size of dump
4323*4882a593Smuzhiyun	lea		(%sp),%a0
4324*4882a593Smuzhiyun
4325*4882a593Smuzhiyun	tst.b		%d1			# should FP0 be moved?
4326*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_fp1	# no
4327*4882a593Smuzhiyun
4328*4882a593Smuzhiyun	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
4329*4882a593Smuzhiyun	mov.l		0x4+EXC_FP0(%a6),(%a0)+
4330*4882a593Smuzhiyun	mov.l		0x8+EXC_FP0(%a6),(%a0)+
4331*4882a593Smuzhiyun
4332*4882a593Smuzhiyunfmovm_out_ctrl_fp1:
4333*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP1 be moved?
4334*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_fp2	# no
4335*4882a593Smuzhiyun
4336*4882a593Smuzhiyun	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
4337*4882a593Smuzhiyun	mov.l		0x4+EXC_FP1(%a6),(%a0)+
4338*4882a593Smuzhiyun	mov.l		0x8+EXC_FP1(%a6),(%a0)+
4339*4882a593Smuzhiyun
4340*4882a593Smuzhiyunfmovm_out_ctrl_fp2:
4341*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP2 be moved?
4342*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_fp3	# no
4343*4882a593Smuzhiyun
4344*4882a593Smuzhiyun	fmovm.x		&0x20,(%a0)		# yes
4345*4882a593Smuzhiyun	add.l		&0xc,%a0
4346*4882a593Smuzhiyun
4347*4882a593Smuzhiyunfmovm_out_ctrl_fp3:
4348*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP3 be moved?
4349*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_fp4	# no
4350*4882a593Smuzhiyun
4351*4882a593Smuzhiyun	fmovm.x		&0x10,(%a0)		# yes
4352*4882a593Smuzhiyun	add.l		&0xc,%a0
4353*4882a593Smuzhiyun
4354*4882a593Smuzhiyunfmovm_out_ctrl_fp4:
4355*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP4 be moved?
4356*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_fp5	# no
4357*4882a593Smuzhiyun
4358*4882a593Smuzhiyun	fmovm.x		&0x08,(%a0)		# yes
4359*4882a593Smuzhiyun	add.l		&0xc,%a0
4360*4882a593Smuzhiyun
4361*4882a593Smuzhiyunfmovm_out_ctrl_fp5:
4362*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP5 be moved?
4363*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_fp6	# no
4364*4882a593Smuzhiyun
4365*4882a593Smuzhiyun	fmovm.x		&0x04,(%a0)		# yes
4366*4882a593Smuzhiyun	add.l		&0xc,%a0
4367*4882a593Smuzhiyun
4368*4882a593Smuzhiyunfmovm_out_ctrl_fp6:
4369*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP6 be moved?
4370*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_fp7	# no
4371*4882a593Smuzhiyun
4372*4882a593Smuzhiyun	fmovm.x		&0x02,(%a0)		# yes
4373*4882a593Smuzhiyun	add.l		&0xc,%a0
4374*4882a593Smuzhiyun
4375*4882a593Smuzhiyunfmovm_out_ctrl_fp7:
4376*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP7 be moved?
4377*4882a593Smuzhiyun	bpl.b		fmovm_out_ctrl_done	# no
4378*4882a593Smuzhiyun
4379*4882a593Smuzhiyun	fmovm.x		&0x01,(%a0)		# yes
4380*4882a593Smuzhiyun	add.l		&0xc,%a0
4381*4882a593Smuzhiyun
4382*4882a593Smuzhiyunfmovm_out_ctrl_done:
4383*4882a593Smuzhiyun	mov.l		%a1,L_SCR1(%a6)
4384*4882a593Smuzhiyun
4385*4882a593Smuzhiyun	lea		(%sp),%a0		# pass: supervisor src
4386*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save size
4387*4882a593Smuzhiyun	bsr.l		_dmem_write		# copy data to user mem
4388*4882a593Smuzhiyun
4389*4882a593Smuzhiyun	mov.l		(%sp)+,%d0
4390*4882a593Smuzhiyun	add.l		%d0,%sp			# clear fpreg data from stack
4391*4882a593Smuzhiyun
4392*4882a593Smuzhiyun	tst.l		%d1			# did dstore err?
4393*4882a593Smuzhiyun	bne.w		fmovm_out_err		# yes
4394*4882a593Smuzhiyun
4395*4882a593Smuzhiyun	rts
4396*4882a593Smuzhiyun
4397*4882a593Smuzhiyun############
4398*4882a593Smuzhiyun# MOVE IN: #
4399*4882a593Smuzhiyun############
4400*4882a593Smuzhiyunfmovm_data_in:
4401*4882a593Smuzhiyun	mov.l		%a0,L_SCR1(%a6)
4402*4882a593Smuzhiyun
4403*4882a593Smuzhiyun	sub.l		%d0,%sp			# make room for fpregs
4404*4882a593Smuzhiyun	lea		(%sp),%a1
4405*4882a593Smuzhiyun
4406*4882a593Smuzhiyun	mov.l		%d1,-(%sp)		# save bit string for later
4407*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save # of bytes
4408*4882a593Smuzhiyun
4409*4882a593Smuzhiyun	bsr.l		_dmem_read		# copy data from user mem
4410*4882a593Smuzhiyun
4411*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# retrieve # of bytes
4412*4882a593Smuzhiyun
4413*4882a593Smuzhiyun	tst.l		%d1			# did dfetch fail?
4414*4882a593Smuzhiyun	bne.w		fmovm_in_err		# yes
4415*4882a593Smuzhiyun
4416*4882a593Smuzhiyun	mov.l		(%sp)+,%d1		# load bit string
4417*4882a593Smuzhiyun
4418*4882a593Smuzhiyun	lea		(%sp),%a0		# addr of stack
4419*4882a593Smuzhiyun
4420*4882a593Smuzhiyun	tst.b		%d1			# should FP0 be moved?
4421*4882a593Smuzhiyun	bpl.b		fmovm_data_in_fp1	# no
4422*4882a593Smuzhiyun
4423*4882a593Smuzhiyun	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
4424*4882a593Smuzhiyun	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
4425*4882a593Smuzhiyun	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
4426*4882a593Smuzhiyun
4427*4882a593Smuzhiyunfmovm_data_in_fp1:
4428*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP1 be moved?
4429*4882a593Smuzhiyun	bpl.b		fmovm_data_in_fp2	# no
4430*4882a593Smuzhiyun
4431*4882a593Smuzhiyun	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
4432*4882a593Smuzhiyun	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
4433*4882a593Smuzhiyun	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
4434*4882a593Smuzhiyun
4435*4882a593Smuzhiyunfmovm_data_in_fp2:
4436*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP2 be moved?
4437*4882a593Smuzhiyun	bpl.b		fmovm_data_in_fp3	# no
4438*4882a593Smuzhiyun
4439*4882a593Smuzhiyun	fmovm.x		(%a0)+,&0x20		# yes
4440*4882a593Smuzhiyun
4441*4882a593Smuzhiyunfmovm_data_in_fp3:
4442*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP3 be moved?
4443*4882a593Smuzhiyun	bpl.b		fmovm_data_in_fp4	# no
4444*4882a593Smuzhiyun
4445*4882a593Smuzhiyun	fmovm.x		(%a0)+,&0x10		# yes
4446*4882a593Smuzhiyun
4447*4882a593Smuzhiyunfmovm_data_in_fp4:
4448*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP4 be moved?
4449*4882a593Smuzhiyun	bpl.b		fmovm_data_in_fp5	# no
4450*4882a593Smuzhiyun
4451*4882a593Smuzhiyun	fmovm.x		(%a0)+,&0x08		# yes
4452*4882a593Smuzhiyun
4453*4882a593Smuzhiyunfmovm_data_in_fp5:
4454*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP5 be moved?
4455*4882a593Smuzhiyun	bpl.b		fmovm_data_in_fp6	# no
4456*4882a593Smuzhiyun
4457*4882a593Smuzhiyun	fmovm.x		(%a0)+,&0x04		# yes
4458*4882a593Smuzhiyun
4459*4882a593Smuzhiyunfmovm_data_in_fp6:
4460*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP6 be moved?
4461*4882a593Smuzhiyun	bpl.b		fmovm_data_in_fp7	# no
4462*4882a593Smuzhiyun
4463*4882a593Smuzhiyun	fmovm.x		(%a0)+,&0x02		# yes
4464*4882a593Smuzhiyun
4465*4882a593Smuzhiyunfmovm_data_in_fp7:
4466*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# should FP7 be moved?
4467*4882a593Smuzhiyun	bpl.b		fmovm_data_in_done	# no
4468*4882a593Smuzhiyun
4469*4882a593Smuzhiyun	fmovm.x		(%a0)+,&0x01		# yes
4470*4882a593Smuzhiyun
4471*4882a593Smuzhiyunfmovm_data_in_done:
4472*4882a593Smuzhiyun	add.l		%d0,%sp			# remove fpregs from stack
4473*4882a593Smuzhiyun	rts
4474*4882a593Smuzhiyun
4475*4882a593Smuzhiyun#####################################
4476*4882a593Smuzhiyun
4477*4882a593Smuzhiyunfmovm_data_done:
4478*4882a593Smuzhiyun	rts
4479*4882a593Smuzhiyun
4480*4882a593Smuzhiyun##############################################################################
4481*4882a593Smuzhiyun
4482*4882a593Smuzhiyun#
4483*4882a593Smuzhiyun# table indexed by the operation's bit string that gives the number
4484*4882a593Smuzhiyun# of bytes that will be moved.
4485*4882a593Smuzhiyun#
4486*4882a593Smuzhiyun# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487*4882a593Smuzhiyun#
4488*4882a593Smuzhiyuntbl_fmovm_size:
4489*4882a593Smuzhiyun	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490*4882a593Smuzhiyun	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491*4882a593Smuzhiyun	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493*4882a593Smuzhiyun	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497*4882a593Smuzhiyun	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504*4882a593Smuzhiyun	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505*4882a593Smuzhiyun	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512*4882a593Smuzhiyun	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513*4882a593Smuzhiyun	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516*4882a593Smuzhiyun	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517*4882a593Smuzhiyun	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518*4882a593Smuzhiyun	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519*4882a593Smuzhiyun	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520*4882a593Smuzhiyun	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521*4882a593Smuzhiyun
4522*4882a593Smuzhiyun#
4523*4882a593Smuzhiyun# table to convert a pre-decrement bit string into a post-increment
4524*4882a593Smuzhiyun# or control bit string.
4525*4882a593Smuzhiyun# ex:	0x00	==>	0x00
4526*4882a593Smuzhiyun#	0x01	==>	0x80
4527*4882a593Smuzhiyun#	0x02	==>	0x40
4528*4882a593Smuzhiyun#		.
4529*4882a593Smuzhiyun#		.
4530*4882a593Smuzhiyun#	0xfd	==>	0xbf
4531*4882a593Smuzhiyun#	0xfe	==>	0x7f
4532*4882a593Smuzhiyun#	0xff	==>	0xff
4533*4882a593Smuzhiyun#
4534*4882a593Smuzhiyuntbl_fmovm_convert:
4535*4882a593Smuzhiyun	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536*4882a593Smuzhiyun	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537*4882a593Smuzhiyun	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538*4882a593Smuzhiyun	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539*4882a593Smuzhiyun	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540*4882a593Smuzhiyun	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541*4882a593Smuzhiyun	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542*4882a593Smuzhiyun	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543*4882a593Smuzhiyun	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544*4882a593Smuzhiyun	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545*4882a593Smuzhiyun	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546*4882a593Smuzhiyun	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547*4882a593Smuzhiyun	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548*4882a593Smuzhiyun	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549*4882a593Smuzhiyun	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550*4882a593Smuzhiyun	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551*4882a593Smuzhiyun	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552*4882a593Smuzhiyun	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553*4882a593Smuzhiyun	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554*4882a593Smuzhiyun	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555*4882a593Smuzhiyun	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556*4882a593Smuzhiyun	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557*4882a593Smuzhiyun	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558*4882a593Smuzhiyun	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559*4882a593Smuzhiyun	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560*4882a593Smuzhiyun	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561*4882a593Smuzhiyun	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562*4882a593Smuzhiyun	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563*4882a593Smuzhiyun	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564*4882a593Smuzhiyun	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565*4882a593Smuzhiyun	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566*4882a593Smuzhiyun	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567*4882a593Smuzhiyun
4568*4882a593Smuzhiyun	global		fmovm_calc_ea
4569*4882a593Smuzhiyun###############################################
4570*4882a593Smuzhiyun# _fmovm_calc_ea: calculate effective address #
4571*4882a593Smuzhiyun###############################################
4572*4882a593Smuzhiyunfmovm_calc_ea:
4573*4882a593Smuzhiyun	mov.l		%d0,%a0			# move # bytes to a0
4574*4882a593Smuzhiyun
4575*4882a593Smuzhiyun# currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576*4882a593Smuzhiyun# easily changed if they were inputs passed in registers.
4577*4882a593Smuzhiyun	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
4578*4882a593Smuzhiyun	mov.w		%d0,%d1			# make a copy
4579*4882a593Smuzhiyun
4580*4882a593Smuzhiyun	andi.w		&0x3f,%d0		# extract mode field
4581*4882a593Smuzhiyun	andi.l		&0x7,%d1		# extract reg  field
4582*4882a593Smuzhiyun
4583*4882a593Smuzhiyun# jump to the corresponding function for each {MODE,REG} pair.
4584*4882a593Smuzhiyun	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585*4882a593Smuzhiyun	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586*4882a593Smuzhiyun
4587*4882a593Smuzhiyun	swbeg		&64
4588*4882a593Smuzhiyuntbl_fea_mode:
4589*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4590*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4591*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4592*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4593*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4594*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4595*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4596*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4597*4882a593Smuzhiyun
4598*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4599*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4600*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4601*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4602*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4603*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4604*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4605*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4606*4882a593Smuzhiyun
4607*4882a593Smuzhiyun	short		faddr_ind_a0	-	tbl_fea_mode
4608*4882a593Smuzhiyun	short		faddr_ind_a1	-	tbl_fea_mode
4609*4882a593Smuzhiyun	short		faddr_ind_a2	-	tbl_fea_mode
4610*4882a593Smuzhiyun	short		faddr_ind_a3	-	tbl_fea_mode
4611*4882a593Smuzhiyun	short		faddr_ind_a4	-	tbl_fea_mode
4612*4882a593Smuzhiyun	short		faddr_ind_a5	-	tbl_fea_mode
4613*4882a593Smuzhiyun	short		faddr_ind_a6	-	tbl_fea_mode
4614*4882a593Smuzhiyun	short		faddr_ind_a7	-	tbl_fea_mode
4615*4882a593Smuzhiyun
4616*4882a593Smuzhiyun	short		faddr_ind_p_a0	-	tbl_fea_mode
4617*4882a593Smuzhiyun	short		faddr_ind_p_a1	-	tbl_fea_mode
4618*4882a593Smuzhiyun	short		faddr_ind_p_a2	-	tbl_fea_mode
4619*4882a593Smuzhiyun	short		faddr_ind_p_a3	-	tbl_fea_mode
4620*4882a593Smuzhiyun	short		faddr_ind_p_a4	-	tbl_fea_mode
4621*4882a593Smuzhiyun	short		faddr_ind_p_a5	-	tbl_fea_mode
4622*4882a593Smuzhiyun	short		faddr_ind_p_a6	-	tbl_fea_mode
4623*4882a593Smuzhiyun	short		faddr_ind_p_a7	-	tbl_fea_mode
4624*4882a593Smuzhiyun
4625*4882a593Smuzhiyun	short		faddr_ind_m_a0	-	tbl_fea_mode
4626*4882a593Smuzhiyun	short		faddr_ind_m_a1	-	tbl_fea_mode
4627*4882a593Smuzhiyun	short		faddr_ind_m_a2	-	tbl_fea_mode
4628*4882a593Smuzhiyun	short		faddr_ind_m_a3	-	tbl_fea_mode
4629*4882a593Smuzhiyun	short		faddr_ind_m_a4	-	tbl_fea_mode
4630*4882a593Smuzhiyun	short		faddr_ind_m_a5	-	tbl_fea_mode
4631*4882a593Smuzhiyun	short		faddr_ind_m_a6	-	tbl_fea_mode
4632*4882a593Smuzhiyun	short		faddr_ind_m_a7	-	tbl_fea_mode
4633*4882a593Smuzhiyun
4634*4882a593Smuzhiyun	short		faddr_ind_disp_a0	-	tbl_fea_mode
4635*4882a593Smuzhiyun	short		faddr_ind_disp_a1	-	tbl_fea_mode
4636*4882a593Smuzhiyun	short		faddr_ind_disp_a2	-	tbl_fea_mode
4637*4882a593Smuzhiyun	short		faddr_ind_disp_a3	-	tbl_fea_mode
4638*4882a593Smuzhiyun	short		faddr_ind_disp_a4	-	tbl_fea_mode
4639*4882a593Smuzhiyun	short		faddr_ind_disp_a5	-	tbl_fea_mode
4640*4882a593Smuzhiyun	short		faddr_ind_disp_a6	-	tbl_fea_mode
4641*4882a593Smuzhiyun	short		faddr_ind_disp_a7	-	tbl_fea_mode
4642*4882a593Smuzhiyun
4643*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4644*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4645*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4646*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4647*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4648*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4649*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4650*4882a593Smuzhiyun	short		faddr_ind_ext	-	tbl_fea_mode
4651*4882a593Smuzhiyun
4652*4882a593Smuzhiyun	short		fabs_short	-	tbl_fea_mode
4653*4882a593Smuzhiyun	short		fabs_long	-	tbl_fea_mode
4654*4882a593Smuzhiyun	short		fpc_ind		-	tbl_fea_mode
4655*4882a593Smuzhiyun	short		fpc_ind_ext	-	tbl_fea_mode
4656*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4657*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4658*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4659*4882a593Smuzhiyun	short		tbl_fea_mode	-	tbl_fea_mode
4660*4882a593Smuzhiyun
4661*4882a593Smuzhiyun###################################
4662*4882a593Smuzhiyun# Address register indirect: (An) #
4663*4882a593Smuzhiyun###################################
4664*4882a593Smuzhiyunfaddr_ind_a0:
4665*4882a593Smuzhiyun	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
4666*4882a593Smuzhiyun	rts
4667*4882a593Smuzhiyun
4668*4882a593Smuzhiyunfaddr_ind_a1:
4669*4882a593Smuzhiyun	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
4670*4882a593Smuzhiyun	rts
4671*4882a593Smuzhiyun
4672*4882a593Smuzhiyunfaddr_ind_a2:
4673*4882a593Smuzhiyun	mov.l		%a2,%a0			# Get current a2
4674*4882a593Smuzhiyun	rts
4675*4882a593Smuzhiyun
4676*4882a593Smuzhiyunfaddr_ind_a3:
4677*4882a593Smuzhiyun	mov.l		%a3,%a0			# Get current a3
4678*4882a593Smuzhiyun	rts
4679*4882a593Smuzhiyun
4680*4882a593Smuzhiyunfaddr_ind_a4:
4681*4882a593Smuzhiyun	mov.l		%a4,%a0			# Get current a4
4682*4882a593Smuzhiyun	rts
4683*4882a593Smuzhiyun
4684*4882a593Smuzhiyunfaddr_ind_a5:
4685*4882a593Smuzhiyun	mov.l		%a5,%a0			# Get current a5
4686*4882a593Smuzhiyun	rts
4687*4882a593Smuzhiyun
4688*4882a593Smuzhiyunfaddr_ind_a6:
4689*4882a593Smuzhiyun	mov.l		(%a6),%a0		# Get current a6
4690*4882a593Smuzhiyun	rts
4691*4882a593Smuzhiyun
4692*4882a593Smuzhiyunfaddr_ind_a7:
4693*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%a0		# Get current a7
4694*4882a593Smuzhiyun	rts
4695*4882a593Smuzhiyun
4696*4882a593Smuzhiyun#####################################################
4697*4882a593Smuzhiyun# Address register indirect w/ postincrement: (An)+ #
4698*4882a593Smuzhiyun#####################################################
4699*4882a593Smuzhiyunfaddr_ind_p_a0:
4700*4882a593Smuzhiyun	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4701*4882a593Smuzhiyun	mov.l		%d0,%d1
4702*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4703*4882a593Smuzhiyun	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
4704*4882a593Smuzhiyun	mov.l		%d0,%a0
4705*4882a593Smuzhiyun	rts
4706*4882a593Smuzhiyun
4707*4882a593Smuzhiyunfaddr_ind_p_a1:
4708*4882a593Smuzhiyun	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4709*4882a593Smuzhiyun	mov.l		%d0,%d1
4710*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4711*4882a593Smuzhiyun	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
4712*4882a593Smuzhiyun	mov.l		%d0,%a0
4713*4882a593Smuzhiyun	rts
4714*4882a593Smuzhiyun
4715*4882a593Smuzhiyunfaddr_ind_p_a2:
4716*4882a593Smuzhiyun	mov.l		%a2,%d0			# Get current a2
4717*4882a593Smuzhiyun	mov.l		%d0,%d1
4718*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4719*4882a593Smuzhiyun	mov.l		%d1,%a2			# Save incr value
4720*4882a593Smuzhiyun	mov.l		%d0,%a0
4721*4882a593Smuzhiyun	rts
4722*4882a593Smuzhiyun
4723*4882a593Smuzhiyunfaddr_ind_p_a3:
4724*4882a593Smuzhiyun	mov.l		%a3,%d0			# Get current a3
4725*4882a593Smuzhiyun	mov.l		%d0,%d1
4726*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4727*4882a593Smuzhiyun	mov.l		%d1,%a3			# Save incr value
4728*4882a593Smuzhiyun	mov.l		%d0,%a0
4729*4882a593Smuzhiyun	rts
4730*4882a593Smuzhiyun
4731*4882a593Smuzhiyunfaddr_ind_p_a4:
4732*4882a593Smuzhiyun	mov.l		%a4,%d0			# Get current a4
4733*4882a593Smuzhiyun	mov.l		%d0,%d1
4734*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4735*4882a593Smuzhiyun	mov.l		%d1,%a4			# Save incr value
4736*4882a593Smuzhiyun	mov.l		%d0,%a0
4737*4882a593Smuzhiyun	rts
4738*4882a593Smuzhiyun
4739*4882a593Smuzhiyunfaddr_ind_p_a5:
4740*4882a593Smuzhiyun	mov.l		%a5,%d0			# Get current a5
4741*4882a593Smuzhiyun	mov.l		%d0,%d1
4742*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4743*4882a593Smuzhiyun	mov.l		%d1,%a5			# Save incr value
4744*4882a593Smuzhiyun	mov.l		%d0,%a0
4745*4882a593Smuzhiyun	rts
4746*4882a593Smuzhiyun
4747*4882a593Smuzhiyunfaddr_ind_p_a6:
4748*4882a593Smuzhiyun	mov.l		(%a6),%d0		# Get current a6
4749*4882a593Smuzhiyun	mov.l		%d0,%d1
4750*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4751*4882a593Smuzhiyun	mov.l		%d1,(%a6)		# Save incr value
4752*4882a593Smuzhiyun	mov.l		%d0,%a0
4753*4882a593Smuzhiyun	rts
4754*4882a593Smuzhiyun
4755*4882a593Smuzhiyunfaddr_ind_p_a7:
4756*4882a593Smuzhiyun	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757*4882a593Smuzhiyun
4758*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%d0		# Get current a7
4759*4882a593Smuzhiyun	mov.l		%d0,%d1
4760*4882a593Smuzhiyun	add.l		%a0,%d1			# Increment
4761*4882a593Smuzhiyun	mov.l		%d1,EXC_A7(%a6)		# Save incr value
4762*4882a593Smuzhiyun	mov.l		%d0,%a0
4763*4882a593Smuzhiyun	rts
4764*4882a593Smuzhiyun
4765*4882a593Smuzhiyun####################################################
4766*4882a593Smuzhiyun# Address register indirect w/ predecrement: -(An) #
4767*4882a593Smuzhiyun####################################################
4768*4882a593Smuzhiyunfaddr_ind_m_a0:
4769*4882a593Smuzhiyun	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4770*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4771*4882a593Smuzhiyun	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
4772*4882a593Smuzhiyun	mov.l		%d0,%a0
4773*4882a593Smuzhiyun	rts
4774*4882a593Smuzhiyun
4775*4882a593Smuzhiyunfaddr_ind_m_a1:
4776*4882a593Smuzhiyun	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4777*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4778*4882a593Smuzhiyun	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
4779*4882a593Smuzhiyun	mov.l		%d0,%a0
4780*4882a593Smuzhiyun	rts
4781*4882a593Smuzhiyun
4782*4882a593Smuzhiyunfaddr_ind_m_a2:
4783*4882a593Smuzhiyun	mov.l		%a2,%d0			# Get current a2
4784*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4785*4882a593Smuzhiyun	mov.l		%d0,%a2			# Save decr value
4786*4882a593Smuzhiyun	mov.l		%d0,%a0
4787*4882a593Smuzhiyun	rts
4788*4882a593Smuzhiyun
4789*4882a593Smuzhiyunfaddr_ind_m_a3:
4790*4882a593Smuzhiyun	mov.l		%a3,%d0			# Get current a3
4791*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4792*4882a593Smuzhiyun	mov.l		%d0,%a3			# Save decr value
4793*4882a593Smuzhiyun	mov.l		%d0,%a0
4794*4882a593Smuzhiyun	rts
4795*4882a593Smuzhiyun
4796*4882a593Smuzhiyunfaddr_ind_m_a4:
4797*4882a593Smuzhiyun	mov.l		%a4,%d0			# Get current a4
4798*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4799*4882a593Smuzhiyun	mov.l		%d0,%a4			# Save decr value
4800*4882a593Smuzhiyun	mov.l		%d0,%a0
4801*4882a593Smuzhiyun	rts
4802*4882a593Smuzhiyun
4803*4882a593Smuzhiyunfaddr_ind_m_a5:
4804*4882a593Smuzhiyun	mov.l		%a5,%d0			# Get current a5
4805*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4806*4882a593Smuzhiyun	mov.l		%d0,%a5			# Save decr value
4807*4882a593Smuzhiyun	mov.l		%d0,%a0
4808*4882a593Smuzhiyun	rts
4809*4882a593Smuzhiyun
4810*4882a593Smuzhiyunfaddr_ind_m_a6:
4811*4882a593Smuzhiyun	mov.l		(%a6),%d0		# Get current a6
4812*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4813*4882a593Smuzhiyun	mov.l		%d0,(%a6)		# Save decr value
4814*4882a593Smuzhiyun	mov.l		%d0,%a0
4815*4882a593Smuzhiyun	rts
4816*4882a593Smuzhiyun
4817*4882a593Smuzhiyunfaddr_ind_m_a7:
4818*4882a593Smuzhiyun	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819*4882a593Smuzhiyun
4820*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%d0		# Get current a7
4821*4882a593Smuzhiyun	sub.l		%a0,%d0			# Decrement
4822*4882a593Smuzhiyun	mov.l		%d0,EXC_A7(%a6)		# Save decr value
4823*4882a593Smuzhiyun	mov.l		%d0,%a0
4824*4882a593Smuzhiyun	rts
4825*4882a593Smuzhiyun
4826*4882a593Smuzhiyun########################################################
4827*4882a593Smuzhiyun# Address register indirect w/ displacement: (d16, An) #
4828*4882a593Smuzhiyun########################################################
4829*4882a593Smuzhiyunfaddr_ind_disp_a0:
4830*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4831*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4832*4882a593Smuzhiyun	bsr.l		_imem_read_word
4833*4882a593Smuzhiyun
4834*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4835*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4836*4882a593Smuzhiyun
4837*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4838*4882a593Smuzhiyun
4839*4882a593Smuzhiyun	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
4840*4882a593Smuzhiyun	rts
4841*4882a593Smuzhiyun
4842*4882a593Smuzhiyunfaddr_ind_disp_a1:
4843*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4844*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4845*4882a593Smuzhiyun	bsr.l		_imem_read_word
4846*4882a593Smuzhiyun
4847*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4848*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4849*4882a593Smuzhiyun
4850*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4851*4882a593Smuzhiyun
4852*4882a593Smuzhiyun	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
4853*4882a593Smuzhiyun	rts
4854*4882a593Smuzhiyun
4855*4882a593Smuzhiyunfaddr_ind_disp_a2:
4856*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4857*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4858*4882a593Smuzhiyun	bsr.l		_imem_read_word
4859*4882a593Smuzhiyun
4860*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4861*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4862*4882a593Smuzhiyun
4863*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4864*4882a593Smuzhiyun
4865*4882a593Smuzhiyun	add.l		%a2,%a0			# a2 + d16
4866*4882a593Smuzhiyun	rts
4867*4882a593Smuzhiyun
4868*4882a593Smuzhiyunfaddr_ind_disp_a3:
4869*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4870*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4871*4882a593Smuzhiyun	bsr.l		_imem_read_word
4872*4882a593Smuzhiyun
4873*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4874*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4875*4882a593Smuzhiyun
4876*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4877*4882a593Smuzhiyun
4878*4882a593Smuzhiyun	add.l		%a3,%a0			# a3 + d16
4879*4882a593Smuzhiyun	rts
4880*4882a593Smuzhiyun
4881*4882a593Smuzhiyunfaddr_ind_disp_a4:
4882*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4883*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4884*4882a593Smuzhiyun	bsr.l		_imem_read_word
4885*4882a593Smuzhiyun
4886*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4887*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4888*4882a593Smuzhiyun
4889*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4890*4882a593Smuzhiyun
4891*4882a593Smuzhiyun	add.l		%a4,%a0			# a4 + d16
4892*4882a593Smuzhiyun	rts
4893*4882a593Smuzhiyun
4894*4882a593Smuzhiyunfaddr_ind_disp_a5:
4895*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4896*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4897*4882a593Smuzhiyun	bsr.l		_imem_read_word
4898*4882a593Smuzhiyun
4899*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4900*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4901*4882a593Smuzhiyun
4902*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4903*4882a593Smuzhiyun
4904*4882a593Smuzhiyun	add.l		%a5,%a0			# a5 + d16
4905*4882a593Smuzhiyun	rts
4906*4882a593Smuzhiyun
4907*4882a593Smuzhiyunfaddr_ind_disp_a6:
4908*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4909*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4910*4882a593Smuzhiyun	bsr.l		_imem_read_word
4911*4882a593Smuzhiyun
4912*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4913*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4914*4882a593Smuzhiyun
4915*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4916*4882a593Smuzhiyun
4917*4882a593Smuzhiyun	add.l		(%a6),%a0		# a6 + d16
4918*4882a593Smuzhiyun	rts
4919*4882a593Smuzhiyun
4920*4882a593Smuzhiyunfaddr_ind_disp_a7:
4921*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4922*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4923*4882a593Smuzhiyun	bsr.l		_imem_read_word
4924*4882a593Smuzhiyun
4925*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4926*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4927*4882a593Smuzhiyun
4928*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
4929*4882a593Smuzhiyun
4930*4882a593Smuzhiyun	add.l		EXC_A7(%a6),%a0		# a7 + d16
4931*4882a593Smuzhiyun	rts
4932*4882a593Smuzhiyun
4933*4882a593Smuzhiyun########################################################################
4934*4882a593Smuzhiyun# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935*4882a593Smuzhiyun#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4936*4882a593Smuzhiyun# Memory indirect postindexed: ([bd, An], Xn, od)		       #
4937*4882a593Smuzhiyun# Memory indirect preindexed: ([bd, An, Xn], od)		       #
4938*4882a593Smuzhiyun########################################################################
4939*4882a593Smuzhiyunfaddr_ind_ext:
4940*4882a593Smuzhiyun	addq.l		&0x8,%d1
4941*4882a593Smuzhiyun	bsr.l		fetch_dreg		# fetch base areg
4942*4882a593Smuzhiyun	mov.l		%d0,-(%sp)
4943*4882a593Smuzhiyun
4944*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4945*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4946*4882a593Smuzhiyun	bsr.l		_imem_read_word		# fetch extword in d0
4947*4882a593Smuzhiyun
4948*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4949*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4950*4882a593Smuzhiyun
4951*4882a593Smuzhiyun	mov.l		(%sp)+,%a0
4952*4882a593Smuzhiyun
4953*4882a593Smuzhiyun	btst		&0x8,%d0
4954*4882a593Smuzhiyun	bne.w		fcalc_mem_ind
4955*4882a593Smuzhiyun
4956*4882a593Smuzhiyun	mov.l		%d0,L_SCR1(%a6)		# hold opword
4957*4882a593Smuzhiyun
4958*4882a593Smuzhiyun	mov.l		%d0,%d1
4959*4882a593Smuzhiyun	rol.w		&0x4,%d1
4960*4882a593Smuzhiyun	andi.w		&0xf,%d1		# extract index regno
4961*4882a593Smuzhiyun
4962*4882a593Smuzhiyun# count on fetch_dreg() not to alter a0...
4963*4882a593Smuzhiyun	bsr.l		fetch_dreg		# fetch index
4964*4882a593Smuzhiyun
4965*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
4966*4882a593Smuzhiyun	mov.l		L_SCR1(%a6),%d2		# fetch opword
4967*4882a593Smuzhiyun
4968*4882a593Smuzhiyun	btst		&0xb,%d2		# is it word or long?
4969*4882a593Smuzhiyun	bne.b		faii8_long
4970*4882a593Smuzhiyun	ext.l		%d0			# sign extend word index
4971*4882a593Smuzhiyunfaii8_long:
4972*4882a593Smuzhiyun	mov.l		%d2,%d1
4973*4882a593Smuzhiyun	rol.w		&0x7,%d1
4974*4882a593Smuzhiyun	andi.l		&0x3,%d1		# extract scale value
4975*4882a593Smuzhiyun
4976*4882a593Smuzhiyun	lsl.l		%d1,%d0			# shift index by scale
4977*4882a593Smuzhiyun
4978*4882a593Smuzhiyun	extb.l		%d2			# sign extend displacement
4979*4882a593Smuzhiyun	add.l		%d2,%d0			# index + disp
4980*4882a593Smuzhiyun	add.l		%d0,%a0			# An + (index + disp)
4981*4882a593Smuzhiyun
4982*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore old d2
4983*4882a593Smuzhiyun	rts
4984*4882a593Smuzhiyun
4985*4882a593Smuzhiyun###########################
4986*4882a593Smuzhiyun# Absolute short: (XXX).W #
4987*4882a593Smuzhiyun###########################
4988*4882a593Smuzhiyunfabs_short:
4989*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4990*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4991*4882a593Smuzhiyun	bsr.l		_imem_read_word		# fetch short address
4992*4882a593Smuzhiyun
4993*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
4994*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
4995*4882a593Smuzhiyun
4996*4882a593Smuzhiyun	mov.w		%d0,%a0			# return <ea> in a0
4997*4882a593Smuzhiyun	rts
4998*4882a593Smuzhiyun
4999*4882a593Smuzhiyun##########################
5000*4882a593Smuzhiyun# Absolute long: (XXX).L #
5001*4882a593Smuzhiyun##########################
5002*4882a593Smuzhiyunfabs_long:
5003*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5004*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5005*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch long address
5006*4882a593Smuzhiyun
5007*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5008*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5009*4882a593Smuzhiyun
5010*4882a593Smuzhiyun	mov.l		%d0,%a0			# return <ea> in a0
5011*4882a593Smuzhiyun	rts
5012*4882a593Smuzhiyun
5013*4882a593Smuzhiyun#######################################################
5014*4882a593Smuzhiyun# Program counter indirect w/ displacement: (d16, PC) #
5015*4882a593Smuzhiyun#######################################################
5016*4882a593Smuzhiyunfpc_ind:
5017*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5018*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5019*4882a593Smuzhiyun	bsr.l		_imem_read_word		# fetch word displacement
5020*4882a593Smuzhiyun
5021*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5022*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5023*4882a593Smuzhiyun
5024*4882a593Smuzhiyun	mov.w		%d0,%a0			# sign extend displacement
5025*4882a593Smuzhiyun
5026*4882a593Smuzhiyun	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
5027*4882a593Smuzhiyun
5028*4882a593Smuzhiyun# _imem_read_word() increased the extwptr by 2. need to adjust here.
5029*4882a593Smuzhiyun	subq.l		&0x2,%a0		# adjust <ea>
5030*4882a593Smuzhiyun	rts
5031*4882a593Smuzhiyun
5032*4882a593Smuzhiyun##########################################################
5033*4882a593Smuzhiyun# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034*4882a593Smuzhiyun# "     "     w/   "  (base displacement): (bd, PC, An)  #
5035*4882a593Smuzhiyun# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5036*4882a593Smuzhiyun# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5037*4882a593Smuzhiyun##########################################################
5038*4882a593Smuzhiyunfpc_ind_ext:
5039*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5040*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5041*4882a593Smuzhiyun	bsr.l		_imem_read_word		# fetch ext word
5042*4882a593Smuzhiyun
5043*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5044*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5045*4882a593Smuzhiyun
5046*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
5047*4882a593Smuzhiyun	subq.l		&0x2,%a0		# adjust base
5048*4882a593Smuzhiyun
5049*4882a593Smuzhiyun	btst		&0x8,%d0		# is disp only 8 bits?
5050*4882a593Smuzhiyun	bne.w		fcalc_mem_ind		# calc memory indirect
5051*4882a593Smuzhiyun
5052*4882a593Smuzhiyun	mov.l		%d0,L_SCR1(%a6)		# store opword
5053*4882a593Smuzhiyun
5054*4882a593Smuzhiyun	mov.l		%d0,%d1			# make extword copy
5055*4882a593Smuzhiyun	rol.w		&0x4,%d1		# rotate reg num into place
5056*4882a593Smuzhiyun	andi.w		&0xf,%d1		# extract register number
5057*4882a593Smuzhiyun
5058*4882a593Smuzhiyun# count on fetch_dreg() not to alter a0...
5059*4882a593Smuzhiyun	bsr.l		fetch_dreg		# fetch index
5060*4882a593Smuzhiyun
5061*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
5062*4882a593Smuzhiyun	mov.l		L_SCR1(%a6),%d2		# fetch opword
5063*4882a593Smuzhiyun
5064*4882a593Smuzhiyun	btst		&0xb,%d2		# is index word or long?
5065*4882a593Smuzhiyun	bne.b		fpii8_long		# long
5066*4882a593Smuzhiyun	ext.l		%d0			# sign extend word index
5067*4882a593Smuzhiyunfpii8_long:
5068*4882a593Smuzhiyun	mov.l		%d2,%d1
5069*4882a593Smuzhiyun	rol.w		&0x7,%d1		# rotate scale value into place
5070*4882a593Smuzhiyun	andi.l		&0x3,%d1		# extract scale value
5071*4882a593Smuzhiyun
5072*4882a593Smuzhiyun	lsl.l		%d1,%d0			# shift index by scale
5073*4882a593Smuzhiyun
5074*4882a593Smuzhiyun	extb.l		%d2			# sign extend displacement
5075*4882a593Smuzhiyun	add.l		%d2,%d0			# disp + index
5076*4882a593Smuzhiyun	add.l		%d0,%a0			# An + (index + disp)
5077*4882a593Smuzhiyun
5078*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore temp register
5079*4882a593Smuzhiyun	rts
5080*4882a593Smuzhiyun
5081*4882a593Smuzhiyun# d2 = index
5082*4882a593Smuzhiyun# d3 = base
5083*4882a593Smuzhiyun# d4 = od
5084*4882a593Smuzhiyun# d5 = extword
5085*4882a593Smuzhiyunfcalc_mem_ind:
5086*4882a593Smuzhiyun	btst		&0x6,%d0		# is the index suppressed?
5087*4882a593Smuzhiyun	beq.b		fcalc_index
5088*4882a593Smuzhiyun
5089*4882a593Smuzhiyun	movm.l		&0x3c00,-(%sp)		# save d2-d5
5090*4882a593Smuzhiyun
5091*4882a593Smuzhiyun	mov.l		%d0,%d5			# put extword in d5
5092*4882a593Smuzhiyun	mov.l		%a0,%d3			# put base in d3
5093*4882a593Smuzhiyun
5094*4882a593Smuzhiyun	clr.l		%d2			# yes, so index = 0
5095*4882a593Smuzhiyun	bra.b		fbase_supp_ck
5096*4882a593Smuzhiyun
5097*4882a593Smuzhiyun# index:
5098*4882a593Smuzhiyunfcalc_index:
5099*4882a593Smuzhiyun	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
5100*4882a593Smuzhiyun	bfextu		%d0{&16:&4},%d1		# fetch dreg index
5101*4882a593Smuzhiyun	bsr.l		fetch_dreg
5102*4882a593Smuzhiyun
5103*4882a593Smuzhiyun	movm.l		&0x3c00,-(%sp)		# save d2-d5
5104*4882a593Smuzhiyun	mov.l		%d0,%d2			# put index in d2
5105*4882a593Smuzhiyun	mov.l		L_SCR1(%a6),%d5
5106*4882a593Smuzhiyun	mov.l		%a0,%d3
5107*4882a593Smuzhiyun
5108*4882a593Smuzhiyun	btst		&0xb,%d5		# is index word or long?
5109*4882a593Smuzhiyun	bne.b		fno_ext
5110*4882a593Smuzhiyun	ext.l		%d2
5111*4882a593Smuzhiyun
5112*4882a593Smuzhiyunfno_ext:
5113*4882a593Smuzhiyun	bfextu		%d5{&21:&2},%d0
5114*4882a593Smuzhiyun	lsl.l		%d0,%d2
5115*4882a593Smuzhiyun
5116*4882a593Smuzhiyun# base address (passed as parameter in d3):
5117*4882a593Smuzhiyun# we clear the value here if it should actually be suppressed.
5118*4882a593Smuzhiyunfbase_supp_ck:
5119*4882a593Smuzhiyun	btst		&0x7,%d5		# is the bd suppressed?
5120*4882a593Smuzhiyun	beq.b		fno_base_sup
5121*4882a593Smuzhiyun	clr.l		%d3
5122*4882a593Smuzhiyun
5123*4882a593Smuzhiyun# base displacement:
5124*4882a593Smuzhiyunfno_base_sup:
5125*4882a593Smuzhiyun	bfextu		%d5{&26:&2},%d0		# get bd size
5126*4882a593Smuzhiyun#	beq.l		fmovm_error		# if (size == 0) it's reserved
5127*4882a593Smuzhiyun
5128*4882a593Smuzhiyun	cmpi.b		%d0,&0x2
5129*4882a593Smuzhiyun	blt.b		fno_bd
5130*4882a593Smuzhiyun	beq.b		fget_word_bd
5131*4882a593Smuzhiyun
5132*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5133*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5134*4882a593Smuzhiyun	bsr.l		_imem_read_long
5135*4882a593Smuzhiyun
5136*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5137*4882a593Smuzhiyun	bne.l		fcea_iacc		# yes
5138*4882a593Smuzhiyun
5139*4882a593Smuzhiyun	bra.b		fchk_ind
5140*4882a593Smuzhiyun
5141*4882a593Smuzhiyunfget_word_bd:
5142*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5143*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5144*4882a593Smuzhiyun	bsr.l		_imem_read_word
5145*4882a593Smuzhiyun
5146*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5147*4882a593Smuzhiyun	bne.l		fcea_iacc		# yes
5148*4882a593Smuzhiyun
5149*4882a593Smuzhiyun	ext.l		%d0			# sign extend bd
5150*4882a593Smuzhiyun
5151*4882a593Smuzhiyunfchk_ind:
5152*4882a593Smuzhiyun	add.l		%d0,%d3			# base += bd
5153*4882a593Smuzhiyun
5154*4882a593Smuzhiyun# outer displacement:
5155*4882a593Smuzhiyunfno_bd:
5156*4882a593Smuzhiyun	bfextu		%d5{&30:&2},%d0		# is od suppressed?
5157*4882a593Smuzhiyun	beq.w		faii_bd
5158*4882a593Smuzhiyun
5159*4882a593Smuzhiyun	cmpi.b		%d0,&0x2
5160*4882a593Smuzhiyun	blt.b		fnull_od
5161*4882a593Smuzhiyun	beq.b		fword_od
5162*4882a593Smuzhiyun
5163*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5164*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5165*4882a593Smuzhiyun	bsr.l		_imem_read_long
5166*4882a593Smuzhiyun
5167*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5168*4882a593Smuzhiyun	bne.l		fcea_iacc		# yes
5169*4882a593Smuzhiyun
5170*4882a593Smuzhiyun	bra.b		fadd_them
5171*4882a593Smuzhiyun
5172*4882a593Smuzhiyunfword_od:
5173*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5174*4882a593Smuzhiyun	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5175*4882a593Smuzhiyun	bsr.l		_imem_read_word
5176*4882a593Smuzhiyun
5177*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5178*4882a593Smuzhiyun	bne.l		fcea_iacc		# yes
5179*4882a593Smuzhiyun
5180*4882a593Smuzhiyun	ext.l		%d0			# sign extend od
5181*4882a593Smuzhiyun	bra.b		fadd_them
5182*4882a593Smuzhiyun
5183*4882a593Smuzhiyunfnull_od:
5184*4882a593Smuzhiyun	clr.l		%d0
5185*4882a593Smuzhiyun
5186*4882a593Smuzhiyunfadd_them:
5187*4882a593Smuzhiyun	mov.l		%d0,%d4
5188*4882a593Smuzhiyun
5189*4882a593Smuzhiyun	btst		&0x2,%d5		# pre or post indexing?
5190*4882a593Smuzhiyun	beq.b		fpre_indexed
5191*4882a593Smuzhiyun
5192*4882a593Smuzhiyun	mov.l		%d3,%a0
5193*4882a593Smuzhiyun	bsr.l		_dmem_read_long
5194*4882a593Smuzhiyun
5195*4882a593Smuzhiyun	tst.l		%d1			# did dfetch fail?
5196*4882a593Smuzhiyun	bne.w		fcea_err		# yes
5197*4882a593Smuzhiyun
5198*4882a593Smuzhiyun	add.l		%d2,%d0			# <ea> += index
5199*4882a593Smuzhiyun	add.l		%d4,%d0			# <ea> += od
5200*4882a593Smuzhiyun	bra.b		fdone_ea
5201*4882a593Smuzhiyun
5202*4882a593Smuzhiyunfpre_indexed:
5203*4882a593Smuzhiyun	add.l		%d2,%d3			# preindexing
5204*4882a593Smuzhiyun	mov.l		%d3,%a0
5205*4882a593Smuzhiyun	bsr.l		_dmem_read_long
5206*4882a593Smuzhiyun
5207*4882a593Smuzhiyun	tst.l		%d1			# did dfetch fail?
5208*4882a593Smuzhiyun	bne.w		fcea_err		# yes
5209*4882a593Smuzhiyun
5210*4882a593Smuzhiyun	add.l		%d4,%d0			# ea += od
5211*4882a593Smuzhiyun	bra.b		fdone_ea
5212*4882a593Smuzhiyun
5213*4882a593Smuzhiyunfaii_bd:
5214*4882a593Smuzhiyun	add.l		%d2,%d3			# ea = (base + bd) + index
5215*4882a593Smuzhiyun	mov.l		%d3,%d0
5216*4882a593Smuzhiyunfdone_ea:
5217*4882a593Smuzhiyun	mov.l		%d0,%a0
5218*4882a593Smuzhiyun
5219*4882a593Smuzhiyun	movm.l		(%sp)+,&0x003c		# restore d2-d5
5220*4882a593Smuzhiyun	rts
5221*4882a593Smuzhiyun
5222*4882a593Smuzhiyun#########################################################
5223*4882a593Smuzhiyunfcea_err:
5224*4882a593Smuzhiyun	mov.l		%d3,%a0
5225*4882a593Smuzhiyun
5226*4882a593Smuzhiyun	movm.l		(%sp)+,&0x003c		# restore d2-d5
5227*4882a593Smuzhiyun	mov.w		&0x0101,%d0
5228*4882a593Smuzhiyun	bra.l		iea_dacc
5229*4882a593Smuzhiyun
5230*4882a593Smuzhiyunfcea_iacc:
5231*4882a593Smuzhiyun	movm.l		(%sp)+,&0x003c		# restore d2-d5
5232*4882a593Smuzhiyun	bra.l		iea_iacc
5233*4882a593Smuzhiyun
5234*4882a593Smuzhiyunfmovm_out_err:
5235*4882a593Smuzhiyun	bsr.l		restore
5236*4882a593Smuzhiyun	mov.w		&0x00e1,%d0
5237*4882a593Smuzhiyun	bra.b		fmovm_err
5238*4882a593Smuzhiyun
5239*4882a593Smuzhiyunfmovm_in_err:
5240*4882a593Smuzhiyun	bsr.l		restore
5241*4882a593Smuzhiyun	mov.w		&0x0161,%d0
5242*4882a593Smuzhiyun
5243*4882a593Smuzhiyunfmovm_err:
5244*4882a593Smuzhiyun	mov.l		L_SCR1(%a6),%a0
5245*4882a593Smuzhiyun	bra.l		iea_dacc
5246*4882a593Smuzhiyun
5247*4882a593Smuzhiyun#########################################################################
5248*4882a593Smuzhiyun# XDEF ****************************************************************	#
5249*4882a593Smuzhiyun#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
5250*4882a593Smuzhiyun#									#
5251*4882a593Smuzhiyun# XREF ****************************************************************	#
5252*4882a593Smuzhiyun#	_imem_read_long() - read longword from memory			#
5253*4882a593Smuzhiyun#	iea_iacc() - _imem_read_long() failed; error recovery		#
5254*4882a593Smuzhiyun#									#
5255*4882a593Smuzhiyun# INPUT ***************************************************************	#
5256*4882a593Smuzhiyun#	None								#
5257*4882a593Smuzhiyun#									#
5258*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5259*4882a593Smuzhiyun#	If _imem_read_long() doesn't fail:				#
5260*4882a593Smuzhiyun#		USER_FPCR(a6)  = new FPCR value				#
5261*4882a593Smuzhiyun#		USER_FPSR(a6)  = new FPSR value				#
5262*4882a593Smuzhiyun#		USER_FPIAR(a6) = new FPIAR value			#
5263*4882a593Smuzhiyun#									#
5264*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5265*4882a593Smuzhiyun#	Decode the instruction type by looking at the extension word	#
5266*4882a593Smuzhiyun# in order to see how many control registers to fetch from memory.	#
5267*4882a593Smuzhiyun# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
5268*4882a593Smuzhiyun# the special access error exit handler iea_iacc().			#
5269*4882a593Smuzhiyun#									#
5270*4882a593Smuzhiyun# Instruction word decoding:						#
5271*4882a593Smuzhiyun#									#
5272*4882a593Smuzhiyun#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
5273*4882a593Smuzhiyun#									#
5274*4882a593Smuzhiyun#		WORD1			WORD2				#
5275*4882a593Smuzhiyun#	1111 0010 00 111100	100$ $$00 0000 0000			#
5276*4882a593Smuzhiyun#									#
5277*4882a593Smuzhiyun#	$$$ (100): FPCR							#
5278*4882a593Smuzhiyun#	    (010): FPSR							#
5279*4882a593Smuzhiyun#	    (001): FPIAR						#
5280*4882a593Smuzhiyun#	    (000): FPIAR						#
5281*4882a593Smuzhiyun#									#
5282*4882a593Smuzhiyun#########################################################################
5283*4882a593Smuzhiyun
5284*4882a593Smuzhiyun	global		fmovm_ctrl
5285*4882a593Smuzhiyunfmovm_ctrl:
5286*4882a593Smuzhiyun	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
5287*4882a593Smuzhiyun	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
5288*4882a593Smuzhiyun	beq.w		fctrl_in_7		# yes
5289*4882a593Smuzhiyun	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
5290*4882a593Smuzhiyun	beq.w		fctrl_in_6		# yes
5291*4882a593Smuzhiyun	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
5292*4882a593Smuzhiyun	beq.b		fctrl_in_5		# yes
5293*4882a593Smuzhiyun
5294*4882a593Smuzhiyun# fmovem.l #<data>, fpsr/fpiar
5295*4882a593Smuzhiyunfctrl_in_3:
5296*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5297*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5298*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPSR from mem
5299*4882a593Smuzhiyun
5300*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5301*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5302*4882a593Smuzhiyun
5303*4882a593Smuzhiyun	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
5304*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5305*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5306*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPIAR from mem
5307*4882a593Smuzhiyun
5308*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5309*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5310*4882a593Smuzhiyun
5311*4882a593Smuzhiyun	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5312*4882a593Smuzhiyun	rts
5313*4882a593Smuzhiyun
5314*4882a593Smuzhiyun# fmovem.l #<data>, fpcr/fpiar
5315*4882a593Smuzhiyunfctrl_in_5:
5316*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5317*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5318*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPCR from mem
5319*4882a593Smuzhiyun
5320*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5321*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5322*4882a593Smuzhiyun
5323*4882a593Smuzhiyun	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
5324*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5325*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5326*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPIAR from mem
5327*4882a593Smuzhiyun
5328*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5329*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5330*4882a593Smuzhiyun
5331*4882a593Smuzhiyun	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5332*4882a593Smuzhiyun	rts
5333*4882a593Smuzhiyun
5334*4882a593Smuzhiyun# fmovem.l #<data>, fpcr/fpsr
5335*4882a593Smuzhiyunfctrl_in_6:
5336*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5337*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5338*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPCR from mem
5339*4882a593Smuzhiyun
5340*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5341*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5342*4882a593Smuzhiyun
5343*4882a593Smuzhiyun	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5344*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5345*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5346*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPSR from mem
5347*4882a593Smuzhiyun
5348*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5349*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5350*4882a593Smuzhiyun
5351*4882a593Smuzhiyun	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5352*4882a593Smuzhiyun	rts
5353*4882a593Smuzhiyun
5354*4882a593Smuzhiyun# fmovem.l #<data>, fpcr/fpsr/fpiar
5355*4882a593Smuzhiyunfctrl_in_7:
5356*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5357*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5358*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPCR from mem
5359*4882a593Smuzhiyun
5360*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5361*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5362*4882a593Smuzhiyun
5363*4882a593Smuzhiyun	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5364*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5365*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5366*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPSR from mem
5367*4882a593Smuzhiyun
5368*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5369*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5370*4882a593Smuzhiyun
5371*4882a593Smuzhiyun	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5372*4882a593Smuzhiyun	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5373*4882a593Smuzhiyun	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5374*4882a593Smuzhiyun	bsr.l		_imem_read_long		# fetch FPIAR from mem
5375*4882a593Smuzhiyun
5376*4882a593Smuzhiyun	tst.l		%d1			# did ifetch fail?
5377*4882a593Smuzhiyun	bne.l		iea_iacc		# yes
5378*4882a593Smuzhiyun
5379*4882a593Smuzhiyun	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
5380*4882a593Smuzhiyun	rts
5381*4882a593Smuzhiyun
5382*4882a593Smuzhiyun##########################################################################
5383*4882a593Smuzhiyun
5384*4882a593Smuzhiyun#########################################################################
5385*4882a593Smuzhiyun# XDEF ****************************************************************	#
5386*4882a593Smuzhiyun#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
5387*4882a593Smuzhiyun#			  OVFL/UNFL exceptions will result		#
5388*4882a593Smuzhiyun#									#
5389*4882a593Smuzhiyun# XREF ****************************************************************	#
5390*4882a593Smuzhiyun#	norm() - normalize mantissa after adjusting exponent		#
5391*4882a593Smuzhiyun#									#
5392*4882a593Smuzhiyun# INPUT ***************************************************************	#
5393*4882a593Smuzhiyun#	FP_SRC(a6) = fp op1(src)					#
5394*4882a593Smuzhiyun#	FP_DST(a6) = fp op2(dst)					#
5395*4882a593Smuzhiyun#									#
5396*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5397*4882a593Smuzhiyun#	FP_SRC(a6) = fp op1 scaled(src)					#
5398*4882a593Smuzhiyun#	FP_DST(a6) = fp op2 scaled(dst)					#
5399*4882a593Smuzhiyun#	d0         = scale amount					#
5400*4882a593Smuzhiyun#									#
5401*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5402*4882a593Smuzhiyun#	If the DST exponent is > the SRC exponent, set the DST exponent	#
5403*4882a593Smuzhiyun# equal to 0x3fff and scale the SRC exponent by the value that the	#
5404*4882a593Smuzhiyun# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
5405*4882a593Smuzhiyun# do the opposite. Return this scale factor in d0.			#
5406*4882a593Smuzhiyun#	If the two exponents differ by > the number of mantissa bits	#
5407*4882a593Smuzhiyun# plus two, then set the smallest exponent to a very small value as a	#
5408*4882a593Smuzhiyun# quick shortcut.							#
5409*4882a593Smuzhiyun#									#
5410*4882a593Smuzhiyun#########################################################################
5411*4882a593Smuzhiyun
5412*4882a593Smuzhiyun	global		addsub_scaler2
5413*4882a593Smuzhiyunaddsub_scaler2:
5414*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
5415*4882a593Smuzhiyun	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
5416*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
5417*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
5418*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d0
5419*4882a593Smuzhiyun	mov.w		DST_EX(%a1),%d1
5420*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)
5421*4882a593Smuzhiyun	mov.w		%d1,FP_SCR1_EX(%a6)
5422*4882a593Smuzhiyun
5423*4882a593Smuzhiyun	andi.w		&0x7fff,%d0
5424*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
5425*4882a593Smuzhiyun	mov.w		%d0,L_SCR1(%a6)		# store src exponent
5426*4882a593Smuzhiyun	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
5427*4882a593Smuzhiyun
5428*4882a593Smuzhiyun	cmp.w		%d0, %d1		# is src exp >= dst exp?
5429*4882a593Smuzhiyun	bge.l		src_exp_ge2
5430*4882a593Smuzhiyun
5431*4882a593Smuzhiyun# dst exp is >  src exp; scale dst to exp = 0x3fff
5432*4882a593Smuzhiyundst_exp_gt2:
5433*4882a593Smuzhiyun	bsr.l		scale_to_zero_dst
5434*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save scale factor
5435*4882a593Smuzhiyun
5436*4882a593Smuzhiyun	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
5437*4882a593Smuzhiyun	bne.b		cmpexp12
5438*4882a593Smuzhiyun
5439*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
5440*4882a593Smuzhiyun	bsr.l		norm			# normalize the denorm; result is new exp
5441*4882a593Smuzhiyun	neg.w		%d0			# new exp = -(shft val)
5442*4882a593Smuzhiyun	mov.w		%d0,L_SCR1(%a6)		# inset new exp
5443*4882a593Smuzhiyun
5444*4882a593Smuzhiyuncmpexp12:
5445*4882a593Smuzhiyun	mov.w		2+L_SCR1(%a6),%d0
5446*4882a593Smuzhiyun	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5447*4882a593Smuzhiyun
5448*4882a593Smuzhiyun	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
5449*4882a593Smuzhiyun	bge.b		quick_scale12
5450*4882a593Smuzhiyun
5451*4882a593Smuzhiyun	mov.w		L_SCR1(%a6),%d0
5452*4882a593Smuzhiyun	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
5453*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1
5454*4882a593Smuzhiyun	and.w		&0x8000,%d1
5455*4882a593Smuzhiyun	or.w		%d1,%d0			# concat {sgn,new exp}
5456*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
5457*4882a593Smuzhiyun
5458*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# return SCALE factor
5459*4882a593Smuzhiyun	rts
5460*4882a593Smuzhiyun
5461*4882a593Smuzhiyunquick_scale12:
5462*4882a593Smuzhiyun	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
5463*4882a593Smuzhiyun	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
5464*4882a593Smuzhiyun
5465*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# return SCALE factor
5466*4882a593Smuzhiyun	rts
5467*4882a593Smuzhiyun
5468*4882a593Smuzhiyun# src exp is >= dst exp; scale src to exp = 0x3fff
5469*4882a593Smuzhiyunsrc_exp_ge2:
5470*4882a593Smuzhiyun	bsr.l		scale_to_zero_src
5471*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save scale factor
5472*4882a593Smuzhiyun
5473*4882a593Smuzhiyun	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
5474*4882a593Smuzhiyun	bne.b		cmpexp22
5475*4882a593Smuzhiyun	lea		FP_SCR1(%a6),%a0
5476*4882a593Smuzhiyun	bsr.l		norm			# normalize the denorm; result is new exp
5477*4882a593Smuzhiyun	neg.w		%d0			# new exp = -(shft val)
5478*4882a593Smuzhiyun	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
5479*4882a593Smuzhiyun
5480*4882a593Smuzhiyuncmpexp22:
5481*4882a593Smuzhiyun	mov.w		L_SCR1(%a6),%d0
5482*4882a593Smuzhiyun	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5483*4882a593Smuzhiyun
5484*4882a593Smuzhiyun	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
5485*4882a593Smuzhiyun	bge.b		quick_scale22
5486*4882a593Smuzhiyun
5487*4882a593Smuzhiyun	mov.w		2+L_SCR1(%a6),%d0
5488*4882a593Smuzhiyun	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
5489*4882a593Smuzhiyun	mov.w		FP_SCR1_EX(%a6),%d1
5490*4882a593Smuzhiyun	andi.w		&0x8000,%d1
5491*4882a593Smuzhiyun	or.w		%d1,%d0			# concat {sgn,new exp}
5492*4882a593Smuzhiyun	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
5493*4882a593Smuzhiyun
5494*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# return SCALE factor
5495*4882a593Smuzhiyun	rts
5496*4882a593Smuzhiyun
5497*4882a593Smuzhiyunquick_scale22:
5498*4882a593Smuzhiyun	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
5499*4882a593Smuzhiyun	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
5500*4882a593Smuzhiyun
5501*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# return SCALE factor
5502*4882a593Smuzhiyun	rts
5503*4882a593Smuzhiyun
5504*4882a593Smuzhiyun##########################################################################
5505*4882a593Smuzhiyun
5506*4882a593Smuzhiyun#########################################################################
5507*4882a593Smuzhiyun# XDEF ****************************************************************	#
5508*4882a593Smuzhiyun#	scale_to_zero_src(): scale the exponent of extended precision	#
5509*4882a593Smuzhiyun#			     value at FP_SCR0(a6).			#
5510*4882a593Smuzhiyun#									#
5511*4882a593Smuzhiyun# XREF ****************************************************************	#
5512*4882a593Smuzhiyun#	norm() - normalize the mantissa if the operand was a DENORM	#
5513*4882a593Smuzhiyun#									#
5514*4882a593Smuzhiyun# INPUT ***************************************************************	#
5515*4882a593Smuzhiyun#	FP_SCR0(a6) = extended precision operand to be scaled		#
5516*4882a593Smuzhiyun#									#
5517*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5518*4882a593Smuzhiyun#	FP_SCR0(a6) = scaled extended precision operand			#
5519*4882a593Smuzhiyun#	d0	    = scale value					#
5520*4882a593Smuzhiyun#									#
5521*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5522*4882a593Smuzhiyun#	Set the exponent of the input operand to 0x3fff. Save the value	#
5523*4882a593Smuzhiyun# of the difference between the original and new exponent. Then,	#
5524*4882a593Smuzhiyun# normalize the operand if it was a DENORM. Add this normalization	#
5525*4882a593Smuzhiyun# value to the previous value. Return the result.			#
5526*4882a593Smuzhiyun#									#
5527*4882a593Smuzhiyun#########################################################################
5528*4882a593Smuzhiyun
5529*4882a593Smuzhiyun	global		scale_to_zero_src
5530*4882a593Smuzhiyunscale_to_zero_src:
5531*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5532*4882a593Smuzhiyun	mov.w		%d1,%d0			# make a copy
5533*4882a593Smuzhiyun
5534*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# extract operand's exponent
5535*4882a593Smuzhiyun
5536*4882a593Smuzhiyun	andi.w		&0x8000,%d0		# extract operand's sgn
5537*4882a593Smuzhiyun	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5538*4882a593Smuzhiyun
5539*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
5540*4882a593Smuzhiyun
5541*4882a593Smuzhiyun	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5542*4882a593Smuzhiyun	beq.b		stzs_denorm		# normalize the DENORM
5543*4882a593Smuzhiyun
5544*4882a593Smuzhiyunstzs_norm:
5545*4882a593Smuzhiyun	mov.l		&0x3fff,%d0
5546*4882a593Smuzhiyun	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5547*4882a593Smuzhiyun
5548*4882a593Smuzhiyun	rts
5549*4882a593Smuzhiyun
5550*4882a593Smuzhiyunstzs_denorm:
5551*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5552*4882a593Smuzhiyun	bsr.l		norm			# normalize denorm
5553*4882a593Smuzhiyun	neg.l		%d0			# new exponent = -(shft val)
5554*4882a593Smuzhiyun	mov.l		%d0,%d1			# prepare for op_norm call
5555*4882a593Smuzhiyun	bra.b		stzs_norm		# finish scaling
5556*4882a593Smuzhiyun
5557*4882a593Smuzhiyun###
5558*4882a593Smuzhiyun
5559*4882a593Smuzhiyun#########################################################################
5560*4882a593Smuzhiyun# XDEF ****************************************************************	#
5561*4882a593Smuzhiyun#	scale_sqrt(): scale the input operand exponent so a subsequent	#
5562*4882a593Smuzhiyun#		      fsqrt operation won't take an exception.		#
5563*4882a593Smuzhiyun#									#
5564*4882a593Smuzhiyun# XREF ****************************************************************	#
5565*4882a593Smuzhiyun#	norm() - normalize the mantissa if the operand was a DENORM	#
5566*4882a593Smuzhiyun#									#
5567*4882a593Smuzhiyun# INPUT ***************************************************************	#
5568*4882a593Smuzhiyun#	FP_SCR0(a6) = extended precision operand to be scaled		#
5569*4882a593Smuzhiyun#									#
5570*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5571*4882a593Smuzhiyun#	FP_SCR0(a6) = scaled extended precision operand			#
5572*4882a593Smuzhiyun#	d0	    = scale value					#
5573*4882a593Smuzhiyun#									#
5574*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5575*4882a593Smuzhiyun#	If the input operand is a DENORM, normalize it.			#
5576*4882a593Smuzhiyun#	If the exponent of the input operand is even, set the exponent	#
5577*4882a593Smuzhiyun# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
5578*4882a593Smuzhiyun# exponent of the input operand is off, set the exponent to ox3fff and	#
5579*4882a593Smuzhiyun# return a scale factor of "(exp-0x3fff)/2".				#
5580*4882a593Smuzhiyun#									#
5581*4882a593Smuzhiyun#########################################################################
5582*4882a593Smuzhiyun
5583*4882a593Smuzhiyun	global		scale_sqrt
5584*4882a593Smuzhiyunscale_sqrt:
5585*4882a593Smuzhiyun	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5586*4882a593Smuzhiyun	beq.b		ss_denorm		# normalize the DENORM
5587*4882a593Smuzhiyun
5588*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5589*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# extract operand's exponent
5590*4882a593Smuzhiyun
5591*4882a593Smuzhiyun	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
5592*4882a593Smuzhiyun
5593*4882a593Smuzhiyun	btst		&0x0,%d1		# is exp even or odd?
5594*4882a593Smuzhiyun	beq.b		ss_norm_even
5595*4882a593Smuzhiyun
5596*4882a593Smuzhiyun	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5597*4882a593Smuzhiyun
5598*4882a593Smuzhiyun	mov.l		&0x3fff,%d0
5599*4882a593Smuzhiyun	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5600*4882a593Smuzhiyun	asr.l		&0x1,%d0		# divide scale factor by 2
5601*4882a593Smuzhiyun	rts
5602*4882a593Smuzhiyun
5603*4882a593Smuzhiyunss_norm_even:
5604*4882a593Smuzhiyun	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5605*4882a593Smuzhiyun
5606*4882a593Smuzhiyun	mov.l		&0x3ffe,%d0
5607*4882a593Smuzhiyun	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5608*4882a593Smuzhiyun	asr.l		&0x1,%d0		# divide scale factor by 2
5609*4882a593Smuzhiyun	rts
5610*4882a593Smuzhiyun
5611*4882a593Smuzhiyunss_denorm:
5612*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5613*4882a593Smuzhiyun	bsr.l		norm			# normalize denorm
5614*4882a593Smuzhiyun
5615*4882a593Smuzhiyun	btst		&0x0,%d0		# is exp even or odd?
5616*4882a593Smuzhiyun	beq.b		ss_denorm_even
5617*4882a593Smuzhiyun
5618*4882a593Smuzhiyun	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5619*4882a593Smuzhiyun
5620*4882a593Smuzhiyun	add.l		&0x3fff,%d0
5621*4882a593Smuzhiyun	asr.l		&0x1,%d0		# divide scale factor by 2
5622*4882a593Smuzhiyun	rts
5623*4882a593Smuzhiyun
5624*4882a593Smuzhiyunss_denorm_even:
5625*4882a593Smuzhiyun	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5626*4882a593Smuzhiyun
5627*4882a593Smuzhiyun	add.l		&0x3ffe,%d0
5628*4882a593Smuzhiyun	asr.l		&0x1,%d0		# divide scale factor by 2
5629*4882a593Smuzhiyun	rts
5630*4882a593Smuzhiyun
5631*4882a593Smuzhiyun###
5632*4882a593Smuzhiyun
5633*4882a593Smuzhiyun#########################################################################
5634*4882a593Smuzhiyun# XDEF ****************************************************************	#
5635*4882a593Smuzhiyun#	scale_to_zero_dst(): scale the exponent of extended precision	#
5636*4882a593Smuzhiyun#			     value at FP_SCR1(a6).			#
5637*4882a593Smuzhiyun#									#
5638*4882a593Smuzhiyun# XREF ****************************************************************	#
5639*4882a593Smuzhiyun#	norm() - normalize the mantissa if the operand was a DENORM	#
5640*4882a593Smuzhiyun#									#
5641*4882a593Smuzhiyun# INPUT ***************************************************************	#
5642*4882a593Smuzhiyun#	FP_SCR1(a6) = extended precision operand to be scaled		#
5643*4882a593Smuzhiyun#									#
5644*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5645*4882a593Smuzhiyun#	FP_SCR1(a6) = scaled extended precision operand			#
5646*4882a593Smuzhiyun#	d0	    = scale value					#
5647*4882a593Smuzhiyun#									#
5648*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5649*4882a593Smuzhiyun#	Set the exponent of the input operand to 0x3fff. Save the value	#
5650*4882a593Smuzhiyun# of the difference between the original and new exponent. Then,	#
5651*4882a593Smuzhiyun# normalize the operand if it was a DENORM. Add this normalization	#
5652*4882a593Smuzhiyun# value to the previous value. Return the result.			#
5653*4882a593Smuzhiyun#									#
5654*4882a593Smuzhiyun#########################################################################
5655*4882a593Smuzhiyun
5656*4882a593Smuzhiyun	global		scale_to_zero_dst
5657*4882a593Smuzhiyunscale_to_zero_dst:
5658*4882a593Smuzhiyun	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
5659*4882a593Smuzhiyun	mov.w		%d1,%d0			# make a copy
5660*4882a593Smuzhiyun
5661*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# extract operand's exponent
5662*4882a593Smuzhiyun
5663*4882a593Smuzhiyun	andi.w		&0x8000,%d0		# extract operand's sgn
5664*4882a593Smuzhiyun	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5665*4882a593Smuzhiyun
5666*4882a593Smuzhiyun	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
5667*4882a593Smuzhiyun
5668*4882a593Smuzhiyun	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
5669*4882a593Smuzhiyun	beq.b		stzd_denorm		# normalize the DENORM
5670*4882a593Smuzhiyun
5671*4882a593Smuzhiyunstzd_norm:
5672*4882a593Smuzhiyun	mov.l		&0x3fff,%d0
5673*4882a593Smuzhiyun	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5674*4882a593Smuzhiyun	rts
5675*4882a593Smuzhiyun
5676*4882a593Smuzhiyunstzd_denorm:
5677*4882a593Smuzhiyun	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
5678*4882a593Smuzhiyun	bsr.l		norm			# normalize denorm
5679*4882a593Smuzhiyun	neg.l		%d0			# new exponent = -(shft val)
5680*4882a593Smuzhiyun	mov.l		%d0,%d1			# prepare for op_norm call
5681*4882a593Smuzhiyun	bra.b		stzd_norm		# finish scaling
5682*4882a593Smuzhiyun
5683*4882a593Smuzhiyun##########################################################################
5684*4882a593Smuzhiyun
5685*4882a593Smuzhiyun#########################################################################
5686*4882a593Smuzhiyun# XDEF ****************************************************************	#
5687*4882a593Smuzhiyun#	res_qnan(): return default result w/ QNAN operand for dyadic	#
5688*4882a593Smuzhiyun#	res_snan(): return default result w/ SNAN operand for dyadic	#
5689*4882a593Smuzhiyun#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
5690*4882a593Smuzhiyun#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
5691*4882a593Smuzhiyun#									#
5692*4882a593Smuzhiyun# XREF ****************************************************************	#
5693*4882a593Smuzhiyun#	None								#
5694*4882a593Smuzhiyun#									#
5695*4882a593Smuzhiyun# INPUT ***************************************************************	#
5696*4882a593Smuzhiyun#	FP_SRC(a6) = pointer to extended precision src operand		#
5697*4882a593Smuzhiyun#	FP_DST(a6) = pointer to extended precision dst operand		#
5698*4882a593Smuzhiyun#									#
5699*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5700*4882a593Smuzhiyun#	fp0 = default result						#
5701*4882a593Smuzhiyun#									#
5702*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5703*4882a593Smuzhiyun#	If either operand (but not both operands) of an operation is a	#
5704*4882a593Smuzhiyun# nonsignalling NAN, then that NAN is returned as the result. If both	#
5705*4882a593Smuzhiyun# operands are nonsignalling NANs, then the destination operand		#
5706*4882a593Smuzhiyun# nonsignalling NAN is returned as the result.				#
5707*4882a593Smuzhiyun#	If either operand to an operation is a signalling NAN (SNAN),	#
5708*4882a593Smuzhiyun# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
5709*4882a593Smuzhiyun# enable bit is set in the FPCR, then the trap is taken and the		#
5710*4882a593Smuzhiyun# destination is not modified. If the SNAN trap enable bit is not set,	#
5711*4882a593Smuzhiyun# then the SNAN is converted to a nonsignalling NAN (by setting the	#
5712*4882a593Smuzhiyun# SNAN bit in the operand to one), and the operation continues as	#
5713*4882a593Smuzhiyun# described in the preceding paragraph, for nonsignalling NANs.		#
5714*4882a593Smuzhiyun#	Make sure the appropriate FPSR bits are set before exiting.	#
5715*4882a593Smuzhiyun#									#
5716*4882a593Smuzhiyun#########################################################################
5717*4882a593Smuzhiyun
5718*4882a593Smuzhiyun	global		res_qnan
5719*4882a593Smuzhiyun	global		res_snan
5720*4882a593Smuzhiyunres_qnan:
5721*4882a593Smuzhiyunres_snan:
5722*4882a593Smuzhiyun	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
5723*4882a593Smuzhiyun	beq.b		dst_snan2
5724*4882a593Smuzhiyun	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
5725*4882a593Smuzhiyun	beq.b		dst_qnan2
5726*4882a593Smuzhiyunsrc_nan:
5727*4882a593Smuzhiyun	cmp.b		STAG(%a6), &QNAN
5728*4882a593Smuzhiyun	beq.b		src_qnan2
5729*4882a593Smuzhiyun	global		res_snan_1op
5730*4882a593Smuzhiyunres_snan_1op:
5731*4882a593Smuzhiyunsrc_snan2:
5732*4882a593Smuzhiyun	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
5733*4882a593Smuzhiyun	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
5735*4882a593Smuzhiyun	bra.b		nan_comp
5736*4882a593Smuzhiyun	global		res_qnan_1op
5737*4882a593Smuzhiyunres_qnan_1op:
5738*4882a593Smuzhiyunsrc_qnan2:
5739*4882a593Smuzhiyun	or.l		&nan_mask, USER_FPSR(%a6)
5740*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
5741*4882a593Smuzhiyun	bra.b		nan_comp
5742*4882a593Smuzhiyundst_snan2:
5743*4882a593Smuzhiyun	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744*4882a593Smuzhiyun	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
5745*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
5746*4882a593Smuzhiyun	bra.b		nan_comp
5747*4882a593Smuzhiyundst_qnan2:
5748*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
5749*4882a593Smuzhiyun	cmp.b		STAG(%a6), &SNAN
5750*4882a593Smuzhiyun	bne		nan_done
5751*4882a593Smuzhiyun	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
5752*4882a593Smuzhiyunnan_done:
5753*4882a593Smuzhiyun	or.l		&nan_mask, USER_FPSR(%a6)
5754*4882a593Smuzhiyunnan_comp:
5755*4882a593Smuzhiyun	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
5756*4882a593Smuzhiyun	beq.b		nan_not_neg
5757*4882a593Smuzhiyun	or.l		&neg_mask, USER_FPSR(%a6)
5758*4882a593Smuzhiyunnan_not_neg:
5759*4882a593Smuzhiyun	fmovm.x		(%a0), &0x80
5760*4882a593Smuzhiyun	rts
5761*4882a593Smuzhiyun
5762*4882a593Smuzhiyun#########################################################################
5763*4882a593Smuzhiyun# XDEF ****************************************************************	#
5764*4882a593Smuzhiyun#	res_operr(): return default result during operand error		#
5765*4882a593Smuzhiyun#									#
5766*4882a593Smuzhiyun# XREF ****************************************************************	#
5767*4882a593Smuzhiyun#	None								#
5768*4882a593Smuzhiyun#									#
5769*4882a593Smuzhiyun# INPUT ***************************************************************	#
5770*4882a593Smuzhiyun#	None								#
5771*4882a593Smuzhiyun#									#
5772*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5773*4882a593Smuzhiyun#	fp0 = default operand error result				#
5774*4882a593Smuzhiyun#									#
5775*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5776*4882a593Smuzhiyun#	An nonsignalling NAN is returned as the default result when	#
5777*4882a593Smuzhiyun# an operand error occurs for the following cases:			#
5778*4882a593Smuzhiyun#									#
5779*4882a593Smuzhiyun#	Multiply: (Infinity x Zero)					#
5780*4882a593Smuzhiyun#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
5781*4882a593Smuzhiyun#									#
5782*4882a593Smuzhiyun#########################################################################
5783*4882a593Smuzhiyun
5784*4882a593Smuzhiyun	global		res_operr
5785*4882a593Smuzhiyunres_operr:
5786*4882a593Smuzhiyun	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787*4882a593Smuzhiyun	fmovm.x		nan_return(%pc), &0x80
5788*4882a593Smuzhiyun	rts
5789*4882a593Smuzhiyun
5790*4882a593Smuzhiyunnan_return:
5791*4882a593Smuzhiyun	long		0x7fff0000, 0xffffffff, 0xffffffff
5792*4882a593Smuzhiyun
5793*4882a593Smuzhiyun#########################################################################
5794*4882a593Smuzhiyun# XDEF ****************************************************************	#
5795*4882a593Smuzhiyun#	_denorm(): denormalize an intermediate result			#
5796*4882a593Smuzhiyun#									#
5797*4882a593Smuzhiyun# XREF ****************************************************************	#
5798*4882a593Smuzhiyun#	None								#
5799*4882a593Smuzhiyun#									#
5800*4882a593Smuzhiyun# INPUT *************************************************************** #
5801*4882a593Smuzhiyun#	a0 = points to the operand to be denormalized			#
5802*4882a593Smuzhiyun#		(in the internal extended format)			#
5803*4882a593Smuzhiyun#									#
5804*4882a593Smuzhiyun#	d0 = rounding precision						#
5805*4882a593Smuzhiyun#									#
5806*4882a593Smuzhiyun# OUTPUT **************************************************************	#
5807*4882a593Smuzhiyun#	a0 = pointer to the denormalized result				#
5808*4882a593Smuzhiyun#		(in the internal extended format)			#
5809*4882a593Smuzhiyun#									#
5810*4882a593Smuzhiyun#	d0 = guard,round,sticky						#
5811*4882a593Smuzhiyun#									#
5812*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
5813*4882a593Smuzhiyun#	According to the exponent underflow threshold for the given	#
5814*4882a593Smuzhiyun# precision, shift the mantissa bits to the right in order raise the	#
5815*4882a593Smuzhiyun# exponent of the operand to the threshold value. While shifting the	#
5816*4882a593Smuzhiyun# mantissa bits right, maintain the value of the guard, round, and	#
5817*4882a593Smuzhiyun# sticky bits.								#
5818*4882a593Smuzhiyun# other notes:								#
5819*4882a593Smuzhiyun#	(1) _denorm() is called by the underflow routines		#
5820*4882a593Smuzhiyun#	(2) _denorm() does NOT affect the status register		#
5821*4882a593Smuzhiyun#									#
5822*4882a593Smuzhiyun#########################################################################
5823*4882a593Smuzhiyun
5824*4882a593Smuzhiyun#
5825*4882a593Smuzhiyun# table of exponent threshold values for each precision
5826*4882a593Smuzhiyun#
5827*4882a593Smuzhiyuntbl_thresh:
5828*4882a593Smuzhiyun	short		0x0
5829*4882a593Smuzhiyun	short		sgl_thresh
5830*4882a593Smuzhiyun	short		dbl_thresh
5831*4882a593Smuzhiyun
5832*4882a593Smuzhiyun	global		_denorm
5833*4882a593Smuzhiyun_denorm:
5834*4882a593Smuzhiyun#
5835*4882a593Smuzhiyun# Load the exponent threshold for the precision selected and check
5836*4882a593Smuzhiyun# to see if (threshold - exponent) is > 65 in which case we can
5837*4882a593Smuzhiyun# simply calculate the sticky bit and zero the mantissa. otherwise
5838*4882a593Smuzhiyun# we have to call the denormalization routine.
5839*4882a593Smuzhiyun#
5840*4882a593Smuzhiyun	lsr.b		&0x2, %d0		# shift prec to lo bits
5841*4882a593Smuzhiyun	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842*4882a593Smuzhiyun	mov.w		%d1, %d0		# copy d1 into d0
5843*4882a593Smuzhiyun	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
5844*4882a593Smuzhiyun	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
5845*4882a593Smuzhiyun	bpl.b		denorm_set_stky		# yes; just calc sticky
5846*4882a593Smuzhiyun
5847*4882a593Smuzhiyun	clr.l		%d0			# clear g,r,s
5848*4882a593Smuzhiyun	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849*4882a593Smuzhiyun	beq.b		denorm_call		# no; don't change anything
5850*4882a593Smuzhiyun	bset		&29, %d0		# yes; set sticky bit
5851*4882a593Smuzhiyun
5852*4882a593Smuzhiyundenorm_call:
5853*4882a593Smuzhiyun	bsr.l		dnrm_lp			# denormalize the number
5854*4882a593Smuzhiyun	rts
5855*4882a593Smuzhiyun
5856*4882a593Smuzhiyun#
5857*4882a593Smuzhiyun# all bit would have been shifted off during the denorm so simply
5858*4882a593Smuzhiyun# calculate if the sticky should be set and clear the entire mantissa.
5859*4882a593Smuzhiyun#
5860*4882a593Smuzhiyundenorm_set_stky:
5861*4882a593Smuzhiyun	mov.l		&0x20000000, %d0	# set sticky bit in return value
5862*4882a593Smuzhiyun	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
5863*4882a593Smuzhiyun	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
5864*4882a593Smuzhiyun	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
5865*4882a593Smuzhiyun	rts
5866*4882a593Smuzhiyun
5867*4882a593Smuzhiyun#									#
5868*4882a593Smuzhiyun# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
5869*4882a593Smuzhiyun#									#
5870*4882a593Smuzhiyun# INPUT:								#
5871*4882a593Smuzhiyun#	%a0	   : points to the operand to be denormalized		#
5872*4882a593Smuzhiyun#	%d0{31:29} : initial guard,round,sticky				#
5873*4882a593Smuzhiyun#	%d1{15:0}  : denormalization threshold				#
5874*4882a593Smuzhiyun# OUTPUT:								#
5875*4882a593Smuzhiyun#	%a0	   : points to the denormalized operand			#
5876*4882a593Smuzhiyun#	%d0{31:29} : final guard,round,sticky				#
5877*4882a593Smuzhiyun#									#
5878*4882a593Smuzhiyun
5879*4882a593Smuzhiyun# *** Local Equates *** #
5880*4882a593Smuzhiyunset	GRS,		L_SCR2			# g,r,s temp storage
5881*4882a593Smuzhiyunset	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
5882*4882a593Smuzhiyun
5883*4882a593Smuzhiyun	global		dnrm_lp
5884*4882a593Smuzhiyundnrm_lp:
5885*4882a593Smuzhiyun
5886*4882a593Smuzhiyun#
5887*4882a593Smuzhiyun# make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888*4882a593Smuzhiyun# in memory so as to make the bitfield extraction for denormalization easier.
5889*4882a593Smuzhiyun#
5890*4882a593Smuzhiyun	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891*4882a593Smuzhiyun	mov.l		%d0, GRS(%a6)		# place g,r,s after it
5892*4882a593Smuzhiyun
5893*4882a593Smuzhiyun#
5894*4882a593Smuzhiyun# check to see how much less than the underflow threshold the operand
5895*4882a593Smuzhiyun# exponent is.
5896*4882a593Smuzhiyun#
5897*4882a593Smuzhiyun	mov.l		%d1, %d0		# copy the denorm threshold
5898*4882a593Smuzhiyun	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
5899*4882a593Smuzhiyun	ble.b		dnrm_no_lp		# d1 <= 0
5900*4882a593Smuzhiyun	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
5901*4882a593Smuzhiyun	blt.b		case_1			# yes
5902*4882a593Smuzhiyun	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
5903*4882a593Smuzhiyun	blt.b		case_2			# yes
5904*4882a593Smuzhiyun	bra.w		case_3			# (d1 >= 64)
5905*4882a593Smuzhiyun
5906*4882a593Smuzhiyun#
5907*4882a593Smuzhiyun# No normalization necessary
5908*4882a593Smuzhiyun#
5909*4882a593Smuzhiyundnrm_no_lp:
5910*4882a593Smuzhiyun	mov.l		GRS(%a6), %d0		# restore original g,r,s
5911*4882a593Smuzhiyun	rts
5912*4882a593Smuzhiyun
5913*4882a593Smuzhiyun#
5914*4882a593Smuzhiyun# case (0<d1<32)
5915*4882a593Smuzhiyun#
5916*4882a593Smuzhiyun# %d0 = denorm threshold
5917*4882a593Smuzhiyun# %d1 = "n" = amt to shift
5918*4882a593Smuzhiyun#
5919*4882a593Smuzhiyun#	---------------------------------------------------------
5920*4882a593Smuzhiyun#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5921*4882a593Smuzhiyun#	---------------------------------------------------------
5922*4882a593Smuzhiyun#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923*4882a593Smuzhiyun#	\	   \		      \			 \
5924*4882a593Smuzhiyun#	 \	    \		       \		  \
5925*4882a593Smuzhiyun#	  \	     \			\		   \
5926*4882a593Smuzhiyun#	   \	      \			 \		    \
5927*4882a593Smuzhiyun#	    \	       \		  \		     \
5928*4882a593Smuzhiyun#	     \		\		   \		      \
5929*4882a593Smuzhiyun#	      \		 \		    \		       \
5930*4882a593Smuzhiyun#	       \	  \		     \			\
5931*4882a593Smuzhiyun#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932*4882a593Smuzhiyun#	---------------------------------------------------------
5933*4882a593Smuzhiyun#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
5934*4882a593Smuzhiyun#	---------------------------------------------------------
5935*4882a593Smuzhiyun#
5936*4882a593Smuzhiyuncase_1:
5937*4882a593Smuzhiyun	mov.l		%d2, -(%sp)		# create temp storage
5938*4882a593Smuzhiyun
5939*4882a593Smuzhiyun	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5940*4882a593Smuzhiyun	mov.l		&32, %d0
5941*4882a593Smuzhiyun	sub.w		%d1, %d0		# %d0 = 32 - %d1
5942*4882a593Smuzhiyun
5943*4882a593Smuzhiyun	cmpi.w		%d1, &29		# is shft amt >= 29
5944*4882a593Smuzhiyun	blt.b		case1_extract		# no; no fix needed
5945*4882a593Smuzhiyun	mov.b		GRS(%a6), %d2
5946*4882a593Smuzhiyun	or.b		%d2, 3+FTEMP_LO2(%a6)
5947*4882a593Smuzhiyun
5948*4882a593Smuzhiyuncase1_extract:
5949*4882a593Smuzhiyun	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950*4882a593Smuzhiyun	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951*4882a593Smuzhiyun	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952*4882a593Smuzhiyun
5953*4882a593Smuzhiyun	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
5954*4882a593Smuzhiyun	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
5955*4882a593Smuzhiyun
5956*4882a593Smuzhiyun	bftst		%d0{&2:&30}		# were bits shifted off?
5957*4882a593Smuzhiyun	beq.b		case1_sticky_clear	# no; go finish
5958*4882a593Smuzhiyun	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
5959*4882a593Smuzhiyun
5960*4882a593Smuzhiyuncase1_sticky_clear:
5961*4882a593Smuzhiyun	and.l		&0xe0000000, %d0	# clear all but G,R,S
5962*4882a593Smuzhiyun	mov.l		(%sp)+, %d2		# restore temp register
5963*4882a593Smuzhiyun	rts
5964*4882a593Smuzhiyun
5965*4882a593Smuzhiyun#
5966*4882a593Smuzhiyun# case (32<=d1<64)
5967*4882a593Smuzhiyun#
5968*4882a593Smuzhiyun# %d0 = denorm threshold
5969*4882a593Smuzhiyun# %d1 = "n" = amt to shift
5970*4882a593Smuzhiyun#
5971*4882a593Smuzhiyun#	---------------------------------------------------------
5972*4882a593Smuzhiyun#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5973*4882a593Smuzhiyun#	---------------------------------------------------------
5974*4882a593Smuzhiyun#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975*4882a593Smuzhiyun#	\	   \		      \
5976*4882a593Smuzhiyun#	 \	    \		       \
5977*4882a593Smuzhiyun#	  \	     \			-------------------
5978*4882a593Smuzhiyun#	   \	      --------------------		   \
5979*4882a593Smuzhiyun#	    -------------------		  \		    \
5980*4882a593Smuzhiyun#			       \	   \		     \
5981*4882a593Smuzhiyun#				\	    \		      \
5982*4882a593Smuzhiyun#				 \	     \		       \
5983*4882a593Smuzhiyun#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984*4882a593Smuzhiyun#	---------------------------------------------------------
5985*4882a593Smuzhiyun#	|0...............0|0....0| NEW_LO     |grs		|
5986*4882a593Smuzhiyun#	---------------------------------------------------------
5987*4882a593Smuzhiyun#
5988*4882a593Smuzhiyuncase_2:
5989*4882a593Smuzhiyun	mov.l		%d2, -(%sp)		# create temp storage
5990*4882a593Smuzhiyun
5991*4882a593Smuzhiyun	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5992*4882a593Smuzhiyun	subi.w		&0x20, %d1		# %d1 now between 0 and 32
5993*4882a593Smuzhiyun	mov.l		&0x20, %d0
5994*4882a593Smuzhiyun	sub.w		%d1, %d0		# %d0 = 32 - %d1
5995*4882a593Smuzhiyun
5996*4882a593Smuzhiyun# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997*4882a593Smuzhiyun# the number of bits to check for the sticky detect.
5998*4882a593Smuzhiyun# it only plays a role in shift amounts of 61-63.
5999*4882a593Smuzhiyun	mov.b		GRS(%a6), %d2
6000*4882a593Smuzhiyun	or.b		%d2, 3+FTEMP_LO2(%a6)
6001*4882a593Smuzhiyun
6002*4882a593Smuzhiyun	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003*4882a593Smuzhiyun	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004*4882a593Smuzhiyun
6005*4882a593Smuzhiyun	bftst		%d1{&2:&30}		# were any bits shifted off?
6006*4882a593Smuzhiyun	bne.b		case2_set_sticky	# yes; set sticky bit
6007*4882a593Smuzhiyun	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
6008*4882a593Smuzhiyun	bne.b		case2_set_sticky	# yes; set sticky bit
6009*4882a593Smuzhiyun
6010*4882a593Smuzhiyun	mov.l		%d1, %d0		# move new G,R,S to %d0
6011*4882a593Smuzhiyun	bra.b		case2_end
6012*4882a593Smuzhiyun
6013*4882a593Smuzhiyuncase2_set_sticky:
6014*4882a593Smuzhiyun	mov.l		%d1, %d0		# move new G,R,S to %d0
6015*4882a593Smuzhiyun	bset		&rnd_stky_bit, %d0	# set sticky bit
6016*4882a593Smuzhiyun
6017*4882a593Smuzhiyuncase2_end:
6018*4882a593Smuzhiyun	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
6019*4882a593Smuzhiyun	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
6020*4882a593Smuzhiyun	and.l		&0xe0000000, %d0	# clear all but G,R,S
6021*4882a593Smuzhiyun
6022*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore temp register
6023*4882a593Smuzhiyun	rts
6024*4882a593Smuzhiyun
6025*4882a593Smuzhiyun#
6026*4882a593Smuzhiyun# case (d1>=64)
6027*4882a593Smuzhiyun#
6028*4882a593Smuzhiyun# %d0 = denorm threshold
6029*4882a593Smuzhiyun# %d1 = amt to shift
6030*4882a593Smuzhiyun#
6031*4882a593Smuzhiyuncase_3:
6032*4882a593Smuzhiyun	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
6033*4882a593Smuzhiyun
6034*4882a593Smuzhiyun	cmpi.w		%d1, &65		# is shift amt > 65?
6035*4882a593Smuzhiyun	blt.b		case3_64		# no; it's == 64
6036*4882a593Smuzhiyun	beq.b		case3_65		# no; it's == 65
6037*4882a593Smuzhiyun
6038*4882a593Smuzhiyun#
6039*4882a593Smuzhiyun# case (d1>65)
6040*4882a593Smuzhiyun#
6041*4882a593Smuzhiyun# Shift value is > 65 and out of range. All bits are shifted off.
6042*4882a593Smuzhiyun# Return a zero mantissa with the sticky bit set
6043*4882a593Smuzhiyun#
6044*4882a593Smuzhiyun	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6045*4882a593Smuzhiyun	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6046*4882a593Smuzhiyun	mov.l		&0x20000000, %d0	# set sticky bit
6047*4882a593Smuzhiyun	rts
6048*4882a593Smuzhiyun
6049*4882a593Smuzhiyun#
6050*4882a593Smuzhiyun# case (d1 == 64)
6051*4882a593Smuzhiyun#
6052*4882a593Smuzhiyun#	---------------------------------------------------------
6053*4882a593Smuzhiyun#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6054*4882a593Smuzhiyun#	---------------------------------------------------------
6055*4882a593Smuzhiyun#	<-------(32)------>
6056*4882a593Smuzhiyun#	\		   \
6057*4882a593Smuzhiyun#	 \		    \
6058*4882a593Smuzhiyun#	  \		     \
6059*4882a593Smuzhiyun#	   \		      ------------------------------
6060*4882a593Smuzhiyun#	    -------------------------------		    \
6061*4882a593Smuzhiyun#					   \		     \
6062*4882a593Smuzhiyun#					    \		      \
6063*4882a593Smuzhiyun#					     \		       \
6064*4882a593Smuzhiyun#					      <-------(32)------>
6065*4882a593Smuzhiyun#	---------------------------------------------------------
6066*4882a593Smuzhiyun#	|0...............0|0................0|grs		|
6067*4882a593Smuzhiyun#	---------------------------------------------------------
6068*4882a593Smuzhiyun#
6069*4882a593Smuzhiyuncase3_64:
6070*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6071*4882a593Smuzhiyun	mov.l		%d0, %d1		# make a copy
6072*4882a593Smuzhiyun	and.l		&0xc0000000, %d0	# extract G,R
6073*4882a593Smuzhiyun	and.l		&0x3fffffff, %d1	# extract other bits
6074*4882a593Smuzhiyun
6075*4882a593Smuzhiyun	bra.b		case3_complete
6076*4882a593Smuzhiyun
6077*4882a593Smuzhiyun#
6078*4882a593Smuzhiyun# case (d1 == 65)
6079*4882a593Smuzhiyun#
6080*4882a593Smuzhiyun#	---------------------------------------------------------
6081*4882a593Smuzhiyun#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6082*4882a593Smuzhiyun#	---------------------------------------------------------
6083*4882a593Smuzhiyun#	<-------(32)------>
6084*4882a593Smuzhiyun#	\		   \
6085*4882a593Smuzhiyun#	 \		    \
6086*4882a593Smuzhiyun#	  \		     \
6087*4882a593Smuzhiyun#	   \		      ------------------------------
6088*4882a593Smuzhiyun#	    --------------------------------		    \
6089*4882a593Smuzhiyun#					    \		     \
6090*4882a593Smuzhiyun#					     \		      \
6091*4882a593Smuzhiyun#					      \		       \
6092*4882a593Smuzhiyun#					       <-------(31)----->
6093*4882a593Smuzhiyun#	---------------------------------------------------------
6094*4882a593Smuzhiyun#	|0...............0|0................0|0rs		|
6095*4882a593Smuzhiyun#	---------------------------------------------------------
6096*4882a593Smuzhiyun#
6097*4882a593Smuzhiyuncase3_65:
6098*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6099*4882a593Smuzhiyun	and.l		&0x80000000, %d0	# extract R bit
6100*4882a593Smuzhiyun	lsr.l		&0x1, %d0		# shift high bit into R bit
6101*4882a593Smuzhiyun	and.l		&0x7fffffff, %d1	# extract other bits
6102*4882a593Smuzhiyun
6103*4882a593Smuzhiyuncase3_complete:
6104*4882a593Smuzhiyun# last operation done was an "and" of the bits shifted off so the condition
6105*4882a593Smuzhiyun# codes are already set so branch accordingly.
6106*4882a593Smuzhiyun	bne.b		case3_set_sticky	# yes; go set new sticky
6107*4882a593Smuzhiyun	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
6108*4882a593Smuzhiyun	bne.b		case3_set_sticky	# yes; go set new sticky
6109*4882a593Smuzhiyun	tst.b		GRS(%a6)		# were any bits shifted off?
6110*4882a593Smuzhiyun	bne.b		case3_set_sticky	# yes; go set new sticky
6111*4882a593Smuzhiyun
6112*4882a593Smuzhiyun#
6113*4882a593Smuzhiyun# no bits were shifted off so don't set the sticky bit.
6114*4882a593Smuzhiyun# the guard and
6115*4882a593Smuzhiyun# the entire mantissa is zero.
6116*4882a593Smuzhiyun#
6117*4882a593Smuzhiyun	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6118*4882a593Smuzhiyun	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6119*4882a593Smuzhiyun	rts
6120*4882a593Smuzhiyun
6121*4882a593Smuzhiyun#
6122*4882a593Smuzhiyun# some bits were shifted off so set the sticky bit.
6123*4882a593Smuzhiyun# the entire mantissa is zero.
6124*4882a593Smuzhiyun#
6125*4882a593Smuzhiyuncase3_set_sticky:
6126*4882a593Smuzhiyun	bset		&rnd_stky_bit,%d0	# set new sticky bit
6127*4882a593Smuzhiyun	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6128*4882a593Smuzhiyun	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6129*4882a593Smuzhiyun	rts
6130*4882a593Smuzhiyun
6131*4882a593Smuzhiyun#########################################################################
6132*4882a593Smuzhiyun# XDEF ****************************************************************	#
6133*4882a593Smuzhiyun#	_round(): round result according to precision/mode		#
6134*4882a593Smuzhiyun#									#
6135*4882a593Smuzhiyun# XREF ****************************************************************	#
6136*4882a593Smuzhiyun#	None								#
6137*4882a593Smuzhiyun#									#
6138*4882a593Smuzhiyun# INPUT ***************************************************************	#
6139*4882a593Smuzhiyun#	a0	  = ptr to input operand in internal extended format	#
6140*4882a593Smuzhiyun#	d1(hi)    = contains rounding precision:			#
6141*4882a593Smuzhiyun#			ext = $0000xxxx					#
6142*4882a593Smuzhiyun#			sgl = $0004xxxx					#
6143*4882a593Smuzhiyun#			dbl = $0008xxxx					#
6144*4882a593Smuzhiyun#	d1(lo)	  = contains rounding mode:				#
6145*4882a593Smuzhiyun#			RN  = $xxxx0000					#
6146*4882a593Smuzhiyun#			RZ  = $xxxx0001					#
6147*4882a593Smuzhiyun#			RM  = $xxxx0002					#
6148*4882a593Smuzhiyun#			RP  = $xxxx0003					#
6149*4882a593Smuzhiyun#	d0{31:29} = contains the g,r,s bits (extended)			#
6150*4882a593Smuzhiyun#									#
6151*4882a593Smuzhiyun# OUTPUT **************************************************************	#
6152*4882a593Smuzhiyun#	a0 = pointer to rounded result					#
6153*4882a593Smuzhiyun#									#
6154*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
6155*4882a593Smuzhiyun#	On return the value pointed to by a0 is correctly rounded,	#
6156*4882a593Smuzhiyun#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
6157*4882a593Smuzhiyun#	The result is not typed - the tag field is invalid.  The	#
6158*4882a593Smuzhiyun#	result is still in the internal extended format.		#
6159*4882a593Smuzhiyun#									#
6160*4882a593Smuzhiyun#	The INEX bit of USER_FPSR will be set if the rounded result was	#
6161*4882a593Smuzhiyun#	inexact (i.e. if any of the g-r-s bits were set).		#
6162*4882a593Smuzhiyun#									#
6163*4882a593Smuzhiyun#########################################################################
6164*4882a593Smuzhiyun
6165*4882a593Smuzhiyun	global		_round
6166*4882a593Smuzhiyun_round:
6167*4882a593Smuzhiyun#
6168*4882a593Smuzhiyun# ext_grs() looks at the rounding precision and sets the appropriate
6169*4882a593Smuzhiyun# G,R,S bits.
6170*4882a593Smuzhiyun# If (G,R,S == 0) then result is exact and round is done, else set
6171*4882a593Smuzhiyun# the inex flag in status reg and continue.
6172*4882a593Smuzhiyun#
6173*4882a593Smuzhiyun	bsr.l		ext_grs			# extract G,R,S
6174*4882a593Smuzhiyun
6175*4882a593Smuzhiyun	tst.l		%d0			# are G,R,S zero?
6176*4882a593Smuzhiyun	beq.w		truncate		# yes; round is complete
6177*4882a593Smuzhiyun
6178*4882a593Smuzhiyun	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179*4882a593Smuzhiyun
6180*4882a593Smuzhiyun#
6181*4882a593Smuzhiyun# Use rounding mode as an index into a jump table for these modes.
6182*4882a593Smuzhiyun# All of the following assumes grs != 0.
6183*4882a593Smuzhiyun#
6184*4882a593Smuzhiyun	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185*4882a593Smuzhiyun	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
6186*4882a593Smuzhiyun
6187*4882a593Smuzhiyuntbl_mode:
6188*4882a593Smuzhiyun	short		rnd_near - tbl_mode
6189*4882a593Smuzhiyun	short		truncate - tbl_mode	# RZ always truncates
6190*4882a593Smuzhiyun	short		rnd_mnus - tbl_mode
6191*4882a593Smuzhiyun	short		rnd_plus - tbl_mode
6192*4882a593Smuzhiyun
6193*4882a593Smuzhiyun#################################################################
6194*4882a593Smuzhiyun#	ROUND PLUS INFINITY					#
6195*4882a593Smuzhiyun#								#
6196*4882a593Smuzhiyun#	If sign of fp number = 0 (positive), then add 1 to l.	#
6197*4882a593Smuzhiyun#################################################################
6198*4882a593Smuzhiyunrnd_plus:
6199*4882a593Smuzhiyun	tst.b		FTEMP_SGN(%a0)		# check for sign
6200*4882a593Smuzhiyun	bmi.w		truncate		# if positive then truncate
6201*4882a593Smuzhiyun
6202*4882a593Smuzhiyun	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6203*4882a593Smuzhiyun	swap		%d1			# set up d1 for round prec.
6204*4882a593Smuzhiyun
6205*4882a593Smuzhiyun	cmpi.b		%d1, &s_mode		# is prec = sgl?
6206*4882a593Smuzhiyun	beq.w		add_sgl			# yes
6207*4882a593Smuzhiyun	bgt.w		add_dbl			# no; it's dbl
6208*4882a593Smuzhiyun	bra.w		add_ext			# no; it's ext
6209*4882a593Smuzhiyun
6210*4882a593Smuzhiyun#################################################################
6211*4882a593Smuzhiyun#	ROUND MINUS INFINITY					#
6212*4882a593Smuzhiyun#								#
6213*4882a593Smuzhiyun#	If sign of fp number = 1 (negative), then add 1 to l.	#
6214*4882a593Smuzhiyun#################################################################
6215*4882a593Smuzhiyunrnd_mnus:
6216*4882a593Smuzhiyun	tst.b		FTEMP_SGN(%a0)		# check for sign
6217*4882a593Smuzhiyun	bpl.w		truncate		# if negative then truncate
6218*4882a593Smuzhiyun
6219*4882a593Smuzhiyun	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6220*4882a593Smuzhiyun	swap		%d1			# set up d1 for round prec.
6221*4882a593Smuzhiyun
6222*4882a593Smuzhiyun	cmpi.b		%d1, &s_mode		# is prec = sgl?
6223*4882a593Smuzhiyun	beq.w		add_sgl			# yes
6224*4882a593Smuzhiyun	bgt.w		add_dbl			# no; it's dbl
6225*4882a593Smuzhiyun	bra.w		add_ext			# no; it's ext
6226*4882a593Smuzhiyun
6227*4882a593Smuzhiyun#################################################################
6228*4882a593Smuzhiyun#	ROUND NEAREST						#
6229*4882a593Smuzhiyun#								#
6230*4882a593Smuzhiyun#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
6231*4882a593Smuzhiyun#	Note that this will round to even in case of a tie.	#
6232*4882a593Smuzhiyun#################################################################
6233*4882a593Smuzhiyunrnd_near:
6234*4882a593Smuzhiyun	asl.l		&0x1, %d0		# shift g-bit to c-bit
6235*4882a593Smuzhiyun	bcc.w		truncate		# if (g=1) then
6236*4882a593Smuzhiyun
6237*4882a593Smuzhiyun	swap		%d1			# set up d1 for round prec.
6238*4882a593Smuzhiyun
6239*4882a593Smuzhiyun	cmpi.b		%d1, &s_mode		# is prec = sgl?
6240*4882a593Smuzhiyun	beq.w		add_sgl			# yes
6241*4882a593Smuzhiyun	bgt.w		add_dbl			# no; it's dbl
6242*4882a593Smuzhiyun	bra.w		add_ext			# no; it's ext
6243*4882a593Smuzhiyun
6244*4882a593Smuzhiyun# *** LOCAL EQUATES ***
6245*4882a593Smuzhiyunset	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
6246*4882a593Smuzhiyunset	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
6247*4882a593Smuzhiyun
6248*4882a593Smuzhiyun#########################
6249*4882a593Smuzhiyun#	ADD SINGLE	#
6250*4882a593Smuzhiyun#########################
6251*4882a593Smuzhiyunadd_sgl:
6252*4882a593Smuzhiyun	add.l		&ad_1_sgl, FTEMP_HI(%a0)
6253*4882a593Smuzhiyun	bcc.b		scc_clr			# no mantissa overflow
6254*4882a593Smuzhiyun	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
6255*4882a593Smuzhiyun	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
6256*4882a593Smuzhiyun	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
6257*4882a593Smuzhiyunscc_clr:
6258*4882a593Smuzhiyun	tst.l		%d0			# test for rs = 0
6259*4882a593Smuzhiyun	bne.b		sgl_done
6260*4882a593Smuzhiyun	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261*4882a593Smuzhiyunsgl_done:
6262*4882a593Smuzhiyun	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263*4882a593Smuzhiyun	clr.l		FTEMP_LO(%a0)		# clear d2
6264*4882a593Smuzhiyun	rts
6265*4882a593Smuzhiyun
6266*4882a593Smuzhiyun#########################
6267*4882a593Smuzhiyun#	ADD EXTENDED	#
6268*4882a593Smuzhiyun#########################
6269*4882a593Smuzhiyunadd_ext:
6270*4882a593Smuzhiyun	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
6271*4882a593Smuzhiyun	bcc.b		xcc_clr			# test for carry out
6272*4882a593Smuzhiyun	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
6273*4882a593Smuzhiyun	bcc.b		xcc_clr
6274*4882a593Smuzhiyun	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6275*4882a593Smuzhiyun	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6276*4882a593Smuzhiyun	roxr.w		FTEMP_LO(%a0)
6277*4882a593Smuzhiyun	roxr.w		FTEMP_LO+2(%a0)
6278*4882a593Smuzhiyun	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
6279*4882a593Smuzhiyunxcc_clr:
6280*4882a593Smuzhiyun	tst.l		%d0			# test rs = 0
6281*4882a593Smuzhiyun	bne.b		add_ext_done
6282*4882a593Smuzhiyun	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
6283*4882a593Smuzhiyunadd_ext_done:
6284*4882a593Smuzhiyun	rts
6285*4882a593Smuzhiyun
6286*4882a593Smuzhiyun#########################
6287*4882a593Smuzhiyun#	ADD DOUBLE	#
6288*4882a593Smuzhiyun#########################
6289*4882a593Smuzhiyunadd_dbl:
6290*4882a593Smuzhiyun	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291*4882a593Smuzhiyun	bcc.b		dcc_clr			# no carry
6292*4882a593Smuzhiyun	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
6293*4882a593Smuzhiyun	bcc.b		dcc_clr			# no carry
6294*4882a593Smuzhiyun
6295*4882a593Smuzhiyun	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6296*4882a593Smuzhiyun	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6297*4882a593Smuzhiyun	roxr.w		FTEMP_LO(%a0)
6298*4882a593Smuzhiyun	roxr.w		FTEMP_LO+2(%a0)
6299*4882a593Smuzhiyun	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
6300*4882a593Smuzhiyundcc_clr:
6301*4882a593Smuzhiyun	tst.l		%d0			# test for rs = 0
6302*4882a593Smuzhiyun	bne.b		dbl_done
6303*4882a593Smuzhiyun	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304*4882a593Smuzhiyun
6305*4882a593Smuzhiyundbl_done:
6306*4882a593Smuzhiyun	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307*4882a593Smuzhiyun	rts
6308*4882a593Smuzhiyun
6309*4882a593Smuzhiyun###########################
6310*4882a593Smuzhiyun# Truncate all other bits #
6311*4882a593Smuzhiyun###########################
6312*4882a593Smuzhiyuntruncate:
6313*4882a593Smuzhiyun	swap		%d1			# select rnd prec
6314*4882a593Smuzhiyun
6315*4882a593Smuzhiyun	cmpi.b		%d1, &s_mode		# is prec sgl?
6316*4882a593Smuzhiyun	beq.w		sgl_done		# yes
6317*4882a593Smuzhiyun	bgt.b		dbl_done		# no; it's dbl
6318*4882a593Smuzhiyun	rts					# no; it's ext
6319*4882a593Smuzhiyun
6320*4882a593Smuzhiyun
6321*4882a593Smuzhiyun#
6322*4882a593Smuzhiyun# ext_grs(): extract guard, round and sticky bits according to
6323*4882a593Smuzhiyun#	     rounding precision.
6324*4882a593Smuzhiyun#
6325*4882a593Smuzhiyun# INPUT
6326*4882a593Smuzhiyun#	d0	   = extended precision g,r,s (in d0{31:29})
6327*4882a593Smuzhiyun#	d1	   = {PREC,ROUND}
6328*4882a593Smuzhiyun# OUTPUT
6329*4882a593Smuzhiyun#	d0{31:29}  = guard, round, sticky
6330*4882a593Smuzhiyun#
6331*4882a593Smuzhiyun# The ext_grs extract the guard/round/sticky bits according to the
6332*4882a593Smuzhiyun# selected rounding precision. It is called by the round subroutine
6333*4882a593Smuzhiyun# only.  All registers except d0 are kept intact. d0 becomes an
6334*4882a593Smuzhiyun# updated guard,round,sticky in d0{31:29}
6335*4882a593Smuzhiyun#
6336*4882a593Smuzhiyun# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337*4882a593Smuzhiyun#	 prior to usage, and needs to restore d1 to original. this
6338*4882a593Smuzhiyun#	 routine is tightly tied to the round routine and not meant to
6339*4882a593Smuzhiyun#	 uphold standard subroutine calling practices.
6340*4882a593Smuzhiyun#
6341*4882a593Smuzhiyun
6342*4882a593Smuzhiyunext_grs:
6343*4882a593Smuzhiyun	swap		%d1			# have d1.w point to round precision
6344*4882a593Smuzhiyun	tst.b		%d1			# is rnd prec = extended?
6345*4882a593Smuzhiyun	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
6346*4882a593Smuzhiyun
6347*4882a593Smuzhiyun#
6348*4882a593Smuzhiyun# %d0 actually already hold g,r,s since _round() had it before calling
6349*4882a593Smuzhiyun# this function. so, as long as we don't disturb it, we are "returning" it.
6350*4882a593Smuzhiyun#
6351*4882a593Smuzhiyunext_grs_ext:
6352*4882a593Smuzhiyun	swap		%d1			# yes; return to correct positions
6353*4882a593Smuzhiyun	rts
6354*4882a593Smuzhiyun
6355*4882a593Smuzhiyunext_grs_not_ext:
6356*4882a593Smuzhiyun	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
6357*4882a593Smuzhiyun
6358*4882a593Smuzhiyun	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
6359*4882a593Smuzhiyun	bne.b		ext_grs_dbl		# no; go handle dbl
6360*4882a593Smuzhiyun
6361*4882a593Smuzhiyun#
6362*4882a593Smuzhiyun# sgl:
6363*4882a593Smuzhiyun#	96		64	  40	32		0
6364*4882a593Smuzhiyun#	-----------------------------------------------------
6365*4882a593Smuzhiyun#	| EXP	|XXXXXXX|	  |xx	|		|grs|
6366*4882a593Smuzhiyun#	-----------------------------------------------------
6367*4882a593Smuzhiyun#			<--(24)--->nn\			   /
6368*4882a593Smuzhiyun#				   ee ---------------------
6369*4882a593Smuzhiyun#				   ww		|
6370*4882a593Smuzhiyun#						v
6371*4882a593Smuzhiyun#				   gr	   new sticky
6372*4882a593Smuzhiyun#
6373*4882a593Smuzhiyunext_grs_sgl:
6374*4882a593Smuzhiyun	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375*4882a593Smuzhiyun	mov.l		&30, %d2		# of the sgl prec. limits
6376*4882a593Smuzhiyun	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
6377*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
6378*4882a593Smuzhiyun	and.l		&0x0000003f, %d2	# s bit is the or of all other
6379*4882a593Smuzhiyun	bne.b		ext_grs_st_stky		# bits to the right of g-r
6380*4882a593Smuzhiyun	tst.l		FTEMP_LO(%a0)		# test lower mantissa
6381*4882a593Smuzhiyun	bne.b		ext_grs_st_stky		# if any are set, set sticky
6382*4882a593Smuzhiyun	tst.l		%d0			# test original g,r,s
6383*4882a593Smuzhiyun	bne.b		ext_grs_st_stky		# if any are set, set sticky
6384*4882a593Smuzhiyun	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
6385*4882a593Smuzhiyun
6386*4882a593Smuzhiyun#
6387*4882a593Smuzhiyun# dbl:
6388*4882a593Smuzhiyun#	96		64		32	 11	0
6389*4882a593Smuzhiyun#	-----------------------------------------------------
6390*4882a593Smuzhiyun#	| EXP	|XXXXXXX|		|	 |xx	|grs|
6391*4882a593Smuzhiyun#	-----------------------------------------------------
6392*4882a593Smuzhiyun#						  nn\	    /
6393*4882a593Smuzhiyun#						  ee -------
6394*4882a593Smuzhiyun#						  ww	|
6395*4882a593Smuzhiyun#							v
6396*4882a593Smuzhiyun#						  gr	new sticky
6397*4882a593Smuzhiyun#
6398*4882a593Smuzhiyunext_grs_dbl:
6399*4882a593Smuzhiyun	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400*4882a593Smuzhiyun	mov.l		&30, %d2		# of the dbl prec. limits
6401*4882a593Smuzhiyun	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
6402*4882a593Smuzhiyun	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
6403*4882a593Smuzhiyun	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
6404*4882a593Smuzhiyun	bne.b		ext_grs_st_stky		# other bits to the right of g-r
6405*4882a593Smuzhiyun	tst.l		%d0			# test word original g,r,s
6406*4882a593Smuzhiyun	bne.b		ext_grs_st_stky		# if any are set, set sticky
6407*4882a593Smuzhiyun	bra.b		ext_grs_end_sd		# if clear, exit
6408*4882a593Smuzhiyun
6409*4882a593Smuzhiyunext_grs_st_stky:
6410*4882a593Smuzhiyun	bset		&rnd_stky_bit, %d3	# set sticky bit
6411*4882a593Smuzhiyunext_grs_end_sd:
6412*4882a593Smuzhiyun	mov.l		%d3, %d0		# return grs to d0
6413*4882a593Smuzhiyun
6414*4882a593Smuzhiyun	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
6415*4882a593Smuzhiyun
6416*4882a593Smuzhiyun	swap		%d1			# restore d1 to original
6417*4882a593Smuzhiyun	rts
6418*4882a593Smuzhiyun
6419*4882a593Smuzhiyun#########################################################################
6420*4882a593Smuzhiyun# norm(): normalize the mantissa of an extended precision input. the	#
6421*4882a593Smuzhiyun#	  input operand should not be normalized already.		#
6422*4882a593Smuzhiyun#									#
6423*4882a593Smuzhiyun# XDEF ****************************************************************	#
6424*4882a593Smuzhiyun#	norm()								#
6425*4882a593Smuzhiyun#									#
6426*4882a593Smuzhiyun# XREF **************************************************************** #
6427*4882a593Smuzhiyun#	none								#
6428*4882a593Smuzhiyun#									#
6429*4882a593Smuzhiyun# INPUT *************************************************************** #
6430*4882a593Smuzhiyun#	a0 = pointer fp extended precision operand to normalize		#
6431*4882a593Smuzhiyun#									#
6432*4882a593Smuzhiyun# OUTPUT ************************************************************** #
6433*4882a593Smuzhiyun#	d0 = number of bit positions the mantissa was shifted		#
6434*4882a593Smuzhiyun#	a0 = the input operand's mantissa is normalized; the exponent	#
6435*4882a593Smuzhiyun#	     is unchanged.						#
6436*4882a593Smuzhiyun#									#
6437*4882a593Smuzhiyun#########################################################################
6438*4882a593Smuzhiyun	global		norm
6439*4882a593Smuzhiyunnorm:
6440*4882a593Smuzhiyun	mov.l		%d2, -(%sp)		# create some temp regs
6441*4882a593Smuzhiyun	mov.l		%d3, -(%sp)
6442*4882a593Smuzhiyun
6443*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
6444*4882a593Smuzhiyun	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
6445*4882a593Smuzhiyun
6446*4882a593Smuzhiyun	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
6447*4882a593Smuzhiyun	beq.b		norm_lo			# hi(man) is all zeroes!
6448*4882a593Smuzhiyun
6449*4882a593Smuzhiyunnorm_hi:
6450*4882a593Smuzhiyun	lsl.l		%d2, %d0		# left shift hi(man)
6451*4882a593Smuzhiyun	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
6452*4882a593Smuzhiyun
6453*4882a593Smuzhiyun	or.l		%d3, %d0		# create hi(man)
6454*4882a593Smuzhiyun	lsl.l		%d2, %d1		# create lo(man)
6455*4882a593Smuzhiyun
6456*4882a593Smuzhiyun	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6457*4882a593Smuzhiyun	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
6458*4882a593Smuzhiyun
6459*4882a593Smuzhiyun	mov.l		%d2, %d0		# return shift amount
6460*4882a593Smuzhiyun
6461*4882a593Smuzhiyun	mov.l		(%sp)+, %d3		# restore temp regs
6462*4882a593Smuzhiyun	mov.l		(%sp)+, %d2
6463*4882a593Smuzhiyun
6464*4882a593Smuzhiyun	rts
6465*4882a593Smuzhiyun
6466*4882a593Smuzhiyunnorm_lo:
6467*4882a593Smuzhiyun	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
6468*4882a593Smuzhiyun	lsl.l		%d2, %d1		# shift lo(man)
6469*4882a593Smuzhiyun	add.l		&32, %d2		# add 32 to shft amount
6470*4882a593Smuzhiyun
6471*4882a593Smuzhiyun	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
6472*4882a593Smuzhiyun	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
6473*4882a593Smuzhiyun
6474*4882a593Smuzhiyun	mov.l		%d2, %d0		# return shift amount
6475*4882a593Smuzhiyun
6476*4882a593Smuzhiyun	mov.l		(%sp)+, %d3		# restore temp regs
6477*4882a593Smuzhiyun	mov.l		(%sp)+, %d2
6478*4882a593Smuzhiyun
6479*4882a593Smuzhiyun	rts
6480*4882a593Smuzhiyun
6481*4882a593Smuzhiyun#########################################################################
6482*4882a593Smuzhiyun# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
6483*4882a593Smuzhiyun#		- returns corresponding optype tag			#
6484*4882a593Smuzhiyun#									#
6485*4882a593Smuzhiyun# XDEF ****************************************************************	#
6486*4882a593Smuzhiyun#	unnorm_fix()							#
6487*4882a593Smuzhiyun#									#
6488*4882a593Smuzhiyun# XREF **************************************************************** #
6489*4882a593Smuzhiyun#	norm() - normalize the mantissa					#
6490*4882a593Smuzhiyun#									#
6491*4882a593Smuzhiyun# INPUT *************************************************************** #
6492*4882a593Smuzhiyun#	a0 = pointer to unnormalized extended precision number		#
6493*4882a593Smuzhiyun#									#
6494*4882a593Smuzhiyun# OUTPUT ************************************************************** #
6495*4882a593Smuzhiyun#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
6496*4882a593Smuzhiyun#	a0 = input operand has been converted to a norm, denorm, or	#
6497*4882a593Smuzhiyun#	     zero; both the exponent and mantissa are changed.		#
6498*4882a593Smuzhiyun#									#
6499*4882a593Smuzhiyun#########################################################################
6500*4882a593Smuzhiyun
6501*4882a593Smuzhiyun	global		unnorm_fix
6502*4882a593Smuzhiyununnorm_fix:
6503*4882a593Smuzhiyun	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504*4882a593Smuzhiyun	bne.b		unnorm_shift		# hi(man) is not all zeroes
6505*4882a593Smuzhiyun
6506*4882a593Smuzhiyun#
6507*4882a593Smuzhiyun# hi(man) is all zeroes so see if any bits in lo(man) are set
6508*4882a593Smuzhiyun#
6509*4882a593Smuzhiyununnorm_chk_lo:
6510*4882a593Smuzhiyun	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511*4882a593Smuzhiyun	beq.w		unnorm_zero		# yes
6512*4882a593Smuzhiyun
6513*4882a593Smuzhiyun	add.w		&32, %d0		# no; fix shift distance
6514*4882a593Smuzhiyun
6515*4882a593Smuzhiyun#
6516*4882a593Smuzhiyun# d0 = # shifts needed for complete normalization
6517*4882a593Smuzhiyun#
6518*4882a593Smuzhiyununnorm_shift:
6519*4882a593Smuzhiyun	clr.l		%d1			# clear top word
6520*4882a593Smuzhiyun	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6521*4882a593Smuzhiyun	and.w		&0x7fff, %d1		# strip off sgn
6522*4882a593Smuzhiyun
6523*4882a593Smuzhiyun	cmp.w		%d0, %d1		# will denorm push exp < 0?
6524*4882a593Smuzhiyun	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
6525*4882a593Smuzhiyun
6526*4882a593Smuzhiyun#
6527*4882a593Smuzhiyun# exponent would not go < 0. Therefore, number stays normalized
6528*4882a593Smuzhiyun#
6529*4882a593Smuzhiyun	sub.w		%d0, %d1		# shift exponent value
6530*4882a593Smuzhiyun	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
6531*4882a593Smuzhiyun	and.w		&0x8000, %d0		# save old sign
6532*4882a593Smuzhiyun	or.w		%d0, %d1		# {sgn,new exp}
6533*4882a593Smuzhiyun	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
6534*4882a593Smuzhiyun
6535*4882a593Smuzhiyun	bsr.l		norm			# normalize UNNORM
6536*4882a593Smuzhiyun
6537*4882a593Smuzhiyun	mov.b		&NORM, %d0		# return new optype tag
6538*4882a593Smuzhiyun	rts
6539*4882a593Smuzhiyun
6540*4882a593Smuzhiyun#
6541*4882a593Smuzhiyun# exponent would go < 0, so only denormalize until exp = 0
6542*4882a593Smuzhiyun#
6543*4882a593Smuzhiyununnorm_nrm_zero:
6544*4882a593Smuzhiyun	cmp.b		%d1, &32		# is exp <= 32?
6545*4882a593Smuzhiyun	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
6546*4882a593Smuzhiyun
6547*4882a593Smuzhiyun	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548*4882a593Smuzhiyun	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
6549*4882a593Smuzhiyun
6550*4882a593Smuzhiyun	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6551*4882a593Smuzhiyun	lsl.l		%d1, %d0		# extract new lo(man)
6552*4882a593Smuzhiyun	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
6553*4882a593Smuzhiyun
6554*4882a593Smuzhiyun	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6555*4882a593Smuzhiyun
6556*4882a593Smuzhiyun	mov.b		&DENORM, %d0		# return new optype tag
6557*4882a593Smuzhiyun	rts
6558*4882a593Smuzhiyun
6559*4882a593Smuzhiyun#
6560*4882a593Smuzhiyun# only mantissa bits set are in lo(man)
6561*4882a593Smuzhiyun#
6562*4882a593Smuzhiyununnorm_nrm_zero_lrg:
6563*4882a593Smuzhiyun	sub.w		&32, %d1		# adjust shft amt by 32
6564*4882a593Smuzhiyun
6565*4882a593Smuzhiyun	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6566*4882a593Smuzhiyun	lsl.l		%d1, %d0		# left shift lo(man)
6567*4882a593Smuzhiyun
6568*4882a593Smuzhiyun	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6569*4882a593Smuzhiyun	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
6570*4882a593Smuzhiyun
6571*4882a593Smuzhiyun	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6572*4882a593Smuzhiyun
6573*4882a593Smuzhiyun	mov.b		&DENORM, %d0		# return new optype tag
6574*4882a593Smuzhiyun	rts
6575*4882a593Smuzhiyun
6576*4882a593Smuzhiyun#
6577*4882a593Smuzhiyun# whole mantissa is zero so this UNNORM is actually a zero
6578*4882a593Smuzhiyun#
6579*4882a593Smuzhiyununnorm_zero:
6580*4882a593Smuzhiyun	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
6581*4882a593Smuzhiyun
6582*4882a593Smuzhiyun	mov.b		&ZERO, %d0		# fix optype tag
6583*4882a593Smuzhiyun	rts
6584*4882a593Smuzhiyun
6585*4882a593Smuzhiyun#########################################################################
6586*4882a593Smuzhiyun# XDEF ****************************************************************	#
6587*4882a593Smuzhiyun#	set_tag_x(): return the optype of the input ext fp number	#
6588*4882a593Smuzhiyun#									#
6589*4882a593Smuzhiyun# XREF ****************************************************************	#
6590*4882a593Smuzhiyun#	None								#
6591*4882a593Smuzhiyun#									#
6592*4882a593Smuzhiyun# INPUT ***************************************************************	#
6593*4882a593Smuzhiyun#	a0 = pointer to extended precision operand			#
6594*4882a593Smuzhiyun#									#
6595*4882a593Smuzhiyun# OUTPUT **************************************************************	#
6596*4882a593Smuzhiyun#	d0 = value of type tag						#
6597*4882a593Smuzhiyun#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
6598*4882a593Smuzhiyun#									#
6599*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
6600*4882a593Smuzhiyun#	Simply test the exponent, j-bit, and mantissa values to		#
6601*4882a593Smuzhiyun# determine the type of operand.					#
6602*4882a593Smuzhiyun#	If it's an unnormalized zero, alter the operand and force it	#
6603*4882a593Smuzhiyun# to be a normal zero.							#
6604*4882a593Smuzhiyun#									#
6605*4882a593Smuzhiyun#########################################################################
6606*4882a593Smuzhiyun
6607*4882a593Smuzhiyun	global		set_tag_x
6608*4882a593Smuzhiyunset_tag_x:
6609*4882a593Smuzhiyun	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
6610*4882a593Smuzhiyun	andi.w		&0x7fff, %d0		# strip off sign
6611*4882a593Smuzhiyun	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
6612*4882a593Smuzhiyun	beq.b		inf_or_nan_x
6613*4882a593Smuzhiyunnot_inf_or_nan_x:
6614*4882a593Smuzhiyun	btst		&0x7,FTEMP_HI(%a0)
6615*4882a593Smuzhiyun	beq.b		not_norm_x
6616*4882a593Smuzhiyunis_norm_x:
6617*4882a593Smuzhiyun	mov.b		&NORM, %d0
6618*4882a593Smuzhiyun	rts
6619*4882a593Smuzhiyunnot_norm_x:
6620*4882a593Smuzhiyun	tst.w		%d0			# is exponent = 0?
6621*4882a593Smuzhiyun	bne.b		is_unnorm_x
6622*4882a593Smuzhiyunnot_unnorm_x:
6623*4882a593Smuzhiyun	tst.l		FTEMP_HI(%a0)
6624*4882a593Smuzhiyun	bne.b		is_denorm_x
6625*4882a593Smuzhiyun	tst.l		FTEMP_LO(%a0)
6626*4882a593Smuzhiyun	bne.b		is_denorm_x
6627*4882a593Smuzhiyunis_zero_x:
6628*4882a593Smuzhiyun	mov.b		&ZERO, %d0
6629*4882a593Smuzhiyun	rts
6630*4882a593Smuzhiyunis_denorm_x:
6631*4882a593Smuzhiyun	mov.b		&DENORM, %d0
6632*4882a593Smuzhiyun	rts
6633*4882a593Smuzhiyun# must distinguish now "Unnormalized zeroes" which we
6634*4882a593Smuzhiyun# must convert to zero.
6635*4882a593Smuzhiyunis_unnorm_x:
6636*4882a593Smuzhiyun	tst.l		FTEMP_HI(%a0)
6637*4882a593Smuzhiyun	bne.b		is_unnorm_reg_x
6638*4882a593Smuzhiyun	tst.l		FTEMP_LO(%a0)
6639*4882a593Smuzhiyun	bne.b		is_unnorm_reg_x
6640*4882a593Smuzhiyun# it's an "unnormalized zero". let's convert it to an actual zero...
6641*4882a593Smuzhiyun	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
6642*4882a593Smuzhiyun	mov.b		&ZERO, %d0
6643*4882a593Smuzhiyun	rts
6644*4882a593Smuzhiyunis_unnorm_reg_x:
6645*4882a593Smuzhiyun	mov.b		&UNNORM, %d0
6646*4882a593Smuzhiyun	rts
6647*4882a593Smuzhiyuninf_or_nan_x:
6648*4882a593Smuzhiyun	tst.l		FTEMP_LO(%a0)
6649*4882a593Smuzhiyun	bne.b		is_nan_x
6650*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0), %d0
6651*4882a593Smuzhiyun	and.l		&0x7fffffff, %d0	# msb is a don't care!
6652*4882a593Smuzhiyun	bne.b		is_nan_x
6653*4882a593Smuzhiyunis_inf_x:
6654*4882a593Smuzhiyun	mov.b		&INF, %d0
6655*4882a593Smuzhiyun	rts
6656*4882a593Smuzhiyunis_nan_x:
6657*4882a593Smuzhiyun	btst		&0x6, FTEMP_HI(%a0)
6658*4882a593Smuzhiyun	beq.b		is_snan_x
6659*4882a593Smuzhiyun	mov.b		&QNAN, %d0
6660*4882a593Smuzhiyun	rts
6661*4882a593Smuzhiyunis_snan_x:
6662*4882a593Smuzhiyun	mov.b		&SNAN, %d0
6663*4882a593Smuzhiyun	rts
6664*4882a593Smuzhiyun
6665*4882a593Smuzhiyun#########################################################################
6666*4882a593Smuzhiyun# XDEF ****************************************************************	#
6667*4882a593Smuzhiyun#	set_tag_d(): return the optype of the input dbl fp number	#
6668*4882a593Smuzhiyun#									#
6669*4882a593Smuzhiyun# XREF ****************************************************************	#
6670*4882a593Smuzhiyun#	None								#
6671*4882a593Smuzhiyun#									#
6672*4882a593Smuzhiyun# INPUT ***************************************************************	#
6673*4882a593Smuzhiyun#	a0 = points to double precision operand				#
6674*4882a593Smuzhiyun#									#
6675*4882a593Smuzhiyun# OUTPUT **************************************************************	#
6676*4882a593Smuzhiyun#	d0 = value of type tag						#
6677*4882a593Smuzhiyun#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6678*4882a593Smuzhiyun#									#
6679*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
6680*4882a593Smuzhiyun#	Simply test the exponent, j-bit, and mantissa values to		#
6681*4882a593Smuzhiyun# determine the type of operand.					#
6682*4882a593Smuzhiyun#									#
6683*4882a593Smuzhiyun#########################################################################
6684*4882a593Smuzhiyun
6685*4882a593Smuzhiyun	global		set_tag_d
6686*4882a593Smuzhiyunset_tag_d:
6687*4882a593Smuzhiyun	mov.l		FTEMP(%a0), %d0
6688*4882a593Smuzhiyun	mov.l		%d0, %d1
6689*4882a593Smuzhiyun
6690*4882a593Smuzhiyun	andi.l		&0x7ff00000, %d0
6691*4882a593Smuzhiyun	beq.b		zero_or_denorm_d
6692*4882a593Smuzhiyun
6693*4882a593Smuzhiyun	cmpi.l		%d0, &0x7ff00000
6694*4882a593Smuzhiyun	beq.b		inf_or_nan_d
6695*4882a593Smuzhiyun
6696*4882a593Smuzhiyunis_norm_d:
6697*4882a593Smuzhiyun	mov.b		&NORM, %d0
6698*4882a593Smuzhiyun	rts
6699*4882a593Smuzhiyunzero_or_denorm_d:
6700*4882a593Smuzhiyun	and.l		&0x000fffff, %d1
6701*4882a593Smuzhiyun	bne		is_denorm_d
6702*4882a593Smuzhiyun	tst.l		4+FTEMP(%a0)
6703*4882a593Smuzhiyun	bne		is_denorm_d
6704*4882a593Smuzhiyunis_zero_d:
6705*4882a593Smuzhiyun	mov.b		&ZERO, %d0
6706*4882a593Smuzhiyun	rts
6707*4882a593Smuzhiyunis_denorm_d:
6708*4882a593Smuzhiyun	mov.b		&DENORM, %d0
6709*4882a593Smuzhiyun	rts
6710*4882a593Smuzhiyuninf_or_nan_d:
6711*4882a593Smuzhiyun	and.l		&0x000fffff, %d1
6712*4882a593Smuzhiyun	bne		is_nan_d
6713*4882a593Smuzhiyun	tst.l		4+FTEMP(%a0)
6714*4882a593Smuzhiyun	bne		is_nan_d
6715*4882a593Smuzhiyunis_inf_d:
6716*4882a593Smuzhiyun	mov.b		&INF, %d0
6717*4882a593Smuzhiyun	rts
6718*4882a593Smuzhiyunis_nan_d:
6719*4882a593Smuzhiyun	btst		&19, %d1
6720*4882a593Smuzhiyun	bne		is_qnan_d
6721*4882a593Smuzhiyunis_snan_d:
6722*4882a593Smuzhiyun	mov.b		&SNAN, %d0
6723*4882a593Smuzhiyun	rts
6724*4882a593Smuzhiyunis_qnan_d:
6725*4882a593Smuzhiyun	mov.b		&QNAN, %d0
6726*4882a593Smuzhiyun	rts
6727*4882a593Smuzhiyun
6728*4882a593Smuzhiyun#########################################################################
6729*4882a593Smuzhiyun# XDEF ****************************************************************	#
6730*4882a593Smuzhiyun#	set_tag_s(): return the optype of the input sgl fp number	#
6731*4882a593Smuzhiyun#									#
6732*4882a593Smuzhiyun# XREF ****************************************************************	#
6733*4882a593Smuzhiyun#	None								#
6734*4882a593Smuzhiyun#									#
6735*4882a593Smuzhiyun# INPUT ***************************************************************	#
6736*4882a593Smuzhiyun#	a0 = pointer to single precision operand			#
6737*4882a593Smuzhiyun#									#
6738*4882a593Smuzhiyun# OUTPUT **************************************************************	#
6739*4882a593Smuzhiyun#	d0 = value of type tag						#
6740*4882a593Smuzhiyun#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6741*4882a593Smuzhiyun#									#
6742*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
6743*4882a593Smuzhiyun#	Simply test the exponent, j-bit, and mantissa values to		#
6744*4882a593Smuzhiyun# determine the type of operand.					#
6745*4882a593Smuzhiyun#									#
6746*4882a593Smuzhiyun#########################################################################
6747*4882a593Smuzhiyun
6748*4882a593Smuzhiyun	global		set_tag_s
6749*4882a593Smuzhiyunset_tag_s:
6750*4882a593Smuzhiyun	mov.l		FTEMP(%a0), %d0
6751*4882a593Smuzhiyun	mov.l		%d0, %d1
6752*4882a593Smuzhiyun
6753*4882a593Smuzhiyun	andi.l		&0x7f800000, %d0
6754*4882a593Smuzhiyun	beq.b		zero_or_denorm_s
6755*4882a593Smuzhiyun
6756*4882a593Smuzhiyun	cmpi.l		%d0, &0x7f800000
6757*4882a593Smuzhiyun	beq.b		inf_or_nan_s
6758*4882a593Smuzhiyun
6759*4882a593Smuzhiyunis_norm_s:
6760*4882a593Smuzhiyun	mov.b		&NORM, %d0
6761*4882a593Smuzhiyun	rts
6762*4882a593Smuzhiyunzero_or_denorm_s:
6763*4882a593Smuzhiyun	and.l		&0x007fffff, %d1
6764*4882a593Smuzhiyun	bne		is_denorm_s
6765*4882a593Smuzhiyunis_zero_s:
6766*4882a593Smuzhiyun	mov.b		&ZERO, %d0
6767*4882a593Smuzhiyun	rts
6768*4882a593Smuzhiyunis_denorm_s:
6769*4882a593Smuzhiyun	mov.b		&DENORM, %d0
6770*4882a593Smuzhiyun	rts
6771*4882a593Smuzhiyuninf_or_nan_s:
6772*4882a593Smuzhiyun	and.l		&0x007fffff, %d1
6773*4882a593Smuzhiyun	bne		is_nan_s
6774*4882a593Smuzhiyunis_inf_s:
6775*4882a593Smuzhiyun	mov.b		&INF, %d0
6776*4882a593Smuzhiyun	rts
6777*4882a593Smuzhiyunis_nan_s:
6778*4882a593Smuzhiyun	btst		&22, %d1
6779*4882a593Smuzhiyun	bne		is_qnan_s
6780*4882a593Smuzhiyunis_snan_s:
6781*4882a593Smuzhiyun	mov.b		&SNAN, %d0
6782*4882a593Smuzhiyun	rts
6783*4882a593Smuzhiyunis_qnan_s:
6784*4882a593Smuzhiyun	mov.b		&QNAN, %d0
6785*4882a593Smuzhiyun	rts
6786*4882a593Smuzhiyun
6787*4882a593Smuzhiyun#########################################################################
6788*4882a593Smuzhiyun# XDEF ****************************************************************	#
6789*4882a593Smuzhiyun#	unf_res(): routine to produce default underflow result of a	#
6790*4882a593Smuzhiyun#		   scaled extended precision number; this is used by	#
6791*4882a593Smuzhiyun#		   fadd/fdiv/fmul/etc. emulation routines.		#
6792*4882a593Smuzhiyun#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
6793*4882a593Smuzhiyun#		    single round prec and extended prec mode.		#
6794*4882a593Smuzhiyun#									#
6795*4882a593Smuzhiyun# XREF ****************************************************************	#
6796*4882a593Smuzhiyun#	_denorm() - denormalize according to scale factor		#
6797*4882a593Smuzhiyun#	_round() - round denormalized number according to rnd prec	#
6798*4882a593Smuzhiyun#									#
6799*4882a593Smuzhiyun# INPUT ***************************************************************	#
6800*4882a593Smuzhiyun#	a0 = pointer to extended precison operand			#
6801*4882a593Smuzhiyun#	d0 = scale factor						#
6802*4882a593Smuzhiyun#	d1 = rounding precision/mode					#
6803*4882a593Smuzhiyun#									#
6804*4882a593Smuzhiyun# OUTPUT **************************************************************	#
6805*4882a593Smuzhiyun#	a0 = pointer to default underflow result in extended precision	#
6806*4882a593Smuzhiyun#	d0.b = result FPSR_cc which caller may or may not want to save	#
6807*4882a593Smuzhiyun#									#
6808*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
6809*4882a593Smuzhiyun#	Convert the input operand to "internal format" which means the	#
6810*4882a593Smuzhiyun# exponent is extended to 16 bits and the sign is stored in the unused	#
6811*4882a593Smuzhiyun# portion of the extended precison operand. Denormalize the number	#
6812*4882a593Smuzhiyun# according to the scale factor passed in d0. Then, round the		#
6813*4882a593Smuzhiyun# denormalized result.							#
6814*4882a593Smuzhiyun#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
6815*4882a593Smuzhiyun# d0 in case the caller doesn't want to save them (as is the case for	#
6816*4882a593Smuzhiyun# fmove out).								#
6817*4882a593Smuzhiyun#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
6818*4882a593Smuzhiyun# precision and the rounding mode to single.				#
6819*4882a593Smuzhiyun#									#
6820*4882a593Smuzhiyun#########################################################################
6821*4882a593Smuzhiyun	global		unf_res
6822*4882a593Smuzhiyununf_res:
6823*4882a593Smuzhiyun	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
6824*4882a593Smuzhiyun
6825*4882a593Smuzhiyun	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
6826*4882a593Smuzhiyun	sne		FTEMP_SGN(%a0)
6827*4882a593Smuzhiyun
6828*4882a593Smuzhiyun	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6829*4882a593Smuzhiyun	and.w		&0x7fff, %d1
6830*4882a593Smuzhiyun	sub.w		%d0, %d1
6831*4882a593Smuzhiyun	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
6832*4882a593Smuzhiyun
6833*4882a593Smuzhiyun	mov.l		%a0, -(%sp)		# save operand ptr during calls
6834*4882a593Smuzhiyun
6835*4882a593Smuzhiyun	mov.l		0x4(%sp),%d0		# pass rnd prec.
6836*4882a593Smuzhiyun	andi.w		&0x00c0,%d0
6837*4882a593Smuzhiyun	lsr.w		&0x4,%d0
6838*4882a593Smuzhiyun	bsr.l		_denorm			# denorm result
6839*4882a593Smuzhiyun
6840*4882a593Smuzhiyun	mov.l		(%sp),%a0
6841*4882a593Smuzhiyun	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
6842*4882a593Smuzhiyun	andi.w		&0xc0,%d1		# extract rnd prec
6843*4882a593Smuzhiyun	lsr.w		&0x4,%d1
6844*4882a593Smuzhiyun	swap		%d1
6845*4882a593Smuzhiyun	mov.w		0x6(%sp),%d1
6846*4882a593Smuzhiyun	andi.w		&0x30,%d1
6847*4882a593Smuzhiyun	lsr.w		&0x4,%d1
6848*4882a593Smuzhiyun	bsr.l		_round			# round the denorm
6849*4882a593Smuzhiyun
6850*4882a593Smuzhiyun	mov.l		(%sp)+, %a0
6851*4882a593Smuzhiyun
6852*4882a593Smuzhiyun# result is now rounded properly. convert back to normal format
6853*4882a593Smuzhiyun	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
6854*4882a593Smuzhiyun	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6855*4882a593Smuzhiyun	beq.b		unf_res_chkifzero	# no; result is positive
6856*4882a593Smuzhiyun	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
6857*4882a593Smuzhiyun	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6858*4882a593Smuzhiyun
6859*4882a593Smuzhiyun# the number may have become zero after rounding. set ccodes accordingly.
6860*4882a593Smuzhiyununf_res_chkifzero:
6861*4882a593Smuzhiyun	clr.l		%d0
6862*4882a593Smuzhiyun	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6863*4882a593Smuzhiyun	bne.b		unf_res_cont		# no
6864*4882a593Smuzhiyun	tst.l		FTEMP_LO(%a0)
6865*4882a593Smuzhiyun	bne.b		unf_res_cont		# no
6866*4882a593Smuzhiyun#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
6867*4882a593Smuzhiyun	bset		&z_bit, %d0		# yes; set zero ccode bit
6868*4882a593Smuzhiyun
6869*4882a593Smuzhiyununf_res_cont:
6870*4882a593Smuzhiyun
6871*4882a593Smuzhiyun#
6872*4882a593Smuzhiyun# can inex1 also be set along with unfl and inex2???
6873*4882a593Smuzhiyun#
6874*4882a593Smuzhiyun# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875*4882a593Smuzhiyun#
6876*4882a593Smuzhiyun	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877*4882a593Smuzhiyun	beq.b		unf_res_end		# no
6878*4882a593Smuzhiyun	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879*4882a593Smuzhiyun
6880*4882a593Smuzhiyununf_res_end:
6881*4882a593Smuzhiyun	add.l		&0x4, %sp		# clear stack
6882*4882a593Smuzhiyun	rts
6883*4882a593Smuzhiyun
6884*4882a593Smuzhiyun# unf_res() for fsglmul() and fsgldiv().
6885*4882a593Smuzhiyun	global		unf_res4
6886*4882a593Smuzhiyununf_res4:
6887*4882a593Smuzhiyun	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
6888*4882a593Smuzhiyun
6889*4882a593Smuzhiyun	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
6890*4882a593Smuzhiyun	sne		FTEMP_SGN(%a0)
6891*4882a593Smuzhiyun
6892*4882a593Smuzhiyun	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
6893*4882a593Smuzhiyun	and.w		&0x7fff,%d1
6894*4882a593Smuzhiyun	sub.w		%d0,%d1
6895*4882a593Smuzhiyun	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
6896*4882a593Smuzhiyun
6897*4882a593Smuzhiyun	mov.l		%a0,-(%sp)		# save operand ptr during calls
6898*4882a593Smuzhiyun
6899*4882a593Smuzhiyun	clr.l		%d0			# force rnd prec = ext
6900*4882a593Smuzhiyun	bsr.l		_denorm			# denorm result
6901*4882a593Smuzhiyun
6902*4882a593Smuzhiyun	mov.l		(%sp),%a0
6903*4882a593Smuzhiyun	mov.w		&s_mode,%d1		# force rnd prec = sgl
6904*4882a593Smuzhiyun	swap		%d1
6905*4882a593Smuzhiyun	mov.w		0x6(%sp),%d1		# load rnd mode
6906*4882a593Smuzhiyun	andi.w		&0x30,%d1		# extract rnd prec
6907*4882a593Smuzhiyun	lsr.w		&0x4,%d1
6908*4882a593Smuzhiyun	bsr.l		_round			# round the denorm
6909*4882a593Smuzhiyun
6910*4882a593Smuzhiyun	mov.l		(%sp)+,%a0
6911*4882a593Smuzhiyun
6912*4882a593Smuzhiyun# result is now rounded properly. convert back to normal format
6913*4882a593Smuzhiyun	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
6914*4882a593Smuzhiyun	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6915*4882a593Smuzhiyun	beq.b		unf_res4_chkifzero	# no; result is positive
6916*4882a593Smuzhiyun	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
6917*4882a593Smuzhiyun	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6918*4882a593Smuzhiyun
6919*4882a593Smuzhiyun# the number may have become zero after rounding. set ccodes accordingly.
6920*4882a593Smuzhiyununf_res4_chkifzero:
6921*4882a593Smuzhiyun	clr.l		%d0
6922*4882a593Smuzhiyun	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6923*4882a593Smuzhiyun	bne.b		unf_res4_cont		# no
6924*4882a593Smuzhiyun	tst.l		FTEMP_LO(%a0)
6925*4882a593Smuzhiyun	bne.b		unf_res4_cont		# no
6926*4882a593Smuzhiyun#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
6927*4882a593Smuzhiyun	bset		&z_bit,%d0		# yes; set zero ccode bit
6928*4882a593Smuzhiyun
6929*4882a593Smuzhiyununf_res4_cont:
6930*4882a593Smuzhiyun
6931*4882a593Smuzhiyun#
6932*4882a593Smuzhiyun# can inex1 also be set along with unfl and inex2???
6933*4882a593Smuzhiyun#
6934*4882a593Smuzhiyun# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935*4882a593Smuzhiyun#
6936*4882a593Smuzhiyun	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937*4882a593Smuzhiyun	beq.b		unf_res4_end		# no
6938*4882a593Smuzhiyun	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939*4882a593Smuzhiyun
6940*4882a593Smuzhiyununf_res4_end:
6941*4882a593Smuzhiyun	add.l		&0x4,%sp		# clear stack
6942*4882a593Smuzhiyun	rts
6943*4882a593Smuzhiyun
6944*4882a593Smuzhiyun#########################################################################
6945*4882a593Smuzhiyun# XDEF ****************************************************************	#
6946*4882a593Smuzhiyun#	ovf_res(): routine to produce the default overflow result of	#
6947*4882a593Smuzhiyun#		   an overflowing number.				#
6948*4882a593Smuzhiyun#	ovf_res2(): same as above but the rnd mode/prec are passed	#
6949*4882a593Smuzhiyun#		    differently.					#
6950*4882a593Smuzhiyun#									#
6951*4882a593Smuzhiyun# XREF ****************************************************************	#
6952*4882a593Smuzhiyun#	none								#
6953*4882a593Smuzhiyun#									#
6954*4882a593Smuzhiyun# INPUT ***************************************************************	#
6955*4882a593Smuzhiyun#	d1.b	= '-1' => (-); '0' => (+)				#
6956*4882a593Smuzhiyun#   ovf_res():								#
6957*4882a593Smuzhiyun#	d0	= rnd mode/prec						#
6958*4882a593Smuzhiyun#   ovf_res2():								#
6959*4882a593Smuzhiyun#	hi(d0)	= rnd prec						#
6960*4882a593Smuzhiyun#	lo(d0)	= rnd mode						#
6961*4882a593Smuzhiyun#									#
6962*4882a593Smuzhiyun# OUTPUT **************************************************************	#
6963*4882a593Smuzhiyun#	a0	= points to extended precision result			#
6964*4882a593Smuzhiyun#	d0.b	= condition code bits					#
6965*4882a593Smuzhiyun#									#
6966*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
6967*4882a593Smuzhiyun#	The default overflow result can be determined by the sign of	#
6968*4882a593Smuzhiyun# the result and the rounding mode/prec in effect. These bits are	#
6969*4882a593Smuzhiyun# concatenated together to create an index into the default result	#
6970*4882a593Smuzhiyun# table. A pointer to the correct result is returned in a0. The		#
6971*4882a593Smuzhiyun# resulting condition codes are returned in d0 in case the caller	#
6972*4882a593Smuzhiyun# doesn't want FPSR_cc altered (as is the case for fmove out).		#
6973*4882a593Smuzhiyun#									#
6974*4882a593Smuzhiyun#########################################################################
6975*4882a593Smuzhiyun
6976*4882a593Smuzhiyun	global		ovf_res
6977*4882a593Smuzhiyunovf_res:
6978*4882a593Smuzhiyun	andi.w		&0x10,%d1		# keep result sign
6979*4882a593Smuzhiyun	lsr.b		&0x4,%d0		# shift prec/mode
6980*4882a593Smuzhiyun	or.b		%d0,%d1			# concat the two
6981*4882a593Smuzhiyun	mov.w		%d1,%d0			# make a copy
6982*4882a593Smuzhiyun	lsl.b		&0x1,%d1		# multiply d1 by 2
6983*4882a593Smuzhiyun	bra.b		ovf_res_load
6984*4882a593Smuzhiyun
6985*4882a593Smuzhiyun	global		ovf_res2
6986*4882a593Smuzhiyunovf_res2:
6987*4882a593Smuzhiyun	and.w		&0x10, %d1		# keep result sign
6988*4882a593Smuzhiyun	or.b		%d0, %d1		# insert rnd mode
6989*4882a593Smuzhiyun	swap		%d0
6990*4882a593Smuzhiyun	or.b		%d0, %d1		# insert rnd prec
6991*4882a593Smuzhiyun	mov.w		%d1, %d0		# make a copy
6992*4882a593Smuzhiyun	lsl.b		&0x1, %d1		# shift left by 1
6993*4882a593Smuzhiyun
6994*4882a593Smuzhiyun#
6995*4882a593Smuzhiyun# use the rounding mode, precision, and result sign as in index into the
6996*4882a593Smuzhiyun# two tables below to fetch the default result and the result ccodes.
6997*4882a593Smuzhiyun#
6998*4882a593Smuzhiyunovf_res_load:
6999*4882a593Smuzhiyun	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000*4882a593Smuzhiyun	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001*4882a593Smuzhiyun
7002*4882a593Smuzhiyun	rts
7003*4882a593Smuzhiyun
7004*4882a593Smuzhiyuntbl_ovfl_cc:
7005*4882a593Smuzhiyun	byte		0x2, 0x0, 0x0, 0x2
7006*4882a593Smuzhiyun	byte		0x2, 0x0, 0x0, 0x2
7007*4882a593Smuzhiyun	byte		0x2, 0x0, 0x0, 0x2
7008*4882a593Smuzhiyun	byte		0x0, 0x0, 0x0, 0x0
7009*4882a593Smuzhiyun	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7010*4882a593Smuzhiyun	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7011*4882a593Smuzhiyun	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7012*4882a593Smuzhiyun
7013*4882a593Smuzhiyuntbl_ovfl_result:
7014*4882a593Smuzhiyun	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015*4882a593Smuzhiyun	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016*4882a593Smuzhiyun	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017*4882a593Smuzhiyun	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018*4882a593Smuzhiyun
7019*4882a593Smuzhiyun	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020*4882a593Smuzhiyun	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021*4882a593Smuzhiyun	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022*4882a593Smuzhiyun	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023*4882a593Smuzhiyun
7024*4882a593Smuzhiyun	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025*4882a593Smuzhiyun	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026*4882a593Smuzhiyun	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027*4882a593Smuzhiyun	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028*4882a593Smuzhiyun
7029*4882a593Smuzhiyun	long		0x00000000,0x00000000,0x00000000,0x00000000
7030*4882a593Smuzhiyun	long		0x00000000,0x00000000,0x00000000,0x00000000
7031*4882a593Smuzhiyun	long		0x00000000,0x00000000,0x00000000,0x00000000
7032*4882a593Smuzhiyun	long		0x00000000,0x00000000,0x00000000,0x00000000
7033*4882a593Smuzhiyun
7034*4882a593Smuzhiyun	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035*4882a593Smuzhiyun	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036*4882a593Smuzhiyun	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037*4882a593Smuzhiyun	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038*4882a593Smuzhiyun
7039*4882a593Smuzhiyun	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040*4882a593Smuzhiyun	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041*4882a593Smuzhiyun	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042*4882a593Smuzhiyun	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043*4882a593Smuzhiyun
7044*4882a593Smuzhiyun	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045*4882a593Smuzhiyun	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046*4882a593Smuzhiyun	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047*4882a593Smuzhiyun	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048*4882a593Smuzhiyun
7049*4882a593Smuzhiyun#########################################################################
7050*4882a593Smuzhiyun# XDEF ****************************************************************	#
7051*4882a593Smuzhiyun#	fout(): move from fp register to memory or data register	#
7052*4882a593Smuzhiyun#									#
7053*4882a593Smuzhiyun# XREF ****************************************************************	#
7054*4882a593Smuzhiyun#	_round() - needed to create EXOP for sgl/dbl precision		#
7055*4882a593Smuzhiyun#	norm() - needed to create EXOP for extended precision		#
7056*4882a593Smuzhiyun#	ovf_res() - create default overflow result for sgl/dbl precision#
7057*4882a593Smuzhiyun#	unf_res() - create default underflow result for sgl/dbl prec.	#
7058*4882a593Smuzhiyun#	dst_dbl() - create rounded dbl precision result.		#
7059*4882a593Smuzhiyun#	dst_sgl() - create rounded sgl precision result.		#
7060*4882a593Smuzhiyun#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
7061*4882a593Smuzhiyun#	bindec() - convert FP binary number to packed number.		#
7062*4882a593Smuzhiyun#	_mem_write() - write data to memory.				#
7063*4882a593Smuzhiyun#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064*4882a593Smuzhiyun#	_dmem_write_{byte,word,long}() - write data to memory.		#
7065*4882a593Smuzhiyun#	store_dreg_{b,w,l}() - store data to data register file.	#
7066*4882a593Smuzhiyun#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
7067*4882a593Smuzhiyun#									#
7068*4882a593Smuzhiyun# INPUT ***************************************************************	#
7069*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
7070*4882a593Smuzhiyun#	d0 = round prec,mode						#
7071*4882a593Smuzhiyun#									#
7072*4882a593Smuzhiyun# OUTPUT **************************************************************	#
7073*4882a593Smuzhiyun#	fp0 : intermediate underflow or overflow result if		#
7074*4882a593Smuzhiyun#	      OVFL/UNFL occurred for a sgl or dbl operand		#
7075*4882a593Smuzhiyun#									#
7076*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
7077*4882a593Smuzhiyun#	This routine is accessed by many handlers that need to do an	#
7078*4882a593Smuzhiyun# opclass three move of an operand out to memory.			#
7079*4882a593Smuzhiyun#	Decode an fmove out (opclass 3) instruction to determine if	#
7080*4882a593Smuzhiyun# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
7081*4882a593Smuzhiyun# register or memory. The algorithm uses a standard "fmove" to create	#
7082*4882a593Smuzhiyun# the rounded result. Also, since exceptions are disabled, this also	#
7083*4882a593Smuzhiyun# create the correct OPERR default result if appropriate.		#
7084*4882a593Smuzhiyun#	For sgl or dbl precision, overflow or underflow can occur. If	#
7085*4882a593Smuzhiyun# either occurs and is enabled, the EXOP.				#
7086*4882a593Smuzhiyun#	For extended precision, the stacked <ea> must be fixed along	#
7087*4882a593Smuzhiyun# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
7088*4882a593Smuzhiyun# the source is a denorm and if underflow is enabled, an EXOP must be	#
7089*4882a593Smuzhiyun# created.								#
7090*4882a593Smuzhiyun#	For packed, the k-factor must be fetched from the instruction	#
7091*4882a593Smuzhiyun# word or a data register. The <ea> must be fixed as w/ extended	#
7092*4882a593Smuzhiyun# precision. Then, bindec() is called to create the appropriate		#
7093*4882a593Smuzhiyun# packed result.							#
7094*4882a593Smuzhiyun#	If at any time an access error is flagged by one of the move-	#
7095*4882a593Smuzhiyun# to-memory routines, then a special exit must be made so that the	#
7096*4882a593Smuzhiyun# access error can be handled properly.					#
7097*4882a593Smuzhiyun#									#
7098*4882a593Smuzhiyun#########################################################################
7099*4882a593Smuzhiyun
7100*4882a593Smuzhiyun	global		fout
7101*4882a593Smuzhiyunfout:
7102*4882a593Smuzhiyun	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103*4882a593Smuzhiyun	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104*4882a593Smuzhiyun	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
7105*4882a593Smuzhiyun
7106*4882a593Smuzhiyun	swbeg		&0x8
7107*4882a593Smuzhiyuntbl_fout:
7108*4882a593Smuzhiyun	short		fout_long	-	tbl_fout
7109*4882a593Smuzhiyun	short		fout_sgl	-	tbl_fout
7110*4882a593Smuzhiyun	short		fout_ext	-	tbl_fout
7111*4882a593Smuzhiyun	short		fout_pack	-	tbl_fout
7112*4882a593Smuzhiyun	short		fout_word	-	tbl_fout
7113*4882a593Smuzhiyun	short		fout_dbl	-	tbl_fout
7114*4882a593Smuzhiyun	short		fout_byte	-	tbl_fout
7115*4882a593Smuzhiyun	short		fout_pack	-	tbl_fout
7116*4882a593Smuzhiyun
7117*4882a593Smuzhiyun#################################################################
7118*4882a593Smuzhiyun# fmove.b out ###################################################
7119*4882a593Smuzhiyun#################################################################
7120*4882a593Smuzhiyun
7121*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand
7122*4882a593Smuzhiyun# is either a DENORM or a NORM.
7123*4882a593Smuzhiyunfout_byte:
7124*4882a593Smuzhiyun	tst.b		STAG(%a6)		# is operand normalized?
7125*4882a593Smuzhiyun	bne.b		fout_byte_denorm	# no
7126*4882a593Smuzhiyun
7127*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# load value
7128*4882a593Smuzhiyun
7129*4882a593Smuzhiyunfout_byte_norm:
7130*4882a593Smuzhiyun	fmov.l		%d0,%fpcr		# insert rnd prec,mode
7131*4882a593Smuzhiyun
7132*4882a593Smuzhiyun	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
7133*4882a593Smuzhiyun
7134*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
7135*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# fetch FPSR
7136*4882a593Smuzhiyun	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7137*4882a593Smuzhiyun
7138*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7139*4882a593Smuzhiyun	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7140*4882a593Smuzhiyun	beq.b		fout_byte_dn		# must save to integer regfile
7141*4882a593Smuzhiyun
7142*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7143*4882a593Smuzhiyun	bsr.l		_dmem_write_byte	# write byte
7144*4882a593Smuzhiyun
7145*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7146*4882a593Smuzhiyun	bne.l		facc_out_b		# yes
7147*4882a593Smuzhiyun
7148*4882a593Smuzhiyun	rts
7149*4882a593Smuzhiyun
7150*4882a593Smuzhiyunfout_byte_dn:
7151*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7152*4882a593Smuzhiyun	andi.w		&0x7,%d1
7153*4882a593Smuzhiyun	bsr.l		store_dreg_b
7154*4882a593Smuzhiyun	rts
7155*4882a593Smuzhiyun
7156*4882a593Smuzhiyunfout_byte_denorm:
7157*4882a593Smuzhiyun	mov.l		SRC_EX(%a0),%d1
7158*4882a593Smuzhiyun	andi.l		&0x80000000,%d1		# keep DENORM sign
7159*4882a593Smuzhiyun	ori.l		&0x00800000,%d1		# make smallest sgl
7160*4882a593Smuzhiyun	fmov.s		%d1,%fp0
7161*4882a593Smuzhiyun	bra.b		fout_byte_norm
7162*4882a593Smuzhiyun
7163*4882a593Smuzhiyun#################################################################
7164*4882a593Smuzhiyun# fmove.w out ###################################################
7165*4882a593Smuzhiyun#################################################################
7166*4882a593Smuzhiyun
7167*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand
7168*4882a593Smuzhiyun# is either a DENORM or a NORM.
7169*4882a593Smuzhiyunfout_word:
7170*4882a593Smuzhiyun	tst.b		STAG(%a6)		# is operand normalized?
7171*4882a593Smuzhiyun	bne.b		fout_word_denorm	# no
7172*4882a593Smuzhiyun
7173*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# load value
7174*4882a593Smuzhiyun
7175*4882a593Smuzhiyunfout_word_norm:
7176*4882a593Smuzhiyun	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7177*4882a593Smuzhiyun
7178*4882a593Smuzhiyun	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
7179*4882a593Smuzhiyun
7180*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
7181*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# fetch FPSR
7182*4882a593Smuzhiyun	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7183*4882a593Smuzhiyun
7184*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7185*4882a593Smuzhiyun	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7186*4882a593Smuzhiyun	beq.b		fout_word_dn		# must save to integer regfile
7187*4882a593Smuzhiyun
7188*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7189*4882a593Smuzhiyun	bsr.l		_dmem_write_word	# write word
7190*4882a593Smuzhiyun
7191*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7192*4882a593Smuzhiyun	bne.l		facc_out_w		# yes
7193*4882a593Smuzhiyun
7194*4882a593Smuzhiyun	rts
7195*4882a593Smuzhiyun
7196*4882a593Smuzhiyunfout_word_dn:
7197*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7198*4882a593Smuzhiyun	andi.w		&0x7,%d1
7199*4882a593Smuzhiyun	bsr.l		store_dreg_w
7200*4882a593Smuzhiyun	rts
7201*4882a593Smuzhiyun
7202*4882a593Smuzhiyunfout_word_denorm:
7203*4882a593Smuzhiyun	mov.l		SRC_EX(%a0),%d1
7204*4882a593Smuzhiyun	andi.l		&0x80000000,%d1		# keep DENORM sign
7205*4882a593Smuzhiyun	ori.l		&0x00800000,%d1		# make smallest sgl
7206*4882a593Smuzhiyun	fmov.s		%d1,%fp0
7207*4882a593Smuzhiyun	bra.b		fout_word_norm
7208*4882a593Smuzhiyun
7209*4882a593Smuzhiyun#################################################################
7210*4882a593Smuzhiyun# fmove.l out ###################################################
7211*4882a593Smuzhiyun#################################################################
7212*4882a593Smuzhiyun
7213*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand
7214*4882a593Smuzhiyun# is either a DENORM or a NORM.
7215*4882a593Smuzhiyunfout_long:
7216*4882a593Smuzhiyun	tst.b		STAG(%a6)		# is operand normalized?
7217*4882a593Smuzhiyun	bne.b		fout_long_denorm	# no
7218*4882a593Smuzhiyun
7219*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# load value
7220*4882a593Smuzhiyun
7221*4882a593Smuzhiyunfout_long_norm:
7222*4882a593Smuzhiyun	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7223*4882a593Smuzhiyun
7224*4882a593Smuzhiyun	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
7225*4882a593Smuzhiyun
7226*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
7227*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# fetch FPSR
7228*4882a593Smuzhiyun	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7229*4882a593Smuzhiyun
7230*4882a593Smuzhiyunfout_long_write:
7231*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7232*4882a593Smuzhiyun	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7233*4882a593Smuzhiyun	beq.b		fout_long_dn		# must save to integer regfile
7234*4882a593Smuzhiyun
7235*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7236*4882a593Smuzhiyun	bsr.l		_dmem_write_long	# write long
7237*4882a593Smuzhiyun
7238*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7239*4882a593Smuzhiyun	bne.l		facc_out_l		# yes
7240*4882a593Smuzhiyun
7241*4882a593Smuzhiyun	rts
7242*4882a593Smuzhiyun
7243*4882a593Smuzhiyunfout_long_dn:
7244*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7245*4882a593Smuzhiyun	andi.w		&0x7,%d1
7246*4882a593Smuzhiyun	bsr.l		store_dreg_l
7247*4882a593Smuzhiyun	rts
7248*4882a593Smuzhiyun
7249*4882a593Smuzhiyunfout_long_denorm:
7250*4882a593Smuzhiyun	mov.l		SRC_EX(%a0),%d1
7251*4882a593Smuzhiyun	andi.l		&0x80000000,%d1		# keep DENORM sign
7252*4882a593Smuzhiyun	ori.l		&0x00800000,%d1		# make smallest sgl
7253*4882a593Smuzhiyun	fmov.s		%d1,%fp0
7254*4882a593Smuzhiyun	bra.b		fout_long_norm
7255*4882a593Smuzhiyun
7256*4882a593Smuzhiyun#################################################################
7257*4882a593Smuzhiyun# fmove.x out ###################################################
7258*4882a593Smuzhiyun#################################################################
7259*4882a593Smuzhiyun
7260*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand
7261*4882a593Smuzhiyun# is either a DENORM or a NORM.
7262*4882a593Smuzhiyun# The DENORM causes an Underflow exception.
7263*4882a593Smuzhiyunfout_ext:
7264*4882a593Smuzhiyun
7265*4882a593Smuzhiyun# we copy the extended precision result to FP_SCR0 so that the reserved
7266*4882a593Smuzhiyun# 16-bit field gets zeroed. we do this since we promise not to disturb
7267*4882a593Smuzhiyun# what's at SRC(a0).
7268*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7269*4882a593Smuzhiyun	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
7270*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7271*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7272*4882a593Smuzhiyun
7273*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return result
7274*4882a593Smuzhiyun
7275*4882a593Smuzhiyun	bsr.l		_calc_ea_fout		# fix stacked <ea>
7276*4882a593Smuzhiyun
7277*4882a593Smuzhiyun	mov.l		%a0,%a1			# pass: dst addr
7278*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: src addr
7279*4882a593Smuzhiyun	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7280*4882a593Smuzhiyun
7281*4882a593Smuzhiyun# we must not yet write the extended precision data to the stack
7282*4882a593Smuzhiyun# in the pre-decrement case from supervisor mode or else we'll corrupt
7283*4882a593Smuzhiyun# the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7285*4882a593Smuzhiyun	beq.b		fout_ext_a7
7286*4882a593Smuzhiyun
7287*4882a593Smuzhiyun	bsr.l		_dmem_write		# write ext prec number to memory
7288*4882a593Smuzhiyun
7289*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7290*4882a593Smuzhiyun	bne.w		fout_ext_err		# yes
7291*4882a593Smuzhiyun
7292*4882a593Smuzhiyun	tst.b		STAG(%a6)		# is operand normalized?
7293*4882a593Smuzhiyun	bne.b		fout_ext_denorm		# no
7294*4882a593Smuzhiyun	rts
7295*4882a593Smuzhiyun
7296*4882a593Smuzhiyun# the number is a DENORM. must set the underflow exception bit
7297*4882a593Smuzhiyunfout_ext_denorm:
7298*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299*4882a593Smuzhiyun
7300*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d0
7301*4882a593Smuzhiyun	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
7302*4882a593Smuzhiyun	bne.b		fout_ext_exc		# yes
7303*4882a593Smuzhiyun	rts
7304*4882a593Smuzhiyun
7305*4882a593Smuzhiyun# we don't want to do the write if the exception occurred in supervisor mode
7306*4882a593Smuzhiyun# so _mem_write2() handles this for us.
7307*4882a593Smuzhiyunfout_ext_a7:
7308*4882a593Smuzhiyun	bsr.l		_mem_write2		# write ext prec number to memory
7309*4882a593Smuzhiyun
7310*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7311*4882a593Smuzhiyun	bne.w		fout_ext_err		# yes
7312*4882a593Smuzhiyun
7313*4882a593Smuzhiyun	tst.b		STAG(%a6)		# is operand normalized?
7314*4882a593Smuzhiyun	bne.b		fout_ext_denorm		# no
7315*4882a593Smuzhiyun	rts
7316*4882a593Smuzhiyun
7317*4882a593Smuzhiyunfout_ext_exc:
7318*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
7319*4882a593Smuzhiyun	bsr.l		norm			# normalize the mantissa
7320*4882a593Smuzhiyun	neg.w		%d0			# new exp = -(shft amt)
7321*4882a593Smuzhiyun	andi.w		&0x7fff,%d0
7322*4882a593Smuzhiyun	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
7323*4882a593Smuzhiyun	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7324*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7325*4882a593Smuzhiyun	rts
7326*4882a593Smuzhiyun
7327*4882a593Smuzhiyunfout_ext_err:
7328*4882a593Smuzhiyun	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
7329*4882a593Smuzhiyun	bra.l		facc_out_x
7330*4882a593Smuzhiyun
7331*4882a593Smuzhiyun#########################################################################
7332*4882a593Smuzhiyun# fmove.s out ###########################################################
7333*4882a593Smuzhiyun#########################################################################
7334*4882a593Smuzhiyunfout_sgl:
7335*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
7336*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl prec
7337*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7338*4882a593Smuzhiyun
7339*4882a593Smuzhiyun#
7340*4882a593Smuzhiyun# operand is a normalized number. first, we check to see if the move out
7341*4882a593Smuzhiyun# would cause either an underflow or overflow. these cases are handled
7342*4882a593Smuzhiyun# separately. otherwise, set the FPCR to the proper rounding mode and
7343*4882a593Smuzhiyun# execute the move.
7344*4882a593Smuzhiyun#
7345*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d0		# extract exponent
7346*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# strip sign
7347*4882a593Smuzhiyun
7348*4882a593Smuzhiyun	cmpi.w		%d0,&SGL_HI		# will operand overflow?
7349*4882a593Smuzhiyun	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
7350*4882a593Smuzhiyun	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
7351*4882a593Smuzhiyun	cmpi.w		%d0,&SGL_LO		# will operand underflow?
7352*4882a593Smuzhiyun	blt.w		fout_sgl_unfl		# yes; go handle underflow
7353*4882a593Smuzhiyun
7354*4882a593Smuzhiyun#
7355*4882a593Smuzhiyun# NORMs(in range) can be stored out by a simple "fmov.s"
7356*4882a593Smuzhiyun# Unnormalized inputs can come through this point.
7357*4882a593Smuzhiyun#
7358*4882a593Smuzhiyunfout_sgl_exg:
7359*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7360*4882a593Smuzhiyun
7361*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7362*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
7363*4882a593Smuzhiyun
7364*4882a593Smuzhiyun	fmov.s		%fp0,%d0		# store does convert and round
7365*4882a593Smuzhiyun
7366*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
7367*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
7368*4882a593Smuzhiyun
7369*4882a593Smuzhiyun	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
7370*4882a593Smuzhiyun
7371*4882a593Smuzhiyunfout_sgl_exg_write:
7372*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7373*4882a593Smuzhiyun	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7374*4882a593Smuzhiyun	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
7375*4882a593Smuzhiyun
7376*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7377*4882a593Smuzhiyun	bsr.l		_dmem_write_long	# write long
7378*4882a593Smuzhiyun
7379*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7380*4882a593Smuzhiyun	bne.l		facc_out_l		# yes
7381*4882a593Smuzhiyun
7382*4882a593Smuzhiyun	rts
7383*4882a593Smuzhiyun
7384*4882a593Smuzhiyunfout_sgl_exg_write_dn:
7385*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7386*4882a593Smuzhiyun	andi.w		&0x7,%d1
7387*4882a593Smuzhiyun	bsr.l		store_dreg_l
7388*4882a593Smuzhiyun	rts
7389*4882a593Smuzhiyun
7390*4882a593Smuzhiyun#
7391*4882a593Smuzhiyun# here, we know that the operand would UNFL if moved out to single prec,
7392*4882a593Smuzhiyun# so, denorm and round and then use generic store single routine to
7393*4882a593Smuzhiyun# write the value to memory.
7394*4882a593Smuzhiyun#
7395*4882a593Smuzhiyunfout_sgl_unfl:
7396*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397*4882a593Smuzhiyun
7398*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7399*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7400*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7401*4882a593Smuzhiyun	mov.l		%a0,-(%sp)
7402*4882a593Smuzhiyun
7403*4882a593Smuzhiyun	clr.l		%d0			# pass: S.F. = 0
7404*4882a593Smuzhiyun
7405*4882a593Smuzhiyun	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7406*4882a593Smuzhiyun	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
7407*4882a593Smuzhiyun
7408*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
7409*4882a593Smuzhiyun	bsr.l		norm			# normalize the DENORM
7410*4882a593Smuzhiyun
7411*4882a593Smuzhiyunfout_sgl_unfl_cont:
7412*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7413*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7414*4882a593Smuzhiyun	bsr.l		unf_res			# calc default underflow result
7415*4882a593Smuzhiyun
7416*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7417*4882a593Smuzhiyun	bsr.l		dst_sgl			# convert to single prec
7418*4882a593Smuzhiyun
7419*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7420*4882a593Smuzhiyun	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7421*4882a593Smuzhiyun	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
7422*4882a593Smuzhiyun
7423*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7424*4882a593Smuzhiyun	bsr.l		_dmem_write_long	# write long
7425*4882a593Smuzhiyun
7426*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7427*4882a593Smuzhiyun	bne.l		facc_out_l		# yes
7428*4882a593Smuzhiyun
7429*4882a593Smuzhiyun	bra.b		fout_sgl_unfl_chkexc
7430*4882a593Smuzhiyun
7431*4882a593Smuzhiyunfout_sgl_unfl_dn:
7432*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7433*4882a593Smuzhiyun	andi.w		&0x7,%d1
7434*4882a593Smuzhiyun	bsr.l		store_dreg_l
7435*4882a593Smuzhiyun
7436*4882a593Smuzhiyunfout_sgl_unfl_chkexc:
7437*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
7438*4882a593Smuzhiyun	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7439*4882a593Smuzhiyun	bne.w		fout_sd_exc_unfl	# yes
7440*4882a593Smuzhiyun	addq.l		&0x4,%sp
7441*4882a593Smuzhiyun	rts
7442*4882a593Smuzhiyun
7443*4882a593Smuzhiyun#
7444*4882a593Smuzhiyun# it's definitely an overflow so call ovf_res to get the correct answer
7445*4882a593Smuzhiyun#
7446*4882a593Smuzhiyunfout_sgl_ovfl:
7447*4882a593Smuzhiyun	tst.b		3+SRC_HI(%a0)		# is result inexact?
7448*4882a593Smuzhiyun	bne.b		fout_sgl_ovfl_inex2
7449*4882a593Smuzhiyun	tst.l		SRC_LO(%a0)		# is result inexact?
7450*4882a593Smuzhiyun	bne.b		fout_sgl_ovfl_inex2
7451*4882a593Smuzhiyun	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452*4882a593Smuzhiyun	bra.b		fout_sgl_ovfl_cont
7453*4882a593Smuzhiyunfout_sgl_ovfl_inex2:
7454*4882a593Smuzhiyun	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455*4882a593Smuzhiyun
7456*4882a593Smuzhiyunfout_sgl_ovfl_cont:
7457*4882a593Smuzhiyun	mov.l		%a0,-(%sp)
7458*4882a593Smuzhiyun
7459*4882a593Smuzhiyun# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460*4882a593Smuzhiyun# overflow result. DON'T save the returned ccodes from ovf_res() since
7461*4882a593Smuzhiyun# fmove out doesn't alter them.
7462*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
7463*4882a593Smuzhiyun	smi		%d1			# set if so
7464*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
7465*4882a593Smuzhiyun	bsr.l		ovf_res			# calc OVFL result
7466*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# load default overflow result
7467*4882a593Smuzhiyun	fmov.s		%fp0,%d0		# store to single
7468*4882a593Smuzhiyun
7469*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7470*4882a593Smuzhiyun	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7471*4882a593Smuzhiyun	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
7472*4882a593Smuzhiyun
7473*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7474*4882a593Smuzhiyun	bsr.l		_dmem_write_long	# write long
7475*4882a593Smuzhiyun
7476*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7477*4882a593Smuzhiyun	bne.l		facc_out_l		# yes
7478*4882a593Smuzhiyun
7479*4882a593Smuzhiyun	bra.b		fout_sgl_ovfl_chkexc
7480*4882a593Smuzhiyun
7481*4882a593Smuzhiyunfout_sgl_ovfl_dn:
7482*4882a593Smuzhiyun	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7483*4882a593Smuzhiyun	andi.w		&0x7,%d1
7484*4882a593Smuzhiyun	bsr.l		store_dreg_l
7485*4882a593Smuzhiyun
7486*4882a593Smuzhiyunfout_sgl_ovfl_chkexc:
7487*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
7488*4882a593Smuzhiyun	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7489*4882a593Smuzhiyun	bne.w		fout_sd_exc_ovfl	# yes
7490*4882a593Smuzhiyun	addq.l		&0x4,%sp
7491*4882a593Smuzhiyun	rts
7492*4882a593Smuzhiyun
7493*4882a593Smuzhiyun#
7494*4882a593Smuzhiyun# move out MAY overflow:
7495*4882a593Smuzhiyun# (1) force the exp to 0x3fff
7496*4882a593Smuzhiyun# (2) do a move w/ appropriate rnd mode
7497*4882a593Smuzhiyun# (3) if exp still equals zero, then insert original exponent
7498*4882a593Smuzhiyun#	for the correct result.
7499*4882a593Smuzhiyun#     if exp now equals one, then it overflowed so call ovf_res.
7500*4882a593Smuzhiyun#
7501*4882a593Smuzhiyunfout_sgl_may_ovfl:
7502*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7503*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep it,clear exp
7504*4882a593Smuzhiyun	ori.w		&0x3fff,%d1		# insert exp = 0
7505*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7506*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508*4882a593Smuzhiyun
7509*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7510*4882a593Smuzhiyun
7511*4882a593Smuzhiyun	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7512*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
7513*4882a593Smuzhiyun
7514*4882a593Smuzhiyun	fabs.x		%fp0			# need absolute value
7515*4882a593Smuzhiyun	fcmp.b		%fp0,&0x2		# did exponent increase?
7516*4882a593Smuzhiyun	fblt.w		fout_sgl_exg		# no; go finish NORM
7517*4882a593Smuzhiyun	bra.w		fout_sgl_ovfl		# yes; go handle overflow
7518*4882a593Smuzhiyun
7519*4882a593Smuzhiyun################
7520*4882a593Smuzhiyun
7521*4882a593Smuzhiyunfout_sd_exc_unfl:
7522*4882a593Smuzhiyun	mov.l		(%sp)+,%a0
7523*4882a593Smuzhiyun
7524*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7525*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7526*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7527*4882a593Smuzhiyun
7528*4882a593Smuzhiyun	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
7529*4882a593Smuzhiyun	bne.b		fout_sd_exc_cont	# no
7530*4882a593Smuzhiyun
7531*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
7532*4882a593Smuzhiyun	bsr.l		norm
7533*4882a593Smuzhiyun	neg.l		%d0
7534*4882a593Smuzhiyun	andi.w		&0x7fff,%d0
7535*4882a593Smuzhiyun	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
7536*4882a593Smuzhiyun	bra.b		fout_sd_exc_cont
7537*4882a593Smuzhiyun
7538*4882a593Smuzhiyunfout_sd_exc:
7539*4882a593Smuzhiyunfout_sd_exc_ovfl:
7540*4882a593Smuzhiyun	mov.l		(%sp)+,%a0		# restore a0
7541*4882a593Smuzhiyun
7542*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7543*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7544*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7545*4882a593Smuzhiyun
7546*4882a593Smuzhiyunfout_sd_exc_cont:
7547*4882a593Smuzhiyun	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
7548*4882a593Smuzhiyun	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
7549*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
7550*4882a593Smuzhiyun
7551*4882a593Smuzhiyun	mov.b		3+L_SCR3(%a6),%d1
7552*4882a593Smuzhiyun	lsr.b		&0x4,%d1
7553*4882a593Smuzhiyun	andi.w		&0x0c,%d1
7554*4882a593Smuzhiyun	swap		%d1
7555*4882a593Smuzhiyun	mov.b		3+L_SCR3(%a6),%d1
7556*4882a593Smuzhiyun	lsr.b		&0x4,%d1
7557*4882a593Smuzhiyun	andi.w		&0x03,%d1
7558*4882a593Smuzhiyun	clr.l		%d0			# pass: zero g,r,s
7559*4882a593Smuzhiyun	bsr.l		_round			# round the DENORM
7560*4882a593Smuzhiyun
7561*4882a593Smuzhiyun	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
7562*4882a593Smuzhiyun	beq.b		fout_sd_exc_done	# no
7563*4882a593Smuzhiyun	bset		&0x7,FP_SCR0_EX(%a6)	# yes
7564*4882a593Smuzhiyun
7565*4882a593Smuzhiyunfout_sd_exc_done:
7566*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7567*4882a593Smuzhiyun	rts
7568*4882a593Smuzhiyun
7569*4882a593Smuzhiyun#################################################################
7570*4882a593Smuzhiyun# fmove.d out ###################################################
7571*4882a593Smuzhiyun#################################################################
7572*4882a593Smuzhiyunfout_dbl:
7573*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
7574*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl prec
7575*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7576*4882a593Smuzhiyun
7577*4882a593Smuzhiyun#
7578*4882a593Smuzhiyun# operand is a normalized number. first, we check to see if the move out
7579*4882a593Smuzhiyun# would cause either an underflow or overflow. these cases are handled
7580*4882a593Smuzhiyun# separately. otherwise, set the FPCR to the proper rounding mode and
7581*4882a593Smuzhiyun# execute the move.
7582*4882a593Smuzhiyun#
7583*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d0		# extract exponent
7584*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# strip sign
7585*4882a593Smuzhiyun
7586*4882a593Smuzhiyun	cmpi.w		%d0,&DBL_HI		# will operand overflow?
7587*4882a593Smuzhiyun	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
7588*4882a593Smuzhiyun	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
7589*4882a593Smuzhiyun	cmpi.w		%d0,&DBL_LO		# will operand underflow?
7590*4882a593Smuzhiyun	blt.w		fout_dbl_unfl		# yes; go handle underflow
7591*4882a593Smuzhiyun
7592*4882a593Smuzhiyun#
7593*4882a593Smuzhiyun# NORMs(in range) can be stored out by a simple "fmov.d"
7594*4882a593Smuzhiyun# Unnormalized inputs can come through this point.
7595*4882a593Smuzhiyun#
7596*4882a593Smuzhiyunfout_dbl_exg:
7597*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7598*4882a593Smuzhiyun
7599*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7600*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
7601*4882a593Smuzhiyun
7602*4882a593Smuzhiyun	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
7603*4882a593Smuzhiyun
7604*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
7605*4882a593Smuzhiyun	fmov.l		%fpsr,%d0		# save FPSR
7606*4882a593Smuzhiyun
7607*4882a593Smuzhiyun	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
7608*4882a593Smuzhiyun
7609*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7610*4882a593Smuzhiyun	lea		L_SCR1(%a6),%a0		# pass: src addr
7611*4882a593Smuzhiyun	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7612*4882a593Smuzhiyun	bsr.l		_dmem_write		# store dbl fop to memory
7613*4882a593Smuzhiyun
7614*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7615*4882a593Smuzhiyun	bne.l		facc_out_d		# yes
7616*4882a593Smuzhiyun
7617*4882a593Smuzhiyun	rts					# no; so we're finished
7618*4882a593Smuzhiyun
7619*4882a593Smuzhiyun#
7620*4882a593Smuzhiyun# here, we know that the operand would UNFL if moved out to double prec,
7621*4882a593Smuzhiyun# so, denorm and round and then use generic store double routine to
7622*4882a593Smuzhiyun# write the value to memory.
7623*4882a593Smuzhiyun#
7624*4882a593Smuzhiyunfout_dbl_unfl:
7625*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626*4882a593Smuzhiyun
7627*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7628*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7629*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7630*4882a593Smuzhiyun	mov.l		%a0,-(%sp)
7631*4882a593Smuzhiyun
7632*4882a593Smuzhiyun	clr.l		%d0			# pass: S.F. = 0
7633*4882a593Smuzhiyun
7634*4882a593Smuzhiyun	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7635*4882a593Smuzhiyun	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
7636*4882a593Smuzhiyun
7637*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
7638*4882a593Smuzhiyun	bsr.l		norm			# normalize the DENORM
7639*4882a593Smuzhiyun
7640*4882a593Smuzhiyunfout_dbl_unfl_cont:
7641*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7642*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7643*4882a593Smuzhiyun	bsr.l		unf_res			# calc default underflow result
7644*4882a593Smuzhiyun
7645*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7646*4882a593Smuzhiyun	bsr.l		dst_dbl			# convert to single prec
7647*4882a593Smuzhiyun	mov.l		%d0,L_SCR1(%a6)
7648*4882a593Smuzhiyun	mov.l		%d1,L_SCR2(%a6)
7649*4882a593Smuzhiyun
7650*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7651*4882a593Smuzhiyun	lea		L_SCR1(%a6),%a0		# pass: src addr
7652*4882a593Smuzhiyun	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7653*4882a593Smuzhiyun	bsr.l		_dmem_write		# store dbl fop to memory
7654*4882a593Smuzhiyun
7655*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7656*4882a593Smuzhiyun	bne.l		facc_out_d		# yes
7657*4882a593Smuzhiyun
7658*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
7659*4882a593Smuzhiyun	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7660*4882a593Smuzhiyun	bne.w		fout_sd_exc_unfl	# yes
7661*4882a593Smuzhiyun	addq.l		&0x4,%sp
7662*4882a593Smuzhiyun	rts
7663*4882a593Smuzhiyun
7664*4882a593Smuzhiyun#
7665*4882a593Smuzhiyun# it's definitely an overflow so call ovf_res to get the correct answer
7666*4882a593Smuzhiyun#
7667*4882a593Smuzhiyunfout_dbl_ovfl:
7668*4882a593Smuzhiyun	mov.w		2+SRC_LO(%a0),%d0
7669*4882a593Smuzhiyun	andi.w		&0x7ff,%d0
7670*4882a593Smuzhiyun	bne.b		fout_dbl_ovfl_inex2
7671*4882a593Smuzhiyun
7672*4882a593Smuzhiyun	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673*4882a593Smuzhiyun	bra.b		fout_dbl_ovfl_cont
7674*4882a593Smuzhiyunfout_dbl_ovfl_inex2:
7675*4882a593Smuzhiyun	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676*4882a593Smuzhiyun
7677*4882a593Smuzhiyunfout_dbl_ovfl_cont:
7678*4882a593Smuzhiyun	mov.l		%a0,-(%sp)
7679*4882a593Smuzhiyun
7680*4882a593Smuzhiyun# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681*4882a593Smuzhiyun# overflow result. DON'T save the returned ccodes from ovf_res() since
7682*4882a593Smuzhiyun# fmove out doesn't alter them.
7683*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
7684*4882a593Smuzhiyun	smi		%d1			# set if so
7685*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
7686*4882a593Smuzhiyun	bsr.l		ovf_res			# calc OVFL result
7687*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# load default overflow result
7688*4882a593Smuzhiyun	fmov.d		%fp0,L_SCR1(%a6)	# store to double
7689*4882a593Smuzhiyun
7690*4882a593Smuzhiyun	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7691*4882a593Smuzhiyun	lea		L_SCR1(%a6),%a0		# pass: src addr
7692*4882a593Smuzhiyun	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7693*4882a593Smuzhiyun	bsr.l		_dmem_write		# store dbl fop to memory
7694*4882a593Smuzhiyun
7695*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7696*4882a593Smuzhiyun	bne.l		facc_out_d		# yes
7697*4882a593Smuzhiyun
7698*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
7699*4882a593Smuzhiyun	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7700*4882a593Smuzhiyun	bne.w		fout_sd_exc_ovfl	# yes
7701*4882a593Smuzhiyun	addq.l		&0x4,%sp
7702*4882a593Smuzhiyun	rts
7703*4882a593Smuzhiyun
7704*4882a593Smuzhiyun#
7705*4882a593Smuzhiyun# move out MAY overflow:
7706*4882a593Smuzhiyun# (1) force the exp to 0x3fff
7707*4882a593Smuzhiyun# (2) do a move w/ appropriate rnd mode
7708*4882a593Smuzhiyun# (3) if exp still equals zero, then insert original exponent
7709*4882a593Smuzhiyun#	for the correct result.
7710*4882a593Smuzhiyun#     if exp now equals one, then it overflowed so call ovf_res.
7711*4882a593Smuzhiyun#
7712*4882a593Smuzhiyunfout_dbl_may_ovfl:
7713*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7714*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep it,clear exp
7715*4882a593Smuzhiyun	ori.w		&0x3fff,%d1		# insert exp = 0
7716*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7717*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719*4882a593Smuzhiyun
7720*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7721*4882a593Smuzhiyun
7722*4882a593Smuzhiyun	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7723*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
7724*4882a593Smuzhiyun
7725*4882a593Smuzhiyun	fabs.x		%fp0			# need absolute value
7726*4882a593Smuzhiyun	fcmp.b		%fp0,&0x2		# did exponent increase?
7727*4882a593Smuzhiyun	fblt.w		fout_dbl_exg		# no; go finish NORM
7728*4882a593Smuzhiyun	bra.w		fout_dbl_ovfl		# yes; go handle overflow
7729*4882a593Smuzhiyun
7730*4882a593Smuzhiyun#########################################################################
7731*4882a593Smuzhiyun# XDEF ****************************************************************	#
7732*4882a593Smuzhiyun#	dst_dbl(): create double precision value from extended prec.	#
7733*4882a593Smuzhiyun#									#
7734*4882a593Smuzhiyun# XREF ****************************************************************	#
7735*4882a593Smuzhiyun#	None								#
7736*4882a593Smuzhiyun#									#
7737*4882a593Smuzhiyun# INPUT ***************************************************************	#
7738*4882a593Smuzhiyun#	a0 = pointer to source operand in extended precision		#
7739*4882a593Smuzhiyun#									#
7740*4882a593Smuzhiyun# OUTPUT **************************************************************	#
7741*4882a593Smuzhiyun#	d0 = hi(double precision result)				#
7742*4882a593Smuzhiyun#	d1 = lo(double precision result)				#
7743*4882a593Smuzhiyun#									#
7744*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
7745*4882a593Smuzhiyun#									#
7746*4882a593Smuzhiyun#  Changes extended precision to double precision.			#
7747*4882a593Smuzhiyun#  Note: no attempt is made to round the extended value to double.	#
7748*4882a593Smuzhiyun#	dbl_sign = ext_sign						#
7749*4882a593Smuzhiyun#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
7750*4882a593Smuzhiyun#	get rid of ext integer bit					#
7751*4882a593Smuzhiyun#	dbl_mant = ext_mant{62:12}					#
7752*4882a593Smuzhiyun#									#
7753*4882a593Smuzhiyun#		---------------   ---------------    ---------------	#
7754*4882a593Smuzhiyun#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7755*4882a593Smuzhiyun#		---------------   ---------------    ---------------	#
7756*4882a593Smuzhiyun#		 95	    64    63 62	      32      31     11	  0	#
7757*4882a593Smuzhiyun#				     |			     |		#
7758*4882a593Smuzhiyun#				     |			     |		#
7759*4882a593Smuzhiyun#				     |			     |		#
7760*4882a593Smuzhiyun#			             v			     v		#
7761*4882a593Smuzhiyun#			      ---------------   ---------------		#
7762*4882a593Smuzhiyun#  double   ->		      |s|exp| mant  |   |  mant       |		#
7763*4882a593Smuzhiyun#			      ---------------   ---------------		#
7764*4882a593Smuzhiyun#			      63     51   32   31	       0	#
7765*4882a593Smuzhiyun#									#
7766*4882a593Smuzhiyun#########################################################################
7767*4882a593Smuzhiyun
7768*4882a593Smuzhiyundst_dbl:
7769*4882a593Smuzhiyun	clr.l		%d0			# clear d0
7770*4882a593Smuzhiyun	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7771*4882a593Smuzhiyun	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7772*4882a593Smuzhiyun	addi.w		&DBL_BIAS,%d0		# add double precision bias
7773*4882a593Smuzhiyun	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7774*4882a593Smuzhiyun	bmi.b		dst_get_dupper		# no
7775*4882a593Smuzhiyun	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
7776*4882a593Smuzhiyundst_get_dupper:
7777*4882a593Smuzhiyun	swap		%d0			# d0 now in upper word
7778*4882a593Smuzhiyun	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
7779*4882a593Smuzhiyun	tst.b		FTEMP_EX(%a0)		# test sign
7780*4882a593Smuzhiyun	bpl.b		dst_get_dman		# if positive, go process mantissa
7781*4882a593Smuzhiyun	bset		&0x1f,%d0		# if negative, set sign
7782*4882a593Smuzhiyundst_get_dman:
7783*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7784*4882a593Smuzhiyun	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
7785*4882a593Smuzhiyun	or.l		%d1,%d0			# put these bits in ms word of double
7786*4882a593Smuzhiyun	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
7787*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7788*4882a593Smuzhiyun	mov.l		&21,%d0			# load shift count
7789*4882a593Smuzhiyun	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
7790*4882a593Smuzhiyun	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
7791*4882a593Smuzhiyun	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
7792*4882a593Smuzhiyun	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
7793*4882a593Smuzhiyun	mov.l		L_SCR2(%a6),%d1
7794*4882a593Smuzhiyun	or.l		%d0,%d1			# put them in double result
7795*4882a593Smuzhiyun	mov.l		L_SCR1(%a6),%d0
7796*4882a593Smuzhiyun	rts
7797*4882a593Smuzhiyun
7798*4882a593Smuzhiyun#########################################################################
7799*4882a593Smuzhiyun# XDEF ****************************************************************	#
7800*4882a593Smuzhiyun#	dst_sgl(): create single precision value from extended prec	#
7801*4882a593Smuzhiyun#									#
7802*4882a593Smuzhiyun# XREF ****************************************************************	#
7803*4882a593Smuzhiyun#									#
7804*4882a593Smuzhiyun# INPUT ***************************************************************	#
7805*4882a593Smuzhiyun#	a0 = pointer to source operand in extended precision		#
7806*4882a593Smuzhiyun#									#
7807*4882a593Smuzhiyun# OUTPUT **************************************************************	#
7808*4882a593Smuzhiyun#	d0 = single precision result					#
7809*4882a593Smuzhiyun#									#
7810*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
7811*4882a593Smuzhiyun#									#
7812*4882a593Smuzhiyun# Changes extended precision to single precision.			#
7813*4882a593Smuzhiyun#	sgl_sign = ext_sign						#
7814*4882a593Smuzhiyun#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
7815*4882a593Smuzhiyun#	get rid of ext integer bit					#
7816*4882a593Smuzhiyun#	sgl_mant = ext_mant{62:12}					#
7817*4882a593Smuzhiyun#									#
7818*4882a593Smuzhiyun#		---------------   ---------------    ---------------	#
7819*4882a593Smuzhiyun#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7820*4882a593Smuzhiyun#		---------------   ---------------    ---------------	#
7821*4882a593Smuzhiyun#		 95	    64    63 62	   40 32      31     12	  0	#
7822*4882a593Smuzhiyun#				     |	   |				#
7823*4882a593Smuzhiyun#				     |	   |				#
7824*4882a593Smuzhiyun#				     |	   |				#
7825*4882a593Smuzhiyun#			             v     v				#
7826*4882a593Smuzhiyun#			      ---------------				#
7827*4882a593Smuzhiyun#  single   ->		      |s|exp| mant  |				#
7828*4882a593Smuzhiyun#			      ---------------				#
7829*4882a593Smuzhiyun#			      31     22     0				#
7830*4882a593Smuzhiyun#									#
7831*4882a593Smuzhiyun#########################################################################
7832*4882a593Smuzhiyun
7833*4882a593Smuzhiyundst_sgl:
7834*4882a593Smuzhiyun	clr.l		%d0
7835*4882a593Smuzhiyun	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7836*4882a593Smuzhiyun	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7837*4882a593Smuzhiyun	addi.w		&SGL_BIAS,%d0		# add single precision bias
7838*4882a593Smuzhiyun	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7839*4882a593Smuzhiyun	bmi.b		dst_get_supper		# no
7840*4882a593Smuzhiyun	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
7841*4882a593Smuzhiyundst_get_supper:
7842*4882a593Smuzhiyun	swap		%d0			# put exp in upper word of d0
7843*4882a593Smuzhiyun	lsl.l		&0x7,%d0		# shift it into single exp bits
7844*4882a593Smuzhiyun	tst.b		FTEMP_EX(%a0)		# test sign
7845*4882a593Smuzhiyun	bpl.b		dst_get_sman		# if positive, continue
7846*4882a593Smuzhiyun	bset		&0x1f,%d0		# if negative, put in sign first
7847*4882a593Smuzhiyundst_get_sman:
7848*4882a593Smuzhiyun	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7849*4882a593Smuzhiyun	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
7850*4882a593Smuzhiyun	lsr.l		&0x8,%d1		# and put them flush right
7851*4882a593Smuzhiyun	or.l		%d1,%d0			# put these bits in ms word of single
7852*4882a593Smuzhiyun	rts
7853*4882a593Smuzhiyun
7854*4882a593Smuzhiyun##############################################################################
7855*4882a593Smuzhiyunfout_pack:
7856*4882a593Smuzhiyun	bsr.l		_calc_ea_fout		# fetch the <ea>
7857*4882a593Smuzhiyun	mov.l		%a0,-(%sp)
7858*4882a593Smuzhiyun
7859*4882a593Smuzhiyun	mov.b		STAG(%a6),%d0		# fetch input type
7860*4882a593Smuzhiyun	bne.w		fout_pack_not_norm	# input is not NORM
7861*4882a593Smuzhiyun
7862*4882a593Smuzhiyunfout_pack_norm:
7863*4882a593Smuzhiyun	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
7864*4882a593Smuzhiyun	beq.b		fout_pack_s		# static
7865*4882a593Smuzhiyun
7866*4882a593Smuzhiyunfout_pack_d:
7867*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
7868*4882a593Smuzhiyun	lsr.b		&0x4,%d1
7869*4882a593Smuzhiyun	andi.w		&0x7,%d1
7870*4882a593Smuzhiyun
7871*4882a593Smuzhiyun	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
7872*4882a593Smuzhiyun
7873*4882a593Smuzhiyun	bra.b		fout_pack_type
7874*4882a593Smuzhiyunfout_pack_s:
7875*4882a593Smuzhiyun	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
7876*4882a593Smuzhiyun
7877*4882a593Smuzhiyunfout_pack_type:
7878*4882a593Smuzhiyun	bfexts		%d0{&25:&7},%d0		# extract k-factor
7879*4882a593Smuzhiyun	mov.l	%d0,-(%sp)
7880*4882a593Smuzhiyun
7881*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to input
7882*4882a593Smuzhiyun
7883*4882a593Smuzhiyun# bindec is currently scrambling FP_SRC for denorm inputs.
7884*4882a593Smuzhiyun# we'll have to change this, but for now, tough luck!!!
7885*4882a593Smuzhiyun	bsr.l		bindec			# convert xprec to packed
7886*4882a593Smuzhiyun
7887*4882a593Smuzhiyun#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888*4882a593Smuzhiyun	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889*4882a593Smuzhiyun
7890*4882a593Smuzhiyun	mov.l	(%sp)+,%d0
7891*4882a593Smuzhiyun
7892*4882a593Smuzhiyun	tst.b		3+FP_SCR0_EX(%a6)
7893*4882a593Smuzhiyun	bne.b		fout_pack_set
7894*4882a593Smuzhiyun	tst.l		FP_SCR0_HI(%a6)
7895*4882a593Smuzhiyun	bne.b		fout_pack_set
7896*4882a593Smuzhiyun	tst.l		FP_SCR0_LO(%a6)
7897*4882a593Smuzhiyun	bne.b		fout_pack_set
7898*4882a593Smuzhiyun
7899*4882a593Smuzhiyun# add the extra condition that only if the k-factor was zero, too, should
7900*4882a593Smuzhiyun# we zero the exponent
7901*4882a593Smuzhiyun	tst.l		%d0
7902*4882a593Smuzhiyun	bne.b		fout_pack_set
7903*4882a593Smuzhiyun# "mantissa" is all zero which means that the answer is zero. but, the '040
7904*4882a593Smuzhiyun# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905*4882a593Smuzhiyun# if the mantissa is zero, I will zero the exponent, too.
7906*4882a593Smuzhiyun# the question now is whether the exponents sign bit is allowed to be non-zero
7907*4882a593Smuzhiyun# for a zero, also...
7908*4882a593Smuzhiyun	andi.w		&0xf000,FP_SCR0(%a6)
7909*4882a593Smuzhiyun
7910*4882a593Smuzhiyunfout_pack_set:
7911*4882a593Smuzhiyun
7912*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: src addr
7913*4882a593Smuzhiyun
7914*4882a593Smuzhiyunfout_pack_write:
7915*4882a593Smuzhiyun	mov.l		(%sp)+,%a1		# pass: dst addr
7916*4882a593Smuzhiyun	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7917*4882a593Smuzhiyun
7918*4882a593Smuzhiyun	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7919*4882a593Smuzhiyun	beq.b		fout_pack_a7
7920*4882a593Smuzhiyun
7921*4882a593Smuzhiyun	bsr.l		_dmem_write		# write ext prec number to memory
7922*4882a593Smuzhiyun
7923*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7924*4882a593Smuzhiyun	bne.w		fout_ext_err		# yes
7925*4882a593Smuzhiyun
7926*4882a593Smuzhiyun	rts
7927*4882a593Smuzhiyun
7928*4882a593Smuzhiyun# we don't want to do the write if the exception occurred in supervisor mode
7929*4882a593Smuzhiyun# so _mem_write2() handles this for us.
7930*4882a593Smuzhiyunfout_pack_a7:
7931*4882a593Smuzhiyun	bsr.l		_mem_write2		# write ext prec number to memory
7932*4882a593Smuzhiyun
7933*4882a593Smuzhiyun	tst.l		%d1			# did dstore fail?
7934*4882a593Smuzhiyun	bne.w		fout_ext_err		# yes
7935*4882a593Smuzhiyun
7936*4882a593Smuzhiyun	rts
7937*4882a593Smuzhiyun
7938*4882a593Smuzhiyunfout_pack_not_norm:
7939*4882a593Smuzhiyun	cmpi.b		%d0,&DENORM		# is it a DENORM?
7940*4882a593Smuzhiyun	beq.w		fout_pack_norm		# yes
7941*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0
7942*4882a593Smuzhiyun	clr.w		2+FP_SRC_EX(%a6)
7943*4882a593Smuzhiyun	cmpi.b		%d0,&SNAN		# is it an SNAN?
7944*4882a593Smuzhiyun	beq.b		fout_pack_snan		# yes
7945*4882a593Smuzhiyun	bra.b		fout_pack_write		# no
7946*4882a593Smuzhiyun
7947*4882a593Smuzhiyunfout_pack_snan:
7948*4882a593Smuzhiyun	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949*4882a593Smuzhiyun	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
7950*4882a593Smuzhiyun	bra.b		fout_pack_write
7951*4882a593Smuzhiyun
7952*4882a593Smuzhiyun#########################################################################
7953*4882a593Smuzhiyun# XDEF ****************************************************************	#
7954*4882a593Smuzhiyun#	fmul(): emulates the fmul instruction				#
7955*4882a593Smuzhiyun#	fsmul(): emulates the fsmul instruction				#
7956*4882a593Smuzhiyun#	fdmul(): emulates the fdmul instruction				#
7957*4882a593Smuzhiyun#									#
7958*4882a593Smuzhiyun# XREF ****************************************************************	#
7959*4882a593Smuzhiyun#	scale_to_zero_src() - scale src exponent to zero		#
7960*4882a593Smuzhiyun#	scale_to_zero_dst() - scale dst exponent to zero		#
7961*4882a593Smuzhiyun#	unf_res() - return default underflow result			#
7962*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
7963*4882a593Smuzhiyun#	res_qnan() - return QNAN result					#
7964*4882a593Smuzhiyun#	res_snan() - return SNAN result					#
7965*4882a593Smuzhiyun#									#
7966*4882a593Smuzhiyun# INPUT ***************************************************************	#
7967*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
7968*4882a593Smuzhiyun#	a1 = pointer to extended precision destination operand		#
7969*4882a593Smuzhiyun#	d0  rnd prec,mode						#
7970*4882a593Smuzhiyun#									#
7971*4882a593Smuzhiyun# OUTPUT **************************************************************	#
7972*4882a593Smuzhiyun#	fp0 = result							#
7973*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
7974*4882a593Smuzhiyun#									#
7975*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
7976*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
7977*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision.				#
7978*4882a593Smuzhiyun#	For norms/denorms, scale the exponents such that a multiply	#
7979*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fmul to		#
7980*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken	#
7981*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result	#
7982*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the	#
7983*4882a593Smuzhiyun# result operand to the proper exponent.				#
7984*4882a593Smuzhiyun#									#
7985*4882a593Smuzhiyun#########################################################################
7986*4882a593Smuzhiyun
7987*4882a593Smuzhiyun	align		0x10
7988*4882a593Smuzhiyuntbl_fmul_ovfl:
7989*4882a593Smuzhiyun	long		0x3fff - 0x7ffe		# ext_max
7990*4882a593Smuzhiyun	long		0x3fff - 0x407e		# sgl_max
7991*4882a593Smuzhiyun	long		0x3fff - 0x43fe		# dbl_max
7992*4882a593Smuzhiyuntbl_fmul_unfl:
7993*4882a593Smuzhiyun	long		0x3fff + 0x0001		# ext_unfl
7994*4882a593Smuzhiyun	long		0x3fff - 0x3f80		# sgl_unfl
7995*4882a593Smuzhiyun	long		0x3fff - 0x3c00		# dbl_unfl
7996*4882a593Smuzhiyun
7997*4882a593Smuzhiyun	global		fsmul
7998*4882a593Smuzhiyunfsmul:
7999*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
8000*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8001*4882a593Smuzhiyun	bra.b		fmul
8002*4882a593Smuzhiyun
8003*4882a593Smuzhiyun	global		fdmul
8004*4882a593Smuzhiyunfdmul:
8005*4882a593Smuzhiyun	andi.b		&0x30,%d0
8006*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8007*4882a593Smuzhiyun
8008*4882a593Smuzhiyun	global		fmul
8009*4882a593Smuzhiyunfmul:
8010*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8011*4882a593Smuzhiyun
8012*4882a593Smuzhiyun	clr.w		%d1
8013*4882a593Smuzhiyun	mov.b		DTAG(%a6),%d1
8014*4882a593Smuzhiyun	lsl.b		&0x3,%d1
8015*4882a593Smuzhiyun	or.b		STAG(%a6),%d1		# combine src tags
8016*4882a593Smuzhiyun	bne.w		fmul_not_norm		# optimize on non-norm input
8017*4882a593Smuzhiyun
8018*4882a593Smuzhiyunfmul_norm:
8019*4882a593Smuzhiyun	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8020*4882a593Smuzhiyun	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8021*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8022*4882a593Smuzhiyun
8023*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8024*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8025*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8026*4882a593Smuzhiyun
8027*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# scale src exponent
8028*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save scale factor 1
8029*4882a593Smuzhiyun
8030*4882a593Smuzhiyun	bsr.l		scale_to_zero_dst	# scale dst exponent
8031*4882a593Smuzhiyun
8032*4882a593Smuzhiyun	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
8033*4882a593Smuzhiyun
8034*4882a593Smuzhiyun	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8035*4882a593Smuzhiyun	lsr.b		&0x6,%d1		# shift to lo bits
8036*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# load S.F.
8037*4882a593Smuzhiyun	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038*4882a593Smuzhiyun	beq.w		fmul_may_ovfl		# result may rnd to overflow
8039*4882a593Smuzhiyun	blt.w		fmul_ovfl		# result will overflow
8040*4882a593Smuzhiyun
8041*4882a593Smuzhiyun	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042*4882a593Smuzhiyun	beq.w		fmul_may_unfl		# result may rnd to no unfl
8043*4882a593Smuzhiyun	bgt.w		fmul_unfl		# result will underflow
8044*4882a593Smuzhiyun
8045*4882a593Smuzhiyun#
8046*4882a593Smuzhiyun# NORMAL:
8047*4882a593Smuzhiyun# - the result of the multiply operation will neither overflow nor underflow.
8048*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode.
8049*4882a593Smuzhiyun# - scale the result exponent using the scale factor. if both operands were
8050*4882a593Smuzhiyun# normalized then we really don't need to go through this scaling. but for now,
8051*4882a593Smuzhiyun# this will do.
8052*4882a593Smuzhiyun#
8053*4882a593Smuzhiyunfmul_normal:
8054*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8055*4882a593Smuzhiyun
8056*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8057*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8058*4882a593Smuzhiyun
8059*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8060*4882a593Smuzhiyun
8061*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
8062*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8063*4882a593Smuzhiyun
8064*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8065*4882a593Smuzhiyun
8066*4882a593Smuzhiyunfmul_normal_exit:
8067*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8068*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
8069*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8070*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
8071*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8072*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
8073*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
8074*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
8075*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8076*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8077*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8078*4882a593Smuzhiyun	rts
8079*4882a593Smuzhiyun
8080*4882a593Smuzhiyun#
8081*4882a593Smuzhiyun# OVERFLOW:
8082*4882a593Smuzhiyun# - the result of the multiply operation is an overflow.
8083*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode in order to
8084*4882a593Smuzhiyun# set the inexact bits.
8085*4882a593Smuzhiyun# - calculate the default result and return it in fp0.
8086*4882a593Smuzhiyun# - if overflow or inexact is enabled, we need a multiply result rounded to
8087*4882a593Smuzhiyun# extended precision. if the original operation was extended, then we have this
8088*4882a593Smuzhiyun# result. if the original operation was single or double, we have to do another
8089*4882a593Smuzhiyun# multiply using extended precision and the correct rounding mode. the result
8090*4882a593Smuzhiyun# of this operation then has its exponent scaled by -0x6000 to create the
8091*4882a593Smuzhiyun# exceptional operand.
8092*4882a593Smuzhiyun#
8093*4882a593Smuzhiyunfmul_ovfl:
8094*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8095*4882a593Smuzhiyun
8096*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8097*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8098*4882a593Smuzhiyun
8099*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8100*4882a593Smuzhiyun
8101*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
8102*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8103*4882a593Smuzhiyun
8104*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8105*4882a593Smuzhiyun
8106*4882a593Smuzhiyun# save setting this until now because this is where fmul_may_ovfl may jump in
8107*4882a593Smuzhiyunfmul_ovfl_tst:
8108*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109*4882a593Smuzhiyun
8110*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
8111*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8112*4882a593Smuzhiyun	bne.b		fmul_ovfl_ena		# yes
8113*4882a593Smuzhiyun
8114*4882a593Smuzhiyun# calculate the default result
8115*4882a593Smuzhiyunfmul_ovfl_dis:
8116*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8117*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
8118*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
8119*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
8120*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8121*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
8122*4882a593Smuzhiyun	rts
8123*4882a593Smuzhiyun
8124*4882a593Smuzhiyun#
8125*4882a593Smuzhiyun# OVFL is enabled; Create EXOP:
8126*4882a593Smuzhiyun# - if precision is extended, then we have the EXOP. simply bias the exponent
8127*4882a593Smuzhiyun# with an extra -0x6000. if the precision is single or double, we need to
8128*4882a593Smuzhiyun# calculate a result rounded to extended precision.
8129*4882a593Smuzhiyun#
8130*4882a593Smuzhiyunfmul_ovfl_ena:
8131*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
8132*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# test the rnd prec
8133*4882a593Smuzhiyun	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
8134*4882a593Smuzhiyun
8135*4882a593Smuzhiyunfmul_ovfl_ena_cont:
8136*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8137*4882a593Smuzhiyun
8138*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
8139*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8140*4882a593Smuzhiyun	mov.w		%d1,%d2			# make a copy
8141*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8142*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
8143*4882a593Smuzhiyun	subi.l		&0x6000,%d1		# subtract bias
8144*4882a593Smuzhiyun	andi.w		&0x7fff,%d1		# clear sign bit
8145*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
8146*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
8147*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8148*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8149*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8150*4882a593Smuzhiyun	bra.b		fmul_ovfl_dis
8151*4882a593Smuzhiyun
8152*4882a593Smuzhiyunfmul_ovfl_ena_sd:
8153*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8154*4882a593Smuzhiyun
8155*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
8156*4882a593Smuzhiyun	andi.b		&0x30,%d1		# keep rnd mode only
8157*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
8158*4882a593Smuzhiyun
8159*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8160*4882a593Smuzhiyun
8161*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8162*4882a593Smuzhiyun	bra.b		fmul_ovfl_ena_cont
8163*4882a593Smuzhiyun
8164*4882a593Smuzhiyun#
8165*4882a593Smuzhiyun# may OVERFLOW:
8166*4882a593Smuzhiyun# - the result of the multiply operation MAY overflow.
8167*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode in order to
8168*4882a593Smuzhiyun# set the inexact bits.
8169*4882a593Smuzhiyun# - calculate the default result and return it in fp0.
8170*4882a593Smuzhiyun#
8171*4882a593Smuzhiyunfmul_may_ovfl:
8172*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8173*4882a593Smuzhiyun
8174*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8175*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8176*4882a593Smuzhiyun
8177*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8178*4882a593Smuzhiyun
8179*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
8180*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8181*4882a593Smuzhiyun
8182*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8183*4882a593Smuzhiyun
8184*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
8185*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8186*4882a593Smuzhiyun	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
8187*4882a593Smuzhiyun
8188*4882a593Smuzhiyun# no, it didn't overflow; we have correct result
8189*4882a593Smuzhiyun	bra.w		fmul_normal_exit
8190*4882a593Smuzhiyun
8191*4882a593Smuzhiyun#
8192*4882a593Smuzhiyun# UNDERFLOW:
8193*4882a593Smuzhiyun# - the result of the multiply operation is an underflow.
8194*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode in order to
8195*4882a593Smuzhiyun# set the inexact bits.
8196*4882a593Smuzhiyun# - calculate the default result and return it in fp0.
8197*4882a593Smuzhiyun# - if overflow or inexact is enabled, we need a multiply result rounded to
8198*4882a593Smuzhiyun# extended precision. if the original operation was extended, then we have this
8199*4882a593Smuzhiyun# result. if the original operation was single or double, we have to do another
8200*4882a593Smuzhiyun# multiply using extended precision and the correct rounding mode. the result
8201*4882a593Smuzhiyun# of this operation then has its exponent scaled by -0x6000 to create the
8202*4882a593Smuzhiyun# exceptional operand.
8203*4882a593Smuzhiyun#
8204*4882a593Smuzhiyunfmul_unfl:
8205*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206*4882a593Smuzhiyun
8207*4882a593Smuzhiyun# for fun, let's use only extended precision, round to zero. then, let
8208*4882a593Smuzhiyun# the unf_res() routine figure out all the rest.
8209*4882a593Smuzhiyun# will we get the correct answer.
8210*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8211*4882a593Smuzhiyun
8212*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8213*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8214*4882a593Smuzhiyun
8215*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8216*4882a593Smuzhiyun
8217*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
8218*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8219*4882a593Smuzhiyun
8220*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8221*4882a593Smuzhiyun
8222*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
8223*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8224*4882a593Smuzhiyun	bne.b		fmul_unfl_ena		# yes
8225*4882a593Smuzhiyun
8226*4882a593Smuzhiyunfmul_unfl_dis:
8227*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8228*4882a593Smuzhiyun
8229*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
8230*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8231*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
8232*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
8233*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8234*4882a593Smuzhiyun	rts
8235*4882a593Smuzhiyun
8236*4882a593Smuzhiyun#
8237*4882a593Smuzhiyun# UNFL is enabled.
8238*4882a593Smuzhiyun#
8239*4882a593Smuzhiyunfmul_unfl_ena:
8240*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
8241*4882a593Smuzhiyun
8242*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
8243*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# is precision extended?
8244*4882a593Smuzhiyun	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
8245*4882a593Smuzhiyun
8246*4882a593Smuzhiyun# if the rnd mode is anything but RZ, then we have to re-do the above
8247*4882a593Smuzhiyun# multiplication because we used RZ for all.
8248*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8249*4882a593Smuzhiyun
8250*4882a593Smuzhiyunfmul_unfl_ena_cont:
8251*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8252*4882a593Smuzhiyun
8253*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8254*4882a593Smuzhiyun
8255*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8256*4882a593Smuzhiyun
8257*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
8258*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
8259*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8260*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
8261*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8262*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
8263*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
8264*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add bias
8265*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
8266*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
8267*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8268*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8269*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8270*4882a593Smuzhiyun	bra.w		fmul_unfl_dis
8271*4882a593Smuzhiyun
8272*4882a593Smuzhiyunfmul_unfl_ena_sd:
8273*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
8274*4882a593Smuzhiyun	andi.b		&0x30,%d1		# use only rnd mode
8275*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
8276*4882a593Smuzhiyun
8277*4882a593Smuzhiyun	bra.b		fmul_unfl_ena_cont
8278*4882a593Smuzhiyun
8279*4882a593Smuzhiyun# MAY UNDERFLOW:
8280*4882a593Smuzhiyun# -use the correct rounding mode and precision. this code favors operations
8281*4882a593Smuzhiyun# that do not underflow.
8282*4882a593Smuzhiyunfmul_may_unfl:
8283*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8284*4882a593Smuzhiyun
8285*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8286*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8287*4882a593Smuzhiyun
8288*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8289*4882a593Smuzhiyun
8290*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
8291*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8292*4882a593Smuzhiyun
8293*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8294*4882a593Smuzhiyun
8295*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
8296*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
8297*4882a593Smuzhiyun	fbgt.w		fmul_normal_exit	# no; no underflow occurred
8298*4882a593Smuzhiyun	fblt.w		fmul_unfl		# yes; underflow occurred
8299*4882a593Smuzhiyun
8300*4882a593Smuzhiyun#
8301*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 2. but,
8302*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 2 or
8303*4882a593Smuzhiyun# a normalized number that rounded down to a 2. so, redo the entire operation
8304*4882a593Smuzhiyun# using RZ as the rounding mode to see what the pre-rounded result is.
8305*4882a593Smuzhiyun# this case should be relatively rare.
8306*4882a593Smuzhiyun#
8307*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
8308*4882a593Smuzhiyun
8309*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
8310*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# keep rnd prec
8311*4882a593Smuzhiyun	ori.b		&rz_mode*0x10,%d1	# insert RZ
8312*4882a593Smuzhiyun
8313*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
8314*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8315*4882a593Smuzhiyun
8316*4882a593Smuzhiyun	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8317*4882a593Smuzhiyun
8318*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8319*4882a593Smuzhiyun	fabs.x		%fp1			# make absolute value
8320*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
8321*4882a593Smuzhiyun	fbge.w		fmul_normal_exit	# no; no underflow occurred
8322*4882a593Smuzhiyun	bra.w		fmul_unfl		# yes, underflow occurred
8323*4882a593Smuzhiyun
8324*4882a593Smuzhiyun################################################################################
8325*4882a593Smuzhiyun
8326*4882a593Smuzhiyun#
8327*4882a593Smuzhiyun# Multiply: inputs are not both normalized; what are they?
8328*4882a593Smuzhiyun#
8329*4882a593Smuzhiyunfmul_not_norm:
8330*4882a593Smuzhiyun	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331*4882a593Smuzhiyun	jmp		(tbl_fmul_op.b,%pc,%d1.w)
8332*4882a593Smuzhiyun
8333*4882a593Smuzhiyun	swbeg		&48
8334*4882a593Smuzhiyuntbl_fmul_op:
8335*4882a593Smuzhiyun	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8336*4882a593Smuzhiyun	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8337*4882a593Smuzhiyun	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8338*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8339*4882a593Smuzhiyun	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8340*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8341*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8342*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8343*4882a593Smuzhiyun
8344*4882a593Smuzhiyun	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
8345*4882a593Smuzhiyun	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
8346*4882a593Smuzhiyun	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
8347*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
8348*4882a593Smuzhiyun	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
8349*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
8350*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8351*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8352*4882a593Smuzhiyun
8353*4882a593Smuzhiyun	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
8354*4882a593Smuzhiyun	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
8355*4882a593Smuzhiyun	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
8356*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
8357*4882a593Smuzhiyun	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
8358*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
8359*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8360*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8361*4882a593Smuzhiyun
8362*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
8363*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
8364*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
8365*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
8366*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
8367*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
8368*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8369*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8370*4882a593Smuzhiyun
8371*4882a593Smuzhiyun	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8372*4882a593Smuzhiyun	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8373*4882a593Smuzhiyun	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8374*4882a593Smuzhiyun	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8375*4882a593Smuzhiyun	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8376*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8377*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8378*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8379*4882a593Smuzhiyun
8380*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
8381*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
8382*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
8383*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
8384*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
8385*4882a593Smuzhiyun	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
8386*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8387*4882a593Smuzhiyun	short		tbl_fmul_op	- tbl_fmul_op #
8388*4882a593Smuzhiyun
8389*4882a593Smuzhiyunfmul_res_operr:
8390*4882a593Smuzhiyun	bra.l		res_operr
8391*4882a593Smuzhiyunfmul_res_snan:
8392*4882a593Smuzhiyun	bra.l		res_snan
8393*4882a593Smuzhiyunfmul_res_qnan:
8394*4882a593Smuzhiyun	bra.l		res_qnan
8395*4882a593Smuzhiyun
8396*4882a593Smuzhiyun#
8397*4882a593Smuzhiyun# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398*4882a593Smuzhiyun#
8399*4882a593Smuzhiyun	global		fmul_zero		# global for fsglmul
8400*4882a593Smuzhiyunfmul_zero:
8401*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8402*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
8403*4882a593Smuzhiyun	eor.b		%d0,%d1
8404*4882a593Smuzhiyun	bpl.b		fmul_zero_p		# result ZERO is pos.
8405*4882a593Smuzhiyunfmul_zero_n:
8406*4882a593Smuzhiyun	fmov.s		&0x80000000,%fp0	# load -ZERO
8407*4882a593Smuzhiyun	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408*4882a593Smuzhiyun	rts
8409*4882a593Smuzhiyunfmul_zero_p:
8410*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# load +ZERO
8411*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
8412*4882a593Smuzhiyun	rts
8413*4882a593Smuzhiyun
8414*4882a593Smuzhiyun#
8415*4882a593Smuzhiyun# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416*4882a593Smuzhiyun#
8417*4882a593Smuzhiyun# Note: The j-bit for an infinity is a don't-care. However, to be
8418*4882a593Smuzhiyun# strictly compatible w/ the 68881/882, we make sure to return an
8419*4882a593Smuzhiyun# INF w/ the j-bit set if the input INF j-bit was set. Destination
8420*4882a593Smuzhiyun# INFs take priority.
8421*4882a593Smuzhiyun#
8422*4882a593Smuzhiyun	global		fmul_inf_dst		# global for fsglmul
8423*4882a593Smuzhiyunfmul_inf_dst:
8424*4882a593Smuzhiyun	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
8425*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8426*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
8427*4882a593Smuzhiyun	eor.b		%d0,%d1
8428*4882a593Smuzhiyun	bpl.b		fmul_inf_dst_p		# result INF is pos.
8429*4882a593Smuzhiyunfmul_inf_dst_n:
8430*4882a593Smuzhiyun	fabs.x		%fp0			# clear result sign
8431*4882a593Smuzhiyun	fneg.x		%fp0			# set result sign
8432*4882a593Smuzhiyun	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433*4882a593Smuzhiyun	rts
8434*4882a593Smuzhiyunfmul_inf_dst_p:
8435*4882a593Smuzhiyun	fabs.x		%fp0			# clear result sign
8436*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
8437*4882a593Smuzhiyun	rts
8438*4882a593Smuzhiyun
8439*4882a593Smuzhiyun	global		fmul_inf_src		# global for fsglmul
8440*4882a593Smuzhiyunfmul_inf_src:
8441*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
8442*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8443*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
8444*4882a593Smuzhiyun	eor.b		%d0,%d1
8445*4882a593Smuzhiyun	bpl.b		fmul_inf_dst_p		# result INF is pos.
8446*4882a593Smuzhiyun	bra.b		fmul_inf_dst_n
8447*4882a593Smuzhiyun
8448*4882a593Smuzhiyun#########################################################################
8449*4882a593Smuzhiyun# XDEF ****************************************************************	#
8450*4882a593Smuzhiyun#	fin(): emulates the fmove instruction				#
8451*4882a593Smuzhiyun#	fsin(): emulates the fsmove instruction				#
8452*4882a593Smuzhiyun#	fdin(): emulates the fdmove instruction				#
8453*4882a593Smuzhiyun#									#
8454*4882a593Smuzhiyun# XREF ****************************************************************	#
8455*4882a593Smuzhiyun#	norm() - normalize mantissa for EXOP on denorm			#
8456*4882a593Smuzhiyun#	scale_to_zero_src() - scale src exponent to zero		#
8457*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
8458*4882a593Smuzhiyun#	unf_res() - return default underflow result			#
8459*4882a593Smuzhiyun#	res_qnan_1op() - return QNAN result				#
8460*4882a593Smuzhiyun#	res_snan_1op() - return SNAN result				#
8461*4882a593Smuzhiyun#									#
8462*4882a593Smuzhiyun# INPUT ***************************************************************	#
8463*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
8464*4882a593Smuzhiyun#	d0 = round prec/mode						#
8465*4882a593Smuzhiyun#									#
8466*4882a593Smuzhiyun# OUTPUT **************************************************************	#
8467*4882a593Smuzhiyun#	fp0 = result							#
8468*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
8469*4882a593Smuzhiyun#									#
8470*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
8471*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
8472*4882a593Smuzhiyun# norms into extended, single, and double precision.			#
8473*4882a593Smuzhiyun#	Norms can be emulated w/ a regular fmove instruction. For	#
8474*4882a593Smuzhiyun# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
8475*4882a593Smuzhiyun# if the result would have overflowed/underflowed. If so, use unf_res()	#
8476*4882a593Smuzhiyun# or ovf_res() to return the default result. Also return EXOP if	#
8477*4882a593Smuzhiyun# exception is enabled. If no exception, return the default result.	#
8478*4882a593Smuzhiyun#	Unnorms don't pass through here.				#
8479*4882a593Smuzhiyun#									#
8480*4882a593Smuzhiyun#########################################################################
8481*4882a593Smuzhiyun
8482*4882a593Smuzhiyun	global		fsin
8483*4882a593Smuzhiyunfsin:
8484*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
8485*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl precision
8486*4882a593Smuzhiyun	bra.b		fin
8487*4882a593Smuzhiyun
8488*4882a593Smuzhiyun	global		fdin
8489*4882a593Smuzhiyunfdin:
8490*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
8491*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl precision
8492*4882a593Smuzhiyun
8493*4882a593Smuzhiyun	global		fin
8494*4882a593Smuzhiyunfin:
8495*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8496*4882a593Smuzhiyun
8497*4882a593Smuzhiyun	mov.b		STAG(%a6),%d1		# fetch src optype tag
8498*4882a593Smuzhiyun	bne.w		fin_not_norm		# optimize on non-norm input
8499*4882a593Smuzhiyun
8500*4882a593Smuzhiyun#
8501*4882a593Smuzhiyun# FP MOVE IN: NORMs and DENORMs ONLY!
8502*4882a593Smuzhiyun#
8503*4882a593Smuzhiyunfin_norm:
8504*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
8505*4882a593Smuzhiyun	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8506*4882a593Smuzhiyun
8507*4882a593Smuzhiyun#
8508*4882a593Smuzhiyun# precision selected is extended. so...we cannot get an underflow
8509*4882a593Smuzhiyun# or overflow because of rounding to the correct precision. so...
8510*4882a593Smuzhiyun# skip the scaling and unscaling...
8511*4882a593Smuzhiyun#
8512*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is the operand negative?
8513*4882a593Smuzhiyun	bpl.b		fin_norm_done		# no
8514*4882a593Smuzhiyun	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8515*4882a593Smuzhiyunfin_norm_done:
8516*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8517*4882a593Smuzhiyun	rts
8518*4882a593Smuzhiyun
8519*4882a593Smuzhiyun#
8520*4882a593Smuzhiyun# for an extended precision DENORM, the UNFL exception bit is set
8521*4882a593Smuzhiyun# the accrued bit is NOT set in this instance(no inexactness!)
8522*4882a593Smuzhiyun#
8523*4882a593Smuzhiyunfin_denorm:
8524*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
8525*4882a593Smuzhiyun	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8526*4882a593Smuzhiyun
8527*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is the operand negative?
8529*4882a593Smuzhiyun	bpl.b		fin_denorm_done		# no
8530*4882a593Smuzhiyun	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8531*4882a593Smuzhiyunfin_denorm_done:
8532*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8533*4882a593Smuzhiyun	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534*4882a593Smuzhiyun	bne.b		fin_denorm_unfl_ena	# yes
8535*4882a593Smuzhiyun	rts
8536*4882a593Smuzhiyun
8537*4882a593Smuzhiyun#
8538*4882a593Smuzhiyun# the input is an extended DENORM and underflow is enabled in the FPCR.
8539*4882a593Smuzhiyun# normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540*4882a593Smuzhiyun# exponent and insert back into the operand.
8541*4882a593Smuzhiyun#
8542*4882a593Smuzhiyunfin_denorm_unfl_ena:
8543*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8544*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8545*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8546*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
8547*4882a593Smuzhiyun	bsr.l		norm			# normalize result
8548*4882a593Smuzhiyun	neg.w		%d0			# new exponent = -(shft val)
8549*4882a593Smuzhiyun	addi.w		&0x6000,%d0		# add new bias to exponent
8550*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
8551*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep old sign
8552*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# clear sign position
8553*4882a593Smuzhiyun	or.w		%d1,%d0			# concat new exo,old sign
8554*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
8555*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8556*4882a593Smuzhiyun	rts
8557*4882a593Smuzhiyun
8558*4882a593Smuzhiyun#
8559*4882a593Smuzhiyun# operand is to be rounded to single or double precision
8560*4882a593Smuzhiyun#
8561*4882a593Smuzhiyunfin_not_ext:
8562*4882a593Smuzhiyun	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
8563*4882a593Smuzhiyun	bne.b		fin_dbl
8564*4882a593Smuzhiyun
8565*4882a593Smuzhiyun#
8566*4882a593Smuzhiyun# operand is to be rounded to single precision
8567*4882a593Smuzhiyun#
8568*4882a593Smuzhiyunfin_sgl:
8569*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8570*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8571*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8572*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# calculate scale factor
8573*4882a593Smuzhiyun
8574*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
8575*4882a593Smuzhiyun	bge.w		fin_sd_unfl		# yes; go handle underflow
8576*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
8577*4882a593Smuzhiyun	beq.w		fin_sd_may_ovfl		# maybe; go check
8578*4882a593Smuzhiyun	blt.w		fin_sd_ovfl		# yes; go handle overflow
8579*4882a593Smuzhiyun
8580*4882a593Smuzhiyun#
8581*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved into the fp reg file
8582*4882a593Smuzhiyun#
8583*4882a593Smuzhiyunfin_sd_normal:
8584*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8585*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8586*4882a593Smuzhiyun
8587*4882a593Smuzhiyun	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8588*4882a593Smuzhiyun
8589*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
8590*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8591*4882a593Smuzhiyun
8592*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8593*4882a593Smuzhiyun
8594*4882a593Smuzhiyunfin_sd_normal_exit:
8595*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
8596*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8597*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8598*4882a593Smuzhiyun	mov.w		%d1,%d2			# make a copy
8599*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8600*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
8601*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
8602*4882a593Smuzhiyun	or.w		%d1,%d2			# concat old sign,new exponent
8603*4882a593Smuzhiyun	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
8604*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8605*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8606*4882a593Smuzhiyun	rts
8607*4882a593Smuzhiyun
8608*4882a593Smuzhiyun#
8609*4882a593Smuzhiyun# operand is to be rounded to double precision
8610*4882a593Smuzhiyun#
8611*4882a593Smuzhiyunfin_dbl:
8612*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8613*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8614*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8615*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# calculate scale factor
8616*4882a593Smuzhiyun
8617*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
8618*4882a593Smuzhiyun	bge.w		fin_sd_unfl		# yes; go handle underflow
8619*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
8620*4882a593Smuzhiyun	beq.w		fin_sd_may_ovfl		# maybe; go check
8621*4882a593Smuzhiyun	blt.w		fin_sd_ovfl		# yes; go handle overflow
8622*4882a593Smuzhiyun	bra.w		fin_sd_normal		# no; ho handle normalized op
8623*4882a593Smuzhiyun
8624*4882a593Smuzhiyun#
8625*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file
8626*4882a593Smuzhiyun#
8627*4882a593Smuzhiyunfin_sd_unfl:
8628*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629*4882a593Smuzhiyun
8630*4882a593Smuzhiyun	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
8631*4882a593Smuzhiyun	bpl.b		fin_sd_unfl_tst
8632*4882a593Smuzhiyun	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
8633*4882a593Smuzhiyun
8634*4882a593Smuzhiyun# if underflow or inexact is enabled, then go calculate the EXOP first.
8635*4882a593Smuzhiyunfin_sd_unfl_tst:
8636*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
8637*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8638*4882a593Smuzhiyun	bne.b		fin_sd_unfl_ena		# yes
8639*4882a593Smuzhiyun
8640*4882a593Smuzhiyunfin_sd_unfl_dis:
8641*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
8642*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8643*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
8644*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
8645*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8646*4882a593Smuzhiyun	rts
8647*4882a593Smuzhiyun
8648*4882a593Smuzhiyun#
8649*4882a593Smuzhiyun# operand will underflow AND underflow or inexact is enabled.
8650*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision.
8651*4882a593Smuzhiyun#
8652*4882a593Smuzhiyunfin_sd_unfl_ena:
8653*4882a593Smuzhiyun	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654*4882a593Smuzhiyun	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
8656*4882a593Smuzhiyun
8657*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
8658*4882a593Smuzhiyun	mov.w		%d1,%d2			# make a copy
8659*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8660*4882a593Smuzhiyun	sub.l		%d0,%d1			# subtract scale factor
8661*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# extract old sign
8662*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add new bias
8663*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
8664*4882a593Smuzhiyun	or.w		%d1,%d2			# concat old sign,new exp
8665*4882a593Smuzhiyun	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
8666*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
8667*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8668*4882a593Smuzhiyun	bra.b		fin_sd_unfl_dis
8669*4882a593Smuzhiyun
8670*4882a593Smuzhiyun#
8671*4882a593Smuzhiyun# operand WILL overflow.
8672*4882a593Smuzhiyun#
8673*4882a593Smuzhiyunfin_sd_ovfl:
8674*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8675*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8676*4882a593Smuzhiyun
8677*4882a593Smuzhiyun	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8678*4882a593Smuzhiyun
8679*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8680*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
8681*4882a593Smuzhiyun
8682*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8683*4882a593Smuzhiyun
8684*4882a593Smuzhiyunfin_sd_ovfl_tst:
8685*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686*4882a593Smuzhiyun
8687*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
8688*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8689*4882a593Smuzhiyun	bne.b		fin_sd_ovfl_ena		# yes
8690*4882a593Smuzhiyun
8691*4882a593Smuzhiyun#
8692*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by
8693*4882a593Smuzhiyun# calling ovf_res().
8694*4882a593Smuzhiyun#
8695*4882a593Smuzhiyunfin_sd_ovfl_dis:
8696*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8697*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
8698*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
8699*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
8700*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8701*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
8702*4882a593Smuzhiyun	rts
8703*4882a593Smuzhiyun
8704*4882a593Smuzhiyun#
8705*4882a593Smuzhiyun# OVFL is enabled.
8706*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision.
8707*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR).
8708*4882a593Smuzhiyun#
8709*4882a593Smuzhiyunfin_sd_ovfl_ena:
8710*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
8711*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8712*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
8713*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8714*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
8715*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
8716*4882a593Smuzhiyun	sub.l		&0x6000,%d1		# subtract bias
8717*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
8718*4882a593Smuzhiyun	or.w		%d2,%d1
8719*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8720*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8721*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8722*4882a593Smuzhiyun	bra.b		fin_sd_ovfl_dis
8723*4882a593Smuzhiyun
8724*4882a593Smuzhiyun#
8725*4882a593Smuzhiyun# the move in MAY overflow. so...
8726*4882a593Smuzhiyun#
8727*4882a593Smuzhiyunfin_sd_may_ovfl:
8728*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8729*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8730*4882a593Smuzhiyun
8731*4882a593Smuzhiyun	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
8732*4882a593Smuzhiyun
8733*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
8734*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8735*4882a593Smuzhiyun
8736*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8737*4882a593Smuzhiyun
8738*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
8739*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8740*4882a593Smuzhiyun	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
8741*4882a593Smuzhiyun
8742*4882a593Smuzhiyun# no, it didn't overflow; we have correct result
8743*4882a593Smuzhiyun	bra.w		fin_sd_normal_exit
8744*4882a593Smuzhiyun
8745*4882a593Smuzhiyun##########################################################################
8746*4882a593Smuzhiyun
8747*4882a593Smuzhiyun#
8748*4882a593Smuzhiyun# operand is not a NORM: check its optype and branch accordingly
8749*4882a593Smuzhiyun#
8750*4882a593Smuzhiyunfin_not_norm:
8751*4882a593Smuzhiyun	cmpi.b		%d1,&DENORM		# weed out DENORM
8752*4882a593Smuzhiyun	beq.w		fin_denorm
8753*4882a593Smuzhiyun	cmpi.b		%d1,&SNAN		# weed out SNANs
8754*4882a593Smuzhiyun	beq.l		res_snan_1op
8755*4882a593Smuzhiyun	cmpi.b		%d1,&QNAN		# weed out QNANs
8756*4882a593Smuzhiyun	beq.l		res_qnan_1op
8757*4882a593Smuzhiyun
8758*4882a593Smuzhiyun#
8759*4882a593Smuzhiyun# do the fmove in; at this point, only possible ops are ZERO and INF.
8760*4882a593Smuzhiyun# use fmov to determine ccodes.
8761*4882a593Smuzhiyun# prec:mode should be zero at this point but it won't affect answer anyways.
8762*4882a593Smuzhiyun#
8763*4882a593Smuzhiyun	fmov.x		SRC(%a0),%fp0		# do fmove in
8764*4882a593Smuzhiyun	fmov.l		%fpsr,%d0		# no exceptions possible
8765*4882a593Smuzhiyun	rol.l		&0x8,%d0		# put ccodes in lo byte
8766*4882a593Smuzhiyun	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
8767*4882a593Smuzhiyun	rts
8768*4882a593Smuzhiyun
8769*4882a593Smuzhiyun#########################################################################
8770*4882a593Smuzhiyun# XDEF ****************************************************************	#
8771*4882a593Smuzhiyun#	fdiv(): emulates the fdiv instruction				#
8772*4882a593Smuzhiyun#	fsdiv(): emulates the fsdiv instruction				#
8773*4882a593Smuzhiyun#	fddiv(): emulates the fddiv instruction				#
8774*4882a593Smuzhiyun#									#
8775*4882a593Smuzhiyun# XREF ****************************************************************	#
8776*4882a593Smuzhiyun#	scale_to_zero_src() - scale src exponent to zero		#
8777*4882a593Smuzhiyun#	scale_to_zero_dst() - scale dst exponent to zero		#
8778*4882a593Smuzhiyun#	unf_res() - return default underflow result			#
8779*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
8780*4882a593Smuzhiyun#	res_qnan() - return QNAN result					#
8781*4882a593Smuzhiyun#	res_snan() - return SNAN result					#
8782*4882a593Smuzhiyun#									#
8783*4882a593Smuzhiyun# INPUT ***************************************************************	#
8784*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
8785*4882a593Smuzhiyun#	a1 = pointer to extended precision destination operand		#
8786*4882a593Smuzhiyun#	d0  rnd prec,mode						#
8787*4882a593Smuzhiyun#									#
8788*4882a593Smuzhiyun# OUTPUT **************************************************************	#
8789*4882a593Smuzhiyun#	fp0 = result							#
8790*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
8791*4882a593Smuzhiyun#									#
8792*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
8793*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
8794*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision.				#
8795*4882a593Smuzhiyun#	For norms/denorms, scale the exponents such that a divide	#
8796*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fdiv to		#
8797*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken	#
8798*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result	#
8799*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the	#
8800*4882a593Smuzhiyun# result operand to the proper exponent.				#
8801*4882a593Smuzhiyun#									#
8802*4882a593Smuzhiyun#########################################################################
8803*4882a593Smuzhiyun
8804*4882a593Smuzhiyun	align		0x10
8805*4882a593Smuzhiyuntbl_fdiv_unfl:
8806*4882a593Smuzhiyun	long		0x3fff - 0x0000		# ext_unfl
8807*4882a593Smuzhiyun	long		0x3fff - 0x3f81		# sgl_unfl
8808*4882a593Smuzhiyun	long		0x3fff - 0x3c01		# dbl_unfl
8809*4882a593Smuzhiyun
8810*4882a593Smuzhiyuntbl_fdiv_ovfl:
8811*4882a593Smuzhiyun	long		0x3fff - 0x7ffe		# ext overflow exponent
8812*4882a593Smuzhiyun	long		0x3fff - 0x407e		# sgl overflow exponent
8813*4882a593Smuzhiyun	long		0x3fff - 0x43fe		# dbl overflow exponent
8814*4882a593Smuzhiyun
8815*4882a593Smuzhiyun	global		fsdiv
8816*4882a593Smuzhiyunfsdiv:
8817*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
8818*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8819*4882a593Smuzhiyun	bra.b		fdiv
8820*4882a593Smuzhiyun
8821*4882a593Smuzhiyun	global		fddiv
8822*4882a593Smuzhiyunfddiv:
8823*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
8824*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8825*4882a593Smuzhiyun
8826*4882a593Smuzhiyun	global		fdiv
8827*4882a593Smuzhiyunfdiv:
8828*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8829*4882a593Smuzhiyun
8830*4882a593Smuzhiyun	clr.w		%d1
8831*4882a593Smuzhiyun	mov.b		DTAG(%a6),%d1
8832*4882a593Smuzhiyun	lsl.b		&0x3,%d1
8833*4882a593Smuzhiyun	or.b		STAG(%a6),%d1		# combine src tags
8834*4882a593Smuzhiyun
8835*4882a593Smuzhiyun	bne.w		fdiv_not_norm		# optimize on non-norm input
8836*4882a593Smuzhiyun
8837*4882a593Smuzhiyun#
8838*4882a593Smuzhiyun# DIVIDE: NORMs and DENORMs ONLY!
8839*4882a593Smuzhiyun#
8840*4882a593Smuzhiyunfdiv_norm:
8841*4882a593Smuzhiyun	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8842*4882a593Smuzhiyun	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8843*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8844*4882a593Smuzhiyun
8845*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8846*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8847*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8848*4882a593Smuzhiyun
8849*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# scale src exponent
8850*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save scale factor 1
8851*4882a593Smuzhiyun
8852*4882a593Smuzhiyun	bsr.l		scale_to_zero_dst	# scale dst exponent
8853*4882a593Smuzhiyun
8854*4882a593Smuzhiyun	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
8855*4882a593Smuzhiyun	add.l		%d0,(%sp)
8856*4882a593Smuzhiyun
8857*4882a593Smuzhiyun	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8858*4882a593Smuzhiyun	lsr.b		&0x6,%d1		# shift to lo bits
8859*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# load S.F.
8860*4882a593Smuzhiyun	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861*4882a593Smuzhiyun	ble.w		fdiv_may_ovfl		# result will overflow
8862*4882a593Smuzhiyun
8863*4882a593Smuzhiyun	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864*4882a593Smuzhiyun	beq.w		fdiv_may_unfl		# maybe
8865*4882a593Smuzhiyun	bgt.w		fdiv_unfl		# yes; go handle underflow
8866*4882a593Smuzhiyun
8867*4882a593Smuzhiyunfdiv_normal:
8868*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8869*4882a593Smuzhiyun
8870*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
8871*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8872*4882a593Smuzhiyun
8873*4882a593Smuzhiyun	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
8874*4882a593Smuzhiyun
8875*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
8876*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8877*4882a593Smuzhiyun
8878*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8879*4882a593Smuzhiyun
8880*4882a593Smuzhiyunfdiv_normal_exit:
8881*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
8882*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# store d2
8883*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8884*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
8885*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8886*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
8887*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
8888*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
8889*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8890*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8891*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8892*4882a593Smuzhiyun	rts
8893*4882a593Smuzhiyun
8894*4882a593Smuzhiyuntbl_fdiv_ovfl2:
8895*4882a593Smuzhiyun	long		0x7fff
8896*4882a593Smuzhiyun	long		0x407f
8897*4882a593Smuzhiyun	long		0x43ff
8898*4882a593Smuzhiyun
8899*4882a593Smuzhiyunfdiv_no_ovfl:
8900*4882a593Smuzhiyun	mov.l		(%sp)+,%d0		# restore scale factor
8901*4882a593Smuzhiyun	bra.b		fdiv_normal_exit
8902*4882a593Smuzhiyun
8903*4882a593Smuzhiyunfdiv_may_ovfl:
8904*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save scale factor
8905*4882a593Smuzhiyun
8906*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8907*4882a593Smuzhiyun
8908*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8909*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# set FPSR
8910*4882a593Smuzhiyun
8911*4882a593Smuzhiyun	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8912*4882a593Smuzhiyun
8913*4882a593Smuzhiyun	fmov.l		%fpsr,%d0
8914*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr
8915*4882a593Smuzhiyun
8916*4882a593Smuzhiyun	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
8917*4882a593Smuzhiyun
8918*4882a593Smuzhiyun	fmovm.x		&0x01,-(%sp)		# save result to stack
8919*4882a593Smuzhiyun	mov.w		(%sp),%d0		# fetch new exponent
8920*4882a593Smuzhiyun	add.l		&0xc,%sp		# clear result from stack
8921*4882a593Smuzhiyun	andi.l		&0x7fff,%d0		# strip sign
8922*4882a593Smuzhiyun	sub.l		(%sp),%d0		# add scale factor
8923*4882a593Smuzhiyun	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924*4882a593Smuzhiyun	blt.b		fdiv_no_ovfl
8925*4882a593Smuzhiyun	mov.l		(%sp)+,%d0
8926*4882a593Smuzhiyun
8927*4882a593Smuzhiyunfdiv_ovfl_tst:
8928*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929*4882a593Smuzhiyun
8930*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
8931*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8932*4882a593Smuzhiyun	bne.b		fdiv_ovfl_ena		# yes
8933*4882a593Smuzhiyun
8934*4882a593Smuzhiyunfdiv_ovfl_dis:
8935*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8936*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
8937*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
8938*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
8939*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
8940*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
8941*4882a593Smuzhiyun	rts
8942*4882a593Smuzhiyun
8943*4882a593Smuzhiyunfdiv_ovfl_ena:
8944*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
8945*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# is precision extended?
8946*4882a593Smuzhiyun	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
8947*4882a593Smuzhiyun
8948*4882a593Smuzhiyunfdiv_ovfl_ena_cont:
8949*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8950*4882a593Smuzhiyun
8951*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
8952*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8953*4882a593Smuzhiyun	mov.w		%d1,%d2			# make a copy
8954*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
8955*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
8956*4882a593Smuzhiyun	subi.l		&0x6000,%d1		# subtract bias
8957*4882a593Smuzhiyun	andi.w		&0x7fff,%d1		# clear sign bit
8958*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
8959*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
8960*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8961*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
8962*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8963*4882a593Smuzhiyun	bra.b		fdiv_ovfl_dis
8964*4882a593Smuzhiyun
8965*4882a593Smuzhiyunfdiv_ovfl_ena_sd:
8966*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8967*4882a593Smuzhiyun
8968*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
8969*4882a593Smuzhiyun	andi.b		&0x30,%d1		# keep rnd mode
8970*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
8971*4882a593Smuzhiyun
8972*4882a593Smuzhiyun	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8973*4882a593Smuzhiyun
8974*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8975*4882a593Smuzhiyun	bra.b		fdiv_ovfl_ena_cont
8976*4882a593Smuzhiyun
8977*4882a593Smuzhiyunfdiv_unfl:
8978*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979*4882a593Smuzhiyun
8980*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8981*4882a593Smuzhiyun
8982*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8983*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
8984*4882a593Smuzhiyun
8985*4882a593Smuzhiyun	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8986*4882a593Smuzhiyun
8987*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
8988*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
8989*4882a593Smuzhiyun
8990*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8991*4882a593Smuzhiyun
8992*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
8993*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8994*4882a593Smuzhiyun	bne.b		fdiv_unfl_ena		# yes
8995*4882a593Smuzhiyun
8996*4882a593Smuzhiyunfdiv_unfl_dis:
8997*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8998*4882a593Smuzhiyun
8999*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
9000*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9001*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
9002*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
9003*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9004*4882a593Smuzhiyun	rts
9005*4882a593Smuzhiyun
9006*4882a593Smuzhiyun#
9007*4882a593Smuzhiyun# UNFL is enabled.
9008*4882a593Smuzhiyun#
9009*4882a593Smuzhiyunfdiv_unfl_ena:
9010*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
9011*4882a593Smuzhiyun
9012*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
9013*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# is precision extended?
9014*4882a593Smuzhiyun	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
9015*4882a593Smuzhiyun
9016*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9017*4882a593Smuzhiyun
9018*4882a593Smuzhiyunfdiv_unfl_ena_cont:
9019*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9020*4882a593Smuzhiyun
9021*4882a593Smuzhiyun	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9022*4882a593Smuzhiyun
9023*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9024*4882a593Smuzhiyun
9025*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
9026*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
9027*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9028*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
9029*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
9030*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
9031*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factoer
9032*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add bias
9033*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
9034*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
9035*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
9036*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
9037*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9038*4882a593Smuzhiyun	bra.w		fdiv_unfl_dis
9039*4882a593Smuzhiyun
9040*4882a593Smuzhiyunfdiv_unfl_ena_sd:
9041*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
9042*4882a593Smuzhiyun	andi.b		&0x30,%d1		# use only rnd mode
9043*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
9044*4882a593Smuzhiyun
9045*4882a593Smuzhiyun	bra.b		fdiv_unfl_ena_cont
9046*4882a593Smuzhiyun
9047*4882a593Smuzhiyun#
9048*4882a593Smuzhiyun# the divide operation MAY underflow:
9049*4882a593Smuzhiyun#
9050*4882a593Smuzhiyunfdiv_may_unfl:
9051*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
9052*4882a593Smuzhiyun
9053*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9054*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9055*4882a593Smuzhiyun
9056*4882a593Smuzhiyun	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
9057*4882a593Smuzhiyun
9058*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
9059*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9060*4882a593Smuzhiyun
9061*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9062*4882a593Smuzhiyun
9063*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
9064*4882a593Smuzhiyun	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
9065*4882a593Smuzhiyun	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
9066*4882a593Smuzhiyun	fblt.w		fdiv_unfl		# yes; underflow occurred
9067*4882a593Smuzhiyun
9068*4882a593Smuzhiyun#
9069*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 1. but,
9070*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 1
9071*4882a593Smuzhiyun# or a normalized number that rounded down to a 1. so, redo the entire
9072*4882a593Smuzhiyun# operation using RZ as the rounding mode to see what the pre-rounded
9073*4882a593Smuzhiyun# result is. this case should be relatively rare.
9074*4882a593Smuzhiyun#
9075*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
9076*4882a593Smuzhiyun
9077*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
9078*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# keep rnd prec
9079*4882a593Smuzhiyun	ori.b		&rz_mode*0x10,%d1	# insert RZ
9080*4882a593Smuzhiyun
9081*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
9082*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9083*4882a593Smuzhiyun
9084*4882a593Smuzhiyun	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9085*4882a593Smuzhiyun
9086*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9087*4882a593Smuzhiyun	fabs.x		%fp1			# make absolute value
9088*4882a593Smuzhiyun	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
9089*4882a593Smuzhiyun	fbge.w		fdiv_normal_exit	# no; no underflow occurred
9090*4882a593Smuzhiyun	bra.w		fdiv_unfl		# yes; underflow occurred
9091*4882a593Smuzhiyun
9092*4882a593Smuzhiyun############################################################################
9093*4882a593Smuzhiyun
9094*4882a593Smuzhiyun#
9095*4882a593Smuzhiyun# Divide: inputs are not both normalized; what are they?
9096*4882a593Smuzhiyun#
9097*4882a593Smuzhiyunfdiv_not_norm:
9098*4882a593Smuzhiyun	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099*4882a593Smuzhiyun	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
9100*4882a593Smuzhiyun
9101*4882a593Smuzhiyun	swbeg		&48
9102*4882a593Smuzhiyuntbl_fdiv_op:
9103*4882a593Smuzhiyun	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
9104*4882a593Smuzhiyun	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
9105*4882a593Smuzhiyun	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
9106*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
9107*4882a593Smuzhiyun	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
9108*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
9109*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9110*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9111*4882a593Smuzhiyun
9112*4882a593Smuzhiyun	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
9113*4882a593Smuzhiyun	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
9114*4882a593Smuzhiyun	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
9115*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
9116*4882a593Smuzhiyun	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
9117*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
9118*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9119*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9120*4882a593Smuzhiyun
9121*4882a593Smuzhiyun	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
9122*4882a593Smuzhiyun	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
9123*4882a593Smuzhiyun	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
9124*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
9125*4882a593Smuzhiyun	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
9126*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
9127*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9128*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9129*4882a593Smuzhiyun
9130*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
9131*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
9132*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
9133*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
9134*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
9135*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
9136*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9137*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9138*4882a593Smuzhiyun
9139*4882a593Smuzhiyun	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
9140*4882a593Smuzhiyun	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
9141*4882a593Smuzhiyun	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
9142*4882a593Smuzhiyun	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
9143*4882a593Smuzhiyun	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
9144*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
9145*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9146*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9147*4882a593Smuzhiyun
9148*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
9149*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
9150*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
9151*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
9152*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
9153*4882a593Smuzhiyun	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
9154*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9155*4882a593Smuzhiyun	short		tbl_fdiv_op	- tbl_fdiv_op #
9156*4882a593Smuzhiyun
9157*4882a593Smuzhiyunfdiv_res_qnan:
9158*4882a593Smuzhiyun	bra.l		res_qnan
9159*4882a593Smuzhiyunfdiv_res_snan:
9160*4882a593Smuzhiyun	bra.l		res_snan
9161*4882a593Smuzhiyunfdiv_res_operr:
9162*4882a593Smuzhiyun	bra.l		res_operr
9163*4882a593Smuzhiyun
9164*4882a593Smuzhiyun	global		fdiv_zero_load		# global for fsgldiv
9165*4882a593Smuzhiyunfdiv_zero_load:
9166*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
9167*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1		# or of input signs.
9168*4882a593Smuzhiyun	eor.b		%d0,%d1
9169*4882a593Smuzhiyun	bpl.b		fdiv_zero_load_p	# result is positive
9170*4882a593Smuzhiyun	fmov.s		&0x80000000,%fp0	# load a -ZERO
9171*4882a593Smuzhiyun	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
9172*4882a593Smuzhiyun	rts
9173*4882a593Smuzhiyunfdiv_zero_load_p:
9174*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# load a +ZERO
9175*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
9176*4882a593Smuzhiyun	rts
9177*4882a593Smuzhiyun
9178*4882a593Smuzhiyun#
9179*4882a593Smuzhiyun# The destination was In Range and the source was a ZERO. The result,
9180*4882a593Smuzhiyun# Therefore, is an INF w/ the proper sign.
9181*4882a593Smuzhiyun# So, determine the sign and return a new INF (w/ the j-bit cleared).
9182*4882a593Smuzhiyun#
9183*4882a593Smuzhiyun	global		fdiv_inf_load		# global for fsgldiv
9184*4882a593Smuzhiyunfdiv_inf_load:
9185*4882a593Smuzhiyun	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# load both signs
9187*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
9188*4882a593Smuzhiyun	eor.b		%d0,%d1
9189*4882a593Smuzhiyun	bpl.b		fdiv_inf_load_p		# result is positive
9190*4882a593Smuzhiyun	fmov.s		&0xff800000,%fp0	# make result -INF
9191*4882a593Smuzhiyun	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192*4882a593Smuzhiyun	rts
9193*4882a593Smuzhiyunfdiv_inf_load_p:
9194*4882a593Smuzhiyun	fmov.s		&0x7f800000,%fp0	# make result +INF
9195*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
9196*4882a593Smuzhiyun	rts
9197*4882a593Smuzhiyun
9198*4882a593Smuzhiyun#
9199*4882a593Smuzhiyun# The destination was an INF w/ an In Range or ZERO source, the result is
9200*4882a593Smuzhiyun# an INF w/ the proper sign.
9201*4882a593Smuzhiyun# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202*4882a593Smuzhiyun# dst INF is set, then then j-bit of the result INF is also set).
9203*4882a593Smuzhiyun#
9204*4882a593Smuzhiyun	global		fdiv_inf_dst		# global for fsgldiv
9205*4882a593Smuzhiyunfdiv_inf_dst:
9206*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d0		# load both signs
9207*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d1
9208*4882a593Smuzhiyun	eor.b		%d0,%d1
9209*4882a593Smuzhiyun	bpl.b		fdiv_inf_dst_p		# result is positive
9210*4882a593Smuzhiyun
9211*4882a593Smuzhiyun	fmovm.x		DST(%a1),&0x80		# return result in fp0
9212*4882a593Smuzhiyun	fabs.x		%fp0			# clear sign bit
9213*4882a593Smuzhiyun	fneg.x		%fp0			# set sign bit
9214*4882a593Smuzhiyun	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215*4882a593Smuzhiyun	rts
9216*4882a593Smuzhiyun
9217*4882a593Smuzhiyunfdiv_inf_dst_p:
9218*4882a593Smuzhiyun	fmovm.x		DST(%a1),&0x80		# return result in fp0
9219*4882a593Smuzhiyun	fabs.x		%fp0			# return positive INF
9220*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
9221*4882a593Smuzhiyun	rts
9222*4882a593Smuzhiyun
9223*4882a593Smuzhiyun#########################################################################
9224*4882a593Smuzhiyun# XDEF ****************************************************************	#
9225*4882a593Smuzhiyun#	fneg(): emulates the fneg instruction				#
9226*4882a593Smuzhiyun#	fsneg(): emulates the fsneg instruction				#
9227*4882a593Smuzhiyun#	fdneg(): emulates the fdneg instruction				#
9228*4882a593Smuzhiyun#									#
9229*4882a593Smuzhiyun# XREF ****************************************************************	#
9230*4882a593Smuzhiyun#	norm() - normalize a denorm to provide EXOP			#
9231*4882a593Smuzhiyun#	scale_to_zero_src() - scale sgl/dbl source exponent		#
9232*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
9233*4882a593Smuzhiyun#	unf_res() - return default underflow result			#
9234*4882a593Smuzhiyun#	res_qnan_1op() - return QNAN result				#
9235*4882a593Smuzhiyun#	res_snan_1op() - return SNAN result				#
9236*4882a593Smuzhiyun#									#
9237*4882a593Smuzhiyun# INPUT ***************************************************************	#
9238*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
9239*4882a593Smuzhiyun#	d0 = rnd prec,mode						#
9240*4882a593Smuzhiyun#									#
9241*4882a593Smuzhiyun# OUTPUT **************************************************************	#
9242*4882a593Smuzhiyun#	fp0 = result							#
9243*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
9244*4882a593Smuzhiyun#									#
9245*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
9246*4882a593Smuzhiyun#	Handle NANs, zeroes, and infinities as special cases. Separate	#
9247*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
9248*4882a593Smuzhiyun# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
9249*4882a593Smuzhiyun# and an actual fneg performed to see if overflow/underflow would have	#
9250*4882a593Smuzhiyun# occurred. If so, return default underflow/overflow result. Else,	#
9251*4882a593Smuzhiyun# scale the result exponent and return result. FPSR gets set based on	#
9252*4882a593Smuzhiyun# the result value.							#
9253*4882a593Smuzhiyun#									#
9254*4882a593Smuzhiyun#########################################################################
9255*4882a593Smuzhiyun
9256*4882a593Smuzhiyun	global		fsneg
9257*4882a593Smuzhiyunfsneg:
9258*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
9259*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9260*4882a593Smuzhiyun	bra.b		fneg
9261*4882a593Smuzhiyun
9262*4882a593Smuzhiyun	global		fdneg
9263*4882a593Smuzhiyunfdneg:
9264*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
9265*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl prec
9266*4882a593Smuzhiyun
9267*4882a593Smuzhiyun	global		fneg
9268*4882a593Smuzhiyunfneg:
9269*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9270*4882a593Smuzhiyun	mov.b		STAG(%a6),%d1
9271*4882a593Smuzhiyun	bne.w		fneg_not_norm		# optimize on non-norm input
9272*4882a593Smuzhiyun
9273*4882a593Smuzhiyun#
9274*4882a593Smuzhiyun# NEGATE SIGN : norms and denorms ONLY!
9275*4882a593Smuzhiyun#
9276*4882a593Smuzhiyunfneg_norm:
9277*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
9278*4882a593Smuzhiyun	bne.w		fneg_not_ext		# no; go handle sgl or dbl
9279*4882a593Smuzhiyun
9280*4882a593Smuzhiyun#
9281*4882a593Smuzhiyun# precision selected is extended. so...we can not get an underflow
9282*4882a593Smuzhiyun# or overflow because of rounding to the correct precision. so...
9283*4882a593Smuzhiyun# skip the scaling and unscaling...
9284*4882a593Smuzhiyun#
9285*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9286*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9287*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d0
9288*4882a593Smuzhiyun	eori.w		&0x8000,%d0		# negate sign
9289*4882a593Smuzhiyun	bpl.b		fneg_norm_load		# sign is positive
9290*4882a593Smuzhiyun	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9291*4882a593Smuzhiyunfneg_norm_load:
9292*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)
9293*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9294*4882a593Smuzhiyun	rts
9295*4882a593Smuzhiyun
9296*4882a593Smuzhiyun#
9297*4882a593Smuzhiyun# for an extended precision DENORM, the UNFL exception bit is set
9298*4882a593Smuzhiyun# the accrued bit is NOT set in this instance(no inexactness!)
9299*4882a593Smuzhiyun#
9300*4882a593Smuzhiyunfneg_denorm:
9301*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
9302*4882a593Smuzhiyun	bne.b		fneg_not_ext		# no; go handle sgl or dbl
9303*4882a593Smuzhiyun
9304*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305*4882a593Smuzhiyun
9306*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9307*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9308*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d0
9309*4882a593Smuzhiyun	eori.w		&0x8000,%d0		# negate sign
9310*4882a593Smuzhiyun	bpl.b		fneg_denorm_done	# no
9311*4882a593Smuzhiyun	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
9312*4882a593Smuzhiyunfneg_denorm_done:
9313*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)
9314*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9315*4882a593Smuzhiyun
9316*4882a593Smuzhiyun	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317*4882a593Smuzhiyun	bne.b		fneg_ext_unfl_ena	# yes
9318*4882a593Smuzhiyun	rts
9319*4882a593Smuzhiyun
9320*4882a593Smuzhiyun#
9321*4882a593Smuzhiyun# the input is an extended DENORM and underflow is enabled in the FPCR.
9322*4882a593Smuzhiyun# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323*4882a593Smuzhiyun# exponent and insert back into the operand.
9324*4882a593Smuzhiyun#
9325*4882a593Smuzhiyunfneg_ext_unfl_ena:
9326*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9327*4882a593Smuzhiyun	bsr.l		norm			# normalize result
9328*4882a593Smuzhiyun	neg.w		%d0			# new exponent = -(shft val)
9329*4882a593Smuzhiyun	addi.w		&0x6000,%d0		# add new bias to exponent
9330*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9331*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep old sign
9332*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# clear sign position
9333*4882a593Smuzhiyun	or.w		%d1,%d0			# concat old sign, new exponent
9334*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9335*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9336*4882a593Smuzhiyun	rts
9337*4882a593Smuzhiyun
9338*4882a593Smuzhiyun#
9339*4882a593Smuzhiyun# operand is either single or double
9340*4882a593Smuzhiyun#
9341*4882a593Smuzhiyunfneg_not_ext:
9342*4882a593Smuzhiyun	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9343*4882a593Smuzhiyun	bne.b		fneg_dbl
9344*4882a593Smuzhiyun
9345*4882a593Smuzhiyun#
9346*4882a593Smuzhiyun# operand is to be rounded to single precision
9347*4882a593Smuzhiyun#
9348*4882a593Smuzhiyunfneg_sgl:
9349*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9350*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9351*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9352*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# calculate scale factor
9353*4882a593Smuzhiyun
9354*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9355*4882a593Smuzhiyun	bge.w		fneg_sd_unfl		# yes; go handle underflow
9356*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9357*4882a593Smuzhiyun	beq.w		fneg_sd_may_ovfl	# maybe; go check
9358*4882a593Smuzhiyun	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9359*4882a593Smuzhiyun
9360*4882a593Smuzhiyun#
9361*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved in to the fp reg file
9362*4882a593Smuzhiyun#
9363*4882a593Smuzhiyunfneg_sd_normal:
9364*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9365*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9366*4882a593Smuzhiyun
9367*4882a593Smuzhiyun	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9368*4882a593Smuzhiyun
9369*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
9370*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9371*4882a593Smuzhiyun
9372*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9373*4882a593Smuzhiyun
9374*4882a593Smuzhiyunfneg_sd_normal_exit:
9375*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
9376*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9377*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9378*4882a593Smuzhiyun	mov.w		%d1,%d2			# make a copy
9379*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
9380*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
9381*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
9382*4882a593Smuzhiyun	or.w		%d1,%d2			# concat old sign,new exp
9383*4882a593Smuzhiyun	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
9384*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
9385*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9386*4882a593Smuzhiyun	rts
9387*4882a593Smuzhiyun
9388*4882a593Smuzhiyun#
9389*4882a593Smuzhiyun# operand is to be rounded to double precision
9390*4882a593Smuzhiyun#
9391*4882a593Smuzhiyunfneg_dbl:
9392*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9393*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9394*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9395*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# calculate scale factor
9396*4882a593Smuzhiyun
9397*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
9398*4882a593Smuzhiyun	bge.b		fneg_sd_unfl		# yes; go handle underflow
9399*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
9400*4882a593Smuzhiyun	beq.w		fneg_sd_may_ovfl	# maybe; go check
9401*4882a593Smuzhiyun	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9402*4882a593Smuzhiyun	bra.w		fneg_sd_normal		# no; ho handle normalized op
9403*4882a593Smuzhiyun
9404*4882a593Smuzhiyun#
9405*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file
9406*4882a593Smuzhiyun#
9407*4882a593Smuzhiyunfneg_sd_unfl:
9408*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409*4882a593Smuzhiyun
9410*4882a593Smuzhiyun	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
9411*4882a593Smuzhiyun	bpl.b		fneg_sd_unfl_tst
9412*4882a593Smuzhiyun	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
9413*4882a593Smuzhiyun
9414*4882a593Smuzhiyun# if underflow or inexact is enabled, go calculate EXOP first.
9415*4882a593Smuzhiyunfneg_sd_unfl_tst:
9416*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
9417*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
9418*4882a593Smuzhiyun	bne.b		fneg_sd_unfl_ena	# yes
9419*4882a593Smuzhiyun
9420*4882a593Smuzhiyunfneg_sd_unfl_dis:
9421*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
9422*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9423*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
9424*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
9425*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9426*4882a593Smuzhiyun	rts
9427*4882a593Smuzhiyun
9428*4882a593Smuzhiyun#
9429*4882a593Smuzhiyun# operand will underflow AND underflow is enabled.
9430*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision.
9431*4882a593Smuzhiyun#
9432*4882a593Smuzhiyunfneg_sd_unfl_ena:
9433*4882a593Smuzhiyun	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434*4882a593Smuzhiyun	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
9436*4882a593Smuzhiyun
9437*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
9438*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
9439*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
9440*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
9441*4882a593Smuzhiyun	sub.l		%d0,%d1			# subtract scale factor
9442*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add new bias
9443*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
9444*4882a593Smuzhiyun	or.w		%d2,%d1			# concat new sign,new exp
9445*4882a593Smuzhiyun	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
9446*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
9447*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
9448*4882a593Smuzhiyun	bra.b		fneg_sd_unfl_dis
9449*4882a593Smuzhiyun
9450*4882a593Smuzhiyun#
9451*4882a593Smuzhiyun# operand WILL overflow.
9452*4882a593Smuzhiyun#
9453*4882a593Smuzhiyunfneg_sd_ovfl:
9454*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9455*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9456*4882a593Smuzhiyun
9457*4882a593Smuzhiyun	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9458*4882a593Smuzhiyun
9459*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9460*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
9461*4882a593Smuzhiyun
9462*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9463*4882a593Smuzhiyun
9464*4882a593Smuzhiyunfneg_sd_ovfl_tst:
9465*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466*4882a593Smuzhiyun
9467*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
9468*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
9469*4882a593Smuzhiyun	bne.b		fneg_sd_ovfl_ena	# yes
9470*4882a593Smuzhiyun
9471*4882a593Smuzhiyun#
9472*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by
9473*4882a593Smuzhiyun# calling ovf_res().
9474*4882a593Smuzhiyun#
9475*4882a593Smuzhiyunfneg_sd_ovfl_dis:
9476*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
9477*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
9478*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
9479*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
9480*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
9481*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
9482*4882a593Smuzhiyun	rts
9483*4882a593Smuzhiyun
9484*4882a593Smuzhiyun#
9485*4882a593Smuzhiyun# OVFL is enabled.
9486*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision.
9487*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR).
9488*4882a593Smuzhiyun#
9489*4882a593Smuzhiyunfneg_sd_ovfl_ena:
9490*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
9491*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9492*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
9493*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
9494*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
9495*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
9496*4882a593Smuzhiyun	subi.l		&0x6000,%d1		# subtract bias
9497*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
9498*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sign,exp
9499*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
9500*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9501*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
9502*4882a593Smuzhiyun	bra.b		fneg_sd_ovfl_dis
9503*4882a593Smuzhiyun
9504*4882a593Smuzhiyun#
9505*4882a593Smuzhiyun# the move in MAY underflow. so...
9506*4882a593Smuzhiyun#
9507*4882a593Smuzhiyunfneg_sd_may_ovfl:
9508*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9509*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9510*4882a593Smuzhiyun
9511*4882a593Smuzhiyun	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9512*4882a593Smuzhiyun
9513*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
9514*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9515*4882a593Smuzhiyun
9516*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9517*4882a593Smuzhiyun
9518*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
9519*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
9520*4882a593Smuzhiyun	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
9521*4882a593Smuzhiyun
9522*4882a593Smuzhiyun# no, it didn't overflow; we have correct result
9523*4882a593Smuzhiyun	bra.w		fneg_sd_normal_exit
9524*4882a593Smuzhiyun
9525*4882a593Smuzhiyun##########################################################################
9526*4882a593Smuzhiyun
9527*4882a593Smuzhiyun#
9528*4882a593Smuzhiyun# input is not normalized; what is it?
9529*4882a593Smuzhiyun#
9530*4882a593Smuzhiyunfneg_not_norm:
9531*4882a593Smuzhiyun	cmpi.b		%d1,&DENORM		# weed out DENORM
9532*4882a593Smuzhiyun	beq.w		fneg_denorm
9533*4882a593Smuzhiyun	cmpi.b		%d1,&SNAN		# weed out SNAN
9534*4882a593Smuzhiyun	beq.l		res_snan_1op
9535*4882a593Smuzhiyun	cmpi.b		%d1,&QNAN		# weed out QNAN
9536*4882a593Smuzhiyun	beq.l		res_qnan_1op
9537*4882a593Smuzhiyun
9538*4882a593Smuzhiyun#
9539*4882a593Smuzhiyun# do the fneg; at this point, only possible ops are ZERO and INF.
9540*4882a593Smuzhiyun# use fneg to determine ccodes.
9541*4882a593Smuzhiyun# prec:mode should be zero at this point but it won't affect answer anyways.
9542*4882a593Smuzhiyun#
9543*4882a593Smuzhiyun	fneg.x		SRC_EX(%a0),%fp0	# do fneg
9544*4882a593Smuzhiyun	fmov.l		%fpsr,%d0
9545*4882a593Smuzhiyun	rol.l		&0x8,%d0		# put ccodes in lo byte
9546*4882a593Smuzhiyun	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
9547*4882a593Smuzhiyun	rts
9548*4882a593Smuzhiyun
9549*4882a593Smuzhiyun#########################################################################
9550*4882a593Smuzhiyun# XDEF ****************************************************************	#
9551*4882a593Smuzhiyun#	ftst(): emulates the ftest instruction				#
9552*4882a593Smuzhiyun#									#
9553*4882a593Smuzhiyun# XREF ****************************************************************	#
9554*4882a593Smuzhiyun#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
9555*4882a593Smuzhiyun#									#
9556*4882a593Smuzhiyun# INPUT ***************************************************************	#
9557*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
9558*4882a593Smuzhiyun#									#
9559*4882a593Smuzhiyun# OUTPUT **************************************************************	#
9560*4882a593Smuzhiyun#	none								#
9561*4882a593Smuzhiyun#									#
9562*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
9563*4882a593Smuzhiyun#	Check the source operand tag (STAG) and set the FPCR according	#
9564*4882a593Smuzhiyun# to the operand type and sign.						#
9565*4882a593Smuzhiyun#									#
9566*4882a593Smuzhiyun#########################################################################
9567*4882a593Smuzhiyun
9568*4882a593Smuzhiyun	global		ftst
9569*4882a593Smuzhiyunftst:
9570*4882a593Smuzhiyun	mov.b		STAG(%a6),%d1
9571*4882a593Smuzhiyun	bne.b		ftst_not_norm		# optimize on non-norm input
9572*4882a593Smuzhiyun
9573*4882a593Smuzhiyun#
9574*4882a593Smuzhiyun# Norm:
9575*4882a593Smuzhiyun#
9576*4882a593Smuzhiyunftst_norm:
9577*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
9578*4882a593Smuzhiyun	bmi.b		ftst_norm_m		# yes
9579*4882a593Smuzhiyun	rts
9580*4882a593Smuzhiyunftst_norm_m:
9581*4882a593Smuzhiyun	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9582*4882a593Smuzhiyun	rts
9583*4882a593Smuzhiyun
9584*4882a593Smuzhiyun#
9585*4882a593Smuzhiyun# input is not normalized; what is it?
9586*4882a593Smuzhiyun#
9587*4882a593Smuzhiyunftst_not_norm:
9588*4882a593Smuzhiyun	cmpi.b		%d1,&ZERO		# weed out ZERO
9589*4882a593Smuzhiyun	beq.b		ftst_zero
9590*4882a593Smuzhiyun	cmpi.b		%d1,&INF		# weed out INF
9591*4882a593Smuzhiyun	beq.b		ftst_inf
9592*4882a593Smuzhiyun	cmpi.b		%d1,&SNAN		# weed out SNAN
9593*4882a593Smuzhiyun	beq.l		res_snan_1op
9594*4882a593Smuzhiyun	cmpi.b		%d1,&QNAN		# weed out QNAN
9595*4882a593Smuzhiyun	beq.l		res_qnan_1op
9596*4882a593Smuzhiyun
9597*4882a593Smuzhiyun#
9598*4882a593Smuzhiyun# Denorm:
9599*4882a593Smuzhiyun#
9600*4882a593Smuzhiyunftst_denorm:
9601*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
9602*4882a593Smuzhiyun	bmi.b		ftst_denorm_m		# yes
9603*4882a593Smuzhiyun	rts
9604*4882a593Smuzhiyunftst_denorm_m:
9605*4882a593Smuzhiyun	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9606*4882a593Smuzhiyun	rts
9607*4882a593Smuzhiyun
9608*4882a593Smuzhiyun#
9609*4882a593Smuzhiyun# Infinity:
9610*4882a593Smuzhiyun#
9611*4882a593Smuzhiyunftst_inf:
9612*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
9613*4882a593Smuzhiyun	bmi.b		ftst_inf_m		# yes
9614*4882a593Smuzhiyunftst_inf_p:
9615*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9616*4882a593Smuzhiyun	rts
9617*4882a593Smuzhiyunftst_inf_m:
9618*4882a593Smuzhiyun	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619*4882a593Smuzhiyun	rts
9620*4882a593Smuzhiyun
9621*4882a593Smuzhiyun#
9622*4882a593Smuzhiyun# Zero:
9623*4882a593Smuzhiyun#
9624*4882a593Smuzhiyunftst_zero:
9625*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
9626*4882a593Smuzhiyun	bmi.b		ftst_zero_m		# yes
9627*4882a593Smuzhiyunftst_zero_p:
9628*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9629*4882a593Smuzhiyun	rts
9630*4882a593Smuzhiyunftst_zero_m:
9631*4882a593Smuzhiyun	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
9632*4882a593Smuzhiyun	rts
9633*4882a593Smuzhiyun
9634*4882a593Smuzhiyun#########################################################################
9635*4882a593Smuzhiyun# XDEF ****************************************************************	#
9636*4882a593Smuzhiyun#	fint(): emulates the fint instruction				#
9637*4882a593Smuzhiyun#									#
9638*4882a593Smuzhiyun# XREF ****************************************************************	#
9639*4882a593Smuzhiyun#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9640*4882a593Smuzhiyun#									#
9641*4882a593Smuzhiyun# INPUT ***************************************************************	#
9642*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
9643*4882a593Smuzhiyun#	d0 = round precision/mode					#
9644*4882a593Smuzhiyun#									#
9645*4882a593Smuzhiyun# OUTPUT **************************************************************	#
9646*4882a593Smuzhiyun#	fp0 = result							#
9647*4882a593Smuzhiyun#									#
9648*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
9649*4882a593Smuzhiyun#	Separate according to operand type. Unnorms don't pass through	#
9650*4882a593Smuzhiyun# here. For norms, load the rounding mode/prec, execute a "fint", then	#
9651*4882a593Smuzhiyun# store the resulting FPSR bits.					#
9652*4882a593Smuzhiyun#	For denorms, force the j-bit to a one and do the same as for	#
9653*4882a593Smuzhiyun# norms. Denorms are so low that the answer will either be a zero or a	#
9654*4882a593Smuzhiyun# one.									#
9655*4882a593Smuzhiyun#	For zeroes/infs/NANs, return the same while setting the FPSR	#
9656*4882a593Smuzhiyun# as appropriate.							#
9657*4882a593Smuzhiyun#									#
9658*4882a593Smuzhiyun#########################################################################
9659*4882a593Smuzhiyun
9660*4882a593Smuzhiyun	global		fint
9661*4882a593Smuzhiyunfint:
9662*4882a593Smuzhiyun	mov.b		STAG(%a6),%d1
9663*4882a593Smuzhiyun	bne.b		fint_not_norm		# optimize on non-norm input
9664*4882a593Smuzhiyun
9665*4882a593Smuzhiyun#
9666*4882a593Smuzhiyun# Norm:
9667*4882a593Smuzhiyun#
9668*4882a593Smuzhiyunfint_norm:
9669*4882a593Smuzhiyun	andi.b		&0x30,%d0		# set prec = ext
9670*4882a593Smuzhiyun
9671*4882a593Smuzhiyun	fmov.l		%d0,%fpcr		# set FPCR
9672*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9673*4882a593Smuzhiyun
9674*4882a593Smuzhiyun	fint.x		SRC(%a0),%fp0		# execute fint
9675*4882a593Smuzhiyun
9676*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9677*4882a593Smuzhiyun	fmov.l		%fpsr,%d0		# save FPSR
9678*4882a593Smuzhiyun	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9679*4882a593Smuzhiyun
9680*4882a593Smuzhiyun	rts
9681*4882a593Smuzhiyun
9682*4882a593Smuzhiyun#
9683*4882a593Smuzhiyun# input is not normalized; what is it?
9684*4882a593Smuzhiyun#
9685*4882a593Smuzhiyunfint_not_norm:
9686*4882a593Smuzhiyun	cmpi.b		%d1,&ZERO		# weed out ZERO
9687*4882a593Smuzhiyun	beq.b		fint_zero
9688*4882a593Smuzhiyun	cmpi.b		%d1,&INF		# weed out INF
9689*4882a593Smuzhiyun	beq.b		fint_inf
9690*4882a593Smuzhiyun	cmpi.b		%d1,&DENORM		# weed out DENORM
9691*4882a593Smuzhiyun	beq.b		fint_denorm
9692*4882a593Smuzhiyun	cmpi.b		%d1,&SNAN		# weed out SNAN
9693*4882a593Smuzhiyun	beq.l		res_snan_1op
9694*4882a593Smuzhiyun	bra.l		res_qnan_1op		# weed out QNAN
9695*4882a593Smuzhiyun
9696*4882a593Smuzhiyun#
9697*4882a593Smuzhiyun# Denorm:
9698*4882a593Smuzhiyun#
9699*4882a593Smuzhiyun# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700*4882a593Smuzhiyun# also, the INEX2 and AINEX exception bits will be set.
9701*4882a593Smuzhiyun# so, we could either set these manually or force the DENORM
9702*4882a593Smuzhiyun# to a very small NORM and ship it to the NORM routine.
9703*4882a593Smuzhiyun# I do the latter.
9704*4882a593Smuzhiyun#
9705*4882a593Smuzhiyunfint_denorm:
9706*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707*4882a593Smuzhiyun	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9708*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
9709*4882a593Smuzhiyun	bra.b		fint_norm
9710*4882a593Smuzhiyun
9711*4882a593Smuzhiyun#
9712*4882a593Smuzhiyun# Zero:
9713*4882a593Smuzhiyun#
9714*4882a593Smuzhiyunfint_zero:
9715*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is ZERO negative?
9716*4882a593Smuzhiyun	bmi.b		fint_zero_m		# yes
9717*4882a593Smuzhiyunfint_zero_p:
9718*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9719*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9720*4882a593Smuzhiyun	rts
9721*4882a593Smuzhiyunfint_zero_m:
9722*4882a593Smuzhiyun	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9723*4882a593Smuzhiyun	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724*4882a593Smuzhiyun	rts
9725*4882a593Smuzhiyun
9726*4882a593Smuzhiyun#
9727*4882a593Smuzhiyun# Infinity:
9728*4882a593Smuzhiyun#
9729*4882a593Smuzhiyunfint_inf:
9730*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9731*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is INF negative?
9732*4882a593Smuzhiyun	bmi.b		fint_inf_m		# yes
9733*4882a593Smuzhiyunfint_inf_p:
9734*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9735*4882a593Smuzhiyun	rts
9736*4882a593Smuzhiyunfint_inf_m:
9737*4882a593Smuzhiyun	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738*4882a593Smuzhiyun	rts
9739*4882a593Smuzhiyun
9740*4882a593Smuzhiyun#########################################################################
9741*4882a593Smuzhiyun# XDEF ****************************************************************	#
9742*4882a593Smuzhiyun#	fintrz(): emulates the fintrz instruction			#
9743*4882a593Smuzhiyun#									#
9744*4882a593Smuzhiyun# XREF ****************************************************************	#
9745*4882a593Smuzhiyun#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9746*4882a593Smuzhiyun#									#
9747*4882a593Smuzhiyun# INPUT ***************************************************************	#
9748*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
9749*4882a593Smuzhiyun#	d0 = round precision/mode					#
9750*4882a593Smuzhiyun#									#
9751*4882a593Smuzhiyun# OUTPUT **************************************************************	#
9752*4882a593Smuzhiyun#	fp0 = result							#
9753*4882a593Smuzhiyun#									#
9754*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
9755*4882a593Smuzhiyun#	Separate according to operand type. Unnorms don't pass through	#
9756*4882a593Smuzhiyun# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
9757*4882a593Smuzhiyun# then store the resulting FPSR bits.					#
9758*4882a593Smuzhiyun#	For denorms, force the j-bit to a one and do the same as for	#
9759*4882a593Smuzhiyun# norms. Denorms are so low that the answer will either be a zero or a	#
9760*4882a593Smuzhiyun# one.									#
9761*4882a593Smuzhiyun#	For zeroes/infs/NANs, return the same while setting the FPSR	#
9762*4882a593Smuzhiyun# as appropriate.							#
9763*4882a593Smuzhiyun#									#
9764*4882a593Smuzhiyun#########################################################################
9765*4882a593Smuzhiyun
9766*4882a593Smuzhiyun	global		fintrz
9767*4882a593Smuzhiyunfintrz:
9768*4882a593Smuzhiyun	mov.b		STAG(%a6),%d1
9769*4882a593Smuzhiyun	bne.b		fintrz_not_norm		# optimize on non-norm input
9770*4882a593Smuzhiyun
9771*4882a593Smuzhiyun#
9772*4882a593Smuzhiyun# Norm:
9773*4882a593Smuzhiyun#
9774*4882a593Smuzhiyunfintrz_norm:
9775*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9776*4882a593Smuzhiyun
9777*4882a593Smuzhiyun	fintrz.x	SRC(%a0),%fp0		# execute fintrz
9778*4882a593Smuzhiyun
9779*4882a593Smuzhiyun	fmov.l		%fpsr,%d0		# save FPSR
9780*4882a593Smuzhiyun	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9781*4882a593Smuzhiyun
9782*4882a593Smuzhiyun	rts
9783*4882a593Smuzhiyun
9784*4882a593Smuzhiyun#
9785*4882a593Smuzhiyun# input is not normalized; what is it?
9786*4882a593Smuzhiyun#
9787*4882a593Smuzhiyunfintrz_not_norm:
9788*4882a593Smuzhiyun	cmpi.b		%d1,&ZERO		# weed out ZERO
9789*4882a593Smuzhiyun	beq.b		fintrz_zero
9790*4882a593Smuzhiyun	cmpi.b		%d1,&INF		# weed out INF
9791*4882a593Smuzhiyun	beq.b		fintrz_inf
9792*4882a593Smuzhiyun	cmpi.b		%d1,&DENORM		# weed out DENORM
9793*4882a593Smuzhiyun	beq.b		fintrz_denorm
9794*4882a593Smuzhiyun	cmpi.b		%d1,&SNAN		# weed out SNAN
9795*4882a593Smuzhiyun	beq.l		res_snan_1op
9796*4882a593Smuzhiyun	bra.l		res_qnan_1op		# weed out QNAN
9797*4882a593Smuzhiyun
9798*4882a593Smuzhiyun#
9799*4882a593Smuzhiyun# Denorm:
9800*4882a593Smuzhiyun#
9801*4882a593Smuzhiyun# for DENORMs, the result will be (+/-)ZERO.
9802*4882a593Smuzhiyun# also, the INEX2 and AINEX exception bits will be set.
9803*4882a593Smuzhiyun# so, we could either set these manually or force the DENORM
9804*4882a593Smuzhiyun# to a very small NORM and ship it to the NORM routine.
9805*4882a593Smuzhiyun# I do the latter.
9806*4882a593Smuzhiyun#
9807*4882a593Smuzhiyunfintrz_denorm:
9808*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809*4882a593Smuzhiyun	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9810*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
9811*4882a593Smuzhiyun	bra.b		fintrz_norm
9812*4882a593Smuzhiyun
9813*4882a593Smuzhiyun#
9814*4882a593Smuzhiyun# Zero:
9815*4882a593Smuzhiyun#
9816*4882a593Smuzhiyunfintrz_zero:
9817*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is ZERO negative?
9818*4882a593Smuzhiyun	bmi.b		fintrz_zero_m		# yes
9819*4882a593Smuzhiyunfintrz_zero_p:
9820*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9821*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9822*4882a593Smuzhiyun	rts
9823*4882a593Smuzhiyunfintrz_zero_m:
9824*4882a593Smuzhiyun	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9825*4882a593Smuzhiyun	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826*4882a593Smuzhiyun	rts
9827*4882a593Smuzhiyun
9828*4882a593Smuzhiyun#
9829*4882a593Smuzhiyun# Infinity:
9830*4882a593Smuzhiyun#
9831*4882a593Smuzhiyunfintrz_inf:
9832*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9833*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is INF negative?
9834*4882a593Smuzhiyun	bmi.b		fintrz_inf_m		# yes
9835*4882a593Smuzhiyunfintrz_inf_p:
9836*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9837*4882a593Smuzhiyun	rts
9838*4882a593Smuzhiyunfintrz_inf_m:
9839*4882a593Smuzhiyun	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840*4882a593Smuzhiyun	rts
9841*4882a593Smuzhiyun
9842*4882a593Smuzhiyun#########################################################################
9843*4882a593Smuzhiyun# XDEF ****************************************************************	#
9844*4882a593Smuzhiyun#	fabs():  emulates the fabs instruction				#
9845*4882a593Smuzhiyun#	fsabs(): emulates the fsabs instruction				#
9846*4882a593Smuzhiyun#	fdabs(): emulates the fdabs instruction				#
9847*4882a593Smuzhiyun#									#
9848*4882a593Smuzhiyun# XREF **************************************************************** #
9849*4882a593Smuzhiyun#	norm() - normalize denorm mantissa to provide EXOP		#
9850*4882a593Smuzhiyun#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
9851*4882a593Smuzhiyun#	unf_res() - calculate underflow result				#
9852*4882a593Smuzhiyun#	ovf_res() - calculate overflow result				#
9853*4882a593Smuzhiyun#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9854*4882a593Smuzhiyun#									#
9855*4882a593Smuzhiyun# INPUT *************************************************************** #
9856*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
9857*4882a593Smuzhiyun#	d0 = rnd precision/mode						#
9858*4882a593Smuzhiyun#									#
9859*4882a593Smuzhiyun# OUTPUT ************************************************************** #
9860*4882a593Smuzhiyun#	fp0 = result							#
9861*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
9862*4882a593Smuzhiyun#									#
9863*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
9864*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
9865*4882a593Smuzhiyun# norms into extended, single, and double precision.			#
9866*4882a593Smuzhiyun#	Simply clear sign for extended precision norm. Ext prec denorm	#
9867*4882a593Smuzhiyun# gets an EXOP created for it since it's an underflow.			#
9868*4882a593Smuzhiyun#	Double and single precision can overflow and underflow. First,	#
9869*4882a593Smuzhiyun# scale the operand such that the exponent is zero. Perform an "fabs"	#
9870*4882a593Smuzhiyun# using the correct rnd mode/prec. Check to see if the original		#
9871*4882a593Smuzhiyun# exponent would take an exception. If so, use unf_res() or ovf_res()	#
9872*4882a593Smuzhiyun# to calculate the default result. Also, create the EXOP for the	#
9873*4882a593Smuzhiyun# exceptional case. If no exception should occur, insert the correct	#
9874*4882a593Smuzhiyun# result exponent and return.						#
9875*4882a593Smuzhiyun#	Unnorms don't pass through here.				#
9876*4882a593Smuzhiyun#									#
9877*4882a593Smuzhiyun#########################################################################
9878*4882a593Smuzhiyun
9879*4882a593Smuzhiyun	global		fsabs
9880*4882a593Smuzhiyunfsabs:
9881*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
9882*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9883*4882a593Smuzhiyun	bra.b		fabs
9884*4882a593Smuzhiyun
9885*4882a593Smuzhiyun	global		fdabs
9886*4882a593Smuzhiyunfdabs:
9887*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
9888*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl precision
9889*4882a593Smuzhiyun
9890*4882a593Smuzhiyun	global		fabs
9891*4882a593Smuzhiyunfabs:
9892*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9893*4882a593Smuzhiyun	mov.b		STAG(%a6),%d1
9894*4882a593Smuzhiyun	bne.w		fabs_not_norm		# optimize on non-norm input
9895*4882a593Smuzhiyun
9896*4882a593Smuzhiyun#
9897*4882a593Smuzhiyun# ABSOLUTE VALUE: norms and denorms ONLY!
9898*4882a593Smuzhiyun#
9899*4882a593Smuzhiyunfabs_norm:
9900*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
9901*4882a593Smuzhiyun	bne.b		fabs_not_ext		# no; go handle sgl or dbl
9902*4882a593Smuzhiyun
9903*4882a593Smuzhiyun#
9904*4882a593Smuzhiyun# precision selected is extended. so...we can not get an underflow
9905*4882a593Smuzhiyun# or overflow because of rounding to the correct precision. so...
9906*4882a593Smuzhiyun# skip the scaling and unscaling...
9907*4882a593Smuzhiyun#
9908*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9909*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9910*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d1
9911*4882a593Smuzhiyun	bclr		&15,%d1			# force absolute value
9912*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
9913*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9914*4882a593Smuzhiyun	rts
9915*4882a593Smuzhiyun
9916*4882a593Smuzhiyun#
9917*4882a593Smuzhiyun# for an extended precision DENORM, the UNFL exception bit is set
9918*4882a593Smuzhiyun# the accrued bit is NOT set in this instance(no inexactness!)
9919*4882a593Smuzhiyun#
9920*4882a593Smuzhiyunfabs_denorm:
9921*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
9922*4882a593Smuzhiyun	bne.b		fabs_not_ext		# no
9923*4882a593Smuzhiyun
9924*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925*4882a593Smuzhiyun
9926*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9927*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9928*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),%d0
9929*4882a593Smuzhiyun	bclr		&15,%d0			# clear sign
9930*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
9931*4882a593Smuzhiyun
9932*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9933*4882a593Smuzhiyun
9934*4882a593Smuzhiyun	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935*4882a593Smuzhiyun	bne.b		fabs_ext_unfl_ena
9936*4882a593Smuzhiyun	rts
9937*4882a593Smuzhiyun
9938*4882a593Smuzhiyun#
9939*4882a593Smuzhiyun# the input is an extended DENORM and underflow is enabled in the FPCR.
9940*4882a593Smuzhiyun# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941*4882a593Smuzhiyun# exponent and insert back into the operand.
9942*4882a593Smuzhiyun#
9943*4882a593Smuzhiyunfabs_ext_unfl_ena:
9944*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9945*4882a593Smuzhiyun	bsr.l		norm			# normalize result
9946*4882a593Smuzhiyun	neg.w		%d0			# new exponent = -(shft val)
9947*4882a593Smuzhiyun	addi.w		&0x6000,%d0		# add new bias to exponent
9948*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9949*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep old sign
9950*4882a593Smuzhiyun	andi.w		&0x7fff,%d0		# clear sign position
9951*4882a593Smuzhiyun	or.w		%d1,%d0			# concat old sign, new exponent
9952*4882a593Smuzhiyun	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9953*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9954*4882a593Smuzhiyun	rts
9955*4882a593Smuzhiyun
9956*4882a593Smuzhiyun#
9957*4882a593Smuzhiyun# operand is either single or double
9958*4882a593Smuzhiyun#
9959*4882a593Smuzhiyunfabs_not_ext:
9960*4882a593Smuzhiyun	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9961*4882a593Smuzhiyun	bne.b		fabs_dbl
9962*4882a593Smuzhiyun
9963*4882a593Smuzhiyun#
9964*4882a593Smuzhiyun# operand is to be rounded to single precision
9965*4882a593Smuzhiyun#
9966*4882a593Smuzhiyunfabs_sgl:
9967*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9968*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9969*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9970*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# calculate scale factor
9971*4882a593Smuzhiyun
9972*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9973*4882a593Smuzhiyun	bge.w		fabs_sd_unfl		# yes; go handle underflow
9974*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9975*4882a593Smuzhiyun	beq.w		fabs_sd_may_ovfl	# maybe; go check
9976*4882a593Smuzhiyun	blt.w		fabs_sd_ovfl		# yes; go handle overflow
9977*4882a593Smuzhiyun
9978*4882a593Smuzhiyun#
9979*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved in to the fp reg file
9980*4882a593Smuzhiyun#
9981*4882a593Smuzhiyunfabs_sd_normal:
9982*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
9983*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9984*4882a593Smuzhiyun
9985*4882a593Smuzhiyun	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
9986*4882a593Smuzhiyun
9987*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
9988*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
9989*4882a593Smuzhiyun
9990*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9991*4882a593Smuzhiyun
9992*4882a593Smuzhiyunfabs_sd_normal_exit:
9993*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
9994*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9995*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9996*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
9997*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
9998*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
9999*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10000*4882a593Smuzhiyun	or.w		%d1,%d2			# concat old sign,new exp
10001*4882a593Smuzhiyun	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
10002*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10003*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10004*4882a593Smuzhiyun	rts
10005*4882a593Smuzhiyun
10006*4882a593Smuzhiyun#
10007*4882a593Smuzhiyun# operand is to be rounded to double precision
10008*4882a593Smuzhiyun#
10009*4882a593Smuzhiyunfabs_dbl:
10010*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10011*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10012*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10013*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# calculate scale factor
10014*4882a593Smuzhiyun
10015*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
10016*4882a593Smuzhiyun	bge.b		fabs_sd_unfl		# yes; go handle underflow
10017*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
10018*4882a593Smuzhiyun	beq.w		fabs_sd_may_ovfl	# maybe; go check
10019*4882a593Smuzhiyun	blt.w		fabs_sd_ovfl		# yes; go handle overflow
10020*4882a593Smuzhiyun	bra.w		fabs_sd_normal		# no; ho handle normalized op
10021*4882a593Smuzhiyun
10022*4882a593Smuzhiyun#
10023*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file
10024*4882a593Smuzhiyun#
10025*4882a593Smuzhiyunfabs_sd_unfl:
10026*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027*4882a593Smuzhiyun
10028*4882a593Smuzhiyun	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
10029*4882a593Smuzhiyun
10030*4882a593Smuzhiyun# if underflow or inexact is enabled, go calculate EXOP first.
10031*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
10032*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10033*4882a593Smuzhiyun	bne.b		fabs_sd_unfl_ena	# yes
10034*4882a593Smuzhiyun
10035*4882a593Smuzhiyunfabs_sd_unfl_dis:
10036*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
10037*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10038*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
10039*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
10040*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10041*4882a593Smuzhiyun	rts
10042*4882a593Smuzhiyun
10043*4882a593Smuzhiyun#
10044*4882a593Smuzhiyun# operand will underflow AND underflow is enabled.
10045*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision.
10046*4882a593Smuzhiyun#
10047*4882a593Smuzhiyunfabs_sd_unfl_ena:
10048*4882a593Smuzhiyun	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049*4882a593Smuzhiyun	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
10051*4882a593Smuzhiyun
10052*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10053*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10054*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10055*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10056*4882a593Smuzhiyun	sub.l		%d0,%d1			# subtract scale factor
10057*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add new bias
10058*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
10059*4882a593Smuzhiyun	or.w		%d2,%d1			# concat new sign,new exp
10060*4882a593Smuzhiyun	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
10061*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
10062*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10063*4882a593Smuzhiyun	bra.b		fabs_sd_unfl_dis
10064*4882a593Smuzhiyun
10065*4882a593Smuzhiyun#
10066*4882a593Smuzhiyun# operand WILL overflow.
10067*4882a593Smuzhiyun#
10068*4882a593Smuzhiyunfabs_sd_ovfl:
10069*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10070*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10071*4882a593Smuzhiyun
10072*4882a593Smuzhiyun	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10073*4882a593Smuzhiyun
10074*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10075*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
10076*4882a593Smuzhiyun
10077*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10078*4882a593Smuzhiyun
10079*4882a593Smuzhiyunfabs_sd_ovfl_tst:
10080*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081*4882a593Smuzhiyun
10082*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
10083*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10084*4882a593Smuzhiyun	bne.b		fabs_sd_ovfl_ena	# yes
10085*4882a593Smuzhiyun
10086*4882a593Smuzhiyun#
10087*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by
10088*4882a593Smuzhiyun# calling ovf_res().
10089*4882a593Smuzhiyun#
10090*4882a593Smuzhiyunfabs_sd_ovfl_dis:
10091*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10092*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
10093*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
10094*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
10095*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10096*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
10097*4882a593Smuzhiyun	rts
10098*4882a593Smuzhiyun
10099*4882a593Smuzhiyun#
10100*4882a593Smuzhiyun# OVFL is enabled.
10101*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision.
10102*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR).
10103*4882a593Smuzhiyun#
10104*4882a593Smuzhiyunfabs_sd_ovfl_ena:
10105*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10106*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10107*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10108*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10109*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10110*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10111*4882a593Smuzhiyun	subi.l		&0x6000,%d1		# subtract bias
10112*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
10113*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sign,exp
10114*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10115*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10116*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10117*4882a593Smuzhiyun	bra.b		fabs_sd_ovfl_dis
10118*4882a593Smuzhiyun
10119*4882a593Smuzhiyun#
10120*4882a593Smuzhiyun# the move in MAY underflow. so...
10121*4882a593Smuzhiyun#
10122*4882a593Smuzhiyunfabs_sd_may_ovfl:
10123*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10124*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10125*4882a593Smuzhiyun
10126*4882a593Smuzhiyun	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10127*4882a593Smuzhiyun
10128*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10129*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10130*4882a593Smuzhiyun
10131*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10132*4882a593Smuzhiyun
10133*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
10134*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10135*4882a593Smuzhiyun	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
10136*4882a593Smuzhiyun
10137*4882a593Smuzhiyun# no, it didn't overflow; we have correct result
10138*4882a593Smuzhiyun	bra.w		fabs_sd_normal_exit
10139*4882a593Smuzhiyun
10140*4882a593Smuzhiyun##########################################################################
10141*4882a593Smuzhiyun
10142*4882a593Smuzhiyun#
10143*4882a593Smuzhiyun# input is not normalized; what is it?
10144*4882a593Smuzhiyun#
10145*4882a593Smuzhiyunfabs_not_norm:
10146*4882a593Smuzhiyun	cmpi.b		%d1,&DENORM		# weed out DENORM
10147*4882a593Smuzhiyun	beq.w		fabs_denorm
10148*4882a593Smuzhiyun	cmpi.b		%d1,&SNAN		# weed out SNAN
10149*4882a593Smuzhiyun	beq.l		res_snan_1op
10150*4882a593Smuzhiyun	cmpi.b		%d1,&QNAN		# weed out QNAN
10151*4882a593Smuzhiyun	beq.l		res_qnan_1op
10152*4882a593Smuzhiyun
10153*4882a593Smuzhiyun	fabs.x		SRC(%a0),%fp0		# force absolute value
10154*4882a593Smuzhiyun
10155*4882a593Smuzhiyun	cmpi.b		%d1,&INF		# weed out INF
10156*4882a593Smuzhiyun	beq.b		fabs_inf
10157*4882a593Smuzhiyunfabs_zero:
10158*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10159*4882a593Smuzhiyun	rts
10160*4882a593Smuzhiyunfabs_inf:
10161*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
10162*4882a593Smuzhiyun	rts
10163*4882a593Smuzhiyun
10164*4882a593Smuzhiyun#########################################################################
10165*4882a593Smuzhiyun# XDEF ****************************************************************	#
10166*4882a593Smuzhiyun#	fcmp(): fp compare op routine					#
10167*4882a593Smuzhiyun#									#
10168*4882a593Smuzhiyun# XREF ****************************************************************	#
10169*4882a593Smuzhiyun#	res_qnan() - return QNAN result					#
10170*4882a593Smuzhiyun#	res_snan() - return SNAN result					#
10171*4882a593Smuzhiyun#									#
10172*4882a593Smuzhiyun# INPUT ***************************************************************	#
10173*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
10174*4882a593Smuzhiyun#	a1 = pointer to extended precision destination operand		#
10175*4882a593Smuzhiyun#	d0 = round prec/mode						#
10176*4882a593Smuzhiyun#									#
10177*4882a593Smuzhiyun# OUTPUT ************************************************************** #
10178*4882a593Smuzhiyun#	None								#
10179*4882a593Smuzhiyun#									#
10180*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
10181*4882a593Smuzhiyun#	Handle NANs and denorms as special cases. For everything else,	#
10182*4882a593Smuzhiyun# just use the actual fcmp instruction to produce the correct condition	#
10183*4882a593Smuzhiyun# codes.								#
10184*4882a593Smuzhiyun#									#
10185*4882a593Smuzhiyun#########################################################################
10186*4882a593Smuzhiyun
10187*4882a593Smuzhiyun	global		fcmp
10188*4882a593Smuzhiyunfcmp:
10189*4882a593Smuzhiyun	clr.w		%d1
10190*4882a593Smuzhiyun	mov.b		DTAG(%a6),%d1
10191*4882a593Smuzhiyun	lsl.b		&0x3,%d1
10192*4882a593Smuzhiyun	or.b		STAG(%a6),%d1
10193*4882a593Smuzhiyun	bne.b		fcmp_not_norm		# optimize on non-norm input
10194*4882a593Smuzhiyun
10195*4882a593Smuzhiyun#
10196*4882a593Smuzhiyun# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197*4882a593Smuzhiyun#
10198*4882a593Smuzhiyunfcmp_norm:
10199*4882a593Smuzhiyun	fmovm.x		DST(%a1),&0x80		# load dst op
10200*4882a593Smuzhiyun
10201*4882a593Smuzhiyun	fcmp.x		%fp0,SRC(%a0)		# do compare
10202*4882a593Smuzhiyun
10203*4882a593Smuzhiyun	fmov.l		%fpsr,%d0		# save FPSR
10204*4882a593Smuzhiyun	rol.l		&0x8,%d0		# extract ccode bits
10205*4882a593Smuzhiyun	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
10206*4882a593Smuzhiyun
10207*4882a593Smuzhiyun	rts
10208*4882a593Smuzhiyun
10209*4882a593Smuzhiyun#
10210*4882a593Smuzhiyun# fcmp: inputs are not both normalized; what are they?
10211*4882a593Smuzhiyun#
10212*4882a593Smuzhiyunfcmp_not_norm:
10213*4882a593Smuzhiyun	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214*4882a593Smuzhiyun	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
10215*4882a593Smuzhiyun
10216*4882a593Smuzhiyun	swbeg		&48
10217*4882a593Smuzhiyuntbl_fcmp_op:
10218*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
10219*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
10220*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
10221*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
10222*4882a593Smuzhiyun	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
10223*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
10224*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10225*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10226*4882a593Smuzhiyun
10227*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
10228*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
10229*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
10230*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
10231*4882a593Smuzhiyun	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
10232*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
10233*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10234*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10235*4882a593Smuzhiyun
10236*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
10237*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
10238*4882a593Smuzhiyun	short		fcmp_norm	- tbl_fcmp_op # INF - INF
10239*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
10240*4882a593Smuzhiyun	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
10241*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
10242*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10243*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10244*4882a593Smuzhiyun
10245*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
10246*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
10247*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
10248*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
10249*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
10250*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
10251*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10252*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10253*4882a593Smuzhiyun
10254*4882a593Smuzhiyun	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
10255*4882a593Smuzhiyun	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
10256*4882a593Smuzhiyun	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
10257*4882a593Smuzhiyun	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
10258*4882a593Smuzhiyun	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
10259*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
10260*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10261*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10262*4882a593Smuzhiyun
10263*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
10264*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
10265*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
10266*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
10267*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
10268*4882a593Smuzhiyun	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
10269*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10270*4882a593Smuzhiyun	short		tbl_fcmp_op	- tbl_fcmp_op #
10271*4882a593Smuzhiyun
10272*4882a593Smuzhiyun# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273*4882a593Smuzhiyun# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274*4882a593Smuzhiyunfcmp_res_qnan:
10275*4882a593Smuzhiyun	bsr.l		res_qnan
10276*4882a593Smuzhiyun	andi.b		&0xf7,FPSR_CC(%a6)
10277*4882a593Smuzhiyun	rts
10278*4882a593Smuzhiyunfcmp_res_snan:
10279*4882a593Smuzhiyun	bsr.l		res_snan
10280*4882a593Smuzhiyun	andi.b		&0xf7,FPSR_CC(%a6)
10281*4882a593Smuzhiyun	rts
10282*4882a593Smuzhiyun
10283*4882a593Smuzhiyun#
10284*4882a593Smuzhiyun# DENORMs are a little more difficult.
10285*4882a593Smuzhiyun# If you have a 2 DENORMs, then you can just force the j-bit to a one
10286*4882a593Smuzhiyun# and use the fcmp_norm routine.
10287*4882a593Smuzhiyun# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288*4882a593Smuzhiyun# and use the fcmp_norm routine.
10289*4882a593Smuzhiyun# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290*4882a593Smuzhiyun# But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291*4882a593Smuzhiyun# (1) signs are (+) and the DENORM is the dst or
10292*4882a593Smuzhiyun# (2) signs are (-) and the DENORM is the src
10293*4882a593Smuzhiyun#
10294*4882a593Smuzhiyun
10295*4882a593Smuzhiyunfcmp_dnrm_s:
10296*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10297*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),%d0
10298*4882a593Smuzhiyun	bset		&31,%d0			# DENORM src; make into small norm
10299*4882a593Smuzhiyun	mov.l		%d0,FP_SCR0_HI(%a6)
10300*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10301*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
10302*4882a593Smuzhiyun	bra.w		fcmp_norm
10303*4882a593Smuzhiyun
10304*4882a593Smuzhiyunfcmp_dnrm_d:
10305*4882a593Smuzhiyun	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
10306*4882a593Smuzhiyun	mov.l		DST_HI(%a1),%d0
10307*4882a593Smuzhiyun	bset		&31,%d0			# DENORM src; make into small norm
10308*4882a593Smuzhiyun	mov.l		%d0,FP_SCR0_HI(%a6)
10309*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
10310*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a1
10311*4882a593Smuzhiyun	bra.w		fcmp_norm
10312*4882a593Smuzhiyun
10313*4882a593Smuzhiyunfcmp_dnrm_sd:
10314*4882a593Smuzhiyun	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10315*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10316*4882a593Smuzhiyun	mov.l		DST_HI(%a1),%d0
10317*4882a593Smuzhiyun	bset		&31,%d0			# DENORM dst; make into small norm
10318*4882a593Smuzhiyun	mov.l		%d0,FP_SCR1_HI(%a6)
10319*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),%d0
10320*4882a593Smuzhiyun	bset		&31,%d0			# DENORM dst; make into small norm
10321*4882a593Smuzhiyun	mov.l		%d0,FP_SCR0_HI(%a6)
10322*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10323*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10324*4882a593Smuzhiyun	lea		FP_SCR1(%a6),%a1
10325*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
10326*4882a593Smuzhiyun	bra.w		fcmp_norm
10327*4882a593Smuzhiyun
10328*4882a593Smuzhiyunfcmp_nrm_dnrm:
10329*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10330*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
10331*4882a593Smuzhiyun	eor.b		%d0,%d1
10332*4882a593Smuzhiyun	bmi.w		fcmp_dnrm_s
10333*4882a593Smuzhiyun
10334*4882a593Smuzhiyun# signs are the same, so must determine the answer ourselves.
10335*4882a593Smuzhiyun	tst.b		%d0			# is src op negative?
10336*4882a593Smuzhiyun	bmi.b		fcmp_nrm_dnrm_m		# yes
10337*4882a593Smuzhiyun	rts
10338*4882a593Smuzhiyunfcmp_nrm_dnrm_m:
10339*4882a593Smuzhiyun	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10340*4882a593Smuzhiyun	rts
10341*4882a593Smuzhiyun
10342*4882a593Smuzhiyunfcmp_dnrm_nrm:
10343*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10344*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
10345*4882a593Smuzhiyun	eor.b		%d0,%d1
10346*4882a593Smuzhiyun	bmi.w		fcmp_dnrm_d
10347*4882a593Smuzhiyun
10348*4882a593Smuzhiyun# signs are the same, so must determine the answer ourselves.
10349*4882a593Smuzhiyun	tst.b		%d0			# is src op negative?
10350*4882a593Smuzhiyun	bpl.b		fcmp_dnrm_nrm_m		# no
10351*4882a593Smuzhiyun	rts
10352*4882a593Smuzhiyunfcmp_dnrm_nrm_m:
10353*4882a593Smuzhiyun	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10354*4882a593Smuzhiyun	rts
10355*4882a593Smuzhiyun
10356*4882a593Smuzhiyun#########################################################################
10357*4882a593Smuzhiyun# XDEF ****************************************************************	#
10358*4882a593Smuzhiyun#	fsglmul(): emulates the fsglmul instruction			#
10359*4882a593Smuzhiyun#									#
10360*4882a593Smuzhiyun# XREF ****************************************************************	#
10361*4882a593Smuzhiyun#	scale_to_zero_src() - scale src exponent to zero		#
10362*4882a593Smuzhiyun#	scale_to_zero_dst() - scale dst exponent to zero		#
10363*4882a593Smuzhiyun#	unf_res4() - return default underflow result for sglop		#
10364*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
10365*4882a593Smuzhiyun#	res_qnan() - return QNAN result					#
10366*4882a593Smuzhiyun#	res_snan() - return SNAN result					#
10367*4882a593Smuzhiyun#									#
10368*4882a593Smuzhiyun# INPUT ***************************************************************	#
10369*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
10370*4882a593Smuzhiyun#	a1 = pointer to extended precision destination operand		#
10371*4882a593Smuzhiyun#	d0  rnd prec,mode						#
10372*4882a593Smuzhiyun#									#
10373*4882a593Smuzhiyun# OUTPUT **************************************************************	#
10374*4882a593Smuzhiyun#	fp0 = result							#
10375*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
10376*4882a593Smuzhiyun#									#
10377*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
10378*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10379*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision.				#
10380*4882a593Smuzhiyun#	For norms/denorms, scale the exponents such that a multiply	#
10381*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fsglmul to	#
10382*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken	#
10383*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result	#
10384*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the	#
10385*4882a593Smuzhiyun# result operand to the proper exponent.				#
10386*4882a593Smuzhiyun#									#
10387*4882a593Smuzhiyun#########################################################################
10388*4882a593Smuzhiyun
10389*4882a593Smuzhiyun	global		fsglmul
10390*4882a593Smuzhiyunfsglmul:
10391*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10392*4882a593Smuzhiyun
10393*4882a593Smuzhiyun	clr.w		%d1
10394*4882a593Smuzhiyun	mov.b		DTAG(%a6),%d1
10395*4882a593Smuzhiyun	lsl.b		&0x3,%d1
10396*4882a593Smuzhiyun	or.b		STAG(%a6),%d1
10397*4882a593Smuzhiyun
10398*4882a593Smuzhiyun	bne.w		fsglmul_not_norm	# optimize on non-norm input
10399*4882a593Smuzhiyun
10400*4882a593Smuzhiyunfsglmul_norm:
10401*4882a593Smuzhiyun	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10402*4882a593Smuzhiyun	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10403*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10404*4882a593Smuzhiyun
10405*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10406*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10407*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10408*4882a593Smuzhiyun
10409*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# scale exponent
10410*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save scale factor 1
10411*4882a593Smuzhiyun
10412*4882a593Smuzhiyun	bsr.l		scale_to_zero_dst	# scale dst exponent
10413*4882a593Smuzhiyun
10414*4882a593Smuzhiyun	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
10415*4882a593Smuzhiyun
10416*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
10417*4882a593Smuzhiyun	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
10418*4882a593Smuzhiyun	blt.w		fsglmul_ovfl		# result will overflow
10419*4882a593Smuzhiyun
10420*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
10421*4882a593Smuzhiyun	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
10422*4882a593Smuzhiyun	bgt.w		fsglmul_unfl		# result will underflow
10423*4882a593Smuzhiyun
10424*4882a593Smuzhiyunfsglmul_normal:
10425*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10426*4882a593Smuzhiyun
10427*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10428*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10429*4882a593Smuzhiyun
10430*4882a593Smuzhiyun	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10431*4882a593Smuzhiyun
10432*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10433*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10434*4882a593Smuzhiyun
10435*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10436*4882a593Smuzhiyun
10437*4882a593Smuzhiyunfsglmul_normal_exit:
10438*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10439*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10440*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10441*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10442*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10443*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10444*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10445*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
10446*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10447*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10448*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10449*4882a593Smuzhiyun	rts
10450*4882a593Smuzhiyun
10451*4882a593Smuzhiyunfsglmul_ovfl:
10452*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10453*4882a593Smuzhiyun
10454*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10455*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10456*4882a593Smuzhiyun
10457*4882a593Smuzhiyun	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10458*4882a593Smuzhiyun
10459*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10460*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10461*4882a593Smuzhiyun
10462*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10463*4882a593Smuzhiyun
10464*4882a593Smuzhiyunfsglmul_ovfl_tst:
10465*4882a593Smuzhiyun
10466*4882a593Smuzhiyun# save setting this until now because this is where fsglmul_may_ovfl may jump in
10467*4882a593Smuzhiyun	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468*4882a593Smuzhiyun
10469*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
10470*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10471*4882a593Smuzhiyun	bne.b		fsglmul_ovfl_ena	# yes
10472*4882a593Smuzhiyun
10473*4882a593Smuzhiyunfsglmul_ovfl_dis:
10474*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10475*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
10476*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10477*4882a593Smuzhiyun	andi.b		&0x30,%d0		# force prec = ext
10478*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
10479*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10480*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
10481*4882a593Smuzhiyun	rts
10482*4882a593Smuzhiyun
10483*4882a593Smuzhiyunfsglmul_ovfl_ena:
10484*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10485*4882a593Smuzhiyun
10486*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10487*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10488*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10489*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10490*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10491*4882a593Smuzhiyun	subi.l		&0x6000,%d1		# subtract bias
10492*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
10493*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10494*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
10495*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10496*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10497*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10498*4882a593Smuzhiyun	bra.b		fsglmul_ovfl_dis
10499*4882a593Smuzhiyun
10500*4882a593Smuzhiyunfsglmul_may_ovfl:
10501*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10502*4882a593Smuzhiyun
10503*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10504*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10505*4882a593Smuzhiyun
10506*4882a593Smuzhiyun	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10507*4882a593Smuzhiyun
10508*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10509*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10510*4882a593Smuzhiyun
10511*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10512*4882a593Smuzhiyun
10513*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
10514*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10515*4882a593Smuzhiyun	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
10516*4882a593Smuzhiyun
10517*4882a593Smuzhiyun# no, it didn't overflow; we have correct result
10518*4882a593Smuzhiyun	bra.w		fsglmul_normal_exit
10519*4882a593Smuzhiyun
10520*4882a593Smuzhiyunfsglmul_unfl:
10521*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522*4882a593Smuzhiyun
10523*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10524*4882a593Smuzhiyun
10525*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10526*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10527*4882a593Smuzhiyun
10528*4882a593Smuzhiyun	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10529*4882a593Smuzhiyun
10530*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10531*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10532*4882a593Smuzhiyun
10533*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10534*4882a593Smuzhiyun
10535*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
10536*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10537*4882a593Smuzhiyun	bne.b		fsglmul_unfl_ena	# yes
10538*4882a593Smuzhiyun
10539*4882a593Smuzhiyunfsglmul_unfl_dis:
10540*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10541*4882a593Smuzhiyun
10542*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
10543*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10544*4882a593Smuzhiyun	bsr.l		unf_res4		# calculate default result
10545*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10546*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10547*4882a593Smuzhiyun	rts
10548*4882a593Smuzhiyun
10549*4882a593Smuzhiyun#
10550*4882a593Smuzhiyun# UNFL is enabled.
10551*4882a593Smuzhiyun#
10552*4882a593Smuzhiyunfsglmul_unfl_ena:
10553*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10554*4882a593Smuzhiyun
10555*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10556*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10557*4882a593Smuzhiyun
10558*4882a593Smuzhiyun	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10559*4882a593Smuzhiyun
10560*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10561*4882a593Smuzhiyun
10562*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10563*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10564*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10565*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10566*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10567*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10568*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10569*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add bias
10570*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
10571*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
10572*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10573*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10574*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10575*4882a593Smuzhiyun	bra.w		fsglmul_unfl_dis
10576*4882a593Smuzhiyun
10577*4882a593Smuzhiyunfsglmul_may_unfl:
10578*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10579*4882a593Smuzhiyun
10580*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10581*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10582*4882a593Smuzhiyun
10583*4882a593Smuzhiyun	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10584*4882a593Smuzhiyun
10585*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10586*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10587*4882a593Smuzhiyun
10588*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10589*4882a593Smuzhiyun
10590*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
10591*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
10592*4882a593Smuzhiyun	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
10593*4882a593Smuzhiyun	fblt.w		fsglmul_unfl		# yes; underflow occurred
10594*4882a593Smuzhiyun
10595*4882a593Smuzhiyun#
10596*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 2. but,
10597*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 2 or
10598*4882a593Smuzhiyun# a normalized number that rounded down to a 2. so, redo the entire operation
10599*4882a593Smuzhiyun# using RZ as the rounding mode to see what the pre-rounded result is.
10600*4882a593Smuzhiyun# this case should be relatively rare.
10601*4882a593Smuzhiyun#
10602*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
10603*4882a593Smuzhiyun
10604*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
10605*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# keep rnd prec
10606*4882a593Smuzhiyun	ori.b		&rz_mode*0x10,%d1	# insert RZ
10607*4882a593Smuzhiyun
10608*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
10609*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10610*4882a593Smuzhiyun
10611*4882a593Smuzhiyun	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10612*4882a593Smuzhiyun
10613*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10614*4882a593Smuzhiyun	fabs.x		%fp1			# make absolute value
10615*4882a593Smuzhiyun	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
10616*4882a593Smuzhiyun	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
10617*4882a593Smuzhiyun	bra.w		fsglmul_unfl		# yes, underflow occurred
10618*4882a593Smuzhiyun
10619*4882a593Smuzhiyun##############################################################################
10620*4882a593Smuzhiyun
10621*4882a593Smuzhiyun#
10622*4882a593Smuzhiyun# Single Precision Multiply: inputs are not both normalized; what are they?
10623*4882a593Smuzhiyun#
10624*4882a593Smuzhiyunfsglmul_not_norm:
10625*4882a593Smuzhiyun	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626*4882a593Smuzhiyun	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
10627*4882a593Smuzhiyun
10628*4882a593Smuzhiyun	swbeg		&48
10629*4882a593Smuzhiyuntbl_fsglmul_op:
10630*4882a593Smuzhiyun	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10631*4882a593Smuzhiyun	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10632*4882a593Smuzhiyun	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10633*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10634*4882a593Smuzhiyun	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10635*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10636*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10637*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10638*4882a593Smuzhiyun
10639*4882a593Smuzhiyun	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
10640*4882a593Smuzhiyun	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
10641*4882a593Smuzhiyun	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
10642*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
10643*4882a593Smuzhiyun	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
10644*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
10645*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10646*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10647*4882a593Smuzhiyun
10648*4882a593Smuzhiyun	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
10649*4882a593Smuzhiyun	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
10650*4882a593Smuzhiyun	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
10651*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
10652*4882a593Smuzhiyun	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
10653*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
10654*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10655*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10656*4882a593Smuzhiyun
10657*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
10658*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
10659*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
10660*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
10661*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
10662*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
10663*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10664*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10665*4882a593Smuzhiyun
10666*4882a593Smuzhiyun	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10667*4882a593Smuzhiyun	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10668*4882a593Smuzhiyun	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10669*4882a593Smuzhiyun	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10670*4882a593Smuzhiyun	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10671*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10672*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10673*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10674*4882a593Smuzhiyun
10675*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
10676*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
10677*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
10678*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
10679*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
10680*4882a593Smuzhiyun	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
10681*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10682*4882a593Smuzhiyun	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10683*4882a593Smuzhiyun
10684*4882a593Smuzhiyunfsglmul_res_operr:
10685*4882a593Smuzhiyun	bra.l		res_operr
10686*4882a593Smuzhiyunfsglmul_res_snan:
10687*4882a593Smuzhiyun	bra.l		res_snan
10688*4882a593Smuzhiyunfsglmul_res_qnan:
10689*4882a593Smuzhiyun	bra.l		res_qnan
10690*4882a593Smuzhiyunfsglmul_zero:
10691*4882a593Smuzhiyun	bra.l		fmul_zero
10692*4882a593Smuzhiyunfsglmul_inf_src:
10693*4882a593Smuzhiyun	bra.l		fmul_inf_src
10694*4882a593Smuzhiyunfsglmul_inf_dst:
10695*4882a593Smuzhiyun	bra.l		fmul_inf_dst
10696*4882a593Smuzhiyun
10697*4882a593Smuzhiyun#########################################################################
10698*4882a593Smuzhiyun# XDEF ****************************************************************	#
10699*4882a593Smuzhiyun#	fsgldiv(): emulates the fsgldiv instruction			#
10700*4882a593Smuzhiyun#									#
10701*4882a593Smuzhiyun# XREF ****************************************************************	#
10702*4882a593Smuzhiyun#	scale_to_zero_src() - scale src exponent to zero		#
10703*4882a593Smuzhiyun#	scale_to_zero_dst() - scale dst exponent to zero		#
10704*4882a593Smuzhiyun#	unf_res4() - return default underflow result for sglop		#
10705*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
10706*4882a593Smuzhiyun#	res_qnan() - return QNAN result					#
10707*4882a593Smuzhiyun#	res_snan() - return SNAN result					#
10708*4882a593Smuzhiyun#									#
10709*4882a593Smuzhiyun# INPUT ***************************************************************	#
10710*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
10711*4882a593Smuzhiyun#	a1 = pointer to extended precision destination operand		#
10712*4882a593Smuzhiyun#	d0  rnd prec,mode						#
10713*4882a593Smuzhiyun#									#
10714*4882a593Smuzhiyun# OUTPUT **************************************************************	#
10715*4882a593Smuzhiyun#	fp0 = result							#
10716*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
10717*4882a593Smuzhiyun#									#
10718*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
10719*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10720*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision.				#
10721*4882a593Smuzhiyun#	For norms/denorms, scale the exponents such that a divide	#
10722*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fsgldiv to	#
10723*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken	#
10724*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result	#
10725*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the	#
10726*4882a593Smuzhiyun# result operand to the proper exponent.				#
10727*4882a593Smuzhiyun#									#
10728*4882a593Smuzhiyun#########################################################################
10729*4882a593Smuzhiyun
10730*4882a593Smuzhiyun	global		fsgldiv
10731*4882a593Smuzhiyunfsgldiv:
10732*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10733*4882a593Smuzhiyun
10734*4882a593Smuzhiyun	clr.w		%d1
10735*4882a593Smuzhiyun	mov.b		DTAG(%a6),%d1
10736*4882a593Smuzhiyun	lsl.b		&0x3,%d1
10737*4882a593Smuzhiyun	or.b		STAG(%a6),%d1		# combine src tags
10738*4882a593Smuzhiyun
10739*4882a593Smuzhiyun	bne.w		fsgldiv_not_norm	# optimize on non-norm input
10740*4882a593Smuzhiyun
10741*4882a593Smuzhiyun#
10742*4882a593Smuzhiyun# DIVIDE: NORMs and DENORMs ONLY!
10743*4882a593Smuzhiyun#
10744*4882a593Smuzhiyunfsgldiv_norm:
10745*4882a593Smuzhiyun	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10746*4882a593Smuzhiyun	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10747*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10748*4882a593Smuzhiyun
10749*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10750*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10751*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10752*4882a593Smuzhiyun
10753*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# calculate scale factor 1
10754*4882a593Smuzhiyun	mov.l		%d0,-(%sp)		# save scale factor 1
10755*4882a593Smuzhiyun
10756*4882a593Smuzhiyun	bsr.l		scale_to_zero_dst	# calculate scale factor 2
10757*4882a593Smuzhiyun
10758*4882a593Smuzhiyun	neg.l		(%sp)			# S.F. = scale1 - scale2
10759*4882a593Smuzhiyun	add.l		%d0,(%sp)
10760*4882a593Smuzhiyun
10761*4882a593Smuzhiyun	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
10762*4882a593Smuzhiyun	lsr.b		&0x6,%d1
10763*4882a593Smuzhiyun	mov.l		(%sp)+,%d0
10764*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x7ffe
10765*4882a593Smuzhiyun	ble.w		fsgldiv_may_ovfl
10766*4882a593Smuzhiyun
10767*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
10768*4882a593Smuzhiyun	beq.w		fsgldiv_may_unfl	# maybe
10769*4882a593Smuzhiyun	bgt.w		fsgldiv_unfl		# yes; go handle underflow
10770*4882a593Smuzhiyun
10771*4882a593Smuzhiyunfsgldiv_normal:
10772*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10773*4882a593Smuzhiyun
10774*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
10775*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10776*4882a593Smuzhiyun
10777*4882a593Smuzhiyun	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
10778*4882a593Smuzhiyun
10779*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
10780*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10781*4882a593Smuzhiyun
10782*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10783*4882a593Smuzhiyun
10784*4882a593Smuzhiyunfsgldiv_normal_exit:
10785*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
10786*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10787*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10788*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10789*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10790*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10791*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10792*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
10793*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10794*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10795*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10796*4882a593Smuzhiyun	rts
10797*4882a593Smuzhiyun
10798*4882a593Smuzhiyunfsgldiv_may_ovfl:
10799*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10800*4882a593Smuzhiyun
10801*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10802*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# set FPSR
10803*4882a593Smuzhiyun
10804*4882a593Smuzhiyun	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
10805*4882a593Smuzhiyun
10806*4882a593Smuzhiyun	fmov.l		%fpsr,%d1
10807*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr
10808*4882a593Smuzhiyun
10809*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
10810*4882a593Smuzhiyun
10811*4882a593Smuzhiyun	fmovm.x		&0x01,-(%sp)		# save result to stack
10812*4882a593Smuzhiyun	mov.w		(%sp),%d1		# fetch new exponent
10813*4882a593Smuzhiyun	add.l		&0xc,%sp		# clear result
10814*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10815*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10816*4882a593Smuzhiyun	cmp.l		%d1,&0x7fff		# did divide overflow?
10817*4882a593Smuzhiyun	blt.b		fsgldiv_normal_exit
10818*4882a593Smuzhiyun
10819*4882a593Smuzhiyunfsgldiv_ovfl_tst:
10820*4882a593Smuzhiyun	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821*4882a593Smuzhiyun
10822*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
10823*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10824*4882a593Smuzhiyun	bne.b		fsgldiv_ovfl_ena	# yes
10825*4882a593Smuzhiyun
10826*4882a593Smuzhiyunfsgldiv_ovfl_dis:
10827*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
10828*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
10829*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10830*4882a593Smuzhiyun	andi.b		&0x30,%d0		# kill precision
10831*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
10832*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
10833*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
10834*4882a593Smuzhiyun	rts
10835*4882a593Smuzhiyun
10836*4882a593Smuzhiyunfsgldiv_ovfl_ena:
10837*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10838*4882a593Smuzhiyun
10839*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10840*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10841*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10842*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10843*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10844*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10845*4882a593Smuzhiyun	subi.l		&0x6000,%d1		# subtract new bias
10846*4882a593Smuzhiyun	andi.w		&0x7fff,%d1		# clear ms bit
10847*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign,new exp
10848*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10849*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10850*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10851*4882a593Smuzhiyun	bra.b		fsgldiv_ovfl_dis
10852*4882a593Smuzhiyun
10853*4882a593Smuzhiyunfsgldiv_unfl:
10854*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855*4882a593Smuzhiyun
10856*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10857*4882a593Smuzhiyun
10858*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10859*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10860*4882a593Smuzhiyun
10861*4882a593Smuzhiyun	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10862*4882a593Smuzhiyun
10863*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10864*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10865*4882a593Smuzhiyun
10866*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10867*4882a593Smuzhiyun
10868*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
10869*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10870*4882a593Smuzhiyun	bne.b		fsgldiv_unfl_ena	# yes
10871*4882a593Smuzhiyun
10872*4882a593Smuzhiyunfsgldiv_unfl_dis:
10873*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10874*4882a593Smuzhiyun
10875*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
10876*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10877*4882a593Smuzhiyun	bsr.l		unf_res4		# calculate default result
10878*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10879*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10880*4882a593Smuzhiyun	rts
10881*4882a593Smuzhiyun
10882*4882a593Smuzhiyun#
10883*4882a593Smuzhiyun# UNFL is enabled.
10884*4882a593Smuzhiyun#
10885*4882a593Smuzhiyunfsgldiv_unfl_ena:
10886*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10887*4882a593Smuzhiyun
10888*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10889*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10890*4882a593Smuzhiyun
10891*4882a593Smuzhiyun	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10892*4882a593Smuzhiyun
10893*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10894*4882a593Smuzhiyun
10895*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10896*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
10897*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10898*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
10899*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
10900*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
10901*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
10902*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add bias
10903*4882a593Smuzhiyun	andi.w		&0x7fff,%d1		# clear top bit
10904*4882a593Smuzhiyun	or.w		%d2,%d1			# concat old sign, new exp
10905*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10906*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
10907*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10908*4882a593Smuzhiyun	bra.b		fsgldiv_unfl_dis
10909*4882a593Smuzhiyun
10910*4882a593Smuzhiyun#
10911*4882a593Smuzhiyun# the divide operation MAY underflow:
10912*4882a593Smuzhiyun#
10913*4882a593Smuzhiyunfsgldiv_may_unfl:
10914*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10915*4882a593Smuzhiyun
10916*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10917*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10918*4882a593Smuzhiyun
10919*4882a593Smuzhiyun	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10920*4882a593Smuzhiyun
10921*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
10922*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10923*4882a593Smuzhiyun
10924*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10925*4882a593Smuzhiyun
10926*4882a593Smuzhiyun	fabs.x		%fp0,%fp1		# make a copy of result
10927*4882a593Smuzhiyun	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
10928*4882a593Smuzhiyun	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
10929*4882a593Smuzhiyun	fblt.w		fsgldiv_unfl		# yes; underflow occurred
10930*4882a593Smuzhiyun
10931*4882a593Smuzhiyun#
10932*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 1. but,
10933*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 1
10934*4882a593Smuzhiyun# or a normalized number that rounded down to a 1. so, redo the entire
10935*4882a593Smuzhiyun# operation using RZ as the rounding mode to see what the pre-rounded
10936*4882a593Smuzhiyun# result is. this case should be relatively rare.
10937*4882a593Smuzhiyun#
10938*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
10939*4882a593Smuzhiyun
10940*4882a593Smuzhiyun	clr.l		%d1			# clear scratch register
10941*4882a593Smuzhiyun	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
10942*4882a593Smuzhiyun
10943*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
10944*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
10945*4882a593Smuzhiyun
10946*4882a593Smuzhiyun	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10947*4882a593Smuzhiyun
10948*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
10949*4882a593Smuzhiyun	fabs.x		%fp1			# make absolute value
10950*4882a593Smuzhiyun	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
10951*4882a593Smuzhiyun	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
10952*4882a593Smuzhiyun	bra.w		fsgldiv_unfl		# yes; underflow occurred
10953*4882a593Smuzhiyun
10954*4882a593Smuzhiyun############################################################################
10955*4882a593Smuzhiyun
10956*4882a593Smuzhiyun#
10957*4882a593Smuzhiyun# Divide: inputs are not both normalized; what are they?
10958*4882a593Smuzhiyun#
10959*4882a593Smuzhiyunfsgldiv_not_norm:
10960*4882a593Smuzhiyun	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961*4882a593Smuzhiyun	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962*4882a593Smuzhiyun
10963*4882a593Smuzhiyun	swbeg		&48
10964*4882a593Smuzhiyuntbl_fsgldiv_op:
10965*4882a593Smuzhiyun	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
10966*4882a593Smuzhiyun	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
10967*4882a593Smuzhiyun	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
10968*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
10969*4882a593Smuzhiyun	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
10970*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
10971*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10972*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10973*4882a593Smuzhiyun
10974*4882a593Smuzhiyun	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
10975*4882a593Smuzhiyun	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
10976*4882a593Smuzhiyun	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
10977*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
10978*4882a593Smuzhiyun	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
10979*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
10980*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10981*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10982*4882a593Smuzhiyun
10983*4882a593Smuzhiyun	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
10984*4882a593Smuzhiyun	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
10985*4882a593Smuzhiyun	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
10986*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
10987*4882a593Smuzhiyun	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
10988*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
10989*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10990*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10991*4882a593Smuzhiyun
10992*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
10993*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
10994*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
10995*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
10996*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
10997*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
10998*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10999*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11000*4882a593Smuzhiyun
11001*4882a593Smuzhiyun	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
11002*4882a593Smuzhiyun	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
11003*4882a593Smuzhiyun	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
11004*4882a593Smuzhiyun	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
11005*4882a593Smuzhiyun	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
11006*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
11007*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11008*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11009*4882a593Smuzhiyun
11010*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
11011*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
11012*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
11013*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
11014*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
11015*4882a593Smuzhiyun	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
11016*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11017*4882a593Smuzhiyun	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11018*4882a593Smuzhiyun
11019*4882a593Smuzhiyunfsgldiv_res_qnan:
11020*4882a593Smuzhiyun	bra.l		res_qnan
11021*4882a593Smuzhiyunfsgldiv_res_snan:
11022*4882a593Smuzhiyun	bra.l		res_snan
11023*4882a593Smuzhiyunfsgldiv_res_operr:
11024*4882a593Smuzhiyun	bra.l		res_operr
11025*4882a593Smuzhiyunfsgldiv_inf_load:
11026*4882a593Smuzhiyun	bra.l		fdiv_inf_load
11027*4882a593Smuzhiyunfsgldiv_zero_load:
11028*4882a593Smuzhiyun	bra.l		fdiv_zero_load
11029*4882a593Smuzhiyunfsgldiv_inf_dst:
11030*4882a593Smuzhiyun	bra.l		fdiv_inf_dst
11031*4882a593Smuzhiyun
11032*4882a593Smuzhiyun#########################################################################
11033*4882a593Smuzhiyun# XDEF ****************************************************************	#
11034*4882a593Smuzhiyun#	fadd(): emulates the fadd instruction				#
11035*4882a593Smuzhiyun#	fsadd(): emulates the fadd instruction				#
11036*4882a593Smuzhiyun#	fdadd(): emulates the fdadd instruction				#
11037*4882a593Smuzhiyun#									#
11038*4882a593Smuzhiyun# XREF ****************************************************************	#
11039*4882a593Smuzhiyun#	addsub_scaler2() - scale the operands so they won't take exc	#
11040*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
11041*4882a593Smuzhiyun#	unf_res() - return default underflow result			#
11042*4882a593Smuzhiyun#	res_qnan() - set QNAN result					#
11043*4882a593Smuzhiyun#	res_snan() - set SNAN result					#
11044*4882a593Smuzhiyun#	res_operr() - set OPERR result					#
11045*4882a593Smuzhiyun#	scale_to_zero_src() - set src operand exponent equal to zero	#
11046*4882a593Smuzhiyun#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11047*4882a593Smuzhiyun#									#
11048*4882a593Smuzhiyun# INPUT ***************************************************************	#
11049*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
11050*4882a593Smuzhiyun#	a1 = pointer to extended precision destination operand		#
11051*4882a593Smuzhiyun#									#
11052*4882a593Smuzhiyun# OUTPUT **************************************************************	#
11053*4882a593Smuzhiyun#	fp0 = result							#
11054*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
11055*4882a593Smuzhiyun#									#
11056*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
11057*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11058*4882a593Smuzhiyun# norms into extended, single, and double precision.			#
11059*4882a593Smuzhiyun#	Do addition after scaling exponents such that exception won't	#
11060*4882a593Smuzhiyun# occur. Then, check result exponent to see if exception would have	#
11061*4882a593Smuzhiyun# occurred. If so, return default result and maybe EXOP. Else, insert	#
11062*4882a593Smuzhiyun# the correct result exponent and return. Set FPSR bits as appropriate.	#
11063*4882a593Smuzhiyun#									#
11064*4882a593Smuzhiyun#########################################################################
11065*4882a593Smuzhiyun
11066*4882a593Smuzhiyun	global		fsadd
11067*4882a593Smuzhiyunfsadd:
11068*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
11069*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11070*4882a593Smuzhiyun	bra.b		fadd
11071*4882a593Smuzhiyun
11072*4882a593Smuzhiyun	global		fdadd
11073*4882a593Smuzhiyunfdadd:
11074*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
11075*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11076*4882a593Smuzhiyun
11077*4882a593Smuzhiyun	global		fadd
11078*4882a593Smuzhiyunfadd:
11079*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11080*4882a593Smuzhiyun
11081*4882a593Smuzhiyun	clr.w		%d1
11082*4882a593Smuzhiyun	mov.b		DTAG(%a6),%d1
11083*4882a593Smuzhiyun	lsl.b		&0x3,%d1
11084*4882a593Smuzhiyun	or.b		STAG(%a6),%d1		# combine src tags
11085*4882a593Smuzhiyun
11086*4882a593Smuzhiyun	bne.w		fadd_not_norm		# optimize on non-norm input
11087*4882a593Smuzhiyun
11088*4882a593Smuzhiyun#
11089*4882a593Smuzhiyun# ADD: norms and denorms
11090*4882a593Smuzhiyun#
11091*4882a593Smuzhiyunfadd_norm:
11092*4882a593Smuzhiyun	bsr.l		addsub_scaler2		# scale exponents
11093*4882a593Smuzhiyun
11094*4882a593Smuzhiyunfadd_zero_entry:
11095*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11096*4882a593Smuzhiyun
11097*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11098*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11099*4882a593Smuzhiyun
11100*4882a593Smuzhiyun	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11101*4882a593Smuzhiyun
11102*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11103*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
11104*4882a593Smuzhiyun
11105*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11106*4882a593Smuzhiyun
11107*4882a593Smuzhiyun	fbeq.w		fadd_zero_exit		# if result is zero, end now
11108*4882a593Smuzhiyun
11109*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
11110*4882a593Smuzhiyun
11111*4882a593Smuzhiyun	fmovm.x		&0x01,-(%sp)		# save result to stack
11112*4882a593Smuzhiyun
11113*4882a593Smuzhiyun	mov.w		2+L_SCR3(%a6),%d1
11114*4882a593Smuzhiyun	lsr.b		&0x6,%d1
11115*4882a593Smuzhiyun
11116*4882a593Smuzhiyun	mov.w		(%sp),%d2		# fetch new sign, exp
11117*4882a593Smuzhiyun	andi.l		&0x7fff,%d2		# strip sign
11118*4882a593Smuzhiyun	sub.l		%d0,%d2			# add scale factor
11119*4882a593Smuzhiyun
11120*4882a593Smuzhiyun	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121*4882a593Smuzhiyun	bge.b		fadd_ovfl		# yes
11122*4882a593Smuzhiyun
11123*4882a593Smuzhiyun	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124*4882a593Smuzhiyun	blt.w		fadd_unfl		# yes
11125*4882a593Smuzhiyun	beq.w		fadd_may_unfl		# maybe; go find out
11126*4882a593Smuzhiyun
11127*4882a593Smuzhiyunfadd_normal:
11128*4882a593Smuzhiyun	mov.w		(%sp),%d1
11129*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep sign
11130*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sign,new exp
11131*4882a593Smuzhiyun	mov.w		%d1,(%sp)		# insert new exponent
11132*4882a593Smuzhiyun
11133*4882a593Smuzhiyun	fmovm.x		(%sp)+,&0x80		# return result in fp0
11134*4882a593Smuzhiyun
11135*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
11136*4882a593Smuzhiyun	rts
11137*4882a593Smuzhiyun
11138*4882a593Smuzhiyunfadd_zero_exit:
11139*4882a593Smuzhiyun#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11140*4882a593Smuzhiyun	rts
11141*4882a593Smuzhiyun
11142*4882a593Smuzhiyuntbl_fadd_ovfl:
11143*4882a593Smuzhiyun	long		0x7fff			# ext ovfl
11144*4882a593Smuzhiyun	long		0x407f			# sgl ovfl
11145*4882a593Smuzhiyun	long		0x43ff			# dbl ovfl
11146*4882a593Smuzhiyun
11147*4882a593Smuzhiyuntbl_fadd_unfl:
11148*4882a593Smuzhiyun	long	        0x0000			# ext unfl
11149*4882a593Smuzhiyun	long		0x3f81			# sgl unfl
11150*4882a593Smuzhiyun	long		0x3c01			# dbl unfl
11151*4882a593Smuzhiyun
11152*4882a593Smuzhiyunfadd_ovfl:
11153*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154*4882a593Smuzhiyun
11155*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
11156*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11157*4882a593Smuzhiyun	bne.b		fadd_ovfl_ena		# yes
11158*4882a593Smuzhiyun
11159*4882a593Smuzhiyun	add.l		&0xc,%sp
11160*4882a593Smuzhiyunfadd_ovfl_dis:
11161*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11162*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
11163*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11164*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
11165*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11166*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
11167*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
11168*4882a593Smuzhiyun	rts
11169*4882a593Smuzhiyun
11170*4882a593Smuzhiyunfadd_ovfl_ena:
11171*4882a593Smuzhiyun	mov.b		L_SCR3(%a6),%d1
11172*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# is precision extended?
11173*4882a593Smuzhiyun	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
11174*4882a593Smuzhiyun
11175*4882a593Smuzhiyunfadd_ovfl_ena_cont:
11176*4882a593Smuzhiyun	mov.w		(%sp),%d1
11177*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep sign
11178*4882a593Smuzhiyun	subi.l		&0x6000,%d2		# add extra bias
11179*4882a593Smuzhiyun	andi.w		&0x7fff,%d2
11180*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sign,new exp
11181*4882a593Smuzhiyun	mov.w		%d1,(%sp)		# insert new exponent
11182*4882a593Smuzhiyun
11183*4882a593Smuzhiyun	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11184*4882a593Smuzhiyun	bra.b		fadd_ovfl_dis
11185*4882a593Smuzhiyun
11186*4882a593Smuzhiyunfadd_ovfl_ena_sd:
11187*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11188*4882a593Smuzhiyun
11189*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11190*4882a593Smuzhiyun	andi.b		&0x30,%d1		# keep rnd mode
11191*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
11192*4882a593Smuzhiyun
11193*4882a593Smuzhiyun	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11194*4882a593Smuzhiyun
11195*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11196*4882a593Smuzhiyun
11197*4882a593Smuzhiyun	add.l		&0xc,%sp
11198*4882a593Smuzhiyun	fmovm.x		&0x01,-(%sp)
11199*4882a593Smuzhiyun	bra.b		fadd_ovfl_ena_cont
11200*4882a593Smuzhiyun
11201*4882a593Smuzhiyunfadd_unfl:
11202*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203*4882a593Smuzhiyun
11204*4882a593Smuzhiyun	add.l		&0xc,%sp
11205*4882a593Smuzhiyun
11206*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11207*4882a593Smuzhiyun
11208*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11209*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11210*4882a593Smuzhiyun
11211*4882a593Smuzhiyun	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11212*4882a593Smuzhiyun
11213*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11214*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
11215*4882a593Smuzhiyun
11216*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
11217*4882a593Smuzhiyun
11218*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
11219*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11220*4882a593Smuzhiyun	bne.b		fadd_unfl_ena		# yes
11221*4882a593Smuzhiyun
11222*4882a593Smuzhiyunfadd_unfl_dis:
11223*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11224*4882a593Smuzhiyun
11225*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
11226*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11227*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
11228*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
11229*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11230*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
11231*4882a593Smuzhiyun	rts
11232*4882a593Smuzhiyun
11233*4882a593Smuzhiyunfadd_unfl_ena:
11234*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11235*4882a593Smuzhiyun
11236*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11237*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# is precision extended?
11238*4882a593Smuzhiyun	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
11239*4882a593Smuzhiyun
11240*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11241*4882a593Smuzhiyun
11242*4882a593Smuzhiyunfadd_unfl_ena_cont:
11243*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11244*4882a593Smuzhiyun
11245*4882a593Smuzhiyun	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
11246*4882a593Smuzhiyun
11247*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11248*4882a593Smuzhiyun
11249*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11250*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11251*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
11252*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
11253*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
11254*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
11255*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add new bias
11256*4882a593Smuzhiyun	andi.w		&0x7fff,%d1		# clear top bit
11257*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sign,new exp
11258*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11259*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11260*4882a593Smuzhiyun	bra.w		fadd_unfl_dis
11261*4882a593Smuzhiyun
11262*4882a593Smuzhiyunfadd_unfl_ena_sd:
11263*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11264*4882a593Smuzhiyun	andi.b		&0x30,%d1		# use only rnd mode
11265*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
11266*4882a593Smuzhiyun
11267*4882a593Smuzhiyun	bra.b		fadd_unfl_ena_cont
11268*4882a593Smuzhiyun
11269*4882a593Smuzhiyun#
11270*4882a593Smuzhiyun# result is equal to the smallest normalized number in the selected precision
11271*4882a593Smuzhiyun# if the precision is extended, this result could not have come from an
11272*4882a593Smuzhiyun# underflow that rounded up.
11273*4882a593Smuzhiyun#
11274*4882a593Smuzhiyunfadd_may_unfl:
11275*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11276*4882a593Smuzhiyun	andi.b		&0xc0,%d1
11277*4882a593Smuzhiyun	beq.w		fadd_normal		# yes; no underflow occurred
11278*4882a593Smuzhiyun
11279*4882a593Smuzhiyun	mov.l		0x4(%sp),%d1		# extract hi(man)
11280*4882a593Smuzhiyun	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11281*4882a593Smuzhiyun	bne.w		fadd_normal		# no; no underflow occurred
11282*4882a593Smuzhiyun
11283*4882a593Smuzhiyun	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11284*4882a593Smuzhiyun	bne.w		fadd_normal		# no; no underflow occurred
11285*4882a593Smuzhiyun
11286*4882a593Smuzhiyun	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287*4882a593Smuzhiyun	beq.w		fadd_normal		# no; no underflow occurred
11288*4882a593Smuzhiyun
11289*4882a593Smuzhiyun#
11290*4882a593Smuzhiyun# ok, so now the result has a exponent equal to the smallest normalized
11291*4882a593Smuzhiyun# exponent for the selected precision. also, the mantissa is equal to
11292*4882a593Smuzhiyun# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293*4882a593Smuzhiyun# g,r,s.
11294*4882a593Smuzhiyun# now, we must determine whether the pre-rounded result was an underflow
11295*4882a593Smuzhiyun# rounded "up" or a normalized number rounded "down".
11296*4882a593Smuzhiyun# so, we do this be re-executing the add using RZ as the rounding mode and
11297*4882a593Smuzhiyun# seeing if the new result is smaller or equal to the current result.
11298*4882a593Smuzhiyun#
11299*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11300*4882a593Smuzhiyun
11301*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11302*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# keep rnd prec
11303*4882a593Smuzhiyun	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11304*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
11305*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11306*4882a593Smuzhiyun
11307*4882a593Smuzhiyun	fadd.x		FP_SCR0(%a6),%fp1	# execute add
11308*4882a593Smuzhiyun
11309*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11310*4882a593Smuzhiyun
11311*4882a593Smuzhiyun	fabs.x		%fp0			# compare absolute values
11312*4882a593Smuzhiyun	fabs.x		%fp1
11313*4882a593Smuzhiyun	fcmp.x		%fp0,%fp1		# is first result > second?
11314*4882a593Smuzhiyun
11315*4882a593Smuzhiyun	fbgt.w		fadd_unfl		# yes; it's an underflow
11316*4882a593Smuzhiyun	bra.w		fadd_normal		# no; it's not an underflow
11317*4882a593Smuzhiyun
11318*4882a593Smuzhiyun##########################################################################
11319*4882a593Smuzhiyun
11320*4882a593Smuzhiyun#
11321*4882a593Smuzhiyun# Add: inputs are not both normalized; what are they?
11322*4882a593Smuzhiyun#
11323*4882a593Smuzhiyunfadd_not_norm:
11324*4882a593Smuzhiyun	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325*4882a593Smuzhiyun	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
11326*4882a593Smuzhiyun
11327*4882a593Smuzhiyun	swbeg		&48
11328*4882a593Smuzhiyuntbl_fadd_op:
11329*4882a593Smuzhiyun	short		fadd_norm	- tbl_fadd_op # NORM + NORM
11330*4882a593Smuzhiyun	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
11331*4882a593Smuzhiyun	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
11332*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11333*4882a593Smuzhiyun	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
11334*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11335*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11336*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11337*4882a593Smuzhiyun
11338*4882a593Smuzhiyun	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
11339*4882a593Smuzhiyun	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
11340*4882a593Smuzhiyun	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
11341*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11342*4882a593Smuzhiyun	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
11343*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11344*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11345*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11346*4882a593Smuzhiyun
11347*4882a593Smuzhiyun	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
11348*4882a593Smuzhiyun	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
11349*4882a593Smuzhiyun	short		fadd_inf_2	- tbl_fadd_op # INF + INF
11350*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11351*4882a593Smuzhiyun	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
11352*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11353*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11354*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11355*4882a593Smuzhiyun
11356*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
11357*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
11358*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
11359*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
11360*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
11361*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
11362*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11363*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11364*4882a593Smuzhiyun
11365*4882a593Smuzhiyun	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
11366*4882a593Smuzhiyun	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
11367*4882a593Smuzhiyun	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
11368*4882a593Smuzhiyun	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11369*4882a593Smuzhiyun	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
11370*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11371*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11372*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11373*4882a593Smuzhiyun
11374*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
11375*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
11376*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
11377*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
11378*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
11379*4882a593Smuzhiyun	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
11380*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11381*4882a593Smuzhiyun	short		tbl_fadd_op	- tbl_fadd_op #
11382*4882a593Smuzhiyun
11383*4882a593Smuzhiyunfadd_res_qnan:
11384*4882a593Smuzhiyun	bra.l		res_qnan
11385*4882a593Smuzhiyunfadd_res_snan:
11386*4882a593Smuzhiyun	bra.l		res_snan
11387*4882a593Smuzhiyun
11388*4882a593Smuzhiyun#
11389*4882a593Smuzhiyun# both operands are ZEROes
11390*4882a593Smuzhiyun#
11391*4882a593Smuzhiyunfadd_zero_2:
11392*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
11393*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
11394*4882a593Smuzhiyun	eor.b		%d0,%d1
11395*4882a593Smuzhiyun	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
11396*4882a593Smuzhiyun
11397*4882a593Smuzhiyun# the signs are the same. so determine whether they are positive or negative
11398*4882a593Smuzhiyun# and return the appropriately signed zero.
11399*4882a593Smuzhiyun	tst.b		%d0			# are ZEROes positive or negative?
11400*4882a593Smuzhiyun	bmi.b		fadd_zero_rm		# negative
11401*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# return +ZERO
11402*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11403*4882a593Smuzhiyun	rts
11404*4882a593Smuzhiyun
11405*4882a593Smuzhiyun#
11406*4882a593Smuzhiyun# the ZEROes have opposite signs:
11407*4882a593Smuzhiyun# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408*4882a593Smuzhiyun# - -ZERO is returned in the case of RM.
11409*4882a593Smuzhiyun#
11410*4882a593Smuzhiyunfadd_zero_2_chk_rm:
11411*4882a593Smuzhiyun	mov.b		3+L_SCR3(%a6),%d1
11412*4882a593Smuzhiyun	andi.b		&0x30,%d1		# extract rnd mode
11413*4882a593Smuzhiyun	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
11414*4882a593Smuzhiyun	beq.b		fadd_zero_rm		# yes
11415*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# return +ZERO
11416*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11417*4882a593Smuzhiyun	rts
11418*4882a593Smuzhiyun
11419*4882a593Smuzhiyunfadd_zero_rm:
11420*4882a593Smuzhiyun	fmov.s		&0x80000000,%fp0	# return -ZERO
11421*4882a593Smuzhiyun	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422*4882a593Smuzhiyun	rts
11423*4882a593Smuzhiyun
11424*4882a593Smuzhiyun#
11425*4882a593Smuzhiyun# one operand is a ZERO and the other is a DENORM or NORM. scale
11426*4882a593Smuzhiyun# the DENORM or NORM and jump to the regular fadd routine.
11427*4882a593Smuzhiyun#
11428*4882a593Smuzhiyunfadd_zero_dst:
11429*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11430*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11431*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11432*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# scale the operand
11433*4882a593Smuzhiyun	clr.w		FP_SCR1_EX(%a6)
11434*4882a593Smuzhiyun	clr.l		FP_SCR1_HI(%a6)
11435*4882a593Smuzhiyun	clr.l		FP_SCR1_LO(%a6)
11436*4882a593Smuzhiyun	bra.w		fadd_zero_entry		# go execute fadd
11437*4882a593Smuzhiyun
11438*4882a593Smuzhiyunfadd_zero_src:
11439*4882a593Smuzhiyun	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11440*4882a593Smuzhiyun	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11441*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11442*4882a593Smuzhiyun	bsr.l		scale_to_zero_dst	# scale the operand
11443*4882a593Smuzhiyun	clr.w		FP_SCR0_EX(%a6)
11444*4882a593Smuzhiyun	clr.l		FP_SCR0_HI(%a6)
11445*4882a593Smuzhiyun	clr.l		FP_SCR0_LO(%a6)
11446*4882a593Smuzhiyun	bra.w		fadd_zero_entry		# go execute fadd
11447*4882a593Smuzhiyun
11448*4882a593Smuzhiyun#
11449*4882a593Smuzhiyun# both operands are INFs. an OPERR will result if the INFs have
11450*4882a593Smuzhiyun# different signs. else, an INF of the same sign is returned
11451*4882a593Smuzhiyun#
11452*4882a593Smuzhiyunfadd_inf_2:
11453*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11454*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
11455*4882a593Smuzhiyun	eor.b		%d1,%d0
11456*4882a593Smuzhiyun	bmi.l		res_operr		# weed out (-INF)+(+INF)
11457*4882a593Smuzhiyun
11458*4882a593Smuzhiyun# ok, so it's not an OPERR. but, we do have to remember to return the
11459*4882a593Smuzhiyun# src INF since that's where the 881/882 gets the j-bit from...
11460*4882a593Smuzhiyun
11461*4882a593Smuzhiyun#
11462*4882a593Smuzhiyun# operands are INF and one of {ZERO, INF, DENORM, NORM}
11463*4882a593Smuzhiyun#
11464*4882a593Smuzhiyunfadd_inf_src:
11465*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return src INF
11466*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is INF positive?
11467*4882a593Smuzhiyun	bpl.b		fadd_inf_done		# yes; we're done
11468*4882a593Smuzhiyun	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469*4882a593Smuzhiyun	rts
11470*4882a593Smuzhiyun
11471*4882a593Smuzhiyun#
11472*4882a593Smuzhiyun# operands are INF and one of {ZERO, INF, DENORM, NORM}
11473*4882a593Smuzhiyun#
11474*4882a593Smuzhiyunfadd_inf_dst:
11475*4882a593Smuzhiyun	fmovm.x		DST(%a1),&0x80		# return dst INF
11476*4882a593Smuzhiyun	tst.b		DST_EX(%a1)		# is INF positive?
11477*4882a593Smuzhiyun	bpl.b		fadd_inf_done		# yes; we're done
11478*4882a593Smuzhiyun	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479*4882a593Smuzhiyun	rts
11480*4882a593Smuzhiyun
11481*4882a593Smuzhiyunfadd_inf_done:
11482*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
11483*4882a593Smuzhiyun	rts
11484*4882a593Smuzhiyun
11485*4882a593Smuzhiyun#########################################################################
11486*4882a593Smuzhiyun# XDEF ****************************************************************	#
11487*4882a593Smuzhiyun#	fsub(): emulates the fsub instruction				#
11488*4882a593Smuzhiyun#	fssub(): emulates the fssub instruction				#
11489*4882a593Smuzhiyun#	fdsub(): emulates the fdsub instruction				#
11490*4882a593Smuzhiyun#									#
11491*4882a593Smuzhiyun# XREF ****************************************************************	#
11492*4882a593Smuzhiyun#	addsub_scaler2() - scale the operands so they won't take exc	#
11493*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
11494*4882a593Smuzhiyun#	unf_res() - return default underflow result			#
11495*4882a593Smuzhiyun#	res_qnan() - set QNAN result					#
11496*4882a593Smuzhiyun#	res_snan() - set SNAN result					#
11497*4882a593Smuzhiyun#	res_operr() - set OPERR result					#
11498*4882a593Smuzhiyun#	scale_to_zero_src() - set src operand exponent equal to zero	#
11499*4882a593Smuzhiyun#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11500*4882a593Smuzhiyun#									#
11501*4882a593Smuzhiyun# INPUT ***************************************************************	#
11502*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
11503*4882a593Smuzhiyun#	a1 = pointer to extended precision destination operand		#
11504*4882a593Smuzhiyun#									#
11505*4882a593Smuzhiyun# OUTPUT **************************************************************	#
11506*4882a593Smuzhiyun#	fp0 = result							#
11507*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
11508*4882a593Smuzhiyun#									#
11509*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
11510*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11511*4882a593Smuzhiyun# norms into extended, single, and double precision.			#
11512*4882a593Smuzhiyun#	Do subtraction after scaling exponents such that exception won't#
11513*4882a593Smuzhiyun# occur. Then, check result exponent to see if exception would have	#
11514*4882a593Smuzhiyun# occurred. If so, return default result and maybe EXOP. Else, insert	#
11515*4882a593Smuzhiyun# the correct result exponent and return. Set FPSR bits as appropriate.	#
11516*4882a593Smuzhiyun#									#
11517*4882a593Smuzhiyun#########################################################################
11518*4882a593Smuzhiyun
11519*4882a593Smuzhiyun	global		fssub
11520*4882a593Smuzhiyunfssub:
11521*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
11522*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11523*4882a593Smuzhiyun	bra.b		fsub
11524*4882a593Smuzhiyun
11525*4882a593Smuzhiyun	global		fdsub
11526*4882a593Smuzhiyunfdsub:
11527*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
11528*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11529*4882a593Smuzhiyun
11530*4882a593Smuzhiyun	global		fsub
11531*4882a593Smuzhiyunfsub:
11532*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11533*4882a593Smuzhiyun
11534*4882a593Smuzhiyun	clr.w		%d1
11535*4882a593Smuzhiyun	mov.b		DTAG(%a6),%d1
11536*4882a593Smuzhiyun	lsl.b		&0x3,%d1
11537*4882a593Smuzhiyun	or.b		STAG(%a6),%d1		# combine src tags
11538*4882a593Smuzhiyun
11539*4882a593Smuzhiyun	bne.w		fsub_not_norm		# optimize on non-norm input
11540*4882a593Smuzhiyun
11541*4882a593Smuzhiyun#
11542*4882a593Smuzhiyun# SUB: norms and denorms
11543*4882a593Smuzhiyun#
11544*4882a593Smuzhiyunfsub_norm:
11545*4882a593Smuzhiyun	bsr.l		addsub_scaler2		# scale exponents
11546*4882a593Smuzhiyun
11547*4882a593Smuzhiyunfsub_zero_entry:
11548*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11549*4882a593Smuzhiyun
11550*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11551*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11552*4882a593Smuzhiyun
11553*4882a593Smuzhiyun	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11554*4882a593Smuzhiyun
11555*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11556*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
11557*4882a593Smuzhiyun
11558*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11559*4882a593Smuzhiyun
11560*4882a593Smuzhiyun	fbeq.w		fsub_zero_exit		# if result zero, end now
11561*4882a593Smuzhiyun
11562*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
11563*4882a593Smuzhiyun
11564*4882a593Smuzhiyun	fmovm.x		&0x01,-(%sp)		# save result to stack
11565*4882a593Smuzhiyun
11566*4882a593Smuzhiyun	mov.w		2+L_SCR3(%a6),%d1
11567*4882a593Smuzhiyun	lsr.b		&0x6,%d1
11568*4882a593Smuzhiyun
11569*4882a593Smuzhiyun	mov.w		(%sp),%d2		# fetch new exponent
11570*4882a593Smuzhiyun	andi.l		&0x7fff,%d2		# strip sign
11571*4882a593Smuzhiyun	sub.l		%d0,%d2			# add scale factor
11572*4882a593Smuzhiyun
11573*4882a593Smuzhiyun	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574*4882a593Smuzhiyun	bge.b		fsub_ovfl		# yes
11575*4882a593Smuzhiyun
11576*4882a593Smuzhiyun	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577*4882a593Smuzhiyun	blt.w		fsub_unfl		# yes
11578*4882a593Smuzhiyun	beq.w		fsub_may_unfl		# maybe; go find out
11579*4882a593Smuzhiyun
11580*4882a593Smuzhiyunfsub_normal:
11581*4882a593Smuzhiyun	mov.w		(%sp),%d1
11582*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep sign
11583*4882a593Smuzhiyun	or.w		%d2,%d1			# insert new exponent
11584*4882a593Smuzhiyun	mov.w		%d1,(%sp)		# insert new exponent
11585*4882a593Smuzhiyun
11586*4882a593Smuzhiyun	fmovm.x		(%sp)+,&0x80		# return result in fp0
11587*4882a593Smuzhiyun
11588*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
11589*4882a593Smuzhiyun	rts
11590*4882a593Smuzhiyun
11591*4882a593Smuzhiyunfsub_zero_exit:
11592*4882a593Smuzhiyun#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11593*4882a593Smuzhiyun	rts
11594*4882a593Smuzhiyun
11595*4882a593Smuzhiyuntbl_fsub_ovfl:
11596*4882a593Smuzhiyun	long		0x7fff			# ext ovfl
11597*4882a593Smuzhiyun	long		0x407f			# sgl ovfl
11598*4882a593Smuzhiyun	long		0x43ff			# dbl ovfl
11599*4882a593Smuzhiyun
11600*4882a593Smuzhiyuntbl_fsub_unfl:
11601*4882a593Smuzhiyun	long	        0x0000			# ext unfl
11602*4882a593Smuzhiyun	long		0x3f81			# sgl unfl
11603*4882a593Smuzhiyun	long		0x3c01			# dbl unfl
11604*4882a593Smuzhiyun
11605*4882a593Smuzhiyunfsub_ovfl:
11606*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607*4882a593Smuzhiyun
11608*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
11609*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11610*4882a593Smuzhiyun	bne.b		fsub_ovfl_ena		# yes
11611*4882a593Smuzhiyun
11612*4882a593Smuzhiyun	add.l		&0xc,%sp
11613*4882a593Smuzhiyunfsub_ovfl_dis:
11614*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11615*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
11616*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11617*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
11618*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11619*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
11620*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
11621*4882a593Smuzhiyun	rts
11622*4882a593Smuzhiyun
11623*4882a593Smuzhiyunfsub_ovfl_ena:
11624*4882a593Smuzhiyun	mov.b		L_SCR3(%a6),%d1
11625*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# is precision extended?
11626*4882a593Smuzhiyun	bne.b		fsub_ovfl_ena_sd	# no
11627*4882a593Smuzhiyun
11628*4882a593Smuzhiyunfsub_ovfl_ena_cont:
11629*4882a593Smuzhiyun	mov.w		(%sp),%d1		# fetch {sgn,exp}
11630*4882a593Smuzhiyun	andi.w		&0x8000,%d1		# keep sign
11631*4882a593Smuzhiyun	subi.l		&0x6000,%d2		# subtract new bias
11632*4882a593Smuzhiyun	andi.w		&0x7fff,%d2		# clear top bit
11633*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sign,exp
11634*4882a593Smuzhiyun	mov.w		%d1,(%sp)		# insert new exponent
11635*4882a593Smuzhiyun
11636*4882a593Smuzhiyun	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11637*4882a593Smuzhiyun	bra.b		fsub_ovfl_dis
11638*4882a593Smuzhiyun
11639*4882a593Smuzhiyunfsub_ovfl_ena_sd:
11640*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11641*4882a593Smuzhiyun
11642*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11643*4882a593Smuzhiyun	andi.b		&0x30,%d1		# clear rnd prec
11644*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
11645*4882a593Smuzhiyun
11646*4882a593Smuzhiyun	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11647*4882a593Smuzhiyun
11648*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11649*4882a593Smuzhiyun
11650*4882a593Smuzhiyun	add.l		&0xc,%sp
11651*4882a593Smuzhiyun	fmovm.x		&0x01,-(%sp)
11652*4882a593Smuzhiyun	bra.b		fsub_ovfl_ena_cont
11653*4882a593Smuzhiyun
11654*4882a593Smuzhiyunfsub_unfl:
11655*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656*4882a593Smuzhiyun
11657*4882a593Smuzhiyun	add.l		&0xc,%sp
11658*4882a593Smuzhiyun
11659*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11660*4882a593Smuzhiyun
11661*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11662*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11663*4882a593Smuzhiyun
11664*4882a593Smuzhiyun	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11665*4882a593Smuzhiyun
11666*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11667*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
11668*4882a593Smuzhiyun
11669*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)
11670*4882a593Smuzhiyun
11671*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
11672*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11673*4882a593Smuzhiyun	bne.b		fsub_unfl_ena		# yes
11674*4882a593Smuzhiyun
11675*4882a593Smuzhiyunfsub_unfl_dis:
11676*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11677*4882a593Smuzhiyun
11678*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
11679*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11680*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
11681*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
11682*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11683*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
11684*4882a593Smuzhiyun	rts
11685*4882a593Smuzhiyun
11686*4882a593Smuzhiyunfsub_unfl_ena:
11687*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40
11688*4882a593Smuzhiyun
11689*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11690*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# is precision extended?
11691*4882a593Smuzhiyun	bne.b		fsub_unfl_ena_sd	# no
11692*4882a593Smuzhiyun
11693*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11694*4882a593Smuzhiyun
11695*4882a593Smuzhiyunfsub_unfl_ena_cont:
11696*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11697*4882a593Smuzhiyun
11698*4882a593Smuzhiyun	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11699*4882a593Smuzhiyun
11700*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11701*4882a593Smuzhiyun
11702*4882a593Smuzhiyun	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
11703*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11704*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
11705*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
11706*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
11707*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
11708*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# subtract new bias
11709*4882a593Smuzhiyun	andi.w		&0x7fff,%d1		# clear top bit
11710*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sgn,exp
11711*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11712*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11713*4882a593Smuzhiyun	bra.w		fsub_unfl_dis
11714*4882a593Smuzhiyun
11715*4882a593Smuzhiyunfsub_unfl_ena_sd:
11716*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11717*4882a593Smuzhiyun	andi.b		&0x30,%d1		# clear rnd prec
11718*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
11719*4882a593Smuzhiyun
11720*4882a593Smuzhiyun	bra.b		fsub_unfl_ena_cont
11721*4882a593Smuzhiyun
11722*4882a593Smuzhiyun#
11723*4882a593Smuzhiyun# result is equal to the smallest normalized number in the selected precision
11724*4882a593Smuzhiyun# if the precision is extended, this result could not have come from an
11725*4882a593Smuzhiyun# underflow that rounded up.
11726*4882a593Smuzhiyun#
11727*4882a593Smuzhiyunfsub_may_unfl:
11728*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11729*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# fetch rnd prec
11730*4882a593Smuzhiyun	beq.w		fsub_normal		# yes; no underflow occurred
11731*4882a593Smuzhiyun
11732*4882a593Smuzhiyun	mov.l		0x4(%sp),%d1
11733*4882a593Smuzhiyun	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11734*4882a593Smuzhiyun	bne.w		fsub_normal		# no; no underflow occurred
11735*4882a593Smuzhiyun
11736*4882a593Smuzhiyun	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11737*4882a593Smuzhiyun	bne.w		fsub_normal		# no; no underflow occurred
11738*4882a593Smuzhiyun
11739*4882a593Smuzhiyun	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740*4882a593Smuzhiyun	beq.w		fsub_normal		# no; no underflow occurred
11741*4882a593Smuzhiyun
11742*4882a593Smuzhiyun#
11743*4882a593Smuzhiyun# ok, so now the result has a exponent equal to the smallest normalized
11744*4882a593Smuzhiyun# exponent for the selected precision. also, the mantissa is equal to
11745*4882a593Smuzhiyun# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746*4882a593Smuzhiyun# g,r,s.
11747*4882a593Smuzhiyun# now, we must determine whether the pre-rounded result was an underflow
11748*4882a593Smuzhiyun# rounded "up" or a normalized number rounded "down".
11749*4882a593Smuzhiyun# so, we do this be re-executing the add using RZ as the rounding mode and
11750*4882a593Smuzhiyun# seeing if the new result is smaller or equal to the current result.
11751*4882a593Smuzhiyun#
11752*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11753*4882a593Smuzhiyun
11754*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1
11755*4882a593Smuzhiyun	andi.b		&0xc0,%d1		# keep rnd prec
11756*4882a593Smuzhiyun	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11757*4882a593Smuzhiyun	fmov.l		%d1,%fpcr		# set FPCR
11758*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11759*4882a593Smuzhiyun
11760*4882a593Smuzhiyun	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11761*4882a593Smuzhiyun
11762*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
11763*4882a593Smuzhiyun
11764*4882a593Smuzhiyun	fabs.x		%fp0			# compare absolute values
11765*4882a593Smuzhiyun	fabs.x		%fp1
11766*4882a593Smuzhiyun	fcmp.x		%fp0,%fp1		# is first result > second?
11767*4882a593Smuzhiyun
11768*4882a593Smuzhiyun	fbgt.w		fsub_unfl		# yes; it's an underflow
11769*4882a593Smuzhiyun	bra.w		fsub_normal		# no; it's not an underflow
11770*4882a593Smuzhiyun
11771*4882a593Smuzhiyun##########################################################################
11772*4882a593Smuzhiyun
11773*4882a593Smuzhiyun#
11774*4882a593Smuzhiyun# Sub: inputs are not both normalized; what are they?
11775*4882a593Smuzhiyun#
11776*4882a593Smuzhiyunfsub_not_norm:
11777*4882a593Smuzhiyun	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778*4882a593Smuzhiyun	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
11779*4882a593Smuzhiyun
11780*4882a593Smuzhiyun	swbeg		&48
11781*4882a593Smuzhiyuntbl_fsub_op:
11782*4882a593Smuzhiyun	short		fsub_norm	- tbl_fsub_op # NORM - NORM
11783*4882a593Smuzhiyun	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
11784*4882a593Smuzhiyun	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
11785*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11786*4882a593Smuzhiyun	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
11787*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11788*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11789*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11790*4882a593Smuzhiyun
11791*4882a593Smuzhiyun	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
11792*4882a593Smuzhiyun	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
11793*4882a593Smuzhiyun	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
11794*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11795*4882a593Smuzhiyun	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
11796*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11797*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11798*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11799*4882a593Smuzhiyun
11800*4882a593Smuzhiyun	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
11801*4882a593Smuzhiyun	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
11802*4882a593Smuzhiyun	short		fsub_inf_2	- tbl_fsub_op # INF - INF
11803*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11804*4882a593Smuzhiyun	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
11805*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11806*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11807*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11808*4882a593Smuzhiyun
11809*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
11810*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
11811*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
11812*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
11813*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
11814*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
11815*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11816*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11817*4882a593Smuzhiyun
11818*4882a593Smuzhiyun	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
11819*4882a593Smuzhiyun	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
11820*4882a593Smuzhiyun	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
11821*4882a593Smuzhiyun	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11822*4882a593Smuzhiyun	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
11823*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11824*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11825*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11826*4882a593Smuzhiyun
11827*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
11828*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
11829*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
11830*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
11831*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
11832*4882a593Smuzhiyun	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
11833*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11834*4882a593Smuzhiyun	short		tbl_fsub_op	- tbl_fsub_op #
11835*4882a593Smuzhiyun
11836*4882a593Smuzhiyunfsub_res_qnan:
11837*4882a593Smuzhiyun	bra.l		res_qnan
11838*4882a593Smuzhiyunfsub_res_snan:
11839*4882a593Smuzhiyun	bra.l		res_snan
11840*4882a593Smuzhiyun
11841*4882a593Smuzhiyun#
11842*4882a593Smuzhiyun# both operands are ZEROes
11843*4882a593Smuzhiyun#
11844*4882a593Smuzhiyunfsub_zero_2:
11845*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0
11846*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
11847*4882a593Smuzhiyun	eor.b		%d1,%d0
11848*4882a593Smuzhiyun	bpl.b		fsub_zero_2_chk_rm
11849*4882a593Smuzhiyun
11850*4882a593Smuzhiyun# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851*4882a593Smuzhiyun	tst.b		%d0			# is dst negative?
11852*4882a593Smuzhiyun	bmi.b		fsub_zero_2_rm		# yes
11853*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11854*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11855*4882a593Smuzhiyun	rts
11856*4882a593Smuzhiyun
11857*4882a593Smuzhiyun#
11858*4882a593Smuzhiyun# the ZEROes have the same signs:
11859*4882a593Smuzhiyun# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860*4882a593Smuzhiyun# - -ZERO is returned in the case of RM.
11861*4882a593Smuzhiyun#
11862*4882a593Smuzhiyunfsub_zero_2_chk_rm:
11863*4882a593Smuzhiyun	mov.b		3+L_SCR3(%a6),%d1
11864*4882a593Smuzhiyun	andi.b		&0x30,%d1		# extract rnd mode
11865*4882a593Smuzhiyun	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
11866*4882a593Smuzhiyun	beq.b		fsub_zero_2_rm		# yes
11867*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11868*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11869*4882a593Smuzhiyun	rts
11870*4882a593Smuzhiyun
11871*4882a593Smuzhiyunfsub_zero_2_rm:
11872*4882a593Smuzhiyun	fmov.s		&0x80000000,%fp0	# return -ZERO
11873*4882a593Smuzhiyun	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
11874*4882a593Smuzhiyun	rts
11875*4882a593Smuzhiyun
11876*4882a593Smuzhiyun#
11877*4882a593Smuzhiyun# one operand is a ZERO and the other is a DENORM or a NORM.
11878*4882a593Smuzhiyun# scale the DENORM or NORM and jump to the regular fsub routine.
11879*4882a593Smuzhiyun#
11880*4882a593Smuzhiyunfsub_zero_dst:
11881*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11882*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11883*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11884*4882a593Smuzhiyun	bsr.l		scale_to_zero_src	# scale the operand
11885*4882a593Smuzhiyun	clr.w		FP_SCR1_EX(%a6)
11886*4882a593Smuzhiyun	clr.l		FP_SCR1_HI(%a6)
11887*4882a593Smuzhiyun	clr.l		FP_SCR1_LO(%a6)
11888*4882a593Smuzhiyun	bra.w		fsub_zero_entry		# go execute fsub
11889*4882a593Smuzhiyun
11890*4882a593Smuzhiyunfsub_zero_src:
11891*4882a593Smuzhiyun	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11892*4882a593Smuzhiyun	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11893*4882a593Smuzhiyun	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11894*4882a593Smuzhiyun	bsr.l		scale_to_zero_dst	# scale the operand
11895*4882a593Smuzhiyun	clr.w		FP_SCR0_EX(%a6)
11896*4882a593Smuzhiyun	clr.l		FP_SCR0_HI(%a6)
11897*4882a593Smuzhiyun	clr.l		FP_SCR0_LO(%a6)
11898*4882a593Smuzhiyun	bra.w		fsub_zero_entry		# go execute fsub
11899*4882a593Smuzhiyun
11900*4882a593Smuzhiyun#
11901*4882a593Smuzhiyun# both operands are INFs. an OPERR will result if the INFs have the
11902*4882a593Smuzhiyun# same signs. else,
11903*4882a593Smuzhiyun#
11904*4882a593Smuzhiyunfsub_inf_2:
11905*4882a593Smuzhiyun	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11906*4882a593Smuzhiyun	mov.b		DST_EX(%a1),%d1
11907*4882a593Smuzhiyun	eor.b		%d1,%d0
11908*4882a593Smuzhiyun	bpl.l		res_operr		# weed out (-INF)+(+INF)
11909*4882a593Smuzhiyun
11910*4882a593Smuzhiyun# ok, so it's not an OPERR. but we do have to remember to return
11911*4882a593Smuzhiyun# the src INF since that's where the 881/882 gets the j-bit.
11912*4882a593Smuzhiyun
11913*4882a593Smuzhiyunfsub_inf_src:
11914*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return src INF
11915*4882a593Smuzhiyun	fneg.x		%fp0			# invert sign
11916*4882a593Smuzhiyun	fbge.w		fsub_inf_done		# sign is now positive
11917*4882a593Smuzhiyun	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918*4882a593Smuzhiyun	rts
11919*4882a593Smuzhiyun
11920*4882a593Smuzhiyunfsub_inf_dst:
11921*4882a593Smuzhiyun	fmovm.x		DST(%a1),&0x80		# return dst INF
11922*4882a593Smuzhiyun	tst.b		DST_EX(%a1)		# is INF negative?
11923*4882a593Smuzhiyun	bpl.b		fsub_inf_done		# no
11924*4882a593Smuzhiyun	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925*4882a593Smuzhiyun	rts
11926*4882a593Smuzhiyun
11927*4882a593Smuzhiyunfsub_inf_done:
11928*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
11929*4882a593Smuzhiyun	rts
11930*4882a593Smuzhiyun
11931*4882a593Smuzhiyun#########################################################################
11932*4882a593Smuzhiyun# XDEF ****************************************************************	#
11933*4882a593Smuzhiyun#	fsqrt(): emulates the fsqrt instruction				#
11934*4882a593Smuzhiyun#	fssqrt(): emulates the fssqrt instruction			#
11935*4882a593Smuzhiyun#	fdsqrt(): emulates the fdsqrt instruction			#
11936*4882a593Smuzhiyun#									#
11937*4882a593Smuzhiyun# XREF ****************************************************************	#
11938*4882a593Smuzhiyun#	scale_sqrt() - scale the source operand				#
11939*4882a593Smuzhiyun#	unf_res() - return default underflow result			#
11940*4882a593Smuzhiyun#	ovf_res() - return default overflow result			#
11941*4882a593Smuzhiyun#	res_qnan_1op() - return QNAN result				#
11942*4882a593Smuzhiyun#	res_snan_1op() - return SNAN result				#
11943*4882a593Smuzhiyun#									#
11944*4882a593Smuzhiyun# INPUT ***************************************************************	#
11945*4882a593Smuzhiyun#	a0 = pointer to extended precision source operand		#
11946*4882a593Smuzhiyun#	d0  rnd prec,mode						#
11947*4882a593Smuzhiyun#									#
11948*4882a593Smuzhiyun# OUTPUT **************************************************************	#
11949*4882a593Smuzhiyun#	fp0 = result							#
11950*4882a593Smuzhiyun#	fp1 = EXOP (if exception occurred)				#
11951*4882a593Smuzhiyun#									#
11952*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
11953*4882a593Smuzhiyun#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11954*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision.				#
11955*4882a593Smuzhiyun#	For norms/denorms, scale the exponents such that a sqrt		#
11956*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fsqrt to	#
11957*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken	#
11958*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result	#
11959*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the	#
11960*4882a593Smuzhiyun# result operand to the proper exponent.				#
11961*4882a593Smuzhiyun#									#
11962*4882a593Smuzhiyun#########################################################################
11963*4882a593Smuzhiyun
11964*4882a593Smuzhiyun	global		fssqrt
11965*4882a593Smuzhiyunfssqrt:
11966*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
11967*4882a593Smuzhiyun	ori.b		&s_mode*0x10,%d0	# insert sgl precision
11968*4882a593Smuzhiyun	bra.b		fsqrt
11969*4882a593Smuzhiyun
11970*4882a593Smuzhiyun	global		fdsqrt
11971*4882a593Smuzhiyunfdsqrt:
11972*4882a593Smuzhiyun	andi.b		&0x30,%d0		# clear rnd prec
11973*4882a593Smuzhiyun	ori.b		&d_mode*0x10,%d0	# insert dbl precision
11974*4882a593Smuzhiyun
11975*4882a593Smuzhiyun	global		fsqrt
11976*4882a593Smuzhiyunfsqrt:
11977*4882a593Smuzhiyun	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11978*4882a593Smuzhiyun	clr.w		%d1
11979*4882a593Smuzhiyun	mov.b		STAG(%a6),%d1
11980*4882a593Smuzhiyun	bne.w		fsqrt_not_norm		# optimize on non-norm input
11981*4882a593Smuzhiyun
11982*4882a593Smuzhiyun#
11983*4882a593Smuzhiyun# SQUARE ROOT: norms and denorms ONLY!
11984*4882a593Smuzhiyun#
11985*4882a593Smuzhiyunfsqrt_norm:
11986*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
11987*4882a593Smuzhiyun	bmi.l		res_operr		# yes
11988*4882a593Smuzhiyun
11989*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
11990*4882a593Smuzhiyun	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
11991*4882a593Smuzhiyun
11992*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11993*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
11994*4882a593Smuzhiyun
11995*4882a593Smuzhiyun	fsqrt.x		(%a0),%fp0		# execute square root
11996*4882a593Smuzhiyun
11997*4882a593Smuzhiyun	fmov.l		%fpsr,%d1
11998*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
11999*4882a593Smuzhiyun
12000*4882a593Smuzhiyun	rts
12001*4882a593Smuzhiyun
12002*4882a593Smuzhiyunfsqrt_denorm:
12003*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is operand negative?
12004*4882a593Smuzhiyun	bmi.l		res_operr		# yes
12005*4882a593Smuzhiyun
12006*4882a593Smuzhiyun	andi.b		&0xc0,%d0		# is precision extended?
12007*4882a593Smuzhiyun	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
12008*4882a593Smuzhiyun
12009*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12010*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12011*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12012*4882a593Smuzhiyun
12013*4882a593Smuzhiyun	bsr.l		scale_sqrt		# calculate scale factor
12014*4882a593Smuzhiyun
12015*4882a593Smuzhiyun	bra.w		fsqrt_sd_normal
12016*4882a593Smuzhiyun
12017*4882a593Smuzhiyun#
12018*4882a593Smuzhiyun# operand is either single or double
12019*4882a593Smuzhiyun#
12020*4882a593Smuzhiyunfsqrt_not_ext:
12021*4882a593Smuzhiyun	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12022*4882a593Smuzhiyun	bne.w		fsqrt_dbl
12023*4882a593Smuzhiyun
12024*4882a593Smuzhiyun#
12025*4882a593Smuzhiyun# operand is to be rounded to single precision
12026*4882a593Smuzhiyun#
12027*4882a593Smuzhiyunfsqrt_sgl:
12028*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12029*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12030*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12031*4882a593Smuzhiyun
12032*4882a593Smuzhiyun	bsr.l		scale_sqrt		# calculate scale factor
12033*4882a593Smuzhiyun
12034*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
12035*4882a593Smuzhiyun	beq.w		fsqrt_sd_may_unfl
12036*4882a593Smuzhiyun	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
12037*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
12038*4882a593Smuzhiyun	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12039*4882a593Smuzhiyun	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12040*4882a593Smuzhiyun
12041*4882a593Smuzhiyun#
12042*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved in to the fp reg file
12043*4882a593Smuzhiyun#
12044*4882a593Smuzhiyunfsqrt_sd_normal:
12045*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
12046*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12047*4882a593Smuzhiyun
12048*4882a593Smuzhiyun	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12049*4882a593Smuzhiyun
12050*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
12051*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
12052*4882a593Smuzhiyun
12053*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12054*4882a593Smuzhiyun
12055*4882a593Smuzhiyunfsqrt_sd_normal_exit:
12056*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
12057*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12058*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12059*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
12060*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
12061*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
12062*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
12063*4882a593Smuzhiyun	or.w		%d1,%d2			# concat old sign,new exp
12064*4882a593Smuzhiyun	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12065*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
12066*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12067*4882a593Smuzhiyun	rts
12068*4882a593Smuzhiyun
12069*4882a593Smuzhiyun#
12070*4882a593Smuzhiyun# operand is to be rounded to double precision
12071*4882a593Smuzhiyun#
12072*4882a593Smuzhiyunfsqrt_dbl:
12073*4882a593Smuzhiyun	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12074*4882a593Smuzhiyun	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12075*4882a593Smuzhiyun	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12076*4882a593Smuzhiyun
12077*4882a593Smuzhiyun	bsr.l		scale_sqrt		# calculate scale factor
12078*4882a593Smuzhiyun
12079*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
12080*4882a593Smuzhiyun	beq.w		fsqrt_sd_may_unfl
12081*4882a593Smuzhiyun	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
12082*4882a593Smuzhiyun	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
12083*4882a593Smuzhiyun	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12084*4882a593Smuzhiyun	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12085*4882a593Smuzhiyun	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
12086*4882a593Smuzhiyun
12087*4882a593Smuzhiyun# we're on the line here and the distinguising characteristic is whether
12088*4882a593Smuzhiyun# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089*4882a593Smuzhiyun# elsewise fall through to underflow.
12090*4882a593Smuzhiyunfsqrt_sd_may_unfl:
12091*4882a593Smuzhiyun	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12092*4882a593Smuzhiyun	bne.w		fsqrt_sd_normal		# yes, so no underflow
12093*4882a593Smuzhiyun
12094*4882a593Smuzhiyun#
12095*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file
12096*4882a593Smuzhiyun#
12097*4882a593Smuzhiyunfsqrt_sd_unfl:
12098*4882a593Smuzhiyun	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099*4882a593Smuzhiyun
12100*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12101*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
12102*4882a593Smuzhiyun
12103*4882a593Smuzhiyun	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
12104*4882a593Smuzhiyun
12105*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
12106*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
12107*4882a593Smuzhiyun
12108*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12109*4882a593Smuzhiyun
12110*4882a593Smuzhiyun# if underflow or inexact is enabled, go calculate EXOP first.
12111*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
12112*4882a593Smuzhiyun	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12113*4882a593Smuzhiyun	bne.b		fsqrt_sd_unfl_ena	# yes
12114*4882a593Smuzhiyun
12115*4882a593Smuzhiyunfsqrt_sd_unfl_dis:
12116*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12117*4882a593Smuzhiyun
12118*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0	# pass: result addr
12119*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12120*4882a593Smuzhiyun	bsr.l		unf_res			# calculate default result
12121*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
12122*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12123*4882a593Smuzhiyun	rts
12124*4882a593Smuzhiyun
12125*4882a593Smuzhiyun#
12126*4882a593Smuzhiyun# operand will underflow AND underflow is enabled.
12127*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision.
12128*4882a593Smuzhiyun#
12129*4882a593Smuzhiyunfsqrt_sd_unfl_ena:
12130*4882a593Smuzhiyun	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131*4882a593Smuzhiyun	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12133*4882a593Smuzhiyun
12134*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
12135*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
12136*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
12137*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
12138*4882a593Smuzhiyun	sub.l		%d0,%d1			# subtract scale factor
12139*4882a593Smuzhiyun	addi.l		&0x6000,%d1		# add new bias
12140*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
12141*4882a593Smuzhiyun	or.w		%d2,%d1			# concat new sign,new exp
12142*4882a593Smuzhiyun	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
12143*4882a593Smuzhiyun	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12144*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
12145*4882a593Smuzhiyun	bra.b		fsqrt_sd_unfl_dis
12146*4882a593Smuzhiyun
12147*4882a593Smuzhiyun#
12148*4882a593Smuzhiyun# operand WILL overflow.
12149*4882a593Smuzhiyun#
12150*4882a593Smuzhiyunfsqrt_sd_ovfl:
12151*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
12152*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12153*4882a593Smuzhiyun
12154*4882a593Smuzhiyun	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
12155*4882a593Smuzhiyun
12156*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
12157*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save FPSR
12158*4882a593Smuzhiyun
12159*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12160*4882a593Smuzhiyun
12161*4882a593Smuzhiyunfsqrt_sd_ovfl_tst:
12162*4882a593Smuzhiyun	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163*4882a593Smuzhiyun
12164*4882a593Smuzhiyun	mov.b		FPCR_ENABLE(%a6),%d1
12165*4882a593Smuzhiyun	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12166*4882a593Smuzhiyun	bne.b		fsqrt_sd_ovfl_ena	# yes
12167*4882a593Smuzhiyun
12168*4882a593Smuzhiyun#
12169*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by
12170*4882a593Smuzhiyun# calling ovf_res().
12171*4882a593Smuzhiyun#
12172*4882a593Smuzhiyunfsqrt_sd_ovfl_dis:
12173*4882a593Smuzhiyun	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12174*4882a593Smuzhiyun	sne		%d1			# set sign param accordingly
12175*4882a593Smuzhiyun	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12176*4882a593Smuzhiyun	bsr.l		ovf_res			# calculate default result
12177*4882a593Smuzhiyun	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12178*4882a593Smuzhiyun	fmovm.x		(%a0),&0x80		# return default result in fp0
12179*4882a593Smuzhiyun	rts
12180*4882a593Smuzhiyun
12181*4882a593Smuzhiyun#
12182*4882a593Smuzhiyun# OVFL is enabled.
12183*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision.
12184*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR).
12185*4882a593Smuzhiyun#
12186*4882a593Smuzhiyunfsqrt_sd_ovfl_ena:
12187*4882a593Smuzhiyun	mov.l		%d2,-(%sp)		# save d2
12188*4882a593Smuzhiyun	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12189*4882a593Smuzhiyun	mov.l		%d1,%d2			# make a copy
12190*4882a593Smuzhiyun	andi.l		&0x7fff,%d1		# strip sign
12191*4882a593Smuzhiyun	andi.w		&0x8000,%d2		# keep old sign
12192*4882a593Smuzhiyun	sub.l		%d0,%d1			# add scale factor
12193*4882a593Smuzhiyun	subi.l		&0x6000,%d1		# subtract bias
12194*4882a593Smuzhiyun	andi.w		&0x7fff,%d1
12195*4882a593Smuzhiyun	or.w		%d2,%d1			# concat sign,exp
12196*4882a593Smuzhiyun	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12197*4882a593Smuzhiyun	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12198*4882a593Smuzhiyun	mov.l		(%sp)+,%d2		# restore d2
12199*4882a593Smuzhiyun	bra.b		fsqrt_sd_ovfl_dis
12200*4882a593Smuzhiyun
12201*4882a593Smuzhiyun#
12202*4882a593Smuzhiyun# the move in MAY underflow. so...
12203*4882a593Smuzhiyun#
12204*4882a593Smuzhiyunfsqrt_sd_may_ovfl:
12205*4882a593Smuzhiyun	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12206*4882a593Smuzhiyun	bne.w		fsqrt_sd_ovfl		# yes, so overflow
12207*4882a593Smuzhiyun
12208*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr		# clear FPSR
12209*4882a593Smuzhiyun	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12210*4882a593Smuzhiyun
12211*4882a593Smuzhiyun	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12212*4882a593Smuzhiyun
12213*4882a593Smuzhiyun	fmov.l		%fpsr,%d1		# save status
12214*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr		# clear FPCR
12215*4882a593Smuzhiyun
12216*4882a593Smuzhiyun	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12217*4882a593Smuzhiyun
12218*4882a593Smuzhiyun	fmov.x		%fp0,%fp1		# make a copy of result
12219*4882a593Smuzhiyun	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
12220*4882a593Smuzhiyun	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
12221*4882a593Smuzhiyun
12222*4882a593Smuzhiyun# no, it didn't overflow; we have correct result
12223*4882a593Smuzhiyun	bra.w		fsqrt_sd_normal_exit
12224*4882a593Smuzhiyun
12225*4882a593Smuzhiyun##########################################################################
12226*4882a593Smuzhiyun
12227*4882a593Smuzhiyun#
12228*4882a593Smuzhiyun# input is not normalized; what is it?
12229*4882a593Smuzhiyun#
12230*4882a593Smuzhiyunfsqrt_not_norm:
12231*4882a593Smuzhiyun	cmpi.b		%d1,&DENORM		# weed out DENORM
12232*4882a593Smuzhiyun	beq.w		fsqrt_denorm
12233*4882a593Smuzhiyun	cmpi.b		%d1,&ZERO		# weed out ZERO
12234*4882a593Smuzhiyun	beq.b		fsqrt_zero
12235*4882a593Smuzhiyun	cmpi.b		%d1,&INF		# weed out INF
12236*4882a593Smuzhiyun	beq.b		fsqrt_inf
12237*4882a593Smuzhiyun	cmpi.b		%d1,&SNAN		# weed out SNAN
12238*4882a593Smuzhiyun	beq.l		res_snan_1op
12239*4882a593Smuzhiyun	bra.l		res_qnan_1op
12240*4882a593Smuzhiyun
12241*4882a593Smuzhiyun#
12242*4882a593Smuzhiyun#	fsqrt(+0) = +0
12243*4882a593Smuzhiyun#	fsqrt(-0) = -0
12244*4882a593Smuzhiyun#	fsqrt(+INF) = +INF
12245*4882a593Smuzhiyun#	fsqrt(-INF) = OPERR
12246*4882a593Smuzhiyun#
12247*4882a593Smuzhiyunfsqrt_zero:
12248*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
12249*4882a593Smuzhiyun	bmi.b		fsqrt_zero_m		# negative
12250*4882a593Smuzhiyunfsqrt_zero_p:
12251*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# return +ZERO
12252*4882a593Smuzhiyun	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
12253*4882a593Smuzhiyun	rts
12254*4882a593Smuzhiyunfsqrt_zero_m:
12255*4882a593Smuzhiyun	fmov.s		&0x80000000,%fp0	# return -ZERO
12256*4882a593Smuzhiyun	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
12257*4882a593Smuzhiyun	rts
12258*4882a593Smuzhiyun
12259*4882a593Smuzhiyunfsqrt_inf:
12260*4882a593Smuzhiyun	tst.b		SRC_EX(%a0)		# is INF positive or negative?
12261*4882a593Smuzhiyun	bmi.l		res_operr		# negative
12262*4882a593Smuzhiyunfsqrt_inf_p:
12263*4882a593Smuzhiyun	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
12264*4882a593Smuzhiyun	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
12265*4882a593Smuzhiyun	rts
12266*4882a593Smuzhiyun
12267*4882a593Smuzhiyun#########################################################################
12268*4882a593Smuzhiyun# XDEF ****************************************************************	#
12269*4882a593Smuzhiyun#	fetch_dreg(): fetch register according to index in d1		#
12270*4882a593Smuzhiyun#									#
12271*4882a593Smuzhiyun# XREF ****************************************************************	#
12272*4882a593Smuzhiyun#	None								#
12273*4882a593Smuzhiyun#									#
12274*4882a593Smuzhiyun# INPUT ***************************************************************	#
12275*4882a593Smuzhiyun#	d1 = index of register to fetch from				#
12276*4882a593Smuzhiyun#									#
12277*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12278*4882a593Smuzhiyun#	d0 = value of register fetched					#
12279*4882a593Smuzhiyun#									#
12280*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12281*4882a593Smuzhiyun#	According to the index value in d1 which can range from zero	#
12282*4882a593Smuzhiyun# to fifteen, load the corresponding register file value (where		#
12283*4882a593Smuzhiyun# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
12284*4882a593Smuzhiyun# stack. The rest should still be in their original places.		#
12285*4882a593Smuzhiyun#									#
12286*4882a593Smuzhiyun#########################################################################
12287*4882a593Smuzhiyun
12288*4882a593Smuzhiyun# this routine leaves d1 intact for subsequent store_dreg calls.
12289*4882a593Smuzhiyun	global		fetch_dreg
12290*4882a593Smuzhiyunfetch_dreg:
12291*4882a593Smuzhiyun	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
12292*4882a593Smuzhiyun	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
12293*4882a593Smuzhiyun
12294*4882a593Smuzhiyuntbl_fdreg:
12295*4882a593Smuzhiyun	short		fdreg0 - tbl_fdreg
12296*4882a593Smuzhiyun	short		fdreg1 - tbl_fdreg
12297*4882a593Smuzhiyun	short		fdreg2 - tbl_fdreg
12298*4882a593Smuzhiyun	short		fdreg3 - tbl_fdreg
12299*4882a593Smuzhiyun	short		fdreg4 - tbl_fdreg
12300*4882a593Smuzhiyun	short		fdreg5 - tbl_fdreg
12301*4882a593Smuzhiyun	short		fdreg6 - tbl_fdreg
12302*4882a593Smuzhiyun	short		fdreg7 - tbl_fdreg
12303*4882a593Smuzhiyun	short		fdreg8 - tbl_fdreg
12304*4882a593Smuzhiyun	short		fdreg9 - tbl_fdreg
12305*4882a593Smuzhiyun	short		fdrega - tbl_fdreg
12306*4882a593Smuzhiyun	short		fdregb - tbl_fdreg
12307*4882a593Smuzhiyun	short		fdregc - tbl_fdreg
12308*4882a593Smuzhiyun	short		fdregd - tbl_fdreg
12309*4882a593Smuzhiyun	short		fdrege - tbl_fdreg
12310*4882a593Smuzhiyun	short		fdregf - tbl_fdreg
12311*4882a593Smuzhiyun
12312*4882a593Smuzhiyunfdreg0:
12313*4882a593Smuzhiyun	mov.l		EXC_DREGS+0x0(%a6),%d0
12314*4882a593Smuzhiyun	rts
12315*4882a593Smuzhiyunfdreg1:
12316*4882a593Smuzhiyun	mov.l		EXC_DREGS+0x4(%a6),%d0
12317*4882a593Smuzhiyun	rts
12318*4882a593Smuzhiyunfdreg2:
12319*4882a593Smuzhiyun	mov.l		%d2,%d0
12320*4882a593Smuzhiyun	rts
12321*4882a593Smuzhiyunfdreg3:
12322*4882a593Smuzhiyun	mov.l		%d3,%d0
12323*4882a593Smuzhiyun	rts
12324*4882a593Smuzhiyunfdreg4:
12325*4882a593Smuzhiyun	mov.l		%d4,%d0
12326*4882a593Smuzhiyun	rts
12327*4882a593Smuzhiyunfdreg5:
12328*4882a593Smuzhiyun	mov.l		%d5,%d0
12329*4882a593Smuzhiyun	rts
12330*4882a593Smuzhiyunfdreg6:
12331*4882a593Smuzhiyun	mov.l		%d6,%d0
12332*4882a593Smuzhiyun	rts
12333*4882a593Smuzhiyunfdreg7:
12334*4882a593Smuzhiyun	mov.l		%d7,%d0
12335*4882a593Smuzhiyun	rts
12336*4882a593Smuzhiyunfdreg8:
12337*4882a593Smuzhiyun	mov.l		EXC_DREGS+0x8(%a6),%d0
12338*4882a593Smuzhiyun	rts
12339*4882a593Smuzhiyunfdreg9:
12340*4882a593Smuzhiyun	mov.l		EXC_DREGS+0xc(%a6),%d0
12341*4882a593Smuzhiyun	rts
12342*4882a593Smuzhiyunfdrega:
12343*4882a593Smuzhiyun	mov.l		%a2,%d0
12344*4882a593Smuzhiyun	rts
12345*4882a593Smuzhiyunfdregb:
12346*4882a593Smuzhiyun	mov.l		%a3,%d0
12347*4882a593Smuzhiyun	rts
12348*4882a593Smuzhiyunfdregc:
12349*4882a593Smuzhiyun	mov.l		%a4,%d0
12350*4882a593Smuzhiyun	rts
12351*4882a593Smuzhiyunfdregd:
12352*4882a593Smuzhiyun	mov.l		%a5,%d0
12353*4882a593Smuzhiyun	rts
12354*4882a593Smuzhiyunfdrege:
12355*4882a593Smuzhiyun	mov.l		(%a6),%d0
12356*4882a593Smuzhiyun	rts
12357*4882a593Smuzhiyunfdregf:
12358*4882a593Smuzhiyun	mov.l		EXC_A7(%a6),%d0
12359*4882a593Smuzhiyun	rts
12360*4882a593Smuzhiyun
12361*4882a593Smuzhiyun#########################################################################
12362*4882a593Smuzhiyun# XDEF ****************************************************************	#
12363*4882a593Smuzhiyun#	store_dreg_l(): store longword to data register specified by d1	#
12364*4882a593Smuzhiyun#									#
12365*4882a593Smuzhiyun# XREF ****************************************************************	#
12366*4882a593Smuzhiyun#	None								#
12367*4882a593Smuzhiyun#									#
12368*4882a593Smuzhiyun# INPUT ***************************************************************	#
12369*4882a593Smuzhiyun#	d0 = longowrd value to store					#
12370*4882a593Smuzhiyun#	d1 = index of register to fetch from				#
12371*4882a593Smuzhiyun#									#
12372*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12373*4882a593Smuzhiyun#	(data register is updated)					#
12374*4882a593Smuzhiyun#									#
12375*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12376*4882a593Smuzhiyun#	According to the index value in d1, store the longword value	#
12377*4882a593Smuzhiyun# in d0 to the corresponding data register. D0/D1 are on the stack	#
12378*4882a593Smuzhiyun# while the rest are in their initial places.				#
12379*4882a593Smuzhiyun#									#
12380*4882a593Smuzhiyun#########################################################################
12381*4882a593Smuzhiyun
12382*4882a593Smuzhiyun	global		store_dreg_l
12383*4882a593Smuzhiyunstore_dreg_l:
12384*4882a593Smuzhiyun	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
12385*4882a593Smuzhiyun	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
12386*4882a593Smuzhiyun
12387*4882a593Smuzhiyuntbl_sdregl:
12388*4882a593Smuzhiyun	short		sdregl0 - tbl_sdregl
12389*4882a593Smuzhiyun	short		sdregl1 - tbl_sdregl
12390*4882a593Smuzhiyun	short		sdregl2 - tbl_sdregl
12391*4882a593Smuzhiyun	short		sdregl3 - tbl_sdregl
12392*4882a593Smuzhiyun	short		sdregl4 - tbl_sdregl
12393*4882a593Smuzhiyun	short		sdregl5 - tbl_sdregl
12394*4882a593Smuzhiyun	short		sdregl6 - tbl_sdregl
12395*4882a593Smuzhiyun	short		sdregl7 - tbl_sdregl
12396*4882a593Smuzhiyun
12397*4882a593Smuzhiyunsdregl0:
12398*4882a593Smuzhiyun	mov.l		%d0,EXC_DREGS+0x0(%a6)
12399*4882a593Smuzhiyun	rts
12400*4882a593Smuzhiyunsdregl1:
12401*4882a593Smuzhiyun	mov.l		%d0,EXC_DREGS+0x4(%a6)
12402*4882a593Smuzhiyun	rts
12403*4882a593Smuzhiyunsdregl2:
12404*4882a593Smuzhiyun	mov.l		%d0,%d2
12405*4882a593Smuzhiyun	rts
12406*4882a593Smuzhiyunsdregl3:
12407*4882a593Smuzhiyun	mov.l		%d0,%d3
12408*4882a593Smuzhiyun	rts
12409*4882a593Smuzhiyunsdregl4:
12410*4882a593Smuzhiyun	mov.l		%d0,%d4
12411*4882a593Smuzhiyun	rts
12412*4882a593Smuzhiyunsdregl5:
12413*4882a593Smuzhiyun	mov.l		%d0,%d5
12414*4882a593Smuzhiyun	rts
12415*4882a593Smuzhiyunsdregl6:
12416*4882a593Smuzhiyun	mov.l		%d0,%d6
12417*4882a593Smuzhiyun	rts
12418*4882a593Smuzhiyunsdregl7:
12419*4882a593Smuzhiyun	mov.l		%d0,%d7
12420*4882a593Smuzhiyun	rts
12421*4882a593Smuzhiyun
12422*4882a593Smuzhiyun#########################################################################
12423*4882a593Smuzhiyun# XDEF ****************************************************************	#
12424*4882a593Smuzhiyun#	store_dreg_w(): store word to data register specified by d1	#
12425*4882a593Smuzhiyun#									#
12426*4882a593Smuzhiyun# XREF ****************************************************************	#
12427*4882a593Smuzhiyun#	None								#
12428*4882a593Smuzhiyun#									#
12429*4882a593Smuzhiyun# INPUT ***************************************************************	#
12430*4882a593Smuzhiyun#	d0 = word value to store					#
12431*4882a593Smuzhiyun#	d1 = index of register to fetch from				#
12432*4882a593Smuzhiyun#									#
12433*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12434*4882a593Smuzhiyun#	(data register is updated)					#
12435*4882a593Smuzhiyun#									#
12436*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12437*4882a593Smuzhiyun#	According to the index value in d1, store the word value	#
12438*4882a593Smuzhiyun# in d0 to the corresponding data register. D0/D1 are on the stack	#
12439*4882a593Smuzhiyun# while the rest are in their initial places.				#
12440*4882a593Smuzhiyun#									#
12441*4882a593Smuzhiyun#########################################################################
12442*4882a593Smuzhiyun
12443*4882a593Smuzhiyun	global		store_dreg_w
12444*4882a593Smuzhiyunstore_dreg_w:
12445*4882a593Smuzhiyun	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
12446*4882a593Smuzhiyun	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
12447*4882a593Smuzhiyun
12448*4882a593Smuzhiyuntbl_sdregw:
12449*4882a593Smuzhiyun	short		sdregw0 - tbl_sdregw
12450*4882a593Smuzhiyun	short		sdregw1 - tbl_sdregw
12451*4882a593Smuzhiyun	short		sdregw2 - tbl_sdregw
12452*4882a593Smuzhiyun	short		sdregw3 - tbl_sdregw
12453*4882a593Smuzhiyun	short		sdregw4 - tbl_sdregw
12454*4882a593Smuzhiyun	short		sdregw5 - tbl_sdregw
12455*4882a593Smuzhiyun	short		sdregw6 - tbl_sdregw
12456*4882a593Smuzhiyun	short		sdregw7 - tbl_sdregw
12457*4882a593Smuzhiyun
12458*4882a593Smuzhiyunsdregw0:
12459*4882a593Smuzhiyun	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
12460*4882a593Smuzhiyun	rts
12461*4882a593Smuzhiyunsdregw1:
12462*4882a593Smuzhiyun	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
12463*4882a593Smuzhiyun	rts
12464*4882a593Smuzhiyunsdregw2:
12465*4882a593Smuzhiyun	mov.w		%d0,%d2
12466*4882a593Smuzhiyun	rts
12467*4882a593Smuzhiyunsdregw3:
12468*4882a593Smuzhiyun	mov.w		%d0,%d3
12469*4882a593Smuzhiyun	rts
12470*4882a593Smuzhiyunsdregw4:
12471*4882a593Smuzhiyun	mov.w		%d0,%d4
12472*4882a593Smuzhiyun	rts
12473*4882a593Smuzhiyunsdregw5:
12474*4882a593Smuzhiyun	mov.w		%d0,%d5
12475*4882a593Smuzhiyun	rts
12476*4882a593Smuzhiyunsdregw6:
12477*4882a593Smuzhiyun	mov.w		%d0,%d6
12478*4882a593Smuzhiyun	rts
12479*4882a593Smuzhiyunsdregw7:
12480*4882a593Smuzhiyun	mov.w		%d0,%d7
12481*4882a593Smuzhiyun	rts
12482*4882a593Smuzhiyun
12483*4882a593Smuzhiyun#########################################################################
12484*4882a593Smuzhiyun# XDEF ****************************************************************	#
12485*4882a593Smuzhiyun#	store_dreg_b(): store byte to data register specified by d1	#
12486*4882a593Smuzhiyun#									#
12487*4882a593Smuzhiyun# XREF ****************************************************************	#
12488*4882a593Smuzhiyun#	None								#
12489*4882a593Smuzhiyun#									#
12490*4882a593Smuzhiyun# INPUT ***************************************************************	#
12491*4882a593Smuzhiyun#	d0 = byte value to store					#
12492*4882a593Smuzhiyun#	d1 = index of register to fetch from				#
12493*4882a593Smuzhiyun#									#
12494*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12495*4882a593Smuzhiyun#	(data register is updated)					#
12496*4882a593Smuzhiyun#									#
12497*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12498*4882a593Smuzhiyun#	According to the index value in d1, store the byte value	#
12499*4882a593Smuzhiyun# in d0 to the corresponding data register. D0/D1 are on the stack	#
12500*4882a593Smuzhiyun# while the rest are in their initial places.				#
12501*4882a593Smuzhiyun#									#
12502*4882a593Smuzhiyun#########################################################################
12503*4882a593Smuzhiyun
12504*4882a593Smuzhiyun	global		store_dreg_b
12505*4882a593Smuzhiyunstore_dreg_b:
12506*4882a593Smuzhiyun	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
12507*4882a593Smuzhiyun	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
12508*4882a593Smuzhiyun
12509*4882a593Smuzhiyuntbl_sdregb:
12510*4882a593Smuzhiyun	short		sdregb0 - tbl_sdregb
12511*4882a593Smuzhiyun	short		sdregb1 - tbl_sdregb
12512*4882a593Smuzhiyun	short		sdregb2 - tbl_sdregb
12513*4882a593Smuzhiyun	short		sdregb3 - tbl_sdregb
12514*4882a593Smuzhiyun	short		sdregb4 - tbl_sdregb
12515*4882a593Smuzhiyun	short		sdregb5 - tbl_sdregb
12516*4882a593Smuzhiyun	short		sdregb6 - tbl_sdregb
12517*4882a593Smuzhiyun	short		sdregb7 - tbl_sdregb
12518*4882a593Smuzhiyun
12519*4882a593Smuzhiyunsdregb0:
12520*4882a593Smuzhiyun	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
12521*4882a593Smuzhiyun	rts
12522*4882a593Smuzhiyunsdregb1:
12523*4882a593Smuzhiyun	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
12524*4882a593Smuzhiyun	rts
12525*4882a593Smuzhiyunsdregb2:
12526*4882a593Smuzhiyun	mov.b		%d0,%d2
12527*4882a593Smuzhiyun	rts
12528*4882a593Smuzhiyunsdregb3:
12529*4882a593Smuzhiyun	mov.b		%d0,%d3
12530*4882a593Smuzhiyun	rts
12531*4882a593Smuzhiyunsdregb4:
12532*4882a593Smuzhiyun	mov.b		%d0,%d4
12533*4882a593Smuzhiyun	rts
12534*4882a593Smuzhiyunsdregb5:
12535*4882a593Smuzhiyun	mov.b		%d0,%d5
12536*4882a593Smuzhiyun	rts
12537*4882a593Smuzhiyunsdregb6:
12538*4882a593Smuzhiyun	mov.b		%d0,%d6
12539*4882a593Smuzhiyun	rts
12540*4882a593Smuzhiyunsdregb7:
12541*4882a593Smuzhiyun	mov.b		%d0,%d7
12542*4882a593Smuzhiyun	rts
12543*4882a593Smuzhiyun
12544*4882a593Smuzhiyun#########################################################################
12545*4882a593Smuzhiyun# XDEF ****************************************************************	#
12546*4882a593Smuzhiyun#	inc_areg(): increment an address register by the value in d0	#
12547*4882a593Smuzhiyun#									#
12548*4882a593Smuzhiyun# XREF ****************************************************************	#
12549*4882a593Smuzhiyun#	None								#
12550*4882a593Smuzhiyun#									#
12551*4882a593Smuzhiyun# INPUT ***************************************************************	#
12552*4882a593Smuzhiyun#	d0 = amount to increment by					#
12553*4882a593Smuzhiyun#	d1 = index of address register to increment			#
12554*4882a593Smuzhiyun#									#
12555*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12556*4882a593Smuzhiyun#	(address register is updated)					#
12557*4882a593Smuzhiyun#									#
12558*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12559*4882a593Smuzhiyun#	Typically used for an instruction w/ a post-increment <ea>,	#
12560*4882a593Smuzhiyun# this routine adds the increment value in d0 to the address register	#
12561*4882a593Smuzhiyun# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12562*4882a593Smuzhiyun# in their original places.						#
12563*4882a593Smuzhiyun#	For a7, if the increment amount is one, then we have to		#
12564*4882a593Smuzhiyun# increment by two. For any a7 update, set the mia7_flag so that if	#
12565*4882a593Smuzhiyun# an access error exception occurs later in emulation, this address	#
12566*4882a593Smuzhiyun# register update can be undone.					#
12567*4882a593Smuzhiyun#									#
12568*4882a593Smuzhiyun#########################################################################
12569*4882a593Smuzhiyun
12570*4882a593Smuzhiyun	global		inc_areg
12571*4882a593Smuzhiyuninc_areg:
12572*4882a593Smuzhiyun	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
12573*4882a593Smuzhiyun	jmp		(tbl_iareg.b,%pc,%d1.w*1)
12574*4882a593Smuzhiyun
12575*4882a593Smuzhiyuntbl_iareg:
12576*4882a593Smuzhiyun	short		iareg0 - tbl_iareg
12577*4882a593Smuzhiyun	short		iareg1 - tbl_iareg
12578*4882a593Smuzhiyun	short		iareg2 - tbl_iareg
12579*4882a593Smuzhiyun	short		iareg3 - tbl_iareg
12580*4882a593Smuzhiyun	short		iareg4 - tbl_iareg
12581*4882a593Smuzhiyun	short		iareg5 - tbl_iareg
12582*4882a593Smuzhiyun	short		iareg6 - tbl_iareg
12583*4882a593Smuzhiyun	short		iareg7 - tbl_iareg
12584*4882a593Smuzhiyun
12585*4882a593Smuzhiyuniareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
12586*4882a593Smuzhiyun	rts
12587*4882a593Smuzhiyuniareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
12588*4882a593Smuzhiyun	rts
12589*4882a593Smuzhiyuniareg2:	add.l		%d0,%a2
12590*4882a593Smuzhiyun	rts
12591*4882a593Smuzhiyuniareg3:	add.l		%d0,%a3
12592*4882a593Smuzhiyun	rts
12593*4882a593Smuzhiyuniareg4:	add.l		%d0,%a4
12594*4882a593Smuzhiyun	rts
12595*4882a593Smuzhiyuniareg5:	add.l		%d0,%a5
12596*4882a593Smuzhiyun	rts
12597*4882a593Smuzhiyuniareg6:	add.l		%d0,(%a6)
12598*4882a593Smuzhiyun	rts
12599*4882a593Smuzhiyuniareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
12600*4882a593Smuzhiyun	cmpi.b		%d0,&0x1
12601*4882a593Smuzhiyun	beq.b		iareg7b
12602*4882a593Smuzhiyun	add.l		%d0,EXC_A7(%a6)
12603*4882a593Smuzhiyun	rts
12604*4882a593Smuzhiyuniareg7b:
12605*4882a593Smuzhiyun	addq.l		&0x2,EXC_A7(%a6)
12606*4882a593Smuzhiyun	rts
12607*4882a593Smuzhiyun
12608*4882a593Smuzhiyun#########################################################################
12609*4882a593Smuzhiyun# XDEF ****************************************************************	#
12610*4882a593Smuzhiyun#	dec_areg(): decrement an address register by the value in d0	#
12611*4882a593Smuzhiyun#									#
12612*4882a593Smuzhiyun# XREF ****************************************************************	#
12613*4882a593Smuzhiyun#	None								#
12614*4882a593Smuzhiyun#									#
12615*4882a593Smuzhiyun# INPUT ***************************************************************	#
12616*4882a593Smuzhiyun#	d0 = amount to decrement by					#
12617*4882a593Smuzhiyun#	d1 = index of address register to decrement			#
12618*4882a593Smuzhiyun#									#
12619*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12620*4882a593Smuzhiyun#	(address register is updated)					#
12621*4882a593Smuzhiyun#									#
12622*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12623*4882a593Smuzhiyun#	Typically used for an instruction w/ a pre-decrement <ea>,	#
12624*4882a593Smuzhiyun# this routine adds the decrement value in d0 to the address register	#
12625*4882a593Smuzhiyun# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12626*4882a593Smuzhiyun# in their original places.						#
12627*4882a593Smuzhiyun#	For a7, if the decrement amount is one, then we have to		#
12628*4882a593Smuzhiyun# decrement by two. For any a7 update, set the mda7_flag so that if	#
12629*4882a593Smuzhiyun# an access error exception occurs later in emulation, this address	#
12630*4882a593Smuzhiyun# register update can be undone.					#
12631*4882a593Smuzhiyun#									#
12632*4882a593Smuzhiyun#########################################################################
12633*4882a593Smuzhiyun
12634*4882a593Smuzhiyun	global		dec_areg
12635*4882a593Smuzhiyundec_areg:
12636*4882a593Smuzhiyun	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
12637*4882a593Smuzhiyun	jmp		(tbl_dareg.b,%pc,%d1.w*1)
12638*4882a593Smuzhiyun
12639*4882a593Smuzhiyuntbl_dareg:
12640*4882a593Smuzhiyun	short		dareg0 - tbl_dareg
12641*4882a593Smuzhiyun	short		dareg1 - tbl_dareg
12642*4882a593Smuzhiyun	short		dareg2 - tbl_dareg
12643*4882a593Smuzhiyun	short		dareg3 - tbl_dareg
12644*4882a593Smuzhiyun	short		dareg4 - tbl_dareg
12645*4882a593Smuzhiyun	short		dareg5 - tbl_dareg
12646*4882a593Smuzhiyun	short		dareg6 - tbl_dareg
12647*4882a593Smuzhiyun	short		dareg7 - tbl_dareg
12648*4882a593Smuzhiyun
12649*4882a593Smuzhiyundareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
12650*4882a593Smuzhiyun	rts
12651*4882a593Smuzhiyundareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
12652*4882a593Smuzhiyun	rts
12653*4882a593Smuzhiyundareg2:	sub.l		%d0,%a2
12654*4882a593Smuzhiyun	rts
12655*4882a593Smuzhiyundareg3:	sub.l		%d0,%a3
12656*4882a593Smuzhiyun	rts
12657*4882a593Smuzhiyundareg4:	sub.l		%d0,%a4
12658*4882a593Smuzhiyun	rts
12659*4882a593Smuzhiyundareg5:	sub.l		%d0,%a5
12660*4882a593Smuzhiyun	rts
12661*4882a593Smuzhiyundareg6:	sub.l		%d0,(%a6)
12662*4882a593Smuzhiyun	rts
12663*4882a593Smuzhiyundareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
12664*4882a593Smuzhiyun	cmpi.b		%d0,&0x1
12665*4882a593Smuzhiyun	beq.b		dareg7b
12666*4882a593Smuzhiyun	sub.l		%d0,EXC_A7(%a6)
12667*4882a593Smuzhiyun	rts
12668*4882a593Smuzhiyundareg7b:
12669*4882a593Smuzhiyun	subq.l		&0x2,EXC_A7(%a6)
12670*4882a593Smuzhiyun	rts
12671*4882a593Smuzhiyun
12672*4882a593Smuzhiyun##############################################################################
12673*4882a593Smuzhiyun
12674*4882a593Smuzhiyun#########################################################################
12675*4882a593Smuzhiyun# XDEF ****************************************************************	#
12676*4882a593Smuzhiyun#	load_fpn1(): load FP register value into FP_SRC(a6).		#
12677*4882a593Smuzhiyun#									#
12678*4882a593Smuzhiyun# XREF ****************************************************************	#
12679*4882a593Smuzhiyun#	None								#
12680*4882a593Smuzhiyun#									#
12681*4882a593Smuzhiyun# INPUT ***************************************************************	#
12682*4882a593Smuzhiyun#	d0 = index of FP register to load				#
12683*4882a593Smuzhiyun#									#
12684*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12685*4882a593Smuzhiyun#	FP_SRC(a6) = value loaded from FP register file			#
12686*4882a593Smuzhiyun#									#
12687*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12688*4882a593Smuzhiyun#	Using the index in d0, load FP_SRC(a6) with a number from the	#
12689*4882a593Smuzhiyun# FP register file.							#
12690*4882a593Smuzhiyun#									#
12691*4882a593Smuzhiyun#########################################################################
12692*4882a593Smuzhiyun
12693*4882a593Smuzhiyun	global		load_fpn1
12694*4882a593Smuzhiyunload_fpn1:
12695*4882a593Smuzhiyun	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696*4882a593Smuzhiyun	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
12697*4882a593Smuzhiyun
12698*4882a593Smuzhiyuntbl_load_fpn1:
12699*4882a593Smuzhiyun	short		load_fpn1_0 - tbl_load_fpn1
12700*4882a593Smuzhiyun	short		load_fpn1_1 - tbl_load_fpn1
12701*4882a593Smuzhiyun	short		load_fpn1_2 - tbl_load_fpn1
12702*4882a593Smuzhiyun	short		load_fpn1_3 - tbl_load_fpn1
12703*4882a593Smuzhiyun	short		load_fpn1_4 - tbl_load_fpn1
12704*4882a593Smuzhiyun	short		load_fpn1_5 - tbl_load_fpn1
12705*4882a593Smuzhiyun	short		load_fpn1_6 - tbl_load_fpn1
12706*4882a593Smuzhiyun	short		load_fpn1_7 - tbl_load_fpn1
12707*4882a593Smuzhiyun
12708*4882a593Smuzhiyunload_fpn1_0:
12709*4882a593Smuzhiyun	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710*4882a593Smuzhiyun	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711*4882a593Smuzhiyun	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12713*4882a593Smuzhiyun	rts
12714*4882a593Smuzhiyunload_fpn1_1:
12715*4882a593Smuzhiyun	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716*4882a593Smuzhiyun	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717*4882a593Smuzhiyun	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12719*4882a593Smuzhiyun	rts
12720*4882a593Smuzhiyunload_fpn1_2:
12721*4882a593Smuzhiyun	fmovm.x		&0x20, FP_SRC(%a6)
12722*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12723*4882a593Smuzhiyun	rts
12724*4882a593Smuzhiyunload_fpn1_3:
12725*4882a593Smuzhiyun	fmovm.x		&0x10, FP_SRC(%a6)
12726*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12727*4882a593Smuzhiyun	rts
12728*4882a593Smuzhiyunload_fpn1_4:
12729*4882a593Smuzhiyun	fmovm.x		&0x08, FP_SRC(%a6)
12730*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12731*4882a593Smuzhiyun	rts
12732*4882a593Smuzhiyunload_fpn1_5:
12733*4882a593Smuzhiyun	fmovm.x		&0x04, FP_SRC(%a6)
12734*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12735*4882a593Smuzhiyun	rts
12736*4882a593Smuzhiyunload_fpn1_6:
12737*4882a593Smuzhiyun	fmovm.x		&0x02, FP_SRC(%a6)
12738*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12739*4882a593Smuzhiyun	rts
12740*4882a593Smuzhiyunload_fpn1_7:
12741*4882a593Smuzhiyun	fmovm.x		&0x01, FP_SRC(%a6)
12742*4882a593Smuzhiyun	lea		FP_SRC(%a6), %a0
12743*4882a593Smuzhiyun	rts
12744*4882a593Smuzhiyun
12745*4882a593Smuzhiyun#############################################################################
12746*4882a593Smuzhiyun
12747*4882a593Smuzhiyun#########################################################################
12748*4882a593Smuzhiyun# XDEF ****************************************************************	#
12749*4882a593Smuzhiyun#	load_fpn2(): load FP register value into FP_DST(a6).		#
12750*4882a593Smuzhiyun#									#
12751*4882a593Smuzhiyun# XREF ****************************************************************	#
12752*4882a593Smuzhiyun#	None								#
12753*4882a593Smuzhiyun#									#
12754*4882a593Smuzhiyun# INPUT ***************************************************************	#
12755*4882a593Smuzhiyun#	d0 = index of FP register to load				#
12756*4882a593Smuzhiyun#									#
12757*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12758*4882a593Smuzhiyun#	FP_DST(a6) = value loaded from FP register file			#
12759*4882a593Smuzhiyun#									#
12760*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12761*4882a593Smuzhiyun#	Using the index in d0, load FP_DST(a6) with a number from the	#
12762*4882a593Smuzhiyun# FP register file.							#
12763*4882a593Smuzhiyun#									#
12764*4882a593Smuzhiyun#########################################################################
12765*4882a593Smuzhiyun
12766*4882a593Smuzhiyun	global		load_fpn2
12767*4882a593Smuzhiyunload_fpn2:
12768*4882a593Smuzhiyun	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769*4882a593Smuzhiyun	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
12770*4882a593Smuzhiyun
12771*4882a593Smuzhiyuntbl_load_fpn2:
12772*4882a593Smuzhiyun	short		load_fpn2_0 - tbl_load_fpn2
12773*4882a593Smuzhiyun	short		load_fpn2_1 - tbl_load_fpn2
12774*4882a593Smuzhiyun	short		load_fpn2_2 - tbl_load_fpn2
12775*4882a593Smuzhiyun	short		load_fpn2_3 - tbl_load_fpn2
12776*4882a593Smuzhiyun	short		load_fpn2_4 - tbl_load_fpn2
12777*4882a593Smuzhiyun	short		load_fpn2_5 - tbl_load_fpn2
12778*4882a593Smuzhiyun	short		load_fpn2_6 - tbl_load_fpn2
12779*4882a593Smuzhiyun	short		load_fpn2_7 - tbl_load_fpn2
12780*4882a593Smuzhiyun
12781*4882a593Smuzhiyunload_fpn2_0:
12782*4882a593Smuzhiyun	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783*4882a593Smuzhiyun	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784*4882a593Smuzhiyun	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12786*4882a593Smuzhiyun	rts
12787*4882a593Smuzhiyunload_fpn2_1:
12788*4882a593Smuzhiyun	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789*4882a593Smuzhiyun	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790*4882a593Smuzhiyun	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12792*4882a593Smuzhiyun	rts
12793*4882a593Smuzhiyunload_fpn2_2:
12794*4882a593Smuzhiyun	fmovm.x		&0x20, FP_DST(%a6)
12795*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12796*4882a593Smuzhiyun	rts
12797*4882a593Smuzhiyunload_fpn2_3:
12798*4882a593Smuzhiyun	fmovm.x		&0x10, FP_DST(%a6)
12799*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12800*4882a593Smuzhiyun	rts
12801*4882a593Smuzhiyunload_fpn2_4:
12802*4882a593Smuzhiyun	fmovm.x		&0x08, FP_DST(%a6)
12803*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12804*4882a593Smuzhiyun	rts
12805*4882a593Smuzhiyunload_fpn2_5:
12806*4882a593Smuzhiyun	fmovm.x		&0x04, FP_DST(%a6)
12807*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12808*4882a593Smuzhiyun	rts
12809*4882a593Smuzhiyunload_fpn2_6:
12810*4882a593Smuzhiyun	fmovm.x		&0x02, FP_DST(%a6)
12811*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12812*4882a593Smuzhiyun	rts
12813*4882a593Smuzhiyunload_fpn2_7:
12814*4882a593Smuzhiyun	fmovm.x		&0x01, FP_DST(%a6)
12815*4882a593Smuzhiyun	lea		FP_DST(%a6), %a0
12816*4882a593Smuzhiyun	rts
12817*4882a593Smuzhiyun
12818*4882a593Smuzhiyun#############################################################################
12819*4882a593Smuzhiyun
12820*4882a593Smuzhiyun#########################################################################
12821*4882a593Smuzhiyun# XDEF ****************************************************************	#
12822*4882a593Smuzhiyun#	store_fpreg(): store an fp value to the fpreg designated d0.	#
12823*4882a593Smuzhiyun#									#
12824*4882a593Smuzhiyun# XREF ****************************************************************	#
12825*4882a593Smuzhiyun#	None								#
12826*4882a593Smuzhiyun#									#
12827*4882a593Smuzhiyun# INPUT ***************************************************************	#
12828*4882a593Smuzhiyun#	fp0 = extended precision value to store				#
12829*4882a593Smuzhiyun#	d0  = index of floating-point register				#
12830*4882a593Smuzhiyun#									#
12831*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12832*4882a593Smuzhiyun#	None								#
12833*4882a593Smuzhiyun#									#
12834*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12835*4882a593Smuzhiyun#	Store the value in fp0 to the FP register designated by the	#
12836*4882a593Smuzhiyun# value in d0. The FP number can be DENORM or SNAN so we have to be	#
12837*4882a593Smuzhiyun# careful that we don't take an exception here.				#
12838*4882a593Smuzhiyun#									#
12839*4882a593Smuzhiyun#########################################################################
12840*4882a593Smuzhiyun
12841*4882a593Smuzhiyun	global		store_fpreg
12842*4882a593Smuzhiyunstore_fpreg:
12843*4882a593Smuzhiyun	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844*4882a593Smuzhiyun	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
12845*4882a593Smuzhiyun
12846*4882a593Smuzhiyuntbl_store_fpreg:
12847*4882a593Smuzhiyun	short		store_fpreg_0 - tbl_store_fpreg
12848*4882a593Smuzhiyun	short		store_fpreg_1 - tbl_store_fpreg
12849*4882a593Smuzhiyun	short		store_fpreg_2 - tbl_store_fpreg
12850*4882a593Smuzhiyun	short		store_fpreg_3 - tbl_store_fpreg
12851*4882a593Smuzhiyun	short		store_fpreg_4 - tbl_store_fpreg
12852*4882a593Smuzhiyun	short		store_fpreg_5 - tbl_store_fpreg
12853*4882a593Smuzhiyun	short		store_fpreg_6 - tbl_store_fpreg
12854*4882a593Smuzhiyun	short		store_fpreg_7 - tbl_store_fpreg
12855*4882a593Smuzhiyun
12856*4882a593Smuzhiyunstore_fpreg_0:
12857*4882a593Smuzhiyun	fmovm.x		&0x80, EXC_FP0(%a6)
12858*4882a593Smuzhiyun	rts
12859*4882a593Smuzhiyunstore_fpreg_1:
12860*4882a593Smuzhiyun	fmovm.x		&0x80, EXC_FP1(%a6)
12861*4882a593Smuzhiyun	rts
12862*4882a593Smuzhiyunstore_fpreg_2:
12863*4882a593Smuzhiyun	fmovm.x		&0x01, -(%sp)
12864*4882a593Smuzhiyun	fmovm.x		(%sp)+, &0x20
12865*4882a593Smuzhiyun	rts
12866*4882a593Smuzhiyunstore_fpreg_3:
12867*4882a593Smuzhiyun	fmovm.x		&0x01, -(%sp)
12868*4882a593Smuzhiyun	fmovm.x		(%sp)+, &0x10
12869*4882a593Smuzhiyun	rts
12870*4882a593Smuzhiyunstore_fpreg_4:
12871*4882a593Smuzhiyun	fmovm.x		&0x01, -(%sp)
12872*4882a593Smuzhiyun	fmovm.x		(%sp)+, &0x08
12873*4882a593Smuzhiyun	rts
12874*4882a593Smuzhiyunstore_fpreg_5:
12875*4882a593Smuzhiyun	fmovm.x		&0x01, -(%sp)
12876*4882a593Smuzhiyun	fmovm.x		(%sp)+, &0x04
12877*4882a593Smuzhiyun	rts
12878*4882a593Smuzhiyunstore_fpreg_6:
12879*4882a593Smuzhiyun	fmovm.x		&0x01, -(%sp)
12880*4882a593Smuzhiyun	fmovm.x		(%sp)+, &0x02
12881*4882a593Smuzhiyun	rts
12882*4882a593Smuzhiyunstore_fpreg_7:
12883*4882a593Smuzhiyun	fmovm.x		&0x01, -(%sp)
12884*4882a593Smuzhiyun	fmovm.x		(%sp)+, &0x01
12885*4882a593Smuzhiyun	rts
12886*4882a593Smuzhiyun
12887*4882a593Smuzhiyun#########################################################################
12888*4882a593Smuzhiyun# XDEF ****************************************************************	#
12889*4882a593Smuzhiyun#	get_packed(): fetch a packed operand from memory and then	#
12890*4882a593Smuzhiyun#		      convert it to a floating-point binary number.	#
12891*4882a593Smuzhiyun#									#
12892*4882a593Smuzhiyun# XREF ****************************************************************	#
12893*4882a593Smuzhiyun#	_dcalc_ea() - calculate the correct <ea>			#
12894*4882a593Smuzhiyun#	_mem_read() - fetch the packed operand from memory		#
12895*4882a593Smuzhiyun#	facc_in_x() - the fetch failed so jump to special exit code	#
12896*4882a593Smuzhiyun#	decbin()    - convert packed to binary extended precision	#
12897*4882a593Smuzhiyun#									#
12898*4882a593Smuzhiyun# INPUT ***************************************************************	#
12899*4882a593Smuzhiyun#	None								#
12900*4882a593Smuzhiyun#									#
12901*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12902*4882a593Smuzhiyun#	If no failure on _mem_read():					#
12903*4882a593Smuzhiyun#	FP_SRC(a6) = packed operand now as a binary FP number		#
12904*4882a593Smuzhiyun#									#
12905*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12906*4882a593Smuzhiyun#	Get the correct <ea> which is the value on the exception stack	#
12907*4882a593Smuzhiyun# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
12908*4882a593Smuzhiyun# Then, fetch the operand from memory. If the fetch fails, exit		#
12909*4882a593Smuzhiyun# through facc_in_x().							#
12910*4882a593Smuzhiyun#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
12911*4882a593Smuzhiyun# its binary representation here. Else, call decbin() which will	#
12912*4882a593Smuzhiyun# convert the packed value to an extended precision binary value.	#
12913*4882a593Smuzhiyun#									#
12914*4882a593Smuzhiyun#########################################################################
12915*4882a593Smuzhiyun
12916*4882a593Smuzhiyun# the stacked <ea> for packed is correct except for -(An).
12917*4882a593Smuzhiyun# the base reg must be updated for both -(An) and (An)+.
12918*4882a593Smuzhiyun	global		get_packed
12919*4882a593Smuzhiyunget_packed:
12920*4882a593Smuzhiyun	mov.l		&0xc,%d0		# packed is 12 bytes
12921*4882a593Smuzhiyun	bsr.l		_dcalc_ea		# fetch <ea>; correct An
12922*4882a593Smuzhiyun
12923*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
12924*4882a593Smuzhiyun	mov.l		&0xc,%d0		# pass: 12 bytes
12925*4882a593Smuzhiyun	bsr.l		_dmem_read		# read packed operand
12926*4882a593Smuzhiyun
12927*4882a593Smuzhiyun	tst.l		%d1			# did dfetch fail?
12928*4882a593Smuzhiyun	bne.l		facc_in_x		# yes
12929*4882a593Smuzhiyun
12930*4882a593Smuzhiyun# The packed operand is an INF or a NAN if the exponent field is all ones.
12931*4882a593Smuzhiyun	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
12932*4882a593Smuzhiyun	cmpi.w		%d0,&0x7fff		# INF or NAN?
12933*4882a593Smuzhiyun	bne.b		gp_try_zero		# no
12934*4882a593Smuzhiyun	rts					# operand is an INF or NAN
12935*4882a593Smuzhiyun
12936*4882a593Smuzhiyun# The packed operand is a zero if the mantissa is all zero, else it's
12937*4882a593Smuzhiyun# a normal packed op.
12938*4882a593Smuzhiyungp_try_zero:
12939*4882a593Smuzhiyun	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
12940*4882a593Smuzhiyun	andi.b		&0x0f,%d0		# clear all but last nybble
12941*4882a593Smuzhiyun	bne.b		gp_not_spec		# not a zero
12942*4882a593Smuzhiyun	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
12943*4882a593Smuzhiyun	bne.b		gp_not_spec		# not a zero
12944*4882a593Smuzhiyun	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
12945*4882a593Smuzhiyun	bne.b		gp_not_spec		# not a zero
12946*4882a593Smuzhiyun	rts					# operand is a ZERO
12947*4882a593Smuzhiyungp_not_spec:
12948*4882a593Smuzhiyun	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
12949*4882a593Smuzhiyun	bsr.l		decbin			# convert to extended
12950*4882a593Smuzhiyun	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
12951*4882a593Smuzhiyun	rts
12952*4882a593Smuzhiyun
12953*4882a593Smuzhiyun#########################################################################
12954*4882a593Smuzhiyun# decbin(): Converts normalized packed bcd value pointed to by register	#
12955*4882a593Smuzhiyun#	    a0 to extended-precision value in fp0.			#
12956*4882a593Smuzhiyun#									#
12957*4882a593Smuzhiyun# INPUT ***************************************************************	#
12958*4882a593Smuzhiyun#	a0 = pointer to normalized packed bcd value			#
12959*4882a593Smuzhiyun#									#
12960*4882a593Smuzhiyun# OUTPUT **************************************************************	#
12961*4882a593Smuzhiyun#	fp0 = exact fp representation of the packed bcd value.		#
12962*4882a593Smuzhiyun#									#
12963*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
12964*4882a593Smuzhiyun#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
12965*4882a593Smuzhiyun#	and NaN operands are dispatched without entering this routine)	#
12966*4882a593Smuzhiyun#	value in 68881/882 format at location (a0).			#
12967*4882a593Smuzhiyun#									#
12968*4882a593Smuzhiyun#	A1. Convert the bcd exponent to binary by successive adds and	#
12969*4882a593Smuzhiyun#	muls. Set the sign according to SE. Subtract 16 to compensate	#
12970*4882a593Smuzhiyun#	for the mantissa which is to be interpreted as 17 integer	#
12971*4882a593Smuzhiyun#	digits, rather than 1 integer and 16 fraction digits.		#
12972*4882a593Smuzhiyun#	Note: this operation can never overflow.			#
12973*4882a593Smuzhiyun#									#
12974*4882a593Smuzhiyun#	A2. Convert the bcd mantissa to binary by successive		#
12975*4882a593Smuzhiyun#	adds and muls in FP0. Set the sign according to SM.		#
12976*4882a593Smuzhiyun#	The mantissa digits will be converted with the decimal point	#
12977*4882a593Smuzhiyun#	assumed following the least-significant digit.			#
12978*4882a593Smuzhiyun#	Note: this operation can never overflow.			#
12979*4882a593Smuzhiyun#									#
12980*4882a593Smuzhiyun#	A3. Count the number of leading/trailing zeros in the		#
12981*4882a593Smuzhiyun#	bcd string.  If SE is positive, count the leading zeros;	#
12982*4882a593Smuzhiyun#	if negative, count the trailing zeros.  Set the adjusted	#
12983*4882a593Smuzhiyun#	exponent equal to the exponent from A1 and the zero count	#
12984*4882a593Smuzhiyun#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
12985*4882a593Smuzhiyun#	mantissa the equivalent of forcing in the bcd value:		#
12986*4882a593Smuzhiyun#									#
12987*4882a593Smuzhiyun#	SM = 0	a non-zero digit in the integer position		#
12988*4882a593Smuzhiyun#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
12989*4882a593Smuzhiyun#									#
12990*4882a593Smuzhiyun#	this will insure that any value, regardless of its		#
12991*4882a593Smuzhiyun#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
12992*4882a593Smuzhiyun#	consistently.							#
12993*4882a593Smuzhiyun#									#
12994*4882a593Smuzhiyun#	A4. Calculate the factor 10^exp in FP1 using a table of		#
12995*4882a593Smuzhiyun#	10^(2^n) values.  To reduce the error in forming factors	#
12996*4882a593Smuzhiyun#	greater than 10^27, a directed rounding scheme is used with	#
12997*4882a593Smuzhiyun#	tables rounded to RN, RM, and RP, according to the table	#
12998*4882a593Smuzhiyun#	in the comments of the pwrten section.				#
12999*4882a593Smuzhiyun#									#
13000*4882a593Smuzhiyun#	A5. Form the final binary number by scaling the mantissa by	#
13001*4882a593Smuzhiyun#	the exponent factor.  This is done by multiplying the		#
13002*4882a593Smuzhiyun#	mantissa in FP0 by the factor in FP1 if the adjusted		#
13003*4882a593Smuzhiyun#	exponent sign is positive, and dividing FP0 by FP1 if		#
13004*4882a593Smuzhiyun#	it is negative.							#
13005*4882a593Smuzhiyun#									#
13006*4882a593Smuzhiyun#	Clean up and return. Check if the final mul or div was inexact.	#
13007*4882a593Smuzhiyun#	If so, set INEX1 in USER_FPSR.					#
13008*4882a593Smuzhiyun#									#
13009*4882a593Smuzhiyun#########################################################################
13010*4882a593Smuzhiyun
13011*4882a593Smuzhiyun#
13012*4882a593Smuzhiyun#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013*4882a593Smuzhiyun#	to nearest, minus, and plus, respectively.  The tables include
13014*4882a593Smuzhiyun#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13015*4882a593Smuzhiyun#	is required until the power is greater than 27, however, all
13016*4882a593Smuzhiyun#	tables include the first 5 for ease of indexing.
13017*4882a593Smuzhiyun#
13018*4882a593SmuzhiyunRTABLE:
13019*4882a593Smuzhiyun	byte		0,0,0,0
13020*4882a593Smuzhiyun	byte		2,3,2,3
13021*4882a593Smuzhiyun	byte		2,3,3,2
13022*4882a593Smuzhiyun	byte		3,2,2,3
13023*4882a593Smuzhiyun
13024*4882a593Smuzhiyun	set		FNIBS,7
13025*4882a593Smuzhiyun	set		FSTRT,0
13026*4882a593Smuzhiyun
13027*4882a593Smuzhiyun	set		ESTRT,4
13028*4882a593Smuzhiyun	set		EDIGITS,2
13029*4882a593Smuzhiyun
13030*4882a593Smuzhiyun	global		decbin
13031*4882a593Smuzhiyundecbin:
13032*4882a593Smuzhiyun	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033*4882a593Smuzhiyun	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034*4882a593Smuzhiyun	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
13035*4882a593Smuzhiyun
13036*4882a593Smuzhiyun	lea		FP_SCR0(%a6),%a0
13037*4882a593Smuzhiyun
13038*4882a593Smuzhiyun	movm.l		&0x3c00,-(%sp)		# save d2-d5
13039*4882a593Smuzhiyun	fmovm.x		&0x1,-(%sp)		# save fp1
13040*4882a593Smuzhiyun#
13041*4882a593Smuzhiyun# Calculate exponent:
13042*4882a593Smuzhiyun#  1. Copy bcd value in memory for use as a working copy.
13043*4882a593Smuzhiyun#  2. Calculate absolute value of exponent in d1 by mul and add.
13044*4882a593Smuzhiyun#  3. Correct for exponent sign.
13045*4882a593Smuzhiyun#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046*4882a593Smuzhiyun#     (i.e., all digits assumed left of the decimal point.)
13047*4882a593Smuzhiyun#
13048*4882a593Smuzhiyun# Register usage:
13049*4882a593Smuzhiyun#
13050*4882a593Smuzhiyun#  calc_e:
13051*4882a593Smuzhiyun#	(*)  d0: temp digit storage
13052*4882a593Smuzhiyun#	(*)  d1: accumulator for binary exponent
13053*4882a593Smuzhiyun#	(*)  d2: digit count
13054*4882a593Smuzhiyun#	(*)  d3: offset pointer
13055*4882a593Smuzhiyun#	( )  d4: first word of bcd
13056*4882a593Smuzhiyun#	( )  a0: pointer to working bcd value
13057*4882a593Smuzhiyun#	( )  a6: pointer to original bcd value
13058*4882a593Smuzhiyun#	(*)  FP_SCR1: working copy of original bcd value
13059*4882a593Smuzhiyun#	(*)  L_SCR1: copy of original exponent word
13060*4882a593Smuzhiyun#
13061*4882a593Smuzhiyuncalc_e:
13062*4882a593Smuzhiyun	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
13063*4882a593Smuzhiyun	mov.l		&ESTRT,%d3		# counter to pick up digits
13064*4882a593Smuzhiyun	mov.l		(%a0),%d4		# get first word of bcd
13065*4882a593Smuzhiyun	clr.l		%d1			# zero d1 for accumulator
13066*4882a593Smuzhiyune_gd:
13067*4882a593Smuzhiyun	mulu.l		&0xa,%d1		# mul partial product by one digit place
13068*4882a593Smuzhiyun	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
13069*4882a593Smuzhiyun	add.l		%d0,%d1			# d1 = d1 + d0
13070*4882a593Smuzhiyun	addq.b		&4,%d3			# advance d3 to the next digit
13071*4882a593Smuzhiyun	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
13072*4882a593Smuzhiyun	btst		&30,%d4			# get SE
13073*4882a593Smuzhiyun	beq.b		e_pos			# don't negate if pos
13074*4882a593Smuzhiyun	neg.l		%d1			# negate before subtracting
13075*4882a593Smuzhiyune_pos:
13076*4882a593Smuzhiyun	sub.l		&16,%d1			# sub to compensate for shift of mant
13077*4882a593Smuzhiyun	bge.b		e_save			# if still pos, do not neg
13078*4882a593Smuzhiyun	neg.l		%d1			# now negative, make pos and set SE
13079*4882a593Smuzhiyun	or.l		&0x40000000,%d4		# set SE in d4,
13080*4882a593Smuzhiyun	or.l		&0x40000000,(%a0)	# and in working bcd
13081*4882a593Smuzhiyune_save:
13082*4882a593Smuzhiyun	mov.l		%d1,-(%sp)		# save exp on stack
13083*4882a593Smuzhiyun#
13084*4882a593Smuzhiyun#
13085*4882a593Smuzhiyun# Calculate mantissa:
13086*4882a593Smuzhiyun#  1. Calculate absolute value of mantissa in fp0 by mul and add.
13087*4882a593Smuzhiyun#  2. Correct for mantissa sign.
13088*4882a593Smuzhiyun#     (i.e., all digits assumed left of the decimal point.)
13089*4882a593Smuzhiyun#
13090*4882a593Smuzhiyun# Register usage:
13091*4882a593Smuzhiyun#
13092*4882a593Smuzhiyun#  calc_m:
13093*4882a593Smuzhiyun#	(*)  d0: temp digit storage
13094*4882a593Smuzhiyun#	(*)  d1: lword counter
13095*4882a593Smuzhiyun#	(*)  d2: digit count
13096*4882a593Smuzhiyun#	(*)  d3: offset pointer
13097*4882a593Smuzhiyun#	( )  d4: words 2 and 3 of bcd
13098*4882a593Smuzhiyun#	( )  a0: pointer to working bcd value
13099*4882a593Smuzhiyun#	( )  a6: pointer to original bcd value
13100*4882a593Smuzhiyun#	(*) fp0: mantissa accumulator
13101*4882a593Smuzhiyun#	( )  FP_SCR1: working copy of original bcd value
13102*4882a593Smuzhiyun#	( )  L_SCR1: copy of original exponent word
13103*4882a593Smuzhiyun#
13104*4882a593Smuzhiyuncalc_m:
13105*4882a593Smuzhiyun	mov.l		&1,%d1			# word counter, init to 1
13106*4882a593Smuzhiyun	fmov.s		&0x00000000,%fp0	# accumulator
13107*4882a593Smuzhiyun#
13108*4882a593Smuzhiyun#
13109*4882a593Smuzhiyun#  Since the packed number has a long word between the first & second parts,
13110*4882a593Smuzhiyun#  get the integer digit then skip down & get the rest of the
13111*4882a593Smuzhiyun#  mantissa.  We will unroll the loop once.
13112*4882a593Smuzhiyun#
13113*4882a593Smuzhiyun	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
13114*4882a593Smuzhiyun	fadd.b		%d0,%fp0		# add digit to sum in fp0
13115*4882a593Smuzhiyun#
13116*4882a593Smuzhiyun#
13117*4882a593Smuzhiyun#  Get the rest of the mantissa.
13118*4882a593Smuzhiyun#
13119*4882a593Smuzhiyunloadlw:
13120*4882a593Smuzhiyun	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
13121*4882a593Smuzhiyun	mov.l		&FSTRT,%d3		# counter to pick up digits
13122*4882a593Smuzhiyun	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
13123*4882a593Smuzhiyunmd2b:
13124*4882a593Smuzhiyun	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
13125*4882a593Smuzhiyun	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
13126*4882a593Smuzhiyun	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
13127*4882a593Smuzhiyun#
13128*4882a593Smuzhiyun#
13129*4882a593Smuzhiyun#  If all the digits (8) in that long word have been converted (d2=0),
13130*4882a593Smuzhiyun#  then inc d1 (=2) to point to the next long word and reset d3 to 0
13131*4882a593Smuzhiyun#  to initialize the digit offset, and set d2 to 7 for the digit count;
13132*4882a593Smuzhiyun#  else continue with this long word.
13133*4882a593Smuzhiyun#
13134*4882a593Smuzhiyun	addq.b		&4,%d3			# advance d3 to the next digit
13135*4882a593Smuzhiyun	dbf.w		%d2,md2b		# check for last digit in this lw
13136*4882a593Smuzhiyunnextlw:
13137*4882a593Smuzhiyun	addq.l		&1,%d1			# inc lw pointer in mantissa
13138*4882a593Smuzhiyun	cmp.l		%d1,&2			# test for last lw
13139*4882a593Smuzhiyun	ble.b		loadlw			# if not, get last one
13140*4882a593Smuzhiyun#
13141*4882a593Smuzhiyun#  Check the sign of the mant and make the value in fp0 the same sign.
13142*4882a593Smuzhiyun#
13143*4882a593Smuzhiyunm_sign:
13144*4882a593Smuzhiyun	btst		&31,(%a0)		# test sign of the mantissa
13145*4882a593Smuzhiyun	beq.b		ap_st_z			# if clear, go to append/strip zeros
13146*4882a593Smuzhiyun	fneg.x		%fp0			# if set, negate fp0
13147*4882a593Smuzhiyun#
13148*4882a593Smuzhiyun# Append/strip zeros:
13149*4882a593Smuzhiyun#
13150*4882a593Smuzhiyun#  For adjusted exponents which have an absolute value greater than 27*,
13151*4882a593Smuzhiyun#  this routine calculates the amount needed to normalize the mantissa
13152*4882a593Smuzhiyun#  for the adjusted exponent.  That number is subtracted from the exp
13153*4882a593Smuzhiyun#  if the exp was positive, and added if it was negative.  The purpose
13154*4882a593Smuzhiyun#  of this is to reduce the value of the exponent and the possibility
13155*4882a593Smuzhiyun#  of error in calculation of pwrten.
13156*4882a593Smuzhiyun#
13157*4882a593Smuzhiyun#  1. Branch on the sign of the adjusted exponent.
13158*4882a593Smuzhiyun#  2p.(positive exp)
13159*4882a593Smuzhiyun#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
13160*4882a593Smuzhiyun#   3. Add one for each zero encountered until a non-zero digit.
13161*4882a593Smuzhiyun#   4. Subtract the count from the exp.
13162*4882a593Smuzhiyun#   5. Check if the exp has crossed zero in #3 above; make the exp abs
13163*4882a593Smuzhiyun#	   and set SE.
13164*4882a593Smuzhiyun#	6. Multiply the mantissa by 10**count.
13165*4882a593Smuzhiyun#  2n.(negative exp)
13166*4882a593Smuzhiyun#   2. Check the digits in lwords 3 and 2 in descending order.
13167*4882a593Smuzhiyun#   3. Add one for each zero encountered until a non-zero digit.
13168*4882a593Smuzhiyun#   4. Add the count to the exp.
13169*4882a593Smuzhiyun#   5. Check if the exp has crossed zero in #3 above; clear SE.
13170*4882a593Smuzhiyun#   6. Divide the mantissa by 10**count.
13171*4882a593Smuzhiyun#
13172*4882a593Smuzhiyun#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13173*4882a593Smuzhiyun#   any adjustment due to append/strip zeros will drive the resultane
13174*4882a593Smuzhiyun#   exponent towards zero.  Since all pwrten constants with a power
13175*4882a593Smuzhiyun#   of 27 or less are exact, there is no need to use this routine to
13176*4882a593Smuzhiyun#   attempt to lessen the resultant exponent.
13177*4882a593Smuzhiyun#
13178*4882a593Smuzhiyun# Register usage:
13179*4882a593Smuzhiyun#
13180*4882a593Smuzhiyun#  ap_st_z:
13181*4882a593Smuzhiyun#	(*)  d0: temp digit storage
13182*4882a593Smuzhiyun#	(*)  d1: zero count
13183*4882a593Smuzhiyun#	(*)  d2: digit count
13184*4882a593Smuzhiyun#	(*)  d3: offset pointer
13185*4882a593Smuzhiyun#	( )  d4: first word of bcd
13186*4882a593Smuzhiyun#	(*)  d5: lword counter
13187*4882a593Smuzhiyun#	( )  a0: pointer to working bcd value
13188*4882a593Smuzhiyun#	( )  FP_SCR1: working copy of original bcd value
13189*4882a593Smuzhiyun#	( )  L_SCR1: copy of original exponent word
13190*4882a593Smuzhiyun#
13191*4882a593Smuzhiyun#
13192*4882a593Smuzhiyun# First check the absolute value of the exponent to see if this
13193*4882a593Smuzhiyun# routine is necessary.  If so, then check the sign of the exponent
13194*4882a593Smuzhiyun# and do append (+) or strip (-) zeros accordingly.
13195*4882a593Smuzhiyun# This section handles a positive adjusted exponent.
13196*4882a593Smuzhiyun#
13197*4882a593Smuzhiyunap_st_z:
13198*4882a593Smuzhiyun	mov.l		(%sp),%d1		# load expA for range test
13199*4882a593Smuzhiyun	cmp.l		%d1,&27			# test is with 27
13200*4882a593Smuzhiyun	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
13201*4882a593Smuzhiyun	btst		&30,(%a0)		# check sign of exp
13202*4882a593Smuzhiyun	bne.b		ap_st_n			# if neg, go to neg side
13203*4882a593Smuzhiyun	clr.l		%d1			# zero count reg
13204*4882a593Smuzhiyun	mov.l		(%a0),%d4		# load lword 1 to d4
13205*4882a593Smuzhiyun	bfextu		%d4{&28:&4},%d0		# get M16 in d0
13206*4882a593Smuzhiyun	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
13207*4882a593Smuzhiyun	addq.l		&1,%d1			# inc zero count
13208*4882a593Smuzhiyun	mov.l		&1,%d5			# init lword counter
13209*4882a593Smuzhiyun	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
13210*4882a593Smuzhiyun	bne.b		ap_p_cl			# if lw 2 is zero, skip it
13211*4882a593Smuzhiyun	addq.l		&8,%d1			# and inc count by 8
13212*4882a593Smuzhiyun	addq.l		&1,%d5			# inc lword counter
13213*4882a593Smuzhiyun	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
13214*4882a593Smuzhiyunap_p_cl:
13215*4882a593Smuzhiyun	clr.l		%d3			# init offset reg
13216*4882a593Smuzhiyun	mov.l		&7,%d2			# init digit counter
13217*4882a593Smuzhiyunap_p_gd:
13218*4882a593Smuzhiyun	bfextu		%d4{%d3:&4},%d0		# get digit
13219*4882a593Smuzhiyun	bne.b		ap_p_fx			# if non-zero, go to fix exp
13220*4882a593Smuzhiyun	addq.l		&4,%d3			# point to next digit
13221*4882a593Smuzhiyun	addq.l		&1,%d1			# inc digit counter
13222*4882a593Smuzhiyun	dbf.w		%d2,ap_p_gd		# get next digit
13223*4882a593Smuzhiyunap_p_fx:
13224*4882a593Smuzhiyun	mov.l		%d1,%d0			# copy counter to d2
13225*4882a593Smuzhiyun	mov.l		(%sp),%d1		# get adjusted exp from memory
13226*4882a593Smuzhiyun	sub.l		%d0,%d1			# subtract count from exp
13227*4882a593Smuzhiyun	bge.b		ap_p_fm			# if still pos, go to pwrten
13228*4882a593Smuzhiyun	neg.l		%d1			# now its neg; get abs
13229*4882a593Smuzhiyun	mov.l		(%a0),%d4		# load lword 1 to d4
13230*4882a593Smuzhiyun	or.l		&0x40000000,%d4		# and set SE in d4
13231*4882a593Smuzhiyun	or.l		&0x40000000,(%a0)	# and in memory
13232*4882a593Smuzhiyun#
13233*4882a593Smuzhiyun# Calculate the mantissa multiplier to compensate for the striping of
13234*4882a593Smuzhiyun# zeros from the mantissa.
13235*4882a593Smuzhiyun#
13236*4882a593Smuzhiyunap_p_fm:
13237*4882a593Smuzhiyun	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13238*4882a593Smuzhiyun	clr.l		%d3			# init table index
13239*4882a593Smuzhiyun	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13240*4882a593Smuzhiyun	mov.l		&3,%d2			# init d2 to count bits in counter
13241*4882a593Smuzhiyunap_p_el:
13242*4882a593Smuzhiyun	asr.l		&1,%d0			# shift lsb into carry
13243*4882a593Smuzhiyun	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
13244*4882a593Smuzhiyun	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13245*4882a593Smuzhiyunap_p_en:
13246*4882a593Smuzhiyun	add.l		&12,%d3			# inc d3 to next rtable entry
13247*4882a593Smuzhiyun	tst.l		%d0			# check if d0 is zero
13248*4882a593Smuzhiyun	bne.b		ap_p_el			# if not, get next bit
13249*4882a593Smuzhiyun	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
13250*4882a593Smuzhiyun	bra.b		pwrten			# go calc pwrten
13251*4882a593Smuzhiyun#
13252*4882a593Smuzhiyun# This section handles a negative adjusted exponent.
13253*4882a593Smuzhiyun#
13254*4882a593Smuzhiyunap_st_n:
13255*4882a593Smuzhiyun	clr.l		%d1			# clr counter
13256*4882a593Smuzhiyun	mov.l		&2,%d5			# set up d5 to point to lword 3
13257*4882a593Smuzhiyun	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
13258*4882a593Smuzhiyun	bne.b		ap_n_cl			# if not zero, check digits
13259*4882a593Smuzhiyun	sub.l		&1,%d5			# dec d5 to point to lword 2
13260*4882a593Smuzhiyun	addq.l		&8,%d1			# inc counter by 8
13261*4882a593Smuzhiyun	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
13262*4882a593Smuzhiyunap_n_cl:
13263*4882a593Smuzhiyun	mov.l		&28,%d3			# point to last digit
13264*4882a593Smuzhiyun	mov.l		&7,%d2			# init digit counter
13265*4882a593Smuzhiyunap_n_gd:
13266*4882a593Smuzhiyun	bfextu		%d4{%d3:&4},%d0		# get digit
13267*4882a593Smuzhiyun	bne.b		ap_n_fx			# if non-zero, go to exp fix
13268*4882a593Smuzhiyun	subq.l		&4,%d3			# point to previous digit
13269*4882a593Smuzhiyun	addq.l		&1,%d1			# inc digit counter
13270*4882a593Smuzhiyun	dbf.w		%d2,ap_n_gd		# get next digit
13271*4882a593Smuzhiyunap_n_fx:
13272*4882a593Smuzhiyun	mov.l		%d1,%d0			# copy counter to d0
13273*4882a593Smuzhiyun	mov.l		(%sp),%d1		# get adjusted exp from memory
13274*4882a593Smuzhiyun	sub.l		%d0,%d1			# subtract count from exp
13275*4882a593Smuzhiyun	bgt.b		ap_n_fm			# if still pos, go fix mantissa
13276*4882a593Smuzhiyun	neg.l		%d1			# take abs of exp and clr SE
13277*4882a593Smuzhiyun	mov.l		(%a0),%d4		# load lword 1 to d4
13278*4882a593Smuzhiyun	and.l		&0xbfffffff,%d4		# and clr SE in d4
13279*4882a593Smuzhiyun	and.l		&0xbfffffff,(%a0)	# and in memory
13280*4882a593Smuzhiyun#
13281*4882a593Smuzhiyun# Calculate the mantissa multiplier to compensate for the appending of
13282*4882a593Smuzhiyun# zeros to the mantissa.
13283*4882a593Smuzhiyun#
13284*4882a593Smuzhiyunap_n_fm:
13285*4882a593Smuzhiyun	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13286*4882a593Smuzhiyun	clr.l		%d3			# init table index
13287*4882a593Smuzhiyun	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13288*4882a593Smuzhiyun	mov.l		&3,%d2			# init d2 to count bits in counter
13289*4882a593Smuzhiyunap_n_el:
13290*4882a593Smuzhiyun	asr.l		&1,%d0			# shift lsb into carry
13291*4882a593Smuzhiyun	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
13292*4882a593Smuzhiyun	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13293*4882a593Smuzhiyunap_n_en:
13294*4882a593Smuzhiyun	add.l		&12,%d3			# inc d3 to next rtable entry
13295*4882a593Smuzhiyun	tst.l		%d0			# check if d0 is zero
13296*4882a593Smuzhiyun	bne.b		ap_n_el			# if not, get next bit
13297*4882a593Smuzhiyun	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
13298*4882a593Smuzhiyun#
13299*4882a593Smuzhiyun#
13300*4882a593Smuzhiyun# Calculate power-of-ten factor from adjusted and shifted exponent.
13301*4882a593Smuzhiyun#
13302*4882a593Smuzhiyun# Register usage:
13303*4882a593Smuzhiyun#
13304*4882a593Smuzhiyun#  pwrten:
13305*4882a593Smuzhiyun#	(*)  d0: temp
13306*4882a593Smuzhiyun#	( )  d1: exponent
13307*4882a593Smuzhiyun#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308*4882a593Smuzhiyun#	(*)  d3: FPCR work copy
13309*4882a593Smuzhiyun#	( )  d4: first word of bcd
13310*4882a593Smuzhiyun#	(*)  a1: RTABLE pointer
13311*4882a593Smuzhiyun#  calc_p:
13312*4882a593Smuzhiyun#	(*)  d0: temp
13313*4882a593Smuzhiyun#	( )  d1: exponent
13314*4882a593Smuzhiyun#	(*)  d3: PWRTxx table index
13315*4882a593Smuzhiyun#	( )  a0: pointer to working copy of bcd
13316*4882a593Smuzhiyun#	(*)  a1: PWRTxx pointer
13317*4882a593Smuzhiyun#	(*) fp1: power-of-ten accumulator
13318*4882a593Smuzhiyun#
13319*4882a593Smuzhiyun# Pwrten calculates the exponent factor in the selected rounding mode
13320*4882a593Smuzhiyun# according to the following table:
13321*4882a593Smuzhiyun#
13322*4882a593Smuzhiyun#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13323*4882a593Smuzhiyun#
13324*4882a593Smuzhiyun#	ANY	  ANY	RN	RN
13325*4882a593Smuzhiyun#
13326*4882a593Smuzhiyun#	 +	   +	RP	RP
13327*4882a593Smuzhiyun#	 -	   +	RP	RM
13328*4882a593Smuzhiyun#	 +	   -	RP	RM
13329*4882a593Smuzhiyun#	 -	   -	RP	RP
13330*4882a593Smuzhiyun#
13331*4882a593Smuzhiyun#	 +	   +	RM	RM
13332*4882a593Smuzhiyun#	 -	   +	RM	RP
13333*4882a593Smuzhiyun#	 +	   -	RM	RP
13334*4882a593Smuzhiyun#	 -	   -	RM	RM
13335*4882a593Smuzhiyun#
13336*4882a593Smuzhiyun#	 +	   +	RZ	RM
13337*4882a593Smuzhiyun#	 -	   +	RZ	RM
13338*4882a593Smuzhiyun#	 +	   -	RZ	RP
13339*4882a593Smuzhiyun#	 -	   -	RZ	RP
13340*4882a593Smuzhiyun#
13341*4882a593Smuzhiyun#
13342*4882a593Smuzhiyunpwrten:
13343*4882a593Smuzhiyun	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
13344*4882a593Smuzhiyun	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
13345*4882a593Smuzhiyun	mov.l		(%a0),%d4		# reload 1st bcd word to d4
13346*4882a593Smuzhiyun	asl.l		&2,%d2			# format d2 to be
13347*4882a593Smuzhiyun	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
13348*4882a593Smuzhiyun	add.l		%d0,%d2			# in d2 as index into RTABLE
13349*4882a593Smuzhiyun	lea.l		RTABLE(%pc),%a1		# load rtable base
13350*4882a593Smuzhiyun	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
13351*4882a593Smuzhiyun	clr.l		%d3			# clear d3 to force no exc and extended
13352*4882a593Smuzhiyun	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
13353*4882a593Smuzhiyun	fmov.l		%d3,%fpcr		# write new FPCR
13354*4882a593Smuzhiyun	asr.l		&1,%d0			# write correct PTENxx table
13355*4882a593Smuzhiyun	bcc.b		not_rp			# to a1
13356*4882a593Smuzhiyun	lea.l		PTENRP(%pc),%a1		# it is RP
13357*4882a593Smuzhiyun	bra.b		calc_p			# go to init section
13358*4882a593Smuzhiyunnot_rp:
13359*4882a593Smuzhiyun	asr.l		&1,%d0			# keep checking
13360*4882a593Smuzhiyun	bcc.b		not_rm
13361*4882a593Smuzhiyun	lea.l		PTENRM(%pc),%a1		# it is RM
13362*4882a593Smuzhiyun	bra.b		calc_p			# go to init section
13363*4882a593Smuzhiyunnot_rm:
13364*4882a593Smuzhiyun	lea.l		PTENRN(%pc),%a1		# it is RN
13365*4882a593Smuzhiyuncalc_p:
13366*4882a593Smuzhiyun	mov.l		%d1,%d0			# copy exp to d0;use d0
13367*4882a593Smuzhiyun	bpl.b		no_neg			# if exp is negative,
13368*4882a593Smuzhiyun	neg.l		%d0			# invert it
13369*4882a593Smuzhiyun	or.l		&0x40000000,(%a0)	# and set SE bit
13370*4882a593Smuzhiyunno_neg:
13371*4882a593Smuzhiyun	clr.l		%d3			# table index
13372*4882a593Smuzhiyun	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13373*4882a593Smuzhiyune_loop:
13374*4882a593Smuzhiyun	asr.l		&1,%d0			# shift next bit into carry
13375*4882a593Smuzhiyun	bcc.b		e_next			# if zero, skip the mul
13376*4882a593Smuzhiyun	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13377*4882a593Smuzhiyune_next:
13378*4882a593Smuzhiyun	add.l		&12,%d3			# inc d3 to next rtable entry
13379*4882a593Smuzhiyun	tst.l		%d0			# check if d0 is zero
13380*4882a593Smuzhiyun	bne.b		e_loop			# not zero, continue shifting
13381*4882a593Smuzhiyun#
13382*4882a593Smuzhiyun#
13383*4882a593Smuzhiyun#  Check the sign of the adjusted exp and make the value in fp0 the
13384*4882a593Smuzhiyun#  same sign. If the exp was pos then multiply fp1*fp0;
13385*4882a593Smuzhiyun#  else divide fp0/fp1.
13386*4882a593Smuzhiyun#
13387*4882a593Smuzhiyun# Register Usage:
13388*4882a593Smuzhiyun#  norm:
13389*4882a593Smuzhiyun#	( )  a0: pointer to working bcd value
13390*4882a593Smuzhiyun#	(*) fp0: mantissa accumulator
13391*4882a593Smuzhiyun#	( ) fp1: scaling factor - 10**(abs(exp))
13392*4882a593Smuzhiyun#
13393*4882a593Smuzhiyunpnorm:
13394*4882a593Smuzhiyun	btst		&30,(%a0)		# test the sign of the exponent
13395*4882a593Smuzhiyun	beq.b		mul			# if clear, go to multiply
13396*4882a593Smuzhiyundiv:
13397*4882a593Smuzhiyun	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
13398*4882a593Smuzhiyun	bra.b		end_dec
13399*4882a593Smuzhiyunmul:
13400*4882a593Smuzhiyun	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
13401*4882a593Smuzhiyun#
13402*4882a593Smuzhiyun#
13403*4882a593Smuzhiyun# Clean up and return with result in fp0.
13404*4882a593Smuzhiyun#
13405*4882a593Smuzhiyun# If the final mul/div in decbin incurred an inex exception,
13406*4882a593Smuzhiyun# it will be inex2, but will be reported as inex1 by get_op.
13407*4882a593Smuzhiyun#
13408*4882a593Smuzhiyunend_dec:
13409*4882a593Smuzhiyun	fmov.l		%fpsr,%d0		# get status register
13410*4882a593Smuzhiyun	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
13411*4882a593Smuzhiyun	beq.b		no_exc			# skip this if no exc
13412*4882a593Smuzhiyun	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413*4882a593Smuzhiyunno_exc:
13414*4882a593Smuzhiyun	add.l		&0x4,%sp		# clear 1 lw param
13415*4882a593Smuzhiyun	fmovm.x		(%sp)+,&0x40		# restore fp1
13416*4882a593Smuzhiyun	movm.l		(%sp)+,&0x3c		# restore d2-d5
13417*4882a593Smuzhiyun	fmov.l		&0x0,%fpcr
13418*4882a593Smuzhiyun	fmov.l		&0x0,%fpsr
13419*4882a593Smuzhiyun	rts
13420*4882a593Smuzhiyun
13421*4882a593Smuzhiyun#########################################################################
13422*4882a593Smuzhiyun# bindec(): Converts an input in extended precision format to bcd format#
13423*4882a593Smuzhiyun#									#
13424*4882a593Smuzhiyun# INPUT ***************************************************************	#
13425*4882a593Smuzhiyun#	a0 = pointer to the input extended precision value in memory.	#
13426*4882a593Smuzhiyun#	     the input may be either normalized, unnormalized, or	#
13427*4882a593Smuzhiyun#	     denormalized.						#
13428*4882a593Smuzhiyun#	d0 = contains the k-factor sign-extended to 32-bits.		#
13429*4882a593Smuzhiyun#									#
13430*4882a593Smuzhiyun# OUTPUT **************************************************************	#
13431*4882a593Smuzhiyun#	FP_SCR0(a6) = bcd format result on the stack.			#
13432*4882a593Smuzhiyun#									#
13433*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
13434*4882a593Smuzhiyun#									#
13435*4882a593Smuzhiyun#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
13436*4882a593Smuzhiyun#		The k-factor is saved for use in d7. Clear the		#
13437*4882a593Smuzhiyun#		BINDEC_FLG for separating normalized/denormalized	#
13438*4882a593Smuzhiyun#		input.  If input is unnormalized or denormalized,	#
13439*4882a593Smuzhiyun#		normalize it.						#
13440*4882a593Smuzhiyun#									#
13441*4882a593Smuzhiyun#	A2.	Set X = abs(input).					#
13442*4882a593Smuzhiyun#									#
13443*4882a593Smuzhiyun#	A3.	Compute ILOG.						#
13444*4882a593Smuzhiyun#		ILOG is the log base 10 of the input value.  It is	#
13445*4882a593Smuzhiyun#		approximated by adding e + 0.f when the original	#
13446*4882a593Smuzhiyun#		value is viewed as 2^^e * 1.f in extended precision.	#
13447*4882a593Smuzhiyun#		This value is stored in d6.				#
13448*4882a593Smuzhiyun#									#
13449*4882a593Smuzhiyun#	A4.	Clr INEX bit.						#
13450*4882a593Smuzhiyun#		The operation in A3 above may have set INEX2.		#
13451*4882a593Smuzhiyun#									#
13452*4882a593Smuzhiyun#	A5.	Set ICTR = 0;						#
13453*4882a593Smuzhiyun#		ICTR is a flag used in A13.  It must be set before the	#
13454*4882a593Smuzhiyun#		loop entry A6.						#
13455*4882a593Smuzhiyun#									#
13456*4882a593Smuzhiyun#	A6.	Calculate LEN.						#
13457*4882a593Smuzhiyun#		LEN is the number of digits to be displayed.  The	#
13458*4882a593Smuzhiyun#		k-factor can dictate either the total number of digits,	#
13459*4882a593Smuzhiyun#		if it is a positive number, or the number of digits	#
13460*4882a593Smuzhiyun#		after the decimal point which are to be included as	#
13461*4882a593Smuzhiyun#		significant.  See the 68882 manual for examples.	#
13462*4882a593Smuzhiyun#		If LEN is computed to be greater than 17, set OPERR in	#
13463*4882a593Smuzhiyun#		USER_FPSR.  LEN is stored in d4.			#
13464*4882a593Smuzhiyun#									#
13465*4882a593Smuzhiyun#	A7.	Calculate SCALE.					#
13466*4882a593Smuzhiyun#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
13467*4882a593Smuzhiyun#		of decimal places needed to insure LEN integer digits	#
13468*4882a593Smuzhiyun#		in the output before conversion to bcd. LAMBDA is the	#
13469*4882a593Smuzhiyun#		sign of ISCALE, used in A9. Fp1 contains		#
13470*4882a593Smuzhiyun#		10^^(abs(ISCALE)) using a rounding mode which is a	#
13471*4882a593Smuzhiyun#		function of the original rounding mode and the signs	#
13472*4882a593Smuzhiyun#		of ISCALE and X.  A table is given in the code.		#
13473*4882a593Smuzhiyun#									#
13474*4882a593Smuzhiyun#	A8.	Clr INEX; Force RZ.					#
13475*4882a593Smuzhiyun#		The operation in A3 above may have set INEX2.		#
13476*4882a593Smuzhiyun#		RZ mode is forced for the scaling operation to insure	#
13477*4882a593Smuzhiyun#		only one rounding error.  The grs bits are collected in #
13478*4882a593Smuzhiyun#		the INEX flag for use in A10.				#
13479*4882a593Smuzhiyun#									#
13480*4882a593Smuzhiyun#	A9.	Scale X -> Y.						#
13481*4882a593Smuzhiyun#		The mantissa is scaled to the desired number of		#
13482*4882a593Smuzhiyun#		significant digits.  The excess digits are collected	#
13483*4882a593Smuzhiyun#		in INEX2.						#
13484*4882a593Smuzhiyun#									#
13485*4882a593Smuzhiyun#	A10.	Or in INEX.						#
13486*4882a593Smuzhiyun#		If INEX is set, round error occurred.  This is		#
13487*4882a593Smuzhiyun#		compensated for by 'or-ing' in the INEX2 flag to	#
13488*4882a593Smuzhiyun#		the lsb of Y.						#
13489*4882a593Smuzhiyun#									#
13490*4882a593Smuzhiyun#	A11.	Restore original FPCR; set size ext.			#
13491*4882a593Smuzhiyun#		Perform FINT operation in the user's rounding mode.	#
13492*4882a593Smuzhiyun#		Keep the size to extended.				#
13493*4882a593Smuzhiyun#									#
13494*4882a593Smuzhiyun#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
13495*4882a593Smuzhiyun#		mode.  The FPSP routine sintd0 is used.  The output	#
13496*4882a593Smuzhiyun#		is in fp0.						#
13497*4882a593Smuzhiyun#									#
13498*4882a593Smuzhiyun#	A13.	Check for LEN digits.					#
13499*4882a593Smuzhiyun#		If the int operation results in more than LEN digits,	#
13500*4882a593Smuzhiyun#		or less than LEN -1 digits, adjust ILOG and repeat from	#
13501*4882a593Smuzhiyun#		A6.  This test occurs only on the first pass.  If the	#
13502*4882a593Smuzhiyun#		result is exactly 10^LEN, decrement ILOG and divide	#
13503*4882a593Smuzhiyun#		the mantissa by 10.					#
13504*4882a593Smuzhiyun#									#
13505*4882a593Smuzhiyun#	A14.	Convert the mantissa to bcd.				#
13506*4882a593Smuzhiyun#		The binstr routine is used to convert the LEN digit	#
13507*4882a593Smuzhiyun#		mantissa to bcd in memory.  The input to binstr is	#
13508*4882a593Smuzhiyun#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
13509*4882a593Smuzhiyun#		such that the decimal point is to the left of bit 63.	#
13510*4882a593Smuzhiyun#		The bcd digits are stored in the correct position in	#
13511*4882a593Smuzhiyun#		the final string area in memory.			#
13512*4882a593Smuzhiyun#									#
13513*4882a593Smuzhiyun#	A15.	Convert the exponent to bcd.				#
13514*4882a593Smuzhiyun#		As in A14 above, the exp is converted to bcd and the	#
13515*4882a593Smuzhiyun#		digits are stored in the final string.			#
13516*4882a593Smuzhiyun#		Test the length of the final exponent string.  If the	#
13517*4882a593Smuzhiyun#		length is 4, set operr.					#
13518*4882a593Smuzhiyun#									#
13519*4882a593Smuzhiyun#	A16.	Write sign bits to final string.			#
13520*4882a593Smuzhiyun#									#
13521*4882a593Smuzhiyun#########################################################################
13522*4882a593Smuzhiyun
13523*4882a593Smuzhiyunset	BINDEC_FLG,	EXC_TEMP	# DENORM flag
13524*4882a593Smuzhiyun
13525*4882a593Smuzhiyun# Constants in extended precision
13526*4882a593SmuzhiyunPLOG2:
13527*4882a593Smuzhiyun	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528*4882a593SmuzhiyunPLOG2UP1:
13529*4882a593Smuzhiyun	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530*4882a593Smuzhiyun
13531*4882a593Smuzhiyun# Constants in single precision
13532*4882a593SmuzhiyunFONE:
13533*4882a593Smuzhiyun	long		0x3F800000,0x00000000,0x00000000,0x00000000
13534*4882a593SmuzhiyunFTWO:
13535*4882a593Smuzhiyun	long		0x40000000,0x00000000,0x00000000,0x00000000
13536*4882a593SmuzhiyunFTEN:
13537*4882a593Smuzhiyun	long		0x41200000,0x00000000,0x00000000,0x00000000
13538*4882a593SmuzhiyunF4933:
13539*4882a593Smuzhiyun	long		0x459A2800,0x00000000,0x00000000,0x00000000
13540*4882a593Smuzhiyun
13541*4882a593SmuzhiyunRBDTBL:
13542*4882a593Smuzhiyun	byte		0,0,0,0
13543*4882a593Smuzhiyun	byte		3,3,2,2
13544*4882a593Smuzhiyun	byte		3,2,2,3
13545*4882a593Smuzhiyun	byte		2,3,3,2
13546*4882a593Smuzhiyun
13547*4882a593Smuzhiyun#	Implementation Notes:
13548*4882a593Smuzhiyun#
13549*4882a593Smuzhiyun#	The registers are used as follows:
13550*4882a593Smuzhiyun#
13551*4882a593Smuzhiyun#		d0: scratch; LEN input to binstr
13552*4882a593Smuzhiyun#		d1: scratch
13553*4882a593Smuzhiyun#		d2: upper 32-bits of mantissa for binstr
13554*4882a593Smuzhiyun#		d3: scratch;lower 32-bits of mantissa for binstr
13555*4882a593Smuzhiyun#		d4: LEN
13556*4882a593Smuzhiyun#		d5: LAMBDA/ICTR
13557*4882a593Smuzhiyun#		d6: ILOG
13558*4882a593Smuzhiyun#		d7: k-factor
13559*4882a593Smuzhiyun#		a0: ptr for original operand/final result
13560*4882a593Smuzhiyun#		a1: scratch pointer
13561*4882a593Smuzhiyun#		a2: pointer to FP_X; abs(original value) in ext
13562*4882a593Smuzhiyun#		fp0: scratch
13563*4882a593Smuzhiyun#		fp1: scratch
13564*4882a593Smuzhiyun#		fp2: scratch
13565*4882a593Smuzhiyun#		F_SCR1:
13566*4882a593Smuzhiyun#		F_SCR2:
13567*4882a593Smuzhiyun#		L_SCR1:
13568*4882a593Smuzhiyun#		L_SCR2:
13569*4882a593Smuzhiyun
13570*4882a593Smuzhiyun	global		bindec
13571*4882a593Smuzhiyunbindec:
13572*4882a593Smuzhiyun	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
13573*4882a593Smuzhiyun	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
13574*4882a593Smuzhiyun
13575*4882a593Smuzhiyun# A1. Set RM and size ext. Set SIGMA = sign input;
13576*4882a593Smuzhiyun#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13577*4882a593Smuzhiyun#     separating  normalized/denormalized input.  If the input
13578*4882a593Smuzhiyun#     is a denormalized number, set the BINDEC_FLG memory word
13579*4882a593Smuzhiyun#     to signal denorm.  If the input is unnormalized, normalize
13580*4882a593Smuzhiyun#     the input and test for denormalized result.
13581*4882a593Smuzhiyun#
13582*4882a593Smuzhiyun	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
13583*4882a593Smuzhiyun	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
13584*4882a593Smuzhiyun	mov.l		%d0,%d7		# move k-factor to d7
13585*4882a593Smuzhiyun
13586*4882a593Smuzhiyun	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
13587*4882a593Smuzhiyun	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
13588*4882a593Smuzhiyun	bne.w		A2_str		# no; input is a NORM
13589*4882a593Smuzhiyun
13590*4882a593Smuzhiyun#
13591*4882a593Smuzhiyun# Normalize the denorm
13592*4882a593Smuzhiyun#
13593*4882a593Smuzhiyunun_de_norm:
13594*4882a593Smuzhiyun	mov.w		(%a0),%d0
13595*4882a593Smuzhiyun	and.w		&0x7fff,%d0	# strip sign of normalized exp
13596*4882a593Smuzhiyun	mov.l		4(%a0),%d1
13597*4882a593Smuzhiyun	mov.l		8(%a0),%d2
13598*4882a593Smuzhiyunnorm_loop:
13599*4882a593Smuzhiyun	sub.w		&1,%d0
13600*4882a593Smuzhiyun	lsl.l		&1,%d2
13601*4882a593Smuzhiyun	roxl.l		&1,%d1
13602*4882a593Smuzhiyun	tst.l		%d1
13603*4882a593Smuzhiyun	bge.b		norm_loop
13604*4882a593Smuzhiyun#
13605*4882a593Smuzhiyun# Test if the normalized input is denormalized
13606*4882a593Smuzhiyun#
13607*4882a593Smuzhiyun	tst.w		%d0
13608*4882a593Smuzhiyun	bgt.b		pos_exp		# if greater than zero, it is a norm
13609*4882a593Smuzhiyun	st		BINDEC_FLG(%a6)	# set flag for denorm
13610*4882a593Smuzhiyunpos_exp:
13611*4882a593Smuzhiyun	and.w		&0x7fff,%d0	# strip sign of normalized exp
13612*4882a593Smuzhiyun	mov.w		%d0,(%a0)
13613*4882a593Smuzhiyun	mov.l		%d1,4(%a0)
13614*4882a593Smuzhiyun	mov.l		%d2,8(%a0)
13615*4882a593Smuzhiyun
13616*4882a593Smuzhiyun# A2. Set X = abs(input).
13617*4882a593Smuzhiyun#
13618*4882a593SmuzhiyunA2_str:
13619*4882a593Smuzhiyun	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
13620*4882a593Smuzhiyun	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
13621*4882a593Smuzhiyun	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
13622*4882a593Smuzhiyun	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
13623*4882a593Smuzhiyun
13624*4882a593Smuzhiyun# A3. Compute ILOG.
13625*4882a593Smuzhiyun#     ILOG is the log base 10 of the input value.  It is approx-
13626*4882a593Smuzhiyun#     imated by adding e + 0.f when the original value is viewed
13627*4882a593Smuzhiyun#     as 2^^e * 1.f in extended precision.  This value is stored
13628*4882a593Smuzhiyun#     in d6.
13629*4882a593Smuzhiyun#
13630*4882a593Smuzhiyun# Register usage:
13631*4882a593Smuzhiyun#	Input/Output
13632*4882a593Smuzhiyun#	d0: k-factor/exponent
13633*4882a593Smuzhiyun#	d2: x/x
13634*4882a593Smuzhiyun#	d3: x/x
13635*4882a593Smuzhiyun#	d4: x/x
13636*4882a593Smuzhiyun#	d5: x/x
13637*4882a593Smuzhiyun#	d6: x/ILOG
13638*4882a593Smuzhiyun#	d7: k-factor/Unchanged
13639*4882a593Smuzhiyun#	a0: ptr for original operand/final result
13640*4882a593Smuzhiyun#	a1: x/x
13641*4882a593Smuzhiyun#	a2: x/x
13642*4882a593Smuzhiyun#	fp0: x/float(ILOG)
13643*4882a593Smuzhiyun#	fp1: x/x
13644*4882a593Smuzhiyun#	fp2: x/x
13645*4882a593Smuzhiyun#	F_SCR1:x/x
13646*4882a593Smuzhiyun#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647*4882a593Smuzhiyun#	L_SCR1:x/x
13648*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
13649*4882a593Smuzhiyun
13650*4882a593Smuzhiyun	tst.b		BINDEC_FLG(%a6)	# check for denorm
13651*4882a593Smuzhiyun	beq.b		A3_cont		# if clr, continue with norm
13652*4882a593Smuzhiyun	mov.l		&-4933,%d6	# force ILOG = -4933
13653*4882a593Smuzhiyun	bra.b		A4_str
13654*4882a593SmuzhiyunA3_cont:
13655*4882a593Smuzhiyun	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
13656*4882a593Smuzhiyun	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
13657*4882a593Smuzhiyun	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
13658*4882a593Smuzhiyun	sub.w		&0x3fff,%d0	# strip off bias
13659*4882a593Smuzhiyun	fadd.w		%d0,%fp0	# add in exp
13660*4882a593Smuzhiyun	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
13661*4882a593Smuzhiyun	fbge.w		pos_res		# if pos, branch
13662*4882a593Smuzhiyun	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
13663*4882a593Smuzhiyun	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13664*4882a593Smuzhiyun	bra.b		A4_str		# go move out ILOG
13665*4882a593Smuzhiyunpos_res:
13666*4882a593Smuzhiyun	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
13667*4882a593Smuzhiyun	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13668*4882a593Smuzhiyun
13669*4882a593Smuzhiyun
13670*4882a593Smuzhiyun# A4. Clr INEX bit.
13671*4882a593Smuzhiyun#     The operation in A3 above may have set INEX2.
13672*4882a593Smuzhiyun
13673*4882a593SmuzhiyunA4_str:
13674*4882a593Smuzhiyun	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
13675*4882a593Smuzhiyun
13676*4882a593Smuzhiyun
13677*4882a593Smuzhiyun# A5. Set ICTR = 0;
13678*4882a593Smuzhiyun#     ICTR is a flag used in A13.  It must be set before the
13679*4882a593Smuzhiyun#     loop entry A6. The lower word of d5 is used for ICTR.
13680*4882a593Smuzhiyun
13681*4882a593Smuzhiyun	clr.w		%d5		# clear ICTR
13682*4882a593Smuzhiyun
13683*4882a593Smuzhiyun# A6. Calculate LEN.
13684*4882a593Smuzhiyun#     LEN is the number of digits to be displayed.  The k-factor
13685*4882a593Smuzhiyun#     can dictate either the total number of digits, if it is
13686*4882a593Smuzhiyun#     a positive number, or the number of digits after the
13687*4882a593Smuzhiyun#     original decimal point which are to be included as
13688*4882a593Smuzhiyun#     significant.  See the 68882 manual for examples.
13689*4882a593Smuzhiyun#     If LEN is computed to be greater than 17, set OPERR in
13690*4882a593Smuzhiyun#     USER_FPSR.  LEN is stored in d4.
13691*4882a593Smuzhiyun#
13692*4882a593Smuzhiyun# Register usage:
13693*4882a593Smuzhiyun#	Input/Output
13694*4882a593Smuzhiyun#	d0: exponent/Unchanged
13695*4882a593Smuzhiyun#	d2: x/x/scratch
13696*4882a593Smuzhiyun#	d3: x/x
13697*4882a593Smuzhiyun#	d4: exc picture/LEN
13698*4882a593Smuzhiyun#	d5: ICTR/Unchanged
13699*4882a593Smuzhiyun#	d6: ILOG/Unchanged
13700*4882a593Smuzhiyun#	d7: k-factor/Unchanged
13701*4882a593Smuzhiyun#	a0: ptr for original operand/final result
13702*4882a593Smuzhiyun#	a1: x/x
13703*4882a593Smuzhiyun#	a2: x/x
13704*4882a593Smuzhiyun#	fp0: float(ILOG)/Unchanged
13705*4882a593Smuzhiyun#	fp1: x/x
13706*4882a593Smuzhiyun#	fp2: x/x
13707*4882a593Smuzhiyun#	F_SCR1:x/x
13708*4882a593Smuzhiyun#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709*4882a593Smuzhiyun#	L_SCR1:x/x
13710*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
13711*4882a593Smuzhiyun
13712*4882a593SmuzhiyunA6_str:
13713*4882a593Smuzhiyun	tst.l		%d7		# branch on sign of k
13714*4882a593Smuzhiyun	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
13715*4882a593Smuzhiyun	mov.l		%d7,%d4		# if k > 0, LEN = k
13716*4882a593Smuzhiyun	bra.b		len_ck		# skip to LEN check
13717*4882a593Smuzhiyunk_neg:
13718*4882a593Smuzhiyun	mov.l		%d6,%d4		# first load ILOG to d4
13719*4882a593Smuzhiyun	sub.l		%d7,%d4		# subtract off k
13720*4882a593Smuzhiyun	addq.l		&1,%d4		# add in the 1
13721*4882a593Smuzhiyunlen_ck:
13722*4882a593Smuzhiyun	tst.l		%d4		# LEN check: branch on sign of LEN
13723*4882a593Smuzhiyun	ble.b		LEN_ng		# if neg, set LEN = 1
13724*4882a593Smuzhiyun	cmp.l		%d4,&17		# test if LEN > 17
13725*4882a593Smuzhiyun	ble.b		A7_str		# if not, forget it
13726*4882a593Smuzhiyun	mov.l		&17,%d4		# set max LEN = 17
13727*4882a593Smuzhiyun	tst.l		%d7		# if negative, never set OPERR
13728*4882a593Smuzhiyun	ble.b		A7_str		# if positive, continue
13729*4882a593Smuzhiyun	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
13730*4882a593Smuzhiyun	bra.b		A7_str		# finished here
13731*4882a593SmuzhiyunLEN_ng:
13732*4882a593Smuzhiyun	mov.l		&1,%d4		# min LEN is 1
13733*4882a593Smuzhiyun
13734*4882a593Smuzhiyun
13735*4882a593Smuzhiyun# A7. Calculate SCALE.
13736*4882a593Smuzhiyun#     SCALE is equal to 10^ISCALE, where ISCALE is the number
13737*4882a593Smuzhiyun#     of decimal places needed to insure LEN integer digits
13738*4882a593Smuzhiyun#     in the output before conversion to bcd. LAMBDA is the sign
13739*4882a593Smuzhiyun#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13740*4882a593Smuzhiyun#     the rounding mode as given in the following table (see
13741*4882a593Smuzhiyun#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742*4882a593Smuzhiyun#     of opposite sign in bindec.sa from Coonen).
13743*4882a593Smuzhiyun#
13744*4882a593Smuzhiyun#	Initial					USE
13745*4882a593Smuzhiyun#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
13746*4882a593Smuzhiyun#	----------------------------------------------
13747*4882a593Smuzhiyun#	 RN	00	   0	   0		00/0	RN
13748*4882a593Smuzhiyun#	 RN	00	   0	   1		00/0	RN
13749*4882a593Smuzhiyun#	 RN	00	   1	   0		00/0	RN
13750*4882a593Smuzhiyun#	 RN	00	   1	   1		00/0	RN
13751*4882a593Smuzhiyun#	 RZ	01	   0	   0		11/3	RP
13752*4882a593Smuzhiyun#	 RZ	01	   0	   1		11/3	RP
13753*4882a593Smuzhiyun#	 RZ	01	   1	   0		10/2	RM
13754*4882a593Smuzhiyun#	 RZ	01	   1	   1		10/2	RM
13755*4882a593Smuzhiyun#	 RM	10	   0	   0		11/3	RP
13756*4882a593Smuzhiyun#	 RM	10	   0	   1		10/2	RM
13757*4882a593Smuzhiyun#	 RM	10	   1	   0		10/2	RM
13758*4882a593Smuzhiyun#	 RM	10	   1	   1		11/3	RP
13759*4882a593Smuzhiyun#	 RP	11	   0	   0		10/2	RM
13760*4882a593Smuzhiyun#	 RP	11	   0	   1		11/3	RP
13761*4882a593Smuzhiyun#	 RP	11	   1	   0		11/3	RP
13762*4882a593Smuzhiyun#	 RP	11	   1	   1		10/2	RM
13763*4882a593Smuzhiyun#
13764*4882a593Smuzhiyun# Register usage:
13765*4882a593Smuzhiyun#	Input/Output
13766*4882a593Smuzhiyun#	d0: exponent/scratch - final is 0
13767*4882a593Smuzhiyun#	d2: x/0 or 24 for A9
13768*4882a593Smuzhiyun#	d3: x/scratch - offset ptr into PTENRM array
13769*4882a593Smuzhiyun#	d4: LEN/Unchanged
13770*4882a593Smuzhiyun#	d5: 0/ICTR:LAMBDA
13771*4882a593Smuzhiyun#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772*4882a593Smuzhiyun#	d7: k-factor/Unchanged
13773*4882a593Smuzhiyun#	a0: ptr for original operand/final result
13774*4882a593Smuzhiyun#	a1: x/ptr to PTENRM array
13775*4882a593Smuzhiyun#	a2: x/x
13776*4882a593Smuzhiyun#	fp0: float(ILOG)/Unchanged
13777*4882a593Smuzhiyun#	fp1: x/10^ISCALE
13778*4882a593Smuzhiyun#	fp2: x/x
13779*4882a593Smuzhiyun#	F_SCR1:x/x
13780*4882a593Smuzhiyun#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781*4882a593Smuzhiyun#	L_SCR1:x/x
13782*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
13783*4882a593Smuzhiyun
13784*4882a593SmuzhiyunA7_str:
13785*4882a593Smuzhiyun	tst.l		%d7		# test sign of k
13786*4882a593Smuzhiyun	bgt.b		k_pos		# if pos and > 0, skip this
13787*4882a593Smuzhiyun	cmp.l		%d7,%d6		# test k - ILOG
13788*4882a593Smuzhiyun	blt.b		k_pos		# if ILOG >= k, skip this
13789*4882a593Smuzhiyun	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
13790*4882a593Smuzhiyunk_pos:
13791*4882a593Smuzhiyun	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
13792*4882a593Smuzhiyun	addq.l		&1,%d0		# add the 1
13793*4882a593Smuzhiyun	sub.l		%d4,%d0		# sub off LEN
13794*4882a593Smuzhiyun	swap		%d5		# use upper word of d5 for LAMBDA
13795*4882a593Smuzhiyun	clr.w		%d5		# set it zero initially
13796*4882a593Smuzhiyun	clr.w		%d2		# set up d2 for very small case
13797*4882a593Smuzhiyun	tst.l		%d0		# test sign of ISCALE
13798*4882a593Smuzhiyun	bge.b		iscale		# if pos, skip next inst
13799*4882a593Smuzhiyun	addq.w		&1,%d5		# if neg, set LAMBDA true
13800*4882a593Smuzhiyun	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
13801*4882a593Smuzhiyun	bgt.b		no_inf		# if false, skip rest
13802*4882a593Smuzhiyun	add.l		&24,%d0		# add in 24 to iscale
13803*4882a593Smuzhiyun	mov.l		&24,%d2		# put 24 in d2 for A9
13804*4882a593Smuzhiyunno_inf:
13805*4882a593Smuzhiyun	neg.l		%d0		# and take abs of ISCALE
13806*4882a593Smuzhiyuniscale:
13807*4882a593Smuzhiyun	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
13808*4882a593Smuzhiyun	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
13809*4882a593Smuzhiyun	lsl.w		&1,%d1		# put them in bits 2:1
13810*4882a593Smuzhiyun	add.w		%d5,%d1		# add in LAMBDA
13811*4882a593Smuzhiyun	lsl.w		&1,%d1		# put them in bits 3:1
13812*4882a593Smuzhiyun	tst.l		L_SCR2(%a6)	# test sign of original x
13813*4882a593Smuzhiyun	bge.b		x_pos		# if pos, don't set bit 0
13814*4882a593Smuzhiyun	addq.l		&1,%d1		# if neg, set bit 0
13815*4882a593Smuzhiyunx_pos:
13816*4882a593Smuzhiyun	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
13817*4882a593Smuzhiyun	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
13818*4882a593Smuzhiyun	lsl.l		&4,%d3		# put bits in proper position
13819*4882a593Smuzhiyun	fmov.l		%d3,%fpcr	# load bits into fpu
13820*4882a593Smuzhiyun	lsr.l		&4,%d3		# put bits in proper position
13821*4882a593Smuzhiyun	tst.b		%d3		# decode new rmode for pten table
13822*4882a593Smuzhiyun	bne.b		not_rn		# if zero, it is RN
13823*4882a593Smuzhiyun	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
13824*4882a593Smuzhiyun	bra.b		rmode		# exit decode
13825*4882a593Smuzhiyunnot_rn:
13826*4882a593Smuzhiyun	lsr.b		&1,%d3		# get lsb in carry
13827*4882a593Smuzhiyun	bcc.b		not_rp2		# if carry clear, it is RM
13828*4882a593Smuzhiyun	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
13829*4882a593Smuzhiyun	bra.b		rmode		# exit decode
13830*4882a593Smuzhiyunnot_rp2:
13831*4882a593Smuzhiyun	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
13832*4882a593Smuzhiyunrmode:
13833*4882a593Smuzhiyun	clr.l		%d3		# clr table index
13834*4882a593Smuzhiyune_loop2:
13835*4882a593Smuzhiyun	lsr.l		&1,%d0		# shift next bit into carry
13836*4882a593Smuzhiyun	bcc.b		e_next2		# if zero, skip the mul
13837*4882a593Smuzhiyun	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
13838*4882a593Smuzhiyune_next2:
13839*4882a593Smuzhiyun	add.l		&12,%d3		# inc d3 to next pwrten table entry
13840*4882a593Smuzhiyun	tst.l		%d0		# test if ISCALE is zero
13841*4882a593Smuzhiyun	bne.b		e_loop2		# if not, loop
13842*4882a593Smuzhiyun
13843*4882a593Smuzhiyun# A8. Clr INEX; Force RZ.
13844*4882a593Smuzhiyun#     The operation in A3 above may have set INEX2.
13845*4882a593Smuzhiyun#     RZ mode is forced for the scaling operation to insure
13846*4882a593Smuzhiyun#     only one rounding error.  The grs bits are collected in
13847*4882a593Smuzhiyun#     the INEX flag for use in A10.
13848*4882a593Smuzhiyun#
13849*4882a593Smuzhiyun# Register usage:
13850*4882a593Smuzhiyun#	Input/Output
13851*4882a593Smuzhiyun
13852*4882a593Smuzhiyun	fmov.l		&0,%fpsr	# clr INEX
13853*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
13854*4882a593Smuzhiyun
13855*4882a593Smuzhiyun# A9. Scale X -> Y.
13856*4882a593Smuzhiyun#     The mantissa is scaled to the desired number of significant
13857*4882a593Smuzhiyun#     digits.  The excess digits are collected in INEX2. If mul,
13858*4882a593Smuzhiyun#     Check d2 for excess 10 exponential value.  If not zero,
13859*4882a593Smuzhiyun#     the iscale value would have caused the pwrten calculation
13860*4882a593Smuzhiyun#     to overflow.  Only a negative iscale can cause this, so
13861*4882a593Smuzhiyun#     multiply by 10^(d2), which is now only allowed to be 24,
13862*4882a593Smuzhiyun#     with a multiply by 10^8 and 10^16, which is exact since
13863*4882a593Smuzhiyun#     10^24 is exact.  If the input was denormalized, we must
13864*4882a593Smuzhiyun#     create a busy stack frame with the mul command and the
13865*4882a593Smuzhiyun#     two operands, and allow the fpu to complete the multiply.
13866*4882a593Smuzhiyun#
13867*4882a593Smuzhiyun# Register usage:
13868*4882a593Smuzhiyun#	Input/Output
13869*4882a593Smuzhiyun#	d0: FPCR with RZ mode/Unchanged
13870*4882a593Smuzhiyun#	d2: 0 or 24/unchanged
13871*4882a593Smuzhiyun#	d3: x/x
13872*4882a593Smuzhiyun#	d4: LEN/Unchanged
13873*4882a593Smuzhiyun#	d5: ICTR:LAMBDA
13874*4882a593Smuzhiyun#	d6: ILOG/Unchanged
13875*4882a593Smuzhiyun#	d7: k-factor/Unchanged
13876*4882a593Smuzhiyun#	a0: ptr for original operand/final result
13877*4882a593Smuzhiyun#	a1: ptr to PTENRM array/Unchanged
13878*4882a593Smuzhiyun#	a2: x/x
13879*4882a593Smuzhiyun#	fp0: float(ILOG)/X adjusted for SCALE (Y)
13880*4882a593Smuzhiyun#	fp1: 10^ISCALE/Unchanged
13881*4882a593Smuzhiyun#	fp2: x/x
13882*4882a593Smuzhiyun#	F_SCR1:x/x
13883*4882a593Smuzhiyun#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884*4882a593Smuzhiyun#	L_SCR1:x/x
13885*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
13886*4882a593Smuzhiyun
13887*4882a593SmuzhiyunA9_str:
13888*4882a593Smuzhiyun	fmov.x		(%a0),%fp0	# load X from memory
13889*4882a593Smuzhiyun	fabs.x		%fp0		# use abs(X)
13890*4882a593Smuzhiyun	tst.w		%d5		# LAMBDA is in lower word of d5
13891*4882a593Smuzhiyun	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
13892*4882a593Smuzhiyun	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
13893*4882a593Smuzhiyun	bra.w		A10_st		# branch to A10
13894*4882a593Smuzhiyun
13895*4882a593Smuzhiyunsc_mul:
13896*4882a593Smuzhiyun	tst.b		BINDEC_FLG(%a6)	# check for denorm
13897*4882a593Smuzhiyun	beq.w		A9_norm		# if norm, continue with mul
13898*4882a593Smuzhiyun
13899*4882a593Smuzhiyun# for DENORM, we must calculate:
13900*4882a593Smuzhiyun#	fp0 = input_op * 10^ISCALE * 10^24
13901*4882a593Smuzhiyun# since the input operand is a DENORM, we can't multiply it directly.
13902*4882a593Smuzhiyun# so, we do the multiplication of the exponents and mantissas separately.
13903*4882a593Smuzhiyun# in this way, we avoid underflow on intermediate stages of the
13904*4882a593Smuzhiyun# multiplication and guarantee a result without exception.
13905*4882a593Smuzhiyun	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
13906*4882a593Smuzhiyun
13907*4882a593Smuzhiyun	mov.w		(%sp),%d3	# grab exponent
13908*4882a593Smuzhiyun	andi.w		&0x7fff,%d3	# clear sign
13909*4882a593Smuzhiyun	ori.w		&0x8000,(%a0)	# make DENORM exp negative
13910*4882a593Smuzhiyun	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
13911*4882a593Smuzhiyun	subi.w		&0x3fff,%d3	# subtract BIAS
13912*4882a593Smuzhiyun	add.w		36(%a1),%d3
13913*4882a593Smuzhiyun	subi.w		&0x3fff,%d3	# subtract BIAS
13914*4882a593Smuzhiyun	add.w		48(%a1),%d3
13915*4882a593Smuzhiyun	subi.w		&0x3fff,%d3	# subtract BIAS
13916*4882a593Smuzhiyun
13917*4882a593Smuzhiyun	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
13918*4882a593Smuzhiyun
13919*4882a593Smuzhiyun	andi.w		&0x8000,(%sp)	# keep sign
13920*4882a593Smuzhiyun	or.w		%d3,(%sp)	# insert new exponent
13921*4882a593Smuzhiyun	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
13922*4882a593Smuzhiyun	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
13923*4882a593Smuzhiyun	mov.l		0x4(%a0),-(%sp)
13924*4882a593Smuzhiyun	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13925*4882a593Smuzhiyun	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
13926*4882a593Smuzhiyun	fmul.x		(%sp)+,%fp0
13927*4882a593Smuzhiyun
13928*4882a593Smuzhiyun#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
13929*4882a593Smuzhiyun#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
13930*4882a593Smuzhiyun	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
13931*4882a593Smuzhiyun	mov.l		36+4(%a1),-(%sp)
13932*4882a593Smuzhiyun	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13933*4882a593Smuzhiyun	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
13934*4882a593Smuzhiyun	mov.l		48+4(%a1),-(%sp)
13935*4882a593Smuzhiyun	mov.l		&0x3fff0000,-(%sp)# force exp to zero
13936*4882a593Smuzhiyun	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
13937*4882a593Smuzhiyun	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
13938*4882a593Smuzhiyun	bra.b		A10_st
13939*4882a593Smuzhiyun
13940*4882a593Smuzhiyunsc_mul_err:
13941*4882a593Smuzhiyun	bra.b		sc_mul_err
13942*4882a593Smuzhiyun
13943*4882a593SmuzhiyunA9_norm:
13944*4882a593Smuzhiyun	tst.w		%d2		# test for small exp case
13945*4882a593Smuzhiyun	beq.b		A9_con		# if zero, continue as normal
13946*4882a593Smuzhiyun	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
13947*4882a593Smuzhiyun	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
13948*4882a593SmuzhiyunA9_con:
13949*4882a593Smuzhiyun	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
13950*4882a593Smuzhiyun
13951*4882a593Smuzhiyun# A10. Or in INEX.
13952*4882a593Smuzhiyun#      If INEX is set, round error occurred.  This is compensated
13953*4882a593Smuzhiyun#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954*4882a593Smuzhiyun#
13955*4882a593Smuzhiyun# Register usage:
13956*4882a593Smuzhiyun#	Input/Output
13957*4882a593Smuzhiyun#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958*4882a593Smuzhiyun#	d2: x/x
13959*4882a593Smuzhiyun#	d3: x/x
13960*4882a593Smuzhiyun#	d4: LEN/Unchanged
13961*4882a593Smuzhiyun#	d5: ICTR:LAMBDA
13962*4882a593Smuzhiyun#	d6: ILOG/Unchanged
13963*4882a593Smuzhiyun#	d7: k-factor/Unchanged
13964*4882a593Smuzhiyun#	a0: ptr for original operand/final result
13965*4882a593Smuzhiyun#	a1: ptr to PTENxx array/Unchanged
13966*4882a593Smuzhiyun#	a2: x/ptr to FP_SCR1(a6)
13967*4882a593Smuzhiyun#	fp0: Y/Y with lsb adjusted
13968*4882a593Smuzhiyun#	fp1: 10^ISCALE/Unchanged
13969*4882a593Smuzhiyun#	fp2: x/x
13970*4882a593Smuzhiyun
13971*4882a593SmuzhiyunA10_st:
13972*4882a593Smuzhiyun	fmov.l		%fpsr,%d0	# get FPSR
13973*4882a593Smuzhiyun	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
13974*4882a593Smuzhiyun	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
13975*4882a593Smuzhiyun	btst		&9,%d0		# check if INEX2 set
13976*4882a593Smuzhiyun	beq.b		A11_st		# if clear, skip rest
13977*4882a593Smuzhiyun	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
13978*4882a593Smuzhiyun	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
13979*4882a593Smuzhiyun
13980*4882a593Smuzhiyun
13981*4882a593Smuzhiyun# A11. Restore original FPCR; set size ext.
13982*4882a593Smuzhiyun#      Perform FINT operation in the user's rounding mode.  Keep
13983*4882a593Smuzhiyun#      the size to extended.  The sintdo entry point in the sint
13984*4882a593Smuzhiyun#      routine expects the FPCR value to be in USER_FPCR for
13985*4882a593Smuzhiyun#      mode and precision.  The original FPCR is saved in L_SCR1.
13986*4882a593Smuzhiyun
13987*4882a593SmuzhiyunA11_st:
13988*4882a593Smuzhiyun	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
13989*4882a593Smuzhiyun	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
13990*4882a593Smuzhiyun#					;block exceptions
13991*4882a593Smuzhiyun
13992*4882a593Smuzhiyun
13993*4882a593Smuzhiyun# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994*4882a593Smuzhiyun#      The FPSP routine sintd0 is used.  The output is in fp0.
13995*4882a593Smuzhiyun#
13996*4882a593Smuzhiyun# Register usage:
13997*4882a593Smuzhiyun#	Input/Output
13998*4882a593Smuzhiyun#	d0: FPSR with AINEX cleared/FPCR with size set to ext
13999*4882a593Smuzhiyun#	d2: x/x/scratch
14000*4882a593Smuzhiyun#	d3: x/x
14001*4882a593Smuzhiyun#	d4: LEN/Unchanged
14002*4882a593Smuzhiyun#	d5: ICTR:LAMBDA/Unchanged
14003*4882a593Smuzhiyun#	d6: ILOG/Unchanged
14004*4882a593Smuzhiyun#	d7: k-factor/Unchanged
14005*4882a593Smuzhiyun#	a0: ptr for original operand/src ptr for sintdo
14006*4882a593Smuzhiyun#	a1: ptr to PTENxx array/Unchanged
14007*4882a593Smuzhiyun#	a2: ptr to FP_SCR1(a6)/Unchanged
14008*4882a593Smuzhiyun#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009*4882a593Smuzhiyun#	fp0: Y/YINT
14010*4882a593Smuzhiyun#	fp1: 10^ISCALE/Unchanged
14011*4882a593Smuzhiyun#	fp2: x/x
14012*4882a593Smuzhiyun#	F_SCR1:x/x
14013*4882a593Smuzhiyun#	F_SCR2:Y adjusted for inex/Y with original exponent
14014*4882a593Smuzhiyun#	L_SCR1:x/original USER_FPCR
14015*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
14016*4882a593Smuzhiyun
14017*4882a593SmuzhiyunA12_st:
14018*4882a593Smuzhiyun	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
14019*4882a593Smuzhiyun	mov.l	L_SCR1(%a6),-(%sp)
14020*4882a593Smuzhiyun	mov.l	L_SCR2(%a6),-(%sp)
14021*4882a593Smuzhiyun
14022*4882a593Smuzhiyun	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
14023*4882a593Smuzhiyun	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
14024*4882a593Smuzhiyun	tst.l		L_SCR2(%a6)	# test sign of original operand
14025*4882a593Smuzhiyun	bge.b		do_fint12		# if pos, use Y
14026*4882a593Smuzhiyun	or.l		&0x80000000,(%a0)	# if neg, use -Y
14027*4882a593Smuzhiyundo_fint12:
14028*4882a593Smuzhiyun	mov.l	USER_FPSR(%a6),-(%sp)
14029*4882a593Smuzhiyun#	bsr	sintdo		# sint routine returns int in fp0
14030*4882a593Smuzhiyun
14031*4882a593Smuzhiyun	fmov.l	USER_FPCR(%a6),%fpcr
14032*4882a593Smuzhiyun	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
14033*4882a593Smuzhiyun##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
14034*4882a593Smuzhiyun##	andi.l		&0x00000030,%d0
14035*4882a593Smuzhiyun##	fmov.l		%d0,%fpcr
14036*4882a593Smuzhiyun	fint.x		FP_SCR1(%a6),%fp0	# do fint()
14037*4882a593Smuzhiyun	fmov.l	%fpsr,%d0
14038*4882a593Smuzhiyun	or.w	%d0,FPSR_EXCEPT(%a6)
14039*4882a593Smuzhiyun##	fmov.l		&0x0,%fpcr
14040*4882a593Smuzhiyun##	fmov.l		%fpsr,%d0		# don't keep ccodes
14041*4882a593Smuzhiyun##	or.w		%d0,FPSR_EXCEPT(%a6)
14042*4882a593Smuzhiyun
14043*4882a593Smuzhiyun	mov.b	(%sp),USER_FPSR(%a6)
14044*4882a593Smuzhiyun	add.l	&4,%sp
14045*4882a593Smuzhiyun
14046*4882a593Smuzhiyun	mov.l	(%sp)+,L_SCR2(%a6)
14047*4882a593Smuzhiyun	mov.l	(%sp)+,L_SCR1(%a6)
14048*4882a593Smuzhiyun	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
14049*4882a593Smuzhiyun
14050*4882a593Smuzhiyun	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
14051*4882a593Smuzhiyun	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
14052*4882a593Smuzhiyun
14053*4882a593Smuzhiyun# A13. Check for LEN digits.
14054*4882a593Smuzhiyun#      If the int operation results in more than LEN digits,
14055*4882a593Smuzhiyun#      or less than LEN -1 digits, adjust ILOG and repeat from
14056*4882a593Smuzhiyun#      A6.  This test occurs only on the first pass.  If the
14057*4882a593Smuzhiyun#      result is exactly 10^LEN, decrement ILOG and divide
14058*4882a593Smuzhiyun#      the mantissa by 10.  The calculation of 10^LEN cannot
14059*4882a593Smuzhiyun#      be inexact, since all powers of ten up to 10^27 are exact
14060*4882a593Smuzhiyun#      in extended precision, so the use of a previous power-of-ten
14061*4882a593Smuzhiyun#      table will introduce no error.
14062*4882a593Smuzhiyun#
14063*4882a593Smuzhiyun#
14064*4882a593Smuzhiyun# Register usage:
14065*4882a593Smuzhiyun#	Input/Output
14066*4882a593Smuzhiyun#	d0: FPCR with size set to ext/scratch final = 0
14067*4882a593Smuzhiyun#	d2: x/x
14068*4882a593Smuzhiyun#	d3: x/scratch final = x
14069*4882a593Smuzhiyun#	d4: LEN/LEN adjusted
14070*4882a593Smuzhiyun#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14071*4882a593Smuzhiyun#	d6: ILOG/ILOG adjusted
14072*4882a593Smuzhiyun#	d7: k-factor/Unchanged
14073*4882a593Smuzhiyun#	a0: pointer into memory for packed bcd string formation
14074*4882a593Smuzhiyun#	a1: ptr to PTENxx array/Unchanged
14075*4882a593Smuzhiyun#	a2: ptr to FP_SCR1(a6)/Unchanged
14076*4882a593Smuzhiyun#	fp0: int portion of Y/abs(YINT) adjusted
14077*4882a593Smuzhiyun#	fp1: 10^ISCALE/Unchanged
14078*4882a593Smuzhiyun#	fp2: x/10^LEN
14079*4882a593Smuzhiyun#	F_SCR1:x/x
14080*4882a593Smuzhiyun#	F_SCR2:Y with original exponent/Unchanged
14081*4882a593Smuzhiyun#	L_SCR1:original USER_FPCR/Unchanged
14082*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
14083*4882a593Smuzhiyun
14084*4882a593SmuzhiyunA13_st:
14085*4882a593Smuzhiyun	swap		%d5		# put ICTR in lower word of d5
14086*4882a593Smuzhiyun	tst.w		%d5		# check if ICTR = 0
14087*4882a593Smuzhiyun	bne		not_zr		# if non-zero, go to second test
14088*4882a593Smuzhiyun#
14089*4882a593Smuzhiyun# Compute 10^(LEN-1)
14090*4882a593Smuzhiyun#
14091*4882a593Smuzhiyun	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14092*4882a593Smuzhiyun	mov.l		%d4,%d0		# put LEN in d0
14093*4882a593Smuzhiyun	subq.l		&1,%d0		# d0 = LEN -1
14094*4882a593Smuzhiyun	clr.l		%d3		# clr table index
14095*4882a593Smuzhiyunl_loop:
14096*4882a593Smuzhiyun	lsr.l		&1,%d0		# shift next bit into carry
14097*4882a593Smuzhiyun	bcc.b		l_next		# if zero, skip the mul
14098*4882a593Smuzhiyun	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14099*4882a593Smuzhiyunl_next:
14100*4882a593Smuzhiyun	add.l		&12,%d3		# inc d3 to next pwrten table entry
14101*4882a593Smuzhiyun	tst.l		%d0		# test if LEN is zero
14102*4882a593Smuzhiyun	bne.b		l_loop		# if not, loop
14103*4882a593Smuzhiyun#
14104*4882a593Smuzhiyun# 10^LEN-1 is computed for this test and A14.  If the input was
14105*4882a593Smuzhiyun# denormalized, check only the case in which YINT > 10^LEN.
14106*4882a593Smuzhiyun#
14107*4882a593Smuzhiyun	tst.b		BINDEC_FLG(%a6)	# check if input was norm
14108*4882a593Smuzhiyun	beq.b		A13_con		# if norm, continue with checking
14109*4882a593Smuzhiyun	fabs.x		%fp0		# take abs of YINT
14110*4882a593Smuzhiyun	bra		test_2
14111*4882a593Smuzhiyun#
14112*4882a593Smuzhiyun# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113*4882a593Smuzhiyun#
14114*4882a593SmuzhiyunA13_con:
14115*4882a593Smuzhiyun	fabs.x		%fp0		# take abs of YINT
14116*4882a593Smuzhiyun	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
14117*4882a593Smuzhiyun	fbge.w		test_2		# if greater, do next test
14118*4882a593Smuzhiyun	subq.l		&1,%d6		# subtract 1 from ILOG
14119*4882a593Smuzhiyun	mov.w		&1,%d5		# set ICTR
14120*4882a593Smuzhiyun	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14121*4882a593Smuzhiyun	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14122*4882a593Smuzhiyun	bra.w		A6_str		# return to A6 and recompute YINT
14123*4882a593Smuzhiyuntest_2:
14124*4882a593Smuzhiyun	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14125*4882a593Smuzhiyun	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
14126*4882a593Smuzhiyun	fblt.w		A14_st		# if less, all is ok, go to A14
14127*4882a593Smuzhiyun	fbgt.w		fix_ex		# if greater, fix and redo
14128*4882a593Smuzhiyun	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
14129*4882a593Smuzhiyun	addq.l		&1,%d6		# and inc ILOG
14130*4882a593Smuzhiyun	bra.b		A14_st		# and continue elsewhere
14131*4882a593Smuzhiyunfix_ex:
14132*4882a593Smuzhiyun	addq.l		&1,%d6		# increment ILOG by 1
14133*4882a593Smuzhiyun	mov.w		&1,%d5		# set ICTR
14134*4882a593Smuzhiyun	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14135*4882a593Smuzhiyun	bra.w		A6_str		# return to A6 and recompute YINT
14136*4882a593Smuzhiyun#
14137*4882a593Smuzhiyun# Since ICTR <> 0, we have already been through one adjustment,
14138*4882a593Smuzhiyun# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139*4882a593Smuzhiyun# 10^LEN is again computed using whatever table is in a1 since the
14140*4882a593Smuzhiyun# value calculated cannot be inexact.
14141*4882a593Smuzhiyun#
14142*4882a593Smuzhiyunnot_zr:
14143*4882a593Smuzhiyun	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14144*4882a593Smuzhiyun	mov.l		%d4,%d0		# put LEN in d0
14145*4882a593Smuzhiyun	clr.l		%d3		# clr table index
14146*4882a593Smuzhiyunz_loop:
14147*4882a593Smuzhiyun	lsr.l		&1,%d0		# shift next bit into carry
14148*4882a593Smuzhiyun	bcc.b		z_next		# if zero, skip the mul
14149*4882a593Smuzhiyun	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14150*4882a593Smuzhiyunz_next:
14151*4882a593Smuzhiyun	add.l		&12,%d3		# inc d3 to next pwrten table entry
14152*4882a593Smuzhiyun	tst.l		%d0		# test if LEN is zero
14153*4882a593Smuzhiyun	bne.b		z_loop		# if not, loop
14154*4882a593Smuzhiyun	fabs.x		%fp0		# get abs(YINT)
14155*4882a593Smuzhiyun	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
14156*4882a593Smuzhiyun	fbneq.w		A14_st		# if not, skip this
14157*4882a593Smuzhiyun	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
14158*4882a593Smuzhiyun	addq.l		&1,%d6		# and inc ILOG by 1
14159*4882a593Smuzhiyun	addq.l		&1,%d4		# and inc LEN
14160*4882a593Smuzhiyun	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
14161*4882a593Smuzhiyun
14162*4882a593Smuzhiyun# A14. Convert the mantissa to bcd.
14163*4882a593Smuzhiyun#      The binstr routine is used to convert the LEN digit
14164*4882a593Smuzhiyun#      mantissa to bcd in memory.  The input to binstr is
14165*4882a593Smuzhiyun#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166*4882a593Smuzhiyun#      such that the decimal point is to the left of bit 63.
14167*4882a593Smuzhiyun#      The bcd digits are stored in the correct position in
14168*4882a593Smuzhiyun#      the final string area in memory.
14169*4882a593Smuzhiyun#
14170*4882a593Smuzhiyun#
14171*4882a593Smuzhiyun# Register usage:
14172*4882a593Smuzhiyun#	Input/Output
14173*4882a593Smuzhiyun#	d0: x/LEN call to binstr - final is 0
14174*4882a593Smuzhiyun#	d1: x/0
14175*4882a593Smuzhiyun#	d2: x/ms 32-bits of mant of abs(YINT)
14176*4882a593Smuzhiyun#	d3: x/ls 32-bits of mant of abs(YINT)
14177*4882a593Smuzhiyun#	d4: LEN/Unchanged
14178*4882a593Smuzhiyun#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14179*4882a593Smuzhiyun#	d6: ILOG
14180*4882a593Smuzhiyun#	d7: k-factor/Unchanged
14181*4882a593Smuzhiyun#	a0: pointer into memory for packed bcd string formation
14182*4882a593Smuzhiyun#	    /ptr to first mantissa byte in result string
14183*4882a593Smuzhiyun#	a1: ptr to PTENxx array/Unchanged
14184*4882a593Smuzhiyun#	a2: ptr to FP_SCR1(a6)/Unchanged
14185*4882a593Smuzhiyun#	fp0: int portion of Y/abs(YINT) adjusted
14186*4882a593Smuzhiyun#	fp1: 10^ISCALE/Unchanged
14187*4882a593Smuzhiyun#	fp2: 10^LEN/Unchanged
14188*4882a593Smuzhiyun#	F_SCR1:x/Work area for final result
14189*4882a593Smuzhiyun#	F_SCR2:Y with original exponent/Unchanged
14190*4882a593Smuzhiyun#	L_SCR1:original USER_FPCR/Unchanged
14191*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
14192*4882a593Smuzhiyun
14193*4882a593SmuzhiyunA14_st:
14194*4882a593Smuzhiyun	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
14195*4882a593Smuzhiyun	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
14196*4882a593Smuzhiyun	lea.l		FP_SCR0(%a6),%a0
14197*4882a593Smuzhiyun	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
14198*4882a593Smuzhiyun	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
14199*4882a593Smuzhiyun	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
14200*4882a593Smuzhiyun	clr.l		4(%a0)		# zero word 2 of FP_RES
14201*4882a593Smuzhiyun	clr.l		8(%a0)		# zero word 3 of FP_RES
14202*4882a593Smuzhiyun	mov.l		(%a0),%d0	# move exponent to d0
14203*4882a593Smuzhiyun	swap		%d0		# put exponent in lower word
14204*4882a593Smuzhiyun	beq.b		no_sft		# if zero, don't shift
14205*4882a593Smuzhiyun	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
14206*4882a593Smuzhiyun	tst.l		%d0		# check if > 1
14207*4882a593Smuzhiyun	bgt.b		no_sft		# if so, don't shift
14208*4882a593Smuzhiyun	neg.l		%d0		# make exp positive
14209*4882a593Smuzhiyunm_loop:
14210*4882a593Smuzhiyun	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
14211*4882a593Smuzhiyun	roxr.l		&1,%d3		# the number of places
14212*4882a593Smuzhiyun	dbf.w		%d0,m_loop	# given in d0
14213*4882a593Smuzhiyunno_sft:
14214*4882a593Smuzhiyun	tst.l		%d2		# check for mantissa of zero
14215*4882a593Smuzhiyun	bne.b		no_zr		# if not, go on
14216*4882a593Smuzhiyun	tst.l		%d3		# continue zero check
14217*4882a593Smuzhiyun	beq.b		zer_m		# if zero, go directly to binstr
14218*4882a593Smuzhiyunno_zr:
14219*4882a593Smuzhiyun	clr.l		%d1		# put zero in d1 for addx
14220*4882a593Smuzhiyun	add.l		&0x00000080,%d3	# inc at bit 7
14221*4882a593Smuzhiyun	addx.l		%d1,%d2		# continue inc
14222*4882a593Smuzhiyun	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14223*4882a593Smuzhiyunzer_m:
14224*4882a593Smuzhiyun	mov.l		%d4,%d0		# put LEN in d0 for binstr call
14225*4882a593Smuzhiyun	addq.l		&3,%a0		# a0 points to M16 byte in result
14226*4882a593Smuzhiyun	bsr		binstr		# call binstr to convert mant
14227*4882a593Smuzhiyun
14228*4882a593Smuzhiyun
14229*4882a593Smuzhiyun# A15. Convert the exponent to bcd.
14230*4882a593Smuzhiyun#      As in A14 above, the exp is converted to bcd and the
14231*4882a593Smuzhiyun#      digits are stored in the final string.
14232*4882a593Smuzhiyun#
14233*4882a593Smuzhiyun#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234*4882a593Smuzhiyun#
14235*4882a593Smuzhiyun#	 32               16 15                0
14236*4882a593Smuzhiyun#	-----------------------------------------
14237*4882a593Smuzhiyun#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14238*4882a593Smuzhiyun#	-----------------------------------------
14239*4882a593Smuzhiyun#
14240*4882a593Smuzhiyun# And are moved into their proper places in FP_SCR0.  If digit e4
14241*4882a593Smuzhiyun# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14242*4882a593Smuzhiyun# written as specified in the 881/882 manual for packed decimal.
14243*4882a593Smuzhiyun#
14244*4882a593Smuzhiyun# Register usage:
14245*4882a593Smuzhiyun#	Input/Output
14246*4882a593Smuzhiyun#	d0: x/LEN call to binstr - final is 0
14247*4882a593Smuzhiyun#	d1: x/scratch (0);shift count for final exponent packing
14248*4882a593Smuzhiyun#	d2: x/ms 32-bits of exp fraction/scratch
14249*4882a593Smuzhiyun#	d3: x/ls 32-bits of exp fraction
14250*4882a593Smuzhiyun#	d4: LEN/Unchanged
14251*4882a593Smuzhiyun#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14252*4882a593Smuzhiyun#	d6: ILOG
14253*4882a593Smuzhiyun#	d7: k-factor/Unchanged
14254*4882a593Smuzhiyun#	a0: ptr to result string/ptr to L_SCR1(a6)
14255*4882a593Smuzhiyun#	a1: ptr to PTENxx array/Unchanged
14256*4882a593Smuzhiyun#	a2: ptr to FP_SCR1(a6)/Unchanged
14257*4882a593Smuzhiyun#	fp0: abs(YINT) adjusted/float(ILOG)
14258*4882a593Smuzhiyun#	fp1: 10^ISCALE/Unchanged
14259*4882a593Smuzhiyun#	fp2: 10^LEN/Unchanged
14260*4882a593Smuzhiyun#	F_SCR1:Work area for final result/BCD result
14261*4882a593Smuzhiyun#	F_SCR2:Y with original exponent/ILOG/10^4
14262*4882a593Smuzhiyun#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
14264*4882a593Smuzhiyun
14265*4882a593SmuzhiyunA15_st:
14266*4882a593Smuzhiyun	tst.b		BINDEC_FLG(%a6)	# check for denorm
14267*4882a593Smuzhiyun	beq.b		not_denorm
14268*4882a593Smuzhiyun	ftest.x		%fp0		# test for zero
14269*4882a593Smuzhiyun	fbeq.w		den_zero	# if zero, use k-factor or 4933
14270*4882a593Smuzhiyun	fmov.l		%d6,%fp0	# float ILOG
14271*4882a593Smuzhiyun	fabs.x		%fp0		# get abs of ILOG
14272*4882a593Smuzhiyun	bra.b		convrt
14273*4882a593Smuzhiyunden_zero:
14274*4882a593Smuzhiyun	tst.l		%d7		# check sign of the k-factor
14275*4882a593Smuzhiyun	blt.b		use_ilog	# if negative, use ILOG
14276*4882a593Smuzhiyun	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
14277*4882a593Smuzhiyun	bra.b		convrt		# do it
14278*4882a593Smuzhiyunuse_ilog:
14279*4882a593Smuzhiyun	fmov.l		%d6,%fp0	# float ILOG
14280*4882a593Smuzhiyun	fabs.x		%fp0		# get abs of ILOG
14281*4882a593Smuzhiyun	bra.b		convrt
14282*4882a593Smuzhiyunnot_denorm:
14283*4882a593Smuzhiyun	ftest.x		%fp0		# test for zero
14284*4882a593Smuzhiyun	fbneq.w		not_zero	# if zero, force exponent
14285*4882a593Smuzhiyun	fmov.s		FONE(%pc),%fp0	# force exponent to 1
14286*4882a593Smuzhiyun	bra.b		convrt		# do it
14287*4882a593Smuzhiyunnot_zero:
14288*4882a593Smuzhiyun	fmov.l		%d6,%fp0	# float ILOG
14289*4882a593Smuzhiyun	fabs.x		%fp0		# get abs of ILOG
14290*4882a593Smuzhiyunconvrt:
14291*4882a593Smuzhiyun	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
14292*4882a593Smuzhiyun	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
14293*4882a593Smuzhiyun	mov.l		4(%a2),%d2	# move word 2 to d2
14294*4882a593Smuzhiyun	mov.l		8(%a2),%d3	# move word 3 to d3
14295*4882a593Smuzhiyun	mov.w		(%a2),%d0	# move exp to d0
14296*4882a593Smuzhiyun	beq.b		x_loop_fin	# if zero, skip the shift
14297*4882a593Smuzhiyun	sub.w		&0x3ffd,%d0	# subtract off bias
14298*4882a593Smuzhiyun	neg.w		%d0		# make exp positive
14299*4882a593Smuzhiyunx_loop:
14300*4882a593Smuzhiyun	lsr.l		&1,%d2		# shift d2:d3 right
14301*4882a593Smuzhiyun	roxr.l		&1,%d3		# the number of places
14302*4882a593Smuzhiyun	dbf.w		%d0,x_loop	# given in d0
14303*4882a593Smuzhiyunx_loop_fin:
14304*4882a593Smuzhiyun	clr.l		%d1		# put zero in d1 for addx
14305*4882a593Smuzhiyun	add.l		&0x00000080,%d3	# inc at bit 6
14306*4882a593Smuzhiyun	addx.l		%d1,%d2		# continue inc
14307*4882a593Smuzhiyun	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14308*4882a593Smuzhiyun	mov.l		&4,%d0		# put 4 in d0 for binstr call
14309*4882a593Smuzhiyun	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
14310*4882a593Smuzhiyun	bsr		binstr		# call binstr to convert exp
14311*4882a593Smuzhiyun	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
14312*4882a593Smuzhiyun	mov.l		&12,%d1		# use d1 for shift count
14313*4882a593Smuzhiyun	lsr.l		%d1,%d0		# shift d0 right by 12
14314*4882a593Smuzhiyun	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
14315*4882a593Smuzhiyun	lsr.l		%d1,%d0		# shift d0 right by 12
14316*4882a593Smuzhiyun	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
14317*4882a593Smuzhiyun	tst.b		%d0		# check if e4 is zero
14318*4882a593Smuzhiyun	beq.b		A16_st		# if zero, skip rest
14319*4882a593Smuzhiyun	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
14320*4882a593Smuzhiyun
14321*4882a593Smuzhiyun
14322*4882a593Smuzhiyun# A16. Write sign bits to final string.
14323*4882a593Smuzhiyun#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324*4882a593Smuzhiyun#
14325*4882a593Smuzhiyun# Register usage:
14326*4882a593Smuzhiyun#	Input/Output
14327*4882a593Smuzhiyun#	d0: x/scratch - final is x
14328*4882a593Smuzhiyun#	d2: x/x
14329*4882a593Smuzhiyun#	d3: x/x
14330*4882a593Smuzhiyun#	d4: LEN/Unchanged
14331*4882a593Smuzhiyun#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14332*4882a593Smuzhiyun#	d6: ILOG/ILOG adjusted
14333*4882a593Smuzhiyun#	d7: k-factor/Unchanged
14334*4882a593Smuzhiyun#	a0: ptr to L_SCR1(a6)/Unchanged
14335*4882a593Smuzhiyun#	a1: ptr to PTENxx array/Unchanged
14336*4882a593Smuzhiyun#	a2: ptr to FP_SCR1(a6)/Unchanged
14337*4882a593Smuzhiyun#	fp0: float(ILOG)/Unchanged
14338*4882a593Smuzhiyun#	fp1: 10^ISCALE/Unchanged
14339*4882a593Smuzhiyun#	fp2: 10^LEN/Unchanged
14340*4882a593Smuzhiyun#	F_SCR1:BCD result with correct signs
14341*4882a593Smuzhiyun#	F_SCR2:ILOG/10^4
14342*4882a593Smuzhiyun#	L_SCR1:Exponent digits on return from binstr
14343*4882a593Smuzhiyun#	L_SCR2:first word of X packed/Unchanged
14344*4882a593Smuzhiyun
14345*4882a593SmuzhiyunA16_st:
14346*4882a593Smuzhiyun	clr.l		%d0		# clr d0 for collection of signs
14347*4882a593Smuzhiyun	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
14348*4882a593Smuzhiyun	tst.l		L_SCR2(%a6)	# check sign of original mantissa
14349*4882a593Smuzhiyun	bge.b		mant_p		# if pos, don't set SM
14350*4882a593Smuzhiyun	mov.l		&2,%d0		# move 2 in to d0 for SM
14351*4882a593Smuzhiyunmant_p:
14352*4882a593Smuzhiyun	tst.l		%d6		# check sign of ILOG
14353*4882a593Smuzhiyun	bge.b		wr_sgn		# if pos, don't set SE
14354*4882a593Smuzhiyun	addq.l		&1,%d0		# set bit 0 in d0 for SE
14355*4882a593Smuzhiyunwr_sgn:
14356*4882a593Smuzhiyun	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
14357*4882a593Smuzhiyun
14358*4882a593Smuzhiyun# Clean up and restore all registers used.
14359*4882a593Smuzhiyun
14360*4882a593Smuzhiyun	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
14361*4882a593Smuzhiyun	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
14362*4882a593Smuzhiyun	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
14363*4882a593Smuzhiyun	rts
14364*4882a593Smuzhiyun
14365*4882a593Smuzhiyun	global		PTENRN
14366*4882a593SmuzhiyunPTENRN:
14367*4882a593Smuzhiyun	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14368*4882a593Smuzhiyun	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14369*4882a593Smuzhiyun	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14370*4882a593Smuzhiyun	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14371*4882a593Smuzhiyun	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14372*4882a593Smuzhiyun	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14373*4882a593Smuzhiyun	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14374*4882a593Smuzhiyun	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14375*4882a593Smuzhiyun	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14376*4882a593Smuzhiyun	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14377*4882a593Smuzhiyun	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14378*4882a593Smuzhiyun	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14379*4882a593Smuzhiyun	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14380*4882a593Smuzhiyun
14381*4882a593Smuzhiyun	global		PTENRP
14382*4882a593SmuzhiyunPTENRP:
14383*4882a593Smuzhiyun	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14384*4882a593Smuzhiyun	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14385*4882a593Smuzhiyun	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14386*4882a593Smuzhiyun	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14387*4882a593Smuzhiyun	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14388*4882a593Smuzhiyun	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14389*4882a593Smuzhiyun	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
14390*4882a593Smuzhiyun	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14391*4882a593Smuzhiyun	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14392*4882a593Smuzhiyun	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14393*4882a593Smuzhiyun	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
14394*4882a593Smuzhiyun	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14395*4882a593Smuzhiyun	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14396*4882a593Smuzhiyun
14397*4882a593Smuzhiyun	global		PTENRM
14398*4882a593SmuzhiyunPTENRM:
14399*4882a593Smuzhiyun	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14400*4882a593Smuzhiyun	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14401*4882a593Smuzhiyun	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14402*4882a593Smuzhiyun	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14403*4882a593Smuzhiyun	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14404*4882a593Smuzhiyun	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
14405*4882a593Smuzhiyun	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14406*4882a593Smuzhiyun	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
14407*4882a593Smuzhiyun	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
14408*4882a593Smuzhiyun	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
14409*4882a593Smuzhiyun	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14410*4882a593Smuzhiyun	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
14411*4882a593Smuzhiyun	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
14412*4882a593Smuzhiyun
14413*4882a593Smuzhiyun#########################################################################
14414*4882a593Smuzhiyun# binstr(): Converts a 64-bit binary integer to bcd.			#
14415*4882a593Smuzhiyun#									#
14416*4882a593Smuzhiyun# INPUT *************************************************************** #
14417*4882a593Smuzhiyun#	d2:d3 = 64-bit binary integer					#
14418*4882a593Smuzhiyun#	d0    = desired length (LEN)					#
14419*4882a593Smuzhiyun#	a0    = pointer to start in memory for bcd characters		#
14420*4882a593Smuzhiyun#		(This pointer must point to byte 4 of the first		#
14421*4882a593Smuzhiyun#		 lword of the packed decimal memory string.)		#
14422*4882a593Smuzhiyun#									#
14423*4882a593Smuzhiyun# OUTPUT ************************************************************** #
14424*4882a593Smuzhiyun#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
14425*4882a593Smuzhiyun#									#
14426*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
14427*4882a593Smuzhiyun#	The 64-bit binary is assumed to have a decimal point before	#
14428*4882a593Smuzhiyun#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
14429*4882a593Smuzhiyun#	shift and a mul by 8 shift.  The bits shifted out of the	#
14430*4882a593Smuzhiyun#	msb form a decimal digit.  This process is iterated until	#
14431*4882a593Smuzhiyun#	LEN digits are formed.						#
14432*4882a593Smuzhiyun#									#
14433*4882a593Smuzhiyun# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
14434*4882a593Smuzhiyun#     digit formed will be assumed the least significant.  This is	#
14435*4882a593Smuzhiyun#     to force the first byte formed to have a 0 in the upper 4 bits.	#
14436*4882a593Smuzhiyun#									#
14437*4882a593Smuzhiyun# A2. Beginning of the loop:						#
14438*4882a593Smuzhiyun#     Copy the fraction in d2:d3 to d4:d5.				#
14439*4882a593Smuzhiyun#									#
14440*4882a593Smuzhiyun# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
14441*4882a593Smuzhiyun#     extracts and shifts.  The three msbs from d2 will go into d1.	#
14442*4882a593Smuzhiyun#									#
14443*4882a593Smuzhiyun# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
14444*4882a593Smuzhiyun#     will be collected by the carry.					#
14445*4882a593Smuzhiyun#									#
14446*4882a593Smuzhiyun# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
14447*4882a593Smuzhiyun#     into d2:d3.  D1 will contain the bcd digit formed.		#
14448*4882a593Smuzhiyun#									#
14449*4882a593Smuzhiyun# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
14450*4882a593Smuzhiyun#     zero, it is the ls digit.  Put the digit in its place in the	#
14451*4882a593Smuzhiyun#     upper word of d0.  If it is the ls digit, write the word		#
14452*4882a593Smuzhiyun#     from d0 to memory.						#
14453*4882a593Smuzhiyun#									#
14454*4882a593Smuzhiyun# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
14455*4882a593Smuzhiyun#									#
14456*4882a593Smuzhiyun#########################################################################
14457*4882a593Smuzhiyun
14458*4882a593Smuzhiyun#	Implementation Notes:
14459*4882a593Smuzhiyun#
14460*4882a593Smuzhiyun#	The registers are used as follows:
14461*4882a593Smuzhiyun#
14462*4882a593Smuzhiyun#		d0: LEN counter
14463*4882a593Smuzhiyun#		d1: temp used to form the digit
14464*4882a593Smuzhiyun#		d2: upper 32-bits of fraction for mul by 8
14465*4882a593Smuzhiyun#		d3: lower 32-bits of fraction for mul by 8
14466*4882a593Smuzhiyun#		d4: upper 32-bits of fraction for mul by 2
14467*4882a593Smuzhiyun#		d5: lower 32-bits of fraction for mul by 2
14468*4882a593Smuzhiyun#		d6: temp for bit-field extracts
14469*4882a593Smuzhiyun#		d7: byte digit formation word;digit count {0,1}
14470*4882a593Smuzhiyun#		a0: pointer into memory for packed bcd string formation
14471*4882a593Smuzhiyun#
14472*4882a593Smuzhiyun
14473*4882a593Smuzhiyun	global		binstr
14474*4882a593Smuzhiyunbinstr:
14475*4882a593Smuzhiyun	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
14476*4882a593Smuzhiyun
14477*4882a593Smuzhiyun#
14478*4882a593Smuzhiyun# A1: Init d7
14479*4882a593Smuzhiyun#
14480*4882a593Smuzhiyun	mov.l		&1,%d7		# init d7 for second digit
14481*4882a593Smuzhiyun	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
14482*4882a593Smuzhiyun#
14483*4882a593Smuzhiyun# A2. Copy d2:d3 to d4:d5.  Start loop.
14484*4882a593Smuzhiyun#
14485*4882a593Smuzhiyunloop:
14486*4882a593Smuzhiyun	mov.l		%d2,%d4		# copy the fraction before muls
14487*4882a593Smuzhiyun	mov.l		%d3,%d5		# to d4:d5
14488*4882a593Smuzhiyun#
14489*4882a593Smuzhiyun# A3. Multiply d2:d3 by 8; extract msbs into d1.
14490*4882a593Smuzhiyun#
14491*4882a593Smuzhiyun	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
14492*4882a593Smuzhiyun	asl.l		&3,%d2		# shift d2 left by 3 places
14493*4882a593Smuzhiyun	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
14494*4882a593Smuzhiyun	asl.l		&3,%d3		# shift d3 left by 3 places
14495*4882a593Smuzhiyun	or.l		%d6,%d2		# or in msbs from d3 into d2
14496*4882a593Smuzhiyun#
14497*4882a593Smuzhiyun# A4. Multiply d4:d5 by 2; add carry out to d1.
14498*4882a593Smuzhiyun#
14499*4882a593Smuzhiyun	asl.l		&1,%d5		# mul d5 by 2
14500*4882a593Smuzhiyun	roxl.l		&1,%d4		# mul d4 by 2
14501*4882a593Smuzhiyun	swap		%d6		# put 0 in d6 lower word
14502*4882a593Smuzhiyun	addx.w		%d6,%d1		# add in extend from mul by 2
14503*4882a593Smuzhiyun#
14504*4882a593Smuzhiyun# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14505*4882a593Smuzhiyun#
14506*4882a593Smuzhiyun	add.l		%d5,%d3		# add lower 32 bits
14507*4882a593Smuzhiyun	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508*4882a593Smuzhiyun	addx.l		%d4,%d2		# add with extend upper 32 bits
14509*4882a593Smuzhiyun	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510*4882a593Smuzhiyun	addx.w		%d6,%d1		# add in extend from add to d1
14511*4882a593Smuzhiyun	swap		%d6		# with d6 = 0; put 0 in upper word
14512*4882a593Smuzhiyun#
14513*4882a593Smuzhiyun# A6. Test d7 and branch.
14514*4882a593Smuzhiyun#
14515*4882a593Smuzhiyun	tst.w		%d7		# if zero, store digit & to loop
14516*4882a593Smuzhiyun	beq.b		first_d		# if non-zero, form byte & write
14517*4882a593Smuzhiyunsec_d:
14518*4882a593Smuzhiyun	swap		%d7		# bring first digit to word d7b
14519*4882a593Smuzhiyun	asl.w		&4,%d7		# first digit in upper 4 bits d7b
14520*4882a593Smuzhiyun	add.w		%d1,%d7		# add in ls digit to d7b
14521*4882a593Smuzhiyun	mov.b		%d7,(%a0)+	# store d7b byte in memory
14522*4882a593Smuzhiyun	swap		%d7		# put LEN counter in word d7a
14523*4882a593Smuzhiyun	clr.w		%d7		# set d7a to signal no digits done
14524*4882a593Smuzhiyun	dbf.w		%d0,loop	# do loop some more!
14525*4882a593Smuzhiyun	bra.b		end_bstr	# finished, so exit
14526*4882a593Smuzhiyunfirst_d:
14527*4882a593Smuzhiyun	swap		%d7		# put digit word in d7b
14528*4882a593Smuzhiyun	mov.w		%d1,%d7		# put new digit in d7b
14529*4882a593Smuzhiyun	swap		%d7		# put LEN counter in word d7a
14530*4882a593Smuzhiyun	addq.w		&1,%d7		# set d7a to signal first digit done
14531*4882a593Smuzhiyun	dbf.w		%d0,loop	# do loop some more!
14532*4882a593Smuzhiyun	swap		%d7		# put last digit in string
14533*4882a593Smuzhiyun	lsl.w		&4,%d7		# move it to upper 4 bits
14534*4882a593Smuzhiyun	mov.b		%d7,(%a0)+	# store it in memory string
14535*4882a593Smuzhiyun#
14536*4882a593Smuzhiyun# Clean up and return with result in fp0.
14537*4882a593Smuzhiyun#
14538*4882a593Smuzhiyunend_bstr:
14539*4882a593Smuzhiyun	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
14540*4882a593Smuzhiyun	rts
14541*4882a593Smuzhiyun
14542*4882a593Smuzhiyun#########################################################################
14543*4882a593Smuzhiyun# XDEF ****************************************************************	#
14544*4882a593Smuzhiyun#	facc_in_b(): dmem_read_byte failed				#
14545*4882a593Smuzhiyun#	facc_in_w(): dmem_read_word failed				#
14546*4882a593Smuzhiyun#	facc_in_l(): dmem_read_long failed				#
14547*4882a593Smuzhiyun#	facc_in_d(): dmem_read of dbl prec failed			#
14548*4882a593Smuzhiyun#	facc_in_x(): dmem_read of ext prec failed			#
14549*4882a593Smuzhiyun#									#
14550*4882a593Smuzhiyun#	facc_out_b(): dmem_write_byte failed				#
14551*4882a593Smuzhiyun#	facc_out_w(): dmem_write_word failed				#
14552*4882a593Smuzhiyun#	facc_out_l(): dmem_write_long failed				#
14553*4882a593Smuzhiyun#	facc_out_d(): dmem_write of dbl prec failed			#
14554*4882a593Smuzhiyun#	facc_out_x(): dmem_write of ext prec failed			#
14555*4882a593Smuzhiyun#									#
14556*4882a593Smuzhiyun# XREF ****************************************************************	#
14557*4882a593Smuzhiyun#	_real_access() - exit through access error handler		#
14558*4882a593Smuzhiyun#									#
14559*4882a593Smuzhiyun# INPUT ***************************************************************	#
14560*4882a593Smuzhiyun#	None								#
14561*4882a593Smuzhiyun#									#
14562*4882a593Smuzhiyun# OUTPUT **************************************************************	#
14563*4882a593Smuzhiyun#	None								#
14564*4882a593Smuzhiyun#									#
14565*4882a593Smuzhiyun# ALGORITHM ***********************************************************	#
14566*4882a593Smuzhiyun#	Flow jumps here when an FP data fetch call gets an error	#
14567*4882a593Smuzhiyun# result. This means the operating system wants an access error frame	#
14568*4882a593Smuzhiyun# made out of the current exception stack frame.			#
14569*4882a593Smuzhiyun#	So, we first call restore() which makes sure that any updated	#
14570*4882a593Smuzhiyun# -(an)+ register gets returned to its pre-exception value and then	#
14571*4882a593Smuzhiyun# we change the stack to an access error stack frame.			#
14572*4882a593Smuzhiyun#									#
14573*4882a593Smuzhiyun#########################################################################
14574*4882a593Smuzhiyun
14575*4882a593Smuzhiyunfacc_in_b:
14576*4882a593Smuzhiyun	movq.l		&0x1,%d0			# one byte
14577*4882a593Smuzhiyun	bsr.w		restore				# fix An
14578*4882a593Smuzhiyun
14579*4882a593Smuzhiyun	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
14580*4882a593Smuzhiyun	bra.w		facc_finish
14581*4882a593Smuzhiyun
14582*4882a593Smuzhiyunfacc_in_w:
14583*4882a593Smuzhiyun	movq.l		&0x2,%d0			# two bytes
14584*4882a593Smuzhiyun	bsr.w		restore				# fix An
14585*4882a593Smuzhiyun
14586*4882a593Smuzhiyun	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
14587*4882a593Smuzhiyun	bra.b		facc_finish
14588*4882a593Smuzhiyun
14589*4882a593Smuzhiyunfacc_in_l:
14590*4882a593Smuzhiyun	movq.l		&0x4,%d0			# four bytes
14591*4882a593Smuzhiyun	bsr.w		restore				# fix An
14592*4882a593Smuzhiyun
14593*4882a593Smuzhiyun	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
14594*4882a593Smuzhiyun	bra.b		facc_finish
14595*4882a593Smuzhiyun
14596*4882a593Smuzhiyunfacc_in_d:
14597*4882a593Smuzhiyun	movq.l		&0x8,%d0			# eight bytes
14598*4882a593Smuzhiyun	bsr.w		restore				# fix An
14599*4882a593Smuzhiyun
14600*4882a593Smuzhiyun	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14601*4882a593Smuzhiyun	bra.b		facc_finish
14602*4882a593Smuzhiyun
14603*4882a593Smuzhiyunfacc_in_x:
14604*4882a593Smuzhiyun	movq.l		&0xc,%d0			# twelve bytes
14605*4882a593Smuzhiyun	bsr.w		restore				# fix An
14606*4882a593Smuzhiyun
14607*4882a593Smuzhiyun	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14608*4882a593Smuzhiyun	bra.b		facc_finish
14609*4882a593Smuzhiyun
14610*4882a593Smuzhiyun################################################################
14611*4882a593Smuzhiyun
14612*4882a593Smuzhiyunfacc_out_b:
14613*4882a593Smuzhiyun	movq.l		&0x1,%d0			# one byte
14614*4882a593Smuzhiyun	bsr.w		restore				# restore An
14615*4882a593Smuzhiyun
14616*4882a593Smuzhiyun	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
14617*4882a593Smuzhiyun	bra.b		facc_finish
14618*4882a593Smuzhiyun
14619*4882a593Smuzhiyunfacc_out_w:
14620*4882a593Smuzhiyun	movq.l		&0x2,%d0			# two bytes
14621*4882a593Smuzhiyun	bsr.w		restore				# restore An
14622*4882a593Smuzhiyun
14623*4882a593Smuzhiyun	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
14624*4882a593Smuzhiyun	bra.b		facc_finish
14625*4882a593Smuzhiyun
14626*4882a593Smuzhiyunfacc_out_l:
14627*4882a593Smuzhiyun	movq.l		&0x4,%d0			# four bytes
14628*4882a593Smuzhiyun	bsr.w		restore				# restore An
14629*4882a593Smuzhiyun
14630*4882a593Smuzhiyun	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
14631*4882a593Smuzhiyun	bra.b		facc_finish
14632*4882a593Smuzhiyun
14633*4882a593Smuzhiyunfacc_out_d:
14634*4882a593Smuzhiyun	movq.l		&0x8,%d0			# eight bytes
14635*4882a593Smuzhiyun	bsr.w		restore				# restore An
14636*4882a593Smuzhiyun
14637*4882a593Smuzhiyun	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14638*4882a593Smuzhiyun	bra.b		facc_finish
14639*4882a593Smuzhiyun
14640*4882a593Smuzhiyunfacc_out_x:
14641*4882a593Smuzhiyun	mov.l		&0xc,%d0			# twelve bytes
14642*4882a593Smuzhiyun	bsr.w		restore				# restore An
14643*4882a593Smuzhiyun
14644*4882a593Smuzhiyun	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14645*4882a593Smuzhiyun
14646*4882a593Smuzhiyun# here's where we actually create the access error frame from the
14647*4882a593Smuzhiyun# current exception stack frame.
14648*4882a593Smuzhiyunfacc_finish:
14649*4882a593Smuzhiyun	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650*4882a593Smuzhiyun
14651*4882a593Smuzhiyun	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
14652*4882a593Smuzhiyun	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653*4882a593Smuzhiyun	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
14654*4882a593Smuzhiyun
14655*4882a593Smuzhiyun	unlk		%a6
14656*4882a593Smuzhiyun
14657*4882a593Smuzhiyun	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
14658*4882a593Smuzhiyun	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
14659*4882a593Smuzhiyun	mov.l		0xc(%sp),0x8(%sp)	# store EA
14660*4882a593Smuzhiyun	mov.l		&0x00000001,0xc(%sp)	# store FSLW
14661*4882a593Smuzhiyun	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
14662*4882a593Smuzhiyun	mov.w		&0x4008,0x6(%sp)	# store voff
14663*4882a593Smuzhiyun
14664*4882a593Smuzhiyun	btst		&0x5,(%sp)		# supervisor or user mode?
14665*4882a593Smuzhiyun	beq.b		facc_out2		# user
14666*4882a593Smuzhiyun	bset		&0x2,0xd(%sp)		# set supervisor TM bit
14667*4882a593Smuzhiyun
14668*4882a593Smuzhiyunfacc_out2:
14669*4882a593Smuzhiyun	bra.l		_real_access
14670*4882a593Smuzhiyun
14671*4882a593Smuzhiyun##################################################################
14672*4882a593Smuzhiyun
14673*4882a593Smuzhiyun# if the effective addressing mode was predecrement or postincrement,
14674*4882a593Smuzhiyun# the emulation has already changed its value to the correct post-
14675*4882a593Smuzhiyun# instruction value. but since we're exiting to the access error
14676*4882a593Smuzhiyun# handler, then AN must be returned to its pre-instruction value.
14677*4882a593Smuzhiyun# we do that here.
14678*4882a593Smuzhiyunrestore:
14679*4882a593Smuzhiyun	mov.b		EXC_OPWORD+0x1(%a6),%d1
14680*4882a593Smuzhiyun	andi.b		&0x38,%d1		# extract opmode
14681*4882a593Smuzhiyun	cmpi.b		%d1,&0x18		# postinc?
14682*4882a593Smuzhiyun	beq.w		rest_inc
14683*4882a593Smuzhiyun	cmpi.b		%d1,&0x20		# predec?
14684*4882a593Smuzhiyun	beq.w		rest_dec
14685*4882a593Smuzhiyun	rts
14686*4882a593Smuzhiyun
14687*4882a593Smuzhiyunrest_inc:
14688*4882a593Smuzhiyun	mov.b		EXC_OPWORD+0x1(%a6),%d1
14689*4882a593Smuzhiyun	andi.w		&0x0007,%d1		# fetch An
14690*4882a593Smuzhiyun
14691*4882a593Smuzhiyun	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692*4882a593Smuzhiyun	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
14693*4882a593Smuzhiyun
14694*4882a593Smuzhiyuntbl_rest_inc:
14695*4882a593Smuzhiyun	short		ri_a0 - tbl_rest_inc
14696*4882a593Smuzhiyun	short		ri_a1 - tbl_rest_inc
14697*4882a593Smuzhiyun	short		ri_a2 - tbl_rest_inc
14698*4882a593Smuzhiyun	short		ri_a3 - tbl_rest_inc
14699*4882a593Smuzhiyun	short		ri_a4 - tbl_rest_inc
14700*4882a593Smuzhiyun	short		ri_a5 - tbl_rest_inc
14701*4882a593Smuzhiyun	short		ri_a6 - tbl_rest_inc
14702*4882a593Smuzhiyun	short		ri_a7 - tbl_rest_inc
14703*4882a593Smuzhiyun
14704*4882a593Smuzhiyunri_a0:
14705*4882a593Smuzhiyun	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
14706*4882a593Smuzhiyun	rts
14707*4882a593Smuzhiyunri_a1:
14708*4882a593Smuzhiyun	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
14709*4882a593Smuzhiyun	rts
14710*4882a593Smuzhiyunri_a2:
14711*4882a593Smuzhiyun	sub.l		%d0,%a2			# fix a2
14712*4882a593Smuzhiyun	rts
14713*4882a593Smuzhiyunri_a3:
14714*4882a593Smuzhiyun	sub.l		%d0,%a3			# fix a3
14715*4882a593Smuzhiyun	rts
14716*4882a593Smuzhiyunri_a4:
14717*4882a593Smuzhiyun	sub.l		%d0,%a4			# fix a4
14718*4882a593Smuzhiyun	rts
14719*4882a593Smuzhiyunri_a5:
14720*4882a593Smuzhiyun	sub.l		%d0,%a5			# fix a5
14721*4882a593Smuzhiyun	rts
14722*4882a593Smuzhiyunri_a6:
14723*4882a593Smuzhiyun	sub.l		%d0,(%a6)		# fix stacked a6
14724*4882a593Smuzhiyun	rts
14725*4882a593Smuzhiyun# if it's a fmove out instruction, we don't have to fix a7
14726*4882a593Smuzhiyun# because we hadn't changed it yet. if it's an opclass two
14727*4882a593Smuzhiyun# instruction (data moved in) and the exception was in supervisor
14728*4882a593Smuzhiyun# mode, then also also wasn't updated. if it was user mode, then
14729*4882a593Smuzhiyun# restore the correct a7 which is in the USP currently.
14730*4882a593Smuzhiyunri_a7:
14731*4882a593Smuzhiyun	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
14732*4882a593Smuzhiyun	bne.b		ri_a7_done		# out
14733*4882a593Smuzhiyun
14734*4882a593Smuzhiyun	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
14735*4882a593Smuzhiyun	bne.b		ri_a7_done		# supervisor
14736*4882a593Smuzhiyun	movc		%usp,%a0		# restore USP
14737*4882a593Smuzhiyun	sub.l		%d0,%a0
14738*4882a593Smuzhiyun	movc		%a0,%usp
14739*4882a593Smuzhiyunri_a7_done:
14740*4882a593Smuzhiyun	rts
14741*4882a593Smuzhiyun
14742*4882a593Smuzhiyun# need to invert adjustment value if the <ea> was predec
14743*4882a593Smuzhiyunrest_dec:
14744*4882a593Smuzhiyun	neg.l		%d0
14745*4882a593Smuzhiyun	bra.b		rest_inc
14746