xref: /OK3568_Linux_fs/kernel/arch/m68k/math-emu/fp_util.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/*
2*4882a593Smuzhiyun * fp_util.S
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright Roman Zippel, 1997.  All rights reserved.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Redistribution and use in source and binary forms, with or without
7*4882a593Smuzhiyun * modification, are permitted provided that the following conditions
8*4882a593Smuzhiyun * are met:
9*4882a593Smuzhiyun * 1. Redistributions of source code must retain the above copyright
10*4882a593Smuzhiyun *    notice, and the entire permission notice in its entirety,
11*4882a593Smuzhiyun *    including the disclaimer of warranties.
12*4882a593Smuzhiyun * 2. Redistributions in binary form must reproduce the above copyright
13*4882a593Smuzhiyun *    notice, this list of conditions and the following disclaimer in the
14*4882a593Smuzhiyun *    documentation and/or other materials provided with the distribution.
15*4882a593Smuzhiyun * 3. The name of the author may not be used to endorse or promote
16*4882a593Smuzhiyun *    products derived from this software without specific prior
17*4882a593Smuzhiyun *    written permission.
18*4882a593Smuzhiyun *
19*4882a593Smuzhiyun * ALTERNATIVELY, this product may be distributed under the terms of
20*4882a593Smuzhiyun * the GNU General Public License, in which case the provisions of the GPL are
21*4882a593Smuzhiyun * required INSTEAD OF the above restrictions.  (This clause is
22*4882a593Smuzhiyun * necessary due to a potential bad interaction between the GPL and
23*4882a593Smuzhiyun * the restrictions contained in a BSD-style copyright.)
24*4882a593Smuzhiyun *
25*4882a593Smuzhiyun * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26*4882a593Smuzhiyun * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27*4882a593Smuzhiyun * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28*4882a593Smuzhiyun * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29*4882a593Smuzhiyun * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30*4882a593Smuzhiyun * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31*4882a593Smuzhiyun * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32*4882a593Smuzhiyun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33*4882a593Smuzhiyun * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34*4882a593Smuzhiyun * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35*4882a593Smuzhiyun * OF THE POSSIBILITY OF SUCH DAMAGE.
36*4882a593Smuzhiyun */
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun#include "fp_emu.h"
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun/*
41*4882a593Smuzhiyun * Here are lots of conversion and normalization functions mainly
42*4882a593Smuzhiyun * used by fp_scan.S
43*4882a593Smuzhiyun * Note that these functions are optimized for "normal" numbers,
44*4882a593Smuzhiyun * these are handled first and exit as fast as possible, this is
45*4882a593Smuzhiyun * especially important for fp_normalize_ext/fp_conv_ext2ext, as
46*4882a593Smuzhiyun * it's called very often.
47*4882a593Smuzhiyun * The register usage is optimized for fp_scan.S and which register
48*4882a593Smuzhiyun * is currently at that time unused, be careful if you want change
49*4882a593Smuzhiyun * something here. %d0 and %d1 is always usable, sometimes %d2 (or
50*4882a593Smuzhiyun * only the lower half) most function have to return the %a0
51*4882a593Smuzhiyun * unmodified, so that the caller can immediately reuse it.
52*4882a593Smuzhiyun */
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun	.globl	fp_ill, fp_end
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun	| exits from fp_scan:
57*4882a593Smuzhiyun	| illegal instruction
58*4882a593Smuzhiyunfp_ill:
59*4882a593Smuzhiyun	printf	,"fp_illegal\n"
60*4882a593Smuzhiyun	rts
61*4882a593Smuzhiyun	| completed instruction
62*4882a593Smuzhiyunfp_end:
63*4882a593Smuzhiyun	tst.l	(TASK_MM-8,%a2)
64*4882a593Smuzhiyun	jmi	1f
65*4882a593Smuzhiyun	tst.l	(TASK_MM-4,%a2)
66*4882a593Smuzhiyun	jmi	1f
67*4882a593Smuzhiyun	tst.l	(TASK_MM,%a2)
68*4882a593Smuzhiyun	jpl	2f
69*4882a593Smuzhiyun1:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
70*4882a593Smuzhiyun2:	clr.l	%d0
71*4882a593Smuzhiyun	rts
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun	.globl	fp_conv_long2ext, fp_conv_single2ext
74*4882a593Smuzhiyun	.globl	fp_conv_double2ext, fp_conv_ext2ext
75*4882a593Smuzhiyun	.globl	fp_normalize_ext, fp_normalize_double
76*4882a593Smuzhiyun	.globl	fp_normalize_single, fp_normalize_single_fast
77*4882a593Smuzhiyun	.globl	fp_conv_ext2double, fp_conv_ext2single
78*4882a593Smuzhiyun	.globl	fp_conv_ext2long, fp_conv_ext2short
79*4882a593Smuzhiyun	.globl	fp_conv_ext2byte
80*4882a593Smuzhiyun	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
81*4882a593Smuzhiyun	.globl	fp_finalrounding_double
82*4882a593Smuzhiyun	.globl	fp_finalrounding, fp_finaltest, fp_final
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun/*
85*4882a593Smuzhiyun * First several conversion functions from a source operand
86*4882a593Smuzhiyun * into the extended format. Note, that only fp_conv_ext2ext
87*4882a593Smuzhiyun * normalizes the number and is always called after the other
88*4882a593Smuzhiyun * conversion functions, which only move the information into
89*4882a593Smuzhiyun * fp_ext structure.
90*4882a593Smuzhiyun */
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun	| fp_conv_long2ext:
93*4882a593Smuzhiyun	|
94*4882a593Smuzhiyun	| args:	%d0 = source (32-bit long)
95*4882a593Smuzhiyun	|	%a0 = destination (ptr to struct fp_ext)
96*4882a593Smuzhiyun
97*4882a593Smuzhiyunfp_conv_long2ext:
98*4882a593Smuzhiyun	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
99*4882a593Smuzhiyun	clr.l	%d1			| sign defaults to zero
100*4882a593Smuzhiyun	tst.l	%d0
101*4882a593Smuzhiyun	jeq	fp_l2e_zero		| is source zero?
102*4882a593Smuzhiyun	jpl	1f			| positive?
103*4882a593Smuzhiyun	moveq	#1,%d1
104*4882a593Smuzhiyun	neg.l	%d0
105*4882a593Smuzhiyun1:	swap	%d1
106*4882a593Smuzhiyun	move.w	#0x3fff+31,%d1
107*4882a593Smuzhiyun	move.l	%d1,(%a0)+		| set sign / exp
108*4882a593Smuzhiyun	move.l	%d0,(%a0)+		| set mantissa
109*4882a593Smuzhiyun	clr.l	(%a0)
110*4882a593Smuzhiyun	subq.l	#8,%a0			| restore %a0
111*4882a593Smuzhiyun	printx	PCONV,%a0@
112*4882a593Smuzhiyun	printf	PCONV,")\n"
113*4882a593Smuzhiyun	rts
114*4882a593Smuzhiyun	| source is zero
115*4882a593Smuzhiyunfp_l2e_zero:
116*4882a593Smuzhiyun	clr.l	(%a0)+
117*4882a593Smuzhiyun	clr.l	(%a0)+
118*4882a593Smuzhiyun	clr.l	(%a0)
119*4882a593Smuzhiyun	subq.l	#8,%a0
120*4882a593Smuzhiyun	printx	PCONV,%a0@
121*4882a593Smuzhiyun	printf	PCONV,")\n"
122*4882a593Smuzhiyun	rts
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun	| fp_conv_single2ext
125*4882a593Smuzhiyun	| args:	%d0 = source (single-precision fp value)
126*4882a593Smuzhiyun	|	%a0 = dest (struct fp_ext *)
127*4882a593Smuzhiyun
128*4882a593Smuzhiyunfp_conv_single2ext:
129*4882a593Smuzhiyun	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
130*4882a593Smuzhiyun	move.l	%d0,%d1
131*4882a593Smuzhiyun	lsl.l	#8,%d0			| shift mantissa
132*4882a593Smuzhiyun	lsr.l	#8,%d1			| exponent / sign
133*4882a593Smuzhiyun	lsr.l	#7,%d1
134*4882a593Smuzhiyun	lsr.w	#8,%d1
135*4882a593Smuzhiyun	jeq	fp_s2e_small		| zero / denormal?
136*4882a593Smuzhiyun	cmp.w	#0xff,%d1		| NaN / Inf?
137*4882a593Smuzhiyun	jeq	fp_s2e_large
138*4882a593Smuzhiyun	bset	#31,%d0			| set explizit bit
139*4882a593Smuzhiyun	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
140*4882a593Smuzhiyun9:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
141*4882a593Smuzhiyun	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
142*4882a593Smuzhiyun	clr.l	(%a0)			| low lword = 0
143*4882a593Smuzhiyun	subq.l	#8,%a0
144*4882a593Smuzhiyun	printx	PCONV,%a0@
145*4882a593Smuzhiyun	printf	PCONV,")\n"
146*4882a593Smuzhiyun	rts
147*4882a593Smuzhiyun	| zeros and denormalized
148*4882a593Smuzhiyunfp_s2e_small:
149*4882a593Smuzhiyun	| exponent is zero, so explizit bit is already zero too
150*4882a593Smuzhiyun	tst.l	%d0
151*4882a593Smuzhiyun	jeq	9b
152*4882a593Smuzhiyun	move.w	#0x4000-0x7f,%d1
153*4882a593Smuzhiyun	jra	9b
154*4882a593Smuzhiyun	| infinities and NAN
155*4882a593Smuzhiyunfp_s2e_large:
156*4882a593Smuzhiyun	bclr	#31,%d0			| clear explizit bit
157*4882a593Smuzhiyun	move.w	#0x7fff,%d1
158*4882a593Smuzhiyun	jra	9b
159*4882a593Smuzhiyun
160*4882a593Smuzhiyunfp_conv_double2ext:
161*4882a593Smuzhiyun#ifdef FPU_EMU_DEBUG
162*4882a593Smuzhiyun	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
163*4882a593Smuzhiyun	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
164*4882a593Smuzhiyun	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
165*4882a593Smuzhiyun#endif
166*4882a593Smuzhiyun	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
167*4882a593Smuzhiyun	move.l	%d0,%d1
168*4882a593Smuzhiyun	lsl.l	#8,%d0			| shift high mantissa
169*4882a593Smuzhiyun	lsl.l	#3,%d0
170*4882a593Smuzhiyun	lsr.l	#8,%d1			| exponent / sign
171*4882a593Smuzhiyun	lsr.l	#7,%d1
172*4882a593Smuzhiyun	lsr.w	#5,%d1
173*4882a593Smuzhiyun	jeq	fp_d2e_small		| zero / denormal?
174*4882a593Smuzhiyun	cmp.w	#0x7ff,%d1		| NaN / Inf?
175*4882a593Smuzhiyun	jeq	fp_d2e_large
176*4882a593Smuzhiyun	bset	#31,%d0			| set explizit bit
177*4882a593Smuzhiyun	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
178*4882a593Smuzhiyun9:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
179*4882a593Smuzhiyun	move.l	%d0,(%a0)+
180*4882a593Smuzhiyun	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
181*4882a593Smuzhiyun	move.l	%d0,%d1
182*4882a593Smuzhiyun	lsl.l	#8,%d0
183*4882a593Smuzhiyun	lsl.l	#3,%d0
184*4882a593Smuzhiyun	move.l	%d0,(%a0)
185*4882a593Smuzhiyun	moveq	#21,%d0
186*4882a593Smuzhiyun	lsr.l	%d0,%d1
187*4882a593Smuzhiyun	or.l	%d1,-(%a0)
188*4882a593Smuzhiyun	subq.l	#4,%a0
189*4882a593Smuzhiyun	printx	PCONV,%a0@
190*4882a593Smuzhiyun	printf	PCONV,")\n"
191*4882a593Smuzhiyun	rts
192*4882a593Smuzhiyun	| zeros and denormalized
193*4882a593Smuzhiyunfp_d2e_small:
194*4882a593Smuzhiyun	| exponent is zero, so explizit bit is already zero too
195*4882a593Smuzhiyun	tst.l	%d0
196*4882a593Smuzhiyun	jeq	9b
197*4882a593Smuzhiyun	move.w	#0x4000-0x3ff,%d1
198*4882a593Smuzhiyun	jra	9b
199*4882a593Smuzhiyun	| infinities and NAN
200*4882a593Smuzhiyunfp_d2e_large:
201*4882a593Smuzhiyun	bclr	#31,%d0			| clear explizit bit
202*4882a593Smuzhiyun	move.w	#0x7fff,%d1
203*4882a593Smuzhiyun	jra	9b
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun	| fp_conv_ext2ext:
206*4882a593Smuzhiyun	| originally used to get longdouble from userspace, now it's
207*4882a593Smuzhiyun	| called before arithmetic operations to make sure the number
208*4882a593Smuzhiyun	| is normalized [maybe rename it?].
209*4882a593Smuzhiyun	| args:	%a0 = dest (struct fp_ext *)
210*4882a593Smuzhiyun	| returns 0 in %d0 for a NaN, otherwise 1
211*4882a593Smuzhiyun
212*4882a593Smuzhiyunfp_conv_ext2ext:
213*4882a593Smuzhiyun	printf	PCONV,"e2e: %p(",1,%a0
214*4882a593Smuzhiyun	printx	PCONV,%a0@
215*4882a593Smuzhiyun	printf	PCONV,"), "
216*4882a593Smuzhiyun	move.l	(%a0)+,%d0
217*4882a593Smuzhiyun	cmp.w	#0x7fff,%d0		| Inf / NaN?
218*4882a593Smuzhiyun	jeq	fp_e2e_large
219*4882a593Smuzhiyun	move.l	(%a0),%d0
220*4882a593Smuzhiyun	jpl	fp_e2e_small		| zero / denorm?
221*4882a593Smuzhiyun	| The high bit is set, so normalization is irrelevant.
222*4882a593Smuzhiyunfp_e2e_checkround:
223*4882a593Smuzhiyun	subq.l	#4,%a0
224*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
225*4882a593Smuzhiyun	move.b	(%a0),%d0
226*4882a593Smuzhiyun	jne	fp_e2e_round
227*4882a593Smuzhiyun#endif
228*4882a593Smuzhiyun	printf	PCONV,"%p(",1,%a0
229*4882a593Smuzhiyun	printx	PCONV,%a0@
230*4882a593Smuzhiyun	printf	PCONV,")\n"
231*4882a593Smuzhiyun	moveq	#1,%d0
232*4882a593Smuzhiyun	rts
233*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
234*4882a593Smuzhiyunfp_e2e_round:
235*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2
236*4882a593Smuzhiyun	clr.b	(%a0)
237*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2
238*4882a593Smuzhiyun	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
239*4882a593Smuzhiyun	tst.b	%d0			| test guard bit
240*4882a593Smuzhiyun	jpl	9f			| zero is closer
241*4882a593Smuzhiyun	btst	#0,(11,%a0)		| test lsb bit
242*4882a593Smuzhiyun	jne	fp_e2e_doroundup	| round to infinity
243*4882a593Smuzhiyun	lsl.b	#1,%d0			| check low bits
244*4882a593Smuzhiyun	jeq	9f			| round to zero
245*4882a593Smuzhiyunfp_e2e_doroundup:
246*4882a593Smuzhiyun	addq.l	#1,(8,%a0)
247*4882a593Smuzhiyun	jcc	9f
248*4882a593Smuzhiyun	addq.l	#1,(4,%a0)
249*4882a593Smuzhiyun	jcc	9f
250*4882a593Smuzhiyun	move.w	#0x8000,(4,%a0)
251*4882a593Smuzhiyun	addq.w	#1,(2,%a0)
252*4882a593Smuzhiyun9:	printf	PNORM,"%p(",1,%a0
253*4882a593Smuzhiyun	printx	PNORM,%a0@
254*4882a593Smuzhiyun	printf	PNORM,")\n"
255*4882a593Smuzhiyun	rts
256*4882a593Smuzhiyunfp_e2e_roundother:
257*4882a593Smuzhiyun	subq.w	#2,%d2
258*4882a593Smuzhiyun	jcs	9b			| %d2 < 2, round to zero
259*4882a593Smuzhiyun	jhi	1f			| %d2 > 2, round to +infinity
260*4882a593Smuzhiyun	tst.b	(1,%a0)			| to -inf
261*4882a593Smuzhiyun	jne	fp_e2e_doroundup	| negative, round to infinity
262*4882a593Smuzhiyun	jra	9b			| positive, round to zero
263*4882a593Smuzhiyun1:	tst.b	(1,%a0)			| to +inf
264*4882a593Smuzhiyun	jeq	fp_e2e_doroundup	| positive, round to infinity
265*4882a593Smuzhiyun	jra	9b			| negative, round to zero
266*4882a593Smuzhiyun#endif
267*4882a593Smuzhiyun	| zeros and subnormals:
268*4882a593Smuzhiyun	| try to normalize these anyway.
269*4882a593Smuzhiyunfp_e2e_small:
270*4882a593Smuzhiyun	jne	fp_e2e_small1		| high lword zero?
271*4882a593Smuzhiyun	move.l	(4,%a0),%d0
272*4882a593Smuzhiyun	jne	fp_e2e_small2
273*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
274*4882a593Smuzhiyun	clr.l	%d0
275*4882a593Smuzhiyun	move.b	(-4,%a0),%d0
276*4882a593Smuzhiyun	jne	fp_e2e_small3
277*4882a593Smuzhiyun#endif
278*4882a593Smuzhiyun	| Genuine zero.
279*4882a593Smuzhiyun	clr.w	-(%a0)
280*4882a593Smuzhiyun	subq.l	#2,%a0
281*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
282*4882a593Smuzhiyun	printx	PNORM,%a0@
283*4882a593Smuzhiyun	printf	PNORM,")\n"
284*4882a593Smuzhiyun	moveq	#1,%d0
285*4882a593Smuzhiyun	rts
286*4882a593Smuzhiyun	| definitely subnormal, need to shift all 64 bits
287*4882a593Smuzhiyunfp_e2e_small1:
288*4882a593Smuzhiyun	bfffo	%d0{#0,#32},%d1
289*4882a593Smuzhiyun	move.w	-(%a0),%d2
290*4882a593Smuzhiyun	sub.w	%d1,%d2
291*4882a593Smuzhiyun	jcc	1f
292*4882a593Smuzhiyun	| Pathologically small, denormalize.
293*4882a593Smuzhiyun	add.w	%d2,%d1
294*4882a593Smuzhiyun	clr.w	%d2
295*4882a593Smuzhiyun1:	move.w	%d2,(%a0)+
296*4882a593Smuzhiyun	move.w	%d1,%d2
297*4882a593Smuzhiyun	jeq	fp_e2e_checkround
298*4882a593Smuzhiyun	| fancy 64-bit double-shift begins here
299*4882a593Smuzhiyun	lsl.l	%d2,%d0
300*4882a593Smuzhiyun	move.l	%d0,(%a0)+
301*4882a593Smuzhiyun	move.l	(%a0),%d0
302*4882a593Smuzhiyun	move.l	%d0,%d1
303*4882a593Smuzhiyun	lsl.l	%d2,%d0
304*4882a593Smuzhiyun	move.l	%d0,(%a0)
305*4882a593Smuzhiyun	neg.w	%d2
306*4882a593Smuzhiyun	and.w	#0x1f,%d2
307*4882a593Smuzhiyun	lsr.l	%d2,%d1
308*4882a593Smuzhiyun	or.l	%d1,-(%a0)
309*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
310*4882a593Smuzhiyunfp_e2e_extra1:
311*4882a593Smuzhiyun	clr.l	%d0
312*4882a593Smuzhiyun	move.b	(-4,%a0),%d0
313*4882a593Smuzhiyun	neg.w	%d2
314*4882a593Smuzhiyun	add.w	#24,%d2
315*4882a593Smuzhiyun	jcc	1f
316*4882a593Smuzhiyun	clr.b	(-4,%a0)
317*4882a593Smuzhiyun	lsl.l	%d2,%d0
318*4882a593Smuzhiyun	or.l	%d0,(4,%a0)
319*4882a593Smuzhiyun	jra	fp_e2e_checkround
320*4882a593Smuzhiyun1:	addq.w	#8,%d2
321*4882a593Smuzhiyun	lsl.l	%d2,%d0
322*4882a593Smuzhiyun	move.b	%d0,(-4,%a0)
323*4882a593Smuzhiyun	lsr.l	#8,%d0
324*4882a593Smuzhiyun	or.l	%d0,(4,%a0)
325*4882a593Smuzhiyun#endif
326*4882a593Smuzhiyun	jra	fp_e2e_checkround
327*4882a593Smuzhiyun	| pathologically small subnormal
328*4882a593Smuzhiyunfp_e2e_small2:
329*4882a593Smuzhiyun	bfffo	%d0{#0,#32},%d1
330*4882a593Smuzhiyun	add.w	#32,%d1
331*4882a593Smuzhiyun	move.w	-(%a0),%d2
332*4882a593Smuzhiyun	sub.w	%d1,%d2
333*4882a593Smuzhiyun	jcc	1f
334*4882a593Smuzhiyun	| Beyond pathologically small, denormalize.
335*4882a593Smuzhiyun	add.w	%d2,%d1
336*4882a593Smuzhiyun	clr.w	%d2
337*4882a593Smuzhiyun1:	move.w	%d2,(%a0)+
338*4882a593Smuzhiyun	ext.l	%d1
339*4882a593Smuzhiyun	jeq	fp_e2e_checkround
340*4882a593Smuzhiyun	clr.l	(4,%a0)
341*4882a593Smuzhiyun	sub.w	#32,%d2
342*4882a593Smuzhiyun	jcs	1f
343*4882a593Smuzhiyun	lsl.l	%d1,%d0			| lower lword needs only to be shifted
344*4882a593Smuzhiyun	move.l	%d0,(%a0)		| into the higher lword
345*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
346*4882a593Smuzhiyun	clr.l	%d0
347*4882a593Smuzhiyun	move.b	(-4,%a0),%d0
348*4882a593Smuzhiyun	clr.b	(-4,%a0)
349*4882a593Smuzhiyun	neg.w	%d1
350*4882a593Smuzhiyun	add.w	#32,%d1
351*4882a593Smuzhiyun	bfins	%d0,(%a0){%d1,#8}
352*4882a593Smuzhiyun#endif
353*4882a593Smuzhiyun	jra	fp_e2e_checkround
354*4882a593Smuzhiyun1:	neg.w	%d1			| lower lword is splitted between
355*4882a593Smuzhiyun	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
356*4882a593Smuzhiyun#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
357*4882a593Smuzhiyun	jra	fp_e2e_checkround
358*4882a593Smuzhiyun#else
359*4882a593Smuzhiyun	move.w	%d1,%d2
360*4882a593Smuzhiyun	jra	fp_e2e_extra1
361*4882a593Smuzhiyun	| These are extremely small numbers, that will mostly end up as zero
362*4882a593Smuzhiyun	| anyway, so this is only important for correct rounding.
363*4882a593Smuzhiyunfp_e2e_small3:
364*4882a593Smuzhiyun	bfffo	%d0{#24,#8},%d1
365*4882a593Smuzhiyun	add.w	#40,%d1
366*4882a593Smuzhiyun	move.w	-(%a0),%d2
367*4882a593Smuzhiyun	sub.w	%d1,%d2
368*4882a593Smuzhiyun	jcc	1f
369*4882a593Smuzhiyun	| Pathologically small, denormalize.
370*4882a593Smuzhiyun	add.w	%d2,%d1
371*4882a593Smuzhiyun	clr.w	%d2
372*4882a593Smuzhiyun1:	move.w	%d2,(%a0)+
373*4882a593Smuzhiyun	ext.l	%d1
374*4882a593Smuzhiyun	jeq	fp_e2e_checkround
375*4882a593Smuzhiyun	cmp.w	#8,%d1
376*4882a593Smuzhiyun	jcs	2f
377*4882a593Smuzhiyun1:	clr.b	(-4,%a0)
378*4882a593Smuzhiyun	sub.w	#64,%d1
379*4882a593Smuzhiyun	jcs	1f
380*4882a593Smuzhiyun	add.w	#24,%d1
381*4882a593Smuzhiyun	lsl.l	%d1,%d0
382*4882a593Smuzhiyun	move.l	%d0,(%a0)
383*4882a593Smuzhiyun	jra	fp_e2e_checkround
384*4882a593Smuzhiyun1:	neg.w	%d1
385*4882a593Smuzhiyun	bfins	%d0,(%a0){%d1,#8}
386*4882a593Smuzhiyun	jra	fp_e2e_checkround
387*4882a593Smuzhiyun2:	lsl.l	%d1,%d0
388*4882a593Smuzhiyun	move.b	%d0,(-4,%a0)
389*4882a593Smuzhiyun	lsr.l	#8,%d0
390*4882a593Smuzhiyun	move.b	%d0,(7,%a0)
391*4882a593Smuzhiyun	jra	fp_e2e_checkround
392*4882a593Smuzhiyun#endif
393*4882a593Smuzhiyun1:	move.l	%d0,%d1			| lower lword is splitted between
394*4882a593Smuzhiyun	lsl.l	%d2,%d0			| higher and lower lword
395*4882a593Smuzhiyun	move.l	%d0,(%a0)
396*4882a593Smuzhiyun	move.l	%d1,%d0
397*4882a593Smuzhiyun	neg.w	%d2
398*4882a593Smuzhiyun	add.w	#32,%d2
399*4882a593Smuzhiyun	lsr.l	%d2,%d0
400*4882a593Smuzhiyun	move.l	%d0,-(%a0)
401*4882a593Smuzhiyun	jra	fp_e2e_checkround
402*4882a593Smuzhiyun	| Infinities and NaNs
403*4882a593Smuzhiyunfp_e2e_large:
404*4882a593Smuzhiyun	move.l	(%a0)+,%d0
405*4882a593Smuzhiyun	jne	3f
406*4882a593Smuzhiyun1:	tst.l	(%a0)
407*4882a593Smuzhiyun	jne	4f
408*4882a593Smuzhiyun	moveq	#1,%d0
409*4882a593Smuzhiyun2:	subq.l	#8,%a0
410*4882a593Smuzhiyun	printf	PCONV,"%p(",1,%a0
411*4882a593Smuzhiyun	printx	PCONV,%a0@
412*4882a593Smuzhiyun	printf	PCONV,")\n"
413*4882a593Smuzhiyun	rts
414*4882a593Smuzhiyun	| we have maybe a NaN, shift off the highest bit
415*4882a593Smuzhiyun3:	lsl.l	#1,%d0
416*4882a593Smuzhiyun	jeq	1b
417*4882a593Smuzhiyun	| we have a NaN, clear the return value
418*4882a593Smuzhiyun4:	clrl	%d0
419*4882a593Smuzhiyun	jra	2b
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun/*
423*4882a593Smuzhiyun * Normalization functions.  Call these on the output of general
424*4882a593Smuzhiyun * FP operators, and before any conversion into the destination
425*4882a593Smuzhiyun * formats. fp_normalize_ext has always to be called first, the
426*4882a593Smuzhiyun * following conversion functions expect an already normalized
427*4882a593Smuzhiyun * number.
428*4882a593Smuzhiyun */
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun	| fp_normalize_ext:
431*4882a593Smuzhiyun	| normalize an extended in extended (unpacked) format, basically
432*4882a593Smuzhiyun	| it does the same as fp_conv_ext2ext, additionally it also does
433*4882a593Smuzhiyun	| the necessary postprocessing checks.
434*4882a593Smuzhiyun	| args:	%a0 (struct fp_ext *)
435*4882a593Smuzhiyun	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
436*4882a593Smuzhiyun
437*4882a593Smuzhiyunfp_normalize_ext:
438*4882a593Smuzhiyun	printf	PNORM,"ne: %p(",1,%a0
439*4882a593Smuzhiyun	printx	PNORM,%a0@
440*4882a593Smuzhiyun	printf	PNORM,"), "
441*4882a593Smuzhiyun	move.l	(%a0)+,%d0
442*4882a593Smuzhiyun	cmp.w	#0x7fff,%d0		| Inf / NaN?
443*4882a593Smuzhiyun	jeq	fp_ne_large
444*4882a593Smuzhiyun	move.l	(%a0),%d0
445*4882a593Smuzhiyun	jpl	fp_ne_small		| zero / denorm?
446*4882a593Smuzhiyun	| The high bit is set, so normalization is irrelevant.
447*4882a593Smuzhiyunfp_ne_checkround:
448*4882a593Smuzhiyun	subq.l	#4,%a0
449*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
450*4882a593Smuzhiyun	move.b	(%a0),%d0
451*4882a593Smuzhiyun	jne	fp_ne_round
452*4882a593Smuzhiyun#endif
453*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
454*4882a593Smuzhiyun	printx	PNORM,%a0@
455*4882a593Smuzhiyun	printf	PNORM,")\n"
456*4882a593Smuzhiyun	rts
457*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
458*4882a593Smuzhiyunfp_ne_round:
459*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2
460*4882a593Smuzhiyun	clr.b	(%a0)
461*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2
462*4882a593Smuzhiyun	jne	fp_ne_roundother	| %d2 == 0, round to nearest
463*4882a593Smuzhiyun	tst.b	%d0			| test guard bit
464*4882a593Smuzhiyun	jpl	9f			| zero is closer
465*4882a593Smuzhiyun	btst	#0,(11,%a0)		| test lsb bit
466*4882a593Smuzhiyun	jne	fp_ne_doroundup		| round to infinity
467*4882a593Smuzhiyun	lsl.b	#1,%d0			| check low bits
468*4882a593Smuzhiyun	jeq	9f			| round to zero
469*4882a593Smuzhiyunfp_ne_doroundup:
470*4882a593Smuzhiyun	addq.l	#1,(8,%a0)
471*4882a593Smuzhiyun	jcc	9f
472*4882a593Smuzhiyun	addq.l	#1,(4,%a0)
473*4882a593Smuzhiyun	jcc	9f
474*4882a593Smuzhiyun	addq.w	#1,(2,%a0)
475*4882a593Smuzhiyun	move.w	#0x8000,(4,%a0)
476*4882a593Smuzhiyun9:	printf	PNORM,"%p(",1,%a0
477*4882a593Smuzhiyun	printx	PNORM,%a0@
478*4882a593Smuzhiyun	printf	PNORM,")\n"
479*4882a593Smuzhiyun	rts
480*4882a593Smuzhiyunfp_ne_roundother:
481*4882a593Smuzhiyun	subq.w	#2,%d2
482*4882a593Smuzhiyun	jcs	9b			| %d2 < 2, round to zero
483*4882a593Smuzhiyun	jhi	1f			| %d2 > 2, round to +infinity
484*4882a593Smuzhiyun	tst.b	(1,%a0)			| to -inf
485*4882a593Smuzhiyun	jne	fp_ne_doroundup		| negative, round to infinity
486*4882a593Smuzhiyun	jra	9b			| positive, round to zero
487*4882a593Smuzhiyun1:	tst.b	(1,%a0)			| to +inf
488*4882a593Smuzhiyun	jeq	fp_ne_doroundup		| positive, round to infinity
489*4882a593Smuzhiyun	jra	9b			| negative, round to zero
490*4882a593Smuzhiyun#endif
491*4882a593Smuzhiyun	| Zeros and subnormal numbers
492*4882a593Smuzhiyun	| These are probably merely subnormal, rather than "denormalized"
493*4882a593Smuzhiyun	|  numbers, so we will try to make them normal again.
494*4882a593Smuzhiyunfp_ne_small:
495*4882a593Smuzhiyun	jne	fp_ne_small1		| high lword zero?
496*4882a593Smuzhiyun	move.l	(4,%a0),%d0
497*4882a593Smuzhiyun	jne	fp_ne_small2
498*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
499*4882a593Smuzhiyun	clr.l	%d0
500*4882a593Smuzhiyun	move.b	(-4,%a0),%d0
501*4882a593Smuzhiyun	jne	fp_ne_small3
502*4882a593Smuzhiyun#endif
503*4882a593Smuzhiyun	| Genuine zero.
504*4882a593Smuzhiyun	clr.w	-(%a0)
505*4882a593Smuzhiyun	subq.l	#2,%a0
506*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
507*4882a593Smuzhiyun	printx	PNORM,%a0@
508*4882a593Smuzhiyun	printf	PNORM,")\n"
509*4882a593Smuzhiyun	rts
510*4882a593Smuzhiyun	| Subnormal.
511*4882a593Smuzhiyunfp_ne_small1:
512*4882a593Smuzhiyun	bfffo	%d0{#0,#32},%d1
513*4882a593Smuzhiyun	move.w	-(%a0),%d2
514*4882a593Smuzhiyun	sub.w	%d1,%d2
515*4882a593Smuzhiyun	jcc	1f
516*4882a593Smuzhiyun	| Pathologically small, denormalize.
517*4882a593Smuzhiyun	add.w	%d2,%d1
518*4882a593Smuzhiyun	clr.w	%d2
519*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_UNFL
520*4882a593Smuzhiyun1:	move.w	%d2,(%a0)+
521*4882a593Smuzhiyun	move.w	%d1,%d2
522*4882a593Smuzhiyun	jeq	fp_ne_checkround
523*4882a593Smuzhiyun	| This is exactly the same 64-bit double shift as seen above.
524*4882a593Smuzhiyun	lsl.l	%d2,%d0
525*4882a593Smuzhiyun	move.l	%d0,(%a0)+
526*4882a593Smuzhiyun	move.l	(%a0),%d0
527*4882a593Smuzhiyun	move.l	%d0,%d1
528*4882a593Smuzhiyun	lsl.l	%d2,%d0
529*4882a593Smuzhiyun	move.l	%d0,(%a0)
530*4882a593Smuzhiyun	neg.w	%d2
531*4882a593Smuzhiyun	and.w	#0x1f,%d2
532*4882a593Smuzhiyun	lsr.l	%d2,%d1
533*4882a593Smuzhiyun	or.l	%d1,-(%a0)
534*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
535*4882a593Smuzhiyunfp_ne_extra1:
536*4882a593Smuzhiyun	clr.l	%d0
537*4882a593Smuzhiyun	move.b	(-4,%a0),%d0
538*4882a593Smuzhiyun	neg.w	%d2
539*4882a593Smuzhiyun	add.w	#24,%d2
540*4882a593Smuzhiyun	jcc	1f
541*4882a593Smuzhiyun	clr.b	(-4,%a0)
542*4882a593Smuzhiyun	lsl.l	%d2,%d0
543*4882a593Smuzhiyun	or.l	%d0,(4,%a0)
544*4882a593Smuzhiyun	jra	fp_ne_checkround
545*4882a593Smuzhiyun1:	addq.w	#8,%d2
546*4882a593Smuzhiyun	lsl.l	%d2,%d0
547*4882a593Smuzhiyun	move.b	%d0,(-4,%a0)
548*4882a593Smuzhiyun	lsr.l	#8,%d0
549*4882a593Smuzhiyun	or.l	%d0,(4,%a0)
550*4882a593Smuzhiyun#endif
551*4882a593Smuzhiyun	jra	fp_ne_checkround
552*4882a593Smuzhiyun	| May or may not be subnormal, if so, only 32 bits to shift.
553*4882a593Smuzhiyunfp_ne_small2:
554*4882a593Smuzhiyun	bfffo	%d0{#0,#32},%d1
555*4882a593Smuzhiyun	add.w	#32,%d1
556*4882a593Smuzhiyun	move.w	-(%a0),%d2
557*4882a593Smuzhiyun	sub.w	%d1,%d2
558*4882a593Smuzhiyun	jcc	1f
559*4882a593Smuzhiyun	| Beyond pathologically small, denormalize.
560*4882a593Smuzhiyun	add.w	%d2,%d1
561*4882a593Smuzhiyun	clr.w	%d2
562*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_UNFL
563*4882a593Smuzhiyun1:	move.w	%d2,(%a0)+
564*4882a593Smuzhiyun	ext.l	%d1
565*4882a593Smuzhiyun	jeq	fp_ne_checkround
566*4882a593Smuzhiyun	clr.l	(4,%a0)
567*4882a593Smuzhiyun	sub.w	#32,%d1
568*4882a593Smuzhiyun	jcs	1f
569*4882a593Smuzhiyun	lsl.l	%d1,%d0			| lower lword needs only to be shifted
570*4882a593Smuzhiyun	move.l	%d0,(%a0)		| into the higher lword
571*4882a593Smuzhiyun#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
572*4882a593Smuzhiyun	clr.l	%d0
573*4882a593Smuzhiyun	move.b	(-4,%a0),%d0
574*4882a593Smuzhiyun	clr.b	(-4,%a0)
575*4882a593Smuzhiyun	neg.w	%d1
576*4882a593Smuzhiyun	add.w	#32,%d1
577*4882a593Smuzhiyun	bfins	%d0,(%a0){%d1,#8}
578*4882a593Smuzhiyun#endif
579*4882a593Smuzhiyun	jra	fp_ne_checkround
580*4882a593Smuzhiyun1:	neg.w	%d1			| lower lword is splitted between
581*4882a593Smuzhiyun	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
582*4882a593Smuzhiyun#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
583*4882a593Smuzhiyun	jra	fp_ne_checkround
584*4882a593Smuzhiyun#else
585*4882a593Smuzhiyun	move.w	%d1,%d2
586*4882a593Smuzhiyun	jra	fp_ne_extra1
587*4882a593Smuzhiyun	| These are extremely small numbers, that will mostly end up as zero
588*4882a593Smuzhiyun	| anyway, so this is only important for correct rounding.
589*4882a593Smuzhiyunfp_ne_small3:
590*4882a593Smuzhiyun	bfffo	%d0{#24,#8},%d1
591*4882a593Smuzhiyun	add.w	#40,%d1
592*4882a593Smuzhiyun	move.w	-(%a0),%d2
593*4882a593Smuzhiyun	sub.w	%d1,%d2
594*4882a593Smuzhiyun	jcc	1f
595*4882a593Smuzhiyun	| Pathologically small, denormalize.
596*4882a593Smuzhiyun	add.w	%d2,%d1
597*4882a593Smuzhiyun	clr.w	%d2
598*4882a593Smuzhiyun1:	move.w	%d2,(%a0)+
599*4882a593Smuzhiyun	ext.l	%d1
600*4882a593Smuzhiyun	jeq	fp_ne_checkround
601*4882a593Smuzhiyun	cmp.w	#8,%d1
602*4882a593Smuzhiyun	jcs	2f
603*4882a593Smuzhiyun1:	clr.b	(-4,%a0)
604*4882a593Smuzhiyun	sub.w	#64,%d1
605*4882a593Smuzhiyun	jcs	1f
606*4882a593Smuzhiyun	add.w	#24,%d1
607*4882a593Smuzhiyun	lsl.l	%d1,%d0
608*4882a593Smuzhiyun	move.l	%d0,(%a0)
609*4882a593Smuzhiyun	jra	fp_ne_checkround
610*4882a593Smuzhiyun1:	neg.w	%d1
611*4882a593Smuzhiyun	bfins	%d0,(%a0){%d1,#8}
612*4882a593Smuzhiyun	jra	fp_ne_checkround
613*4882a593Smuzhiyun2:	lsl.l	%d1,%d0
614*4882a593Smuzhiyun	move.b	%d0,(-4,%a0)
615*4882a593Smuzhiyun	lsr.l	#8,%d0
616*4882a593Smuzhiyun	move.b	%d0,(7,%a0)
617*4882a593Smuzhiyun	jra	fp_ne_checkround
618*4882a593Smuzhiyun#endif
619*4882a593Smuzhiyun	| Infinities and NaNs, again, same as above.
620*4882a593Smuzhiyunfp_ne_large:
621*4882a593Smuzhiyun	move.l	(%a0)+,%d0
622*4882a593Smuzhiyun	jne	3f
623*4882a593Smuzhiyun1:	tst.l	(%a0)
624*4882a593Smuzhiyun	jne	4f
625*4882a593Smuzhiyun2:	subq.l	#8,%a0
626*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
627*4882a593Smuzhiyun	printx	PNORM,%a0@
628*4882a593Smuzhiyun	printf	PNORM,")\n"
629*4882a593Smuzhiyun	rts
630*4882a593Smuzhiyun	| we have maybe a NaN, shift off the highest bit
631*4882a593Smuzhiyun3:	move.l	%d0,%d1
632*4882a593Smuzhiyun	lsl.l	#1,%d1
633*4882a593Smuzhiyun	jne	4f
634*4882a593Smuzhiyun	clr.l	(-4,%a0)
635*4882a593Smuzhiyun	jra	1b
636*4882a593Smuzhiyun	| we have a NaN, test if it is signaling
637*4882a593Smuzhiyun4:	bset	#30,%d0
638*4882a593Smuzhiyun	jne	2b
639*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_SNAN
640*4882a593Smuzhiyun	move.l	%d0,(-4,%a0)
641*4882a593Smuzhiyun	jra	2b
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun	| these next two do rounding as per the IEEE standard.
644*4882a593Smuzhiyun	| values for the rounding modes appear to be:
645*4882a593Smuzhiyun	| 0:	Round to nearest
646*4882a593Smuzhiyun	| 1:	Round to zero
647*4882a593Smuzhiyun	| 2:	Round to -Infinity
648*4882a593Smuzhiyun	| 3:	Round to +Infinity
649*4882a593Smuzhiyun	| both functions expect that fp_normalize was already
650*4882a593Smuzhiyun	| called (and extended argument is already normalized
651*4882a593Smuzhiyun	| as far as possible), these are used if there is different
652*4882a593Smuzhiyun	| rounding precision is selected and before converting
653*4882a593Smuzhiyun	| into single/double
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun	| fp_normalize_double:
656*4882a593Smuzhiyun	| normalize an extended with double (52-bit) precision
657*4882a593Smuzhiyun	| args:	 %a0 (struct fp_ext *)
658*4882a593Smuzhiyun
659*4882a593Smuzhiyunfp_normalize_double:
660*4882a593Smuzhiyun	printf	PNORM,"nd: %p(",1,%a0
661*4882a593Smuzhiyun	printx	PNORM,%a0@
662*4882a593Smuzhiyun	printf	PNORM,"), "
663*4882a593Smuzhiyun	move.l	(%a0)+,%d2
664*4882a593Smuzhiyun	tst.w	%d2
665*4882a593Smuzhiyun	jeq	fp_nd_zero		| zero / denormalized
666*4882a593Smuzhiyun	cmp.w	#0x7fff,%d2
667*4882a593Smuzhiyun	jeq	fp_nd_huge		| NaN / infinitive.
668*4882a593Smuzhiyun	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
669*4882a593Smuzhiyun	jcs	fp_nd_small		| too small.
670*4882a593Smuzhiyun	cmp.w	#0x7fe,%d2
671*4882a593Smuzhiyun	jcc	fp_nd_large		| too big.
672*4882a593Smuzhiyun	addq.l	#4,%a0
673*4882a593Smuzhiyun	move.l	(%a0),%d0		| low lword of mantissa
674*4882a593Smuzhiyun	| now, round off the low 11 bits.
675*4882a593Smuzhiyunfp_nd_round:
676*4882a593Smuzhiyun	moveq	#21,%d1
677*4882a593Smuzhiyun	lsl.l	%d1,%d0			| keep 11 low bits.
678*4882a593Smuzhiyun	jne	fp_nd_checkround	| Are they non-zero?
679*4882a593Smuzhiyun	| nothing to do here
680*4882a593Smuzhiyun9:	subq.l	#8,%a0
681*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
682*4882a593Smuzhiyun	printx	PNORM,%a0@
683*4882a593Smuzhiyun	printf	PNORM,")\n"
684*4882a593Smuzhiyun	rts
685*4882a593Smuzhiyun	| Be careful with the X bit! It contains the lsb
686*4882a593Smuzhiyun	| from the shift above, it is needed for round to nearest.
687*4882a593Smuzhiyunfp_nd_checkround:
688*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
689*4882a593Smuzhiyun	and.w	#0xf800,(2,%a0)		| clear bits 0-10
690*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
691*4882a593Smuzhiyun	jne	2f			| %d2 == 0, round to nearest
692*4882a593Smuzhiyun	tst.l	%d0			| test guard bit
693*4882a593Smuzhiyun	jpl	9b			| zero is closer
694*4882a593Smuzhiyun	| here we test the X bit by adding it to %d2
695*4882a593Smuzhiyun	clr.w	%d2			| first set z bit, addx only clears it
696*4882a593Smuzhiyun	addx.w	%d2,%d2			| test lsb bit
697*4882a593Smuzhiyun	| IEEE754-specified "round to even" behaviour.  If the guard
698*4882a593Smuzhiyun	| bit is set, then the number is odd, so rounding works like
699*4882a593Smuzhiyun	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
700*4882a593Smuzhiyun	| Otherwise, an equal distance rounds towards zero, so as not
701*4882a593Smuzhiyun	| to produce an odd number.  This is strange, but it is what
702*4882a593Smuzhiyun	| the standard says.
703*4882a593Smuzhiyun	jne	fp_nd_doroundup		| round to infinity
704*4882a593Smuzhiyun	lsl.l	#1,%d0			| check low bits
705*4882a593Smuzhiyun	jeq	9b			| round to zero
706*4882a593Smuzhiyunfp_nd_doroundup:
707*4882a593Smuzhiyun	| round (the mantissa, that is) towards infinity
708*4882a593Smuzhiyun	add.l	#0x800,(%a0)
709*4882a593Smuzhiyun	jcc	9b			| no overflow, good.
710*4882a593Smuzhiyun	addq.l	#1,-(%a0)		| extend to high lword
711*4882a593Smuzhiyun	jcc	1f			| no overflow, good.
712*4882a593Smuzhiyun	| Yow! we have managed to overflow the mantissa.  Since this
713*4882a593Smuzhiyun	| only happens when %d1 was 0xfffff800, it is now zero, so
714*4882a593Smuzhiyun	| reset the high bit, and increment the exponent.
715*4882a593Smuzhiyun	move.w	#0x8000,(%a0)
716*4882a593Smuzhiyun	addq.w	#1,-(%a0)
717*4882a593Smuzhiyun	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
718*4882a593Smuzhiyun	jeq	fp_nd_large		| yes, so make it infinity.
719*4882a593Smuzhiyun1:	subq.l	#4,%a0
720*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
721*4882a593Smuzhiyun	printx	PNORM,%a0@
722*4882a593Smuzhiyun	printf	PNORM,")\n"
723*4882a593Smuzhiyun	rts
724*4882a593Smuzhiyun2:	subq.w	#2,%d2
725*4882a593Smuzhiyun	jcs	9b			| %d2 < 2, round to zero
726*4882a593Smuzhiyun	jhi	3f			| %d2 > 2, round to +infinity
727*4882a593Smuzhiyun	| Round to +Inf or -Inf.  High word of %d2 contains the
728*4882a593Smuzhiyun	| sign of the number, by the way.
729*4882a593Smuzhiyun	swap	%d2			| to -inf
730*4882a593Smuzhiyun	tst.b	%d2
731*4882a593Smuzhiyun	jne	fp_nd_doroundup		| negative, round to infinity
732*4882a593Smuzhiyun	jra	9b			| positive, round to zero
733*4882a593Smuzhiyun3:	swap	%d2			| to +inf
734*4882a593Smuzhiyun	tst.b	%d2
735*4882a593Smuzhiyun	jeq	fp_nd_doroundup		| positive, round to infinity
736*4882a593Smuzhiyun	jra	9b			| negative, round to zero
737*4882a593Smuzhiyun	| Exponent underflow.  Try to make a denormal, and set it to
738*4882a593Smuzhiyun	| the smallest possible fraction if this fails.
739*4882a593Smuzhiyunfp_nd_small:
740*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
741*4882a593Smuzhiyun	move.w	#0x3c01,(-2,%a0)	| 2**-1022
742*4882a593Smuzhiyun	neg.w	%d2			| degree of underflow
743*4882a593Smuzhiyun	cmp.w	#32,%d2			| single or double shift?
744*4882a593Smuzhiyun	jcc	1f
745*4882a593Smuzhiyun	| Again, another 64-bit double shift.
746*4882a593Smuzhiyun	move.l	(%a0),%d0
747*4882a593Smuzhiyun	move.l	%d0,%d1
748*4882a593Smuzhiyun	lsr.l	%d2,%d0
749*4882a593Smuzhiyun	move.l	%d0,(%a0)+
750*4882a593Smuzhiyun	move.l	(%a0),%d0
751*4882a593Smuzhiyun	lsr.l	%d2,%d0
752*4882a593Smuzhiyun	neg.w	%d2
753*4882a593Smuzhiyun	add.w	#32,%d2
754*4882a593Smuzhiyun	lsl.l	%d2,%d1
755*4882a593Smuzhiyun	or.l	%d1,%d0
756*4882a593Smuzhiyun	move.l	(%a0),%d1
757*4882a593Smuzhiyun	move.l	%d0,(%a0)
758*4882a593Smuzhiyun	| Check to see if we shifted off any significant bits
759*4882a593Smuzhiyun	lsl.l	%d2,%d1
760*4882a593Smuzhiyun	jeq	fp_nd_round		| Nope, round.
761*4882a593Smuzhiyun	bset	#0,%d0			| Yes, so set the "sticky bit".
762*4882a593Smuzhiyun	jra	fp_nd_round		| Now, round.
763*4882a593Smuzhiyun	| Another 64-bit single shift and store
764*4882a593Smuzhiyun1:	sub.w	#32,%d2
765*4882a593Smuzhiyun	cmp.w	#32,%d2			| Do we really need to shift?
766*4882a593Smuzhiyun	jcc	2f			| No, the number is too small.
767*4882a593Smuzhiyun	move.l	(%a0),%d0
768*4882a593Smuzhiyun	clr.l	(%a0)+
769*4882a593Smuzhiyun	move.l	%d0,%d1
770*4882a593Smuzhiyun	lsr.l	%d2,%d0
771*4882a593Smuzhiyun	neg.w	%d2
772*4882a593Smuzhiyun	add.w	#32,%d2
773*4882a593Smuzhiyun	| Again, check to see if we shifted off any significant bits.
774*4882a593Smuzhiyun	tst.l	(%a0)
775*4882a593Smuzhiyun	jeq	1f
776*4882a593Smuzhiyun	bset	#0,%d0			| Sticky bit.
777*4882a593Smuzhiyun1:	move.l	%d0,(%a0)
778*4882a593Smuzhiyun	lsl.l	%d2,%d1
779*4882a593Smuzhiyun	jeq	fp_nd_round
780*4882a593Smuzhiyun	bset	#0,%d0
781*4882a593Smuzhiyun	jra	fp_nd_round
782*4882a593Smuzhiyun	| Sorry, the number is just too small.
783*4882a593Smuzhiyun2:	clr.l	(%a0)+
784*4882a593Smuzhiyun	clr.l	(%a0)
785*4882a593Smuzhiyun	moveq	#1,%d0			| Smallest possible fraction,
786*4882a593Smuzhiyun	jra	fp_nd_round		| round as desired.
787*4882a593Smuzhiyun	| zero and denormalized
788*4882a593Smuzhiyunfp_nd_zero:
789*4882a593Smuzhiyun	tst.l	(%a0)+
790*4882a593Smuzhiyun	jne	1f
791*4882a593Smuzhiyun	tst.l	(%a0)
792*4882a593Smuzhiyun	jne	1f
793*4882a593Smuzhiyun	subq.l	#8,%a0
794*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
795*4882a593Smuzhiyun	printx	PNORM,%a0@
796*4882a593Smuzhiyun	printf	PNORM,")\n"
797*4882a593Smuzhiyun	rts				| zero.  nothing to do.
798*4882a593Smuzhiyun	| These are not merely subnormal numbers, but true denormals,
799*4882a593Smuzhiyun	| i.e. pathologically small (exponent is 2**-16383) numbers.
800*4882a593Smuzhiyun	| It is clearly impossible for even a normal extended number
801*4882a593Smuzhiyun	| with that exponent to fit into double precision, so just
802*4882a593Smuzhiyun	| write these ones off as "too darn small".
803*4882a593Smuzhiyun1:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
804*4882a593Smuzhiyun	clr.l	(%a0)
805*4882a593Smuzhiyun	clr.l	-(%a0)
806*4882a593Smuzhiyun	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
807*4882a593Smuzhiyun	addq.l	#6,%a0
808*4882a593Smuzhiyun	moveq	#1,%d0
809*4882a593Smuzhiyun	jra	fp_nd_round		| round.
810*4882a593Smuzhiyun	| Exponent overflow.  Just call it infinity.
811*4882a593Smuzhiyunfp_nd_large:
812*4882a593Smuzhiyun	move.w	#0x7ff,%d0
813*4882a593Smuzhiyun	and.w	(6,%a0),%d0
814*4882a593Smuzhiyun	jeq	1f
815*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2
816*4882a593Smuzhiyun1:	fp_set_sr FPSR_EXC_OVFL
817*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2
818*4882a593Smuzhiyun	jne	3f			| %d2 = 0 round to nearest
819*4882a593Smuzhiyun1:	move.w	#0x7fff,(-2,%a0)
820*4882a593Smuzhiyun	clr.l	(%a0)+
821*4882a593Smuzhiyun	clr.l	(%a0)
822*4882a593Smuzhiyun2:	subq.l	#8,%a0
823*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
824*4882a593Smuzhiyun	printx	PNORM,%a0@
825*4882a593Smuzhiyun	printf	PNORM,")\n"
826*4882a593Smuzhiyun	rts
827*4882a593Smuzhiyun3:	subq.w	#2,%d2
828*4882a593Smuzhiyun	jcs	5f			| %d2 < 2, round to zero
829*4882a593Smuzhiyun	jhi	4f			| %d2 > 2, round to +infinity
830*4882a593Smuzhiyun	tst.b	(-3,%a0)		| to -inf
831*4882a593Smuzhiyun	jne	1b
832*4882a593Smuzhiyun	jra	5f
833*4882a593Smuzhiyun4:	tst.b	(-3,%a0)		| to +inf
834*4882a593Smuzhiyun	jeq	1b
835*4882a593Smuzhiyun5:	move.w	#0x43fe,(-2,%a0)
836*4882a593Smuzhiyun	moveq	#-1,%d0
837*4882a593Smuzhiyun	move.l	%d0,(%a0)+
838*4882a593Smuzhiyun	move.w	#0xf800,%d0
839*4882a593Smuzhiyun	move.l	%d0,(%a0)
840*4882a593Smuzhiyun	jra	2b
841*4882a593Smuzhiyun	| Infinities or NaNs
842*4882a593Smuzhiyunfp_nd_huge:
843*4882a593Smuzhiyun	subq.l	#4,%a0
844*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
845*4882a593Smuzhiyun	printx	PNORM,%a0@
846*4882a593Smuzhiyun	printf	PNORM,")\n"
847*4882a593Smuzhiyun	rts
848*4882a593Smuzhiyun
849*4882a593Smuzhiyun	| fp_normalize_single:
850*4882a593Smuzhiyun	| normalize an extended with single (23-bit) precision
851*4882a593Smuzhiyun	| args:	 %a0 (struct fp_ext *)
852*4882a593Smuzhiyun
853*4882a593Smuzhiyunfp_normalize_single:
854*4882a593Smuzhiyun	printf	PNORM,"ns: %p(",1,%a0
855*4882a593Smuzhiyun	printx	PNORM,%a0@
856*4882a593Smuzhiyun	printf	PNORM,") "
857*4882a593Smuzhiyun	addq.l	#2,%a0
858*4882a593Smuzhiyun	move.w	(%a0)+,%d2
859*4882a593Smuzhiyun	jeq	fp_ns_zero		| zero / denormalized
860*4882a593Smuzhiyun	cmp.w	#0x7fff,%d2
861*4882a593Smuzhiyun	jeq	fp_ns_huge		| NaN / infinitive.
862*4882a593Smuzhiyun	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
863*4882a593Smuzhiyun	jcs	fp_ns_small		| too small.
864*4882a593Smuzhiyun	cmp.w	#0xfe,%d2
865*4882a593Smuzhiyun	jcc	fp_ns_large		| too big.
866*4882a593Smuzhiyun	move.l	(%a0)+,%d0		| get high lword of mantissa
867*4882a593Smuzhiyunfp_ns_round:
868*4882a593Smuzhiyun	tst.l	(%a0)			| check the low lword
869*4882a593Smuzhiyun	jeq	1f
870*4882a593Smuzhiyun	| Set a sticky bit if it is non-zero.  This should only
871*4882a593Smuzhiyun	| affect the rounding in what would otherwise be equal-
872*4882a593Smuzhiyun	| distance situations, which is what we want it to do.
873*4882a593Smuzhiyun	bset	#0,%d0
874*4882a593Smuzhiyun1:	clr.l	(%a0)			| zap it from memory.
875*4882a593Smuzhiyun	| now, round off the low 8 bits of the hi lword.
876*4882a593Smuzhiyun	tst.b	%d0			| 8 low bits.
877*4882a593Smuzhiyun	jne	fp_ns_checkround	| Are they non-zero?
878*4882a593Smuzhiyun	| nothing to do here
879*4882a593Smuzhiyun	subq.l	#8,%a0
880*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
881*4882a593Smuzhiyun	printx	PNORM,%a0@
882*4882a593Smuzhiyun	printf	PNORM,")\n"
883*4882a593Smuzhiyun	rts
884*4882a593Smuzhiyunfp_ns_checkround:
885*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
886*4882a593Smuzhiyun	clr.b	-(%a0)			| clear low byte of high lword
887*4882a593Smuzhiyun	subq.l	#3,%a0
888*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
889*4882a593Smuzhiyun	jne	2f			| %d2 == 0, round to nearest
890*4882a593Smuzhiyun	tst.b	%d0			| test guard bit
891*4882a593Smuzhiyun	jpl	9f			| zero is closer
892*4882a593Smuzhiyun	btst	#8,%d0			| test lsb bit
893*4882a593Smuzhiyun	| round to even behaviour, see above.
894*4882a593Smuzhiyun	jne	fp_ns_doroundup		| round to infinity
895*4882a593Smuzhiyun	lsl.b	#1,%d0			| check low bits
896*4882a593Smuzhiyun	jeq	9f			| round to zero
897*4882a593Smuzhiyunfp_ns_doroundup:
898*4882a593Smuzhiyun	| round (the mantissa, that is) towards infinity
899*4882a593Smuzhiyun	add.l	#0x100,(%a0)
900*4882a593Smuzhiyun	jcc	9f			| no overflow, good.
901*4882a593Smuzhiyun	| Overflow.  This means that the %d1 was 0xffffff00, so it
902*4882a593Smuzhiyun	| is now zero.  We will set the mantissa to reflect this, and
903*4882a593Smuzhiyun	| increment the exponent (checking for overflow there too)
904*4882a593Smuzhiyun	move.w	#0x8000,(%a0)
905*4882a593Smuzhiyun	addq.w	#1,-(%a0)
906*4882a593Smuzhiyun	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
907*4882a593Smuzhiyun	jeq	fp_ns_large		| yes, so make it infinity.
908*4882a593Smuzhiyun9:	subq.l	#4,%a0
909*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
910*4882a593Smuzhiyun	printx	PNORM,%a0@
911*4882a593Smuzhiyun	printf	PNORM,")\n"
912*4882a593Smuzhiyun	rts
913*4882a593Smuzhiyun	| check nondefault rounding modes
914*4882a593Smuzhiyun2:	subq.w	#2,%d2
915*4882a593Smuzhiyun	jcs	9b			| %d2 < 2, round to zero
916*4882a593Smuzhiyun	jhi	3f			| %d2 > 2, round to +infinity
917*4882a593Smuzhiyun	tst.b	(-3,%a0)		| to -inf
918*4882a593Smuzhiyun	jne	fp_ns_doroundup		| negative, round to infinity
919*4882a593Smuzhiyun	jra	9b			| positive, round to zero
920*4882a593Smuzhiyun3:	tst.b	(-3,%a0)		| to +inf
921*4882a593Smuzhiyun	jeq	fp_ns_doroundup		| positive, round to infinity
922*4882a593Smuzhiyun	jra	9b			| negative, round to zero
923*4882a593Smuzhiyun	| Exponent underflow.  Try to make a denormal, and set it to
924*4882a593Smuzhiyun	| the smallest possible fraction if this fails.
925*4882a593Smuzhiyunfp_ns_small:
926*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
927*4882a593Smuzhiyun	move.w	#0x3f81,(-2,%a0)	| 2**-126
928*4882a593Smuzhiyun	neg.w	%d2			| degree of underflow
929*4882a593Smuzhiyun	cmp.w	#32,%d2			| single or double shift?
930*4882a593Smuzhiyun	jcc	2f
931*4882a593Smuzhiyun	| a 32-bit shift.
932*4882a593Smuzhiyun	move.l	(%a0),%d0
933*4882a593Smuzhiyun	move.l	%d0,%d1
934*4882a593Smuzhiyun	lsr.l	%d2,%d0
935*4882a593Smuzhiyun	move.l	%d0,(%a0)+
936*4882a593Smuzhiyun	| Check to see if we shifted off any significant bits.
937*4882a593Smuzhiyun	neg.w	%d2
938*4882a593Smuzhiyun	add.w	#32,%d2
939*4882a593Smuzhiyun	lsl.l	%d2,%d1
940*4882a593Smuzhiyun	jeq	1f
941*4882a593Smuzhiyun	bset	#0,%d0			| Sticky bit.
942*4882a593Smuzhiyun	| Check the lower lword
943*4882a593Smuzhiyun1:	tst.l	(%a0)
944*4882a593Smuzhiyun	jeq	fp_ns_round
945*4882a593Smuzhiyun	clr	(%a0)
946*4882a593Smuzhiyun	bset	#0,%d0			| Sticky bit.
947*4882a593Smuzhiyun	jra	fp_ns_round
948*4882a593Smuzhiyun	| Sorry, the number is just too small.
949*4882a593Smuzhiyun2:	clr.l	(%a0)+
950*4882a593Smuzhiyun	clr.l	(%a0)
951*4882a593Smuzhiyun	moveq	#1,%d0			| Smallest possible fraction,
952*4882a593Smuzhiyun	jra	fp_ns_round		| round as desired.
953*4882a593Smuzhiyun	| Exponent overflow.  Just call it infinity.
954*4882a593Smuzhiyunfp_ns_large:
955*4882a593Smuzhiyun	tst.b	(3,%a0)
956*4882a593Smuzhiyun	jeq	1f
957*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2
958*4882a593Smuzhiyun1:	fp_set_sr FPSR_EXC_OVFL
959*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2
960*4882a593Smuzhiyun	jne	3f			| %d2 = 0 round to nearest
961*4882a593Smuzhiyun1:	move.w	#0x7fff,(-2,%a0)
962*4882a593Smuzhiyun	clr.l	(%a0)+
963*4882a593Smuzhiyun	clr.l	(%a0)
964*4882a593Smuzhiyun2:	subq.l	#8,%a0
965*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
966*4882a593Smuzhiyun	printx	PNORM,%a0@
967*4882a593Smuzhiyun	printf	PNORM,")\n"
968*4882a593Smuzhiyun	rts
969*4882a593Smuzhiyun3:	subq.w	#2,%d2
970*4882a593Smuzhiyun	jcs	5f			| %d2 < 2, round to zero
971*4882a593Smuzhiyun	jhi	4f			| %d2 > 2, round to +infinity
972*4882a593Smuzhiyun	tst.b	(-3,%a0)		| to -inf
973*4882a593Smuzhiyun	jne	1b
974*4882a593Smuzhiyun	jra	5f
975*4882a593Smuzhiyun4:	tst.b	(-3,%a0)		| to +inf
976*4882a593Smuzhiyun	jeq	1b
977*4882a593Smuzhiyun5:	move.w	#0x407e,(-2,%a0)
978*4882a593Smuzhiyun	move.l	#0xffffff00,(%a0)+
979*4882a593Smuzhiyun	clr.l	(%a0)
980*4882a593Smuzhiyun	jra	2b
981*4882a593Smuzhiyun	| zero and denormalized
982*4882a593Smuzhiyunfp_ns_zero:
983*4882a593Smuzhiyun	tst.l	(%a0)+
984*4882a593Smuzhiyun	jne	1f
985*4882a593Smuzhiyun	tst.l	(%a0)
986*4882a593Smuzhiyun	jne	1f
987*4882a593Smuzhiyun	subq.l	#8,%a0
988*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
989*4882a593Smuzhiyun	printx	PNORM,%a0@
990*4882a593Smuzhiyun	printf	PNORM,")\n"
991*4882a593Smuzhiyun	rts				| zero.  nothing to do.
992*4882a593Smuzhiyun	| These are not merely subnormal numbers, but true denormals,
993*4882a593Smuzhiyun	| i.e. pathologically small (exponent is 2**-16383) numbers.
994*4882a593Smuzhiyun	| It is clearly impossible for even a normal extended number
995*4882a593Smuzhiyun	| with that exponent to fit into single precision, so just
996*4882a593Smuzhiyun	| write these ones off as "too darn small".
997*4882a593Smuzhiyun1:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
998*4882a593Smuzhiyun	clr.l	(%a0)
999*4882a593Smuzhiyun	clr.l	-(%a0)
1000*4882a593Smuzhiyun	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
1001*4882a593Smuzhiyun	addq.l	#6,%a0
1002*4882a593Smuzhiyun	moveq	#1,%d0
1003*4882a593Smuzhiyun	jra	fp_ns_round		| round.
1004*4882a593Smuzhiyun	| Infinities or NaNs
1005*4882a593Smuzhiyunfp_ns_huge:
1006*4882a593Smuzhiyun	subq.l	#4,%a0
1007*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
1008*4882a593Smuzhiyun	printx	PNORM,%a0@
1009*4882a593Smuzhiyun	printf	PNORM,")\n"
1010*4882a593Smuzhiyun	rts
1011*4882a593Smuzhiyun
1012*4882a593Smuzhiyun	| fp_normalize_single_fast:
1013*4882a593Smuzhiyun	| normalize an extended with single (23-bit) precision
1014*4882a593Smuzhiyun	| this is only used by fsgldiv/fsgdlmul, where the
1015*4882a593Smuzhiyun	| operand is not completly normalized.
1016*4882a593Smuzhiyun	| args:	 %a0 (struct fp_ext *)
1017*4882a593Smuzhiyun
1018*4882a593Smuzhiyunfp_normalize_single_fast:
1019*4882a593Smuzhiyun	printf	PNORM,"nsf: %p(",1,%a0
1020*4882a593Smuzhiyun	printx	PNORM,%a0@
1021*4882a593Smuzhiyun	printf	PNORM,") "
1022*4882a593Smuzhiyun	addq.l	#2,%a0
1023*4882a593Smuzhiyun	move.w	(%a0)+,%d2
1024*4882a593Smuzhiyun	cmp.w	#0x7fff,%d2
1025*4882a593Smuzhiyun	jeq	fp_nsf_huge		| NaN / infinitive.
1026*4882a593Smuzhiyun	move.l	(%a0)+,%d0		| get high lword of mantissa
1027*4882a593Smuzhiyunfp_nsf_round:
1028*4882a593Smuzhiyun	tst.l	(%a0)			| check the low lword
1029*4882a593Smuzhiyun	jeq	1f
1030*4882a593Smuzhiyun	| Set a sticky bit if it is non-zero.  This should only
1031*4882a593Smuzhiyun	| affect the rounding in what would otherwise be equal-
1032*4882a593Smuzhiyun	| distance situations, which is what we want it to do.
1033*4882a593Smuzhiyun	bset	#0,%d0
1034*4882a593Smuzhiyun1:	clr.l	(%a0)			| zap it from memory.
1035*4882a593Smuzhiyun	| now, round off the low 8 bits of the hi lword.
1036*4882a593Smuzhiyun	tst.b	%d0			| 8 low bits.
1037*4882a593Smuzhiyun	jne	fp_nsf_checkround	| Are they non-zero?
1038*4882a593Smuzhiyun	| nothing to do here
1039*4882a593Smuzhiyun	subq.l	#8,%a0
1040*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
1041*4882a593Smuzhiyun	printx	PNORM,%a0@
1042*4882a593Smuzhiyun	printf	PNORM,")\n"
1043*4882a593Smuzhiyun	rts
1044*4882a593Smuzhiyunfp_nsf_checkround:
1045*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1046*4882a593Smuzhiyun	clr.b	-(%a0)			| clear low byte of high lword
1047*4882a593Smuzhiyun	subq.l	#3,%a0
1048*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1049*4882a593Smuzhiyun	jne	2f			| %d2 == 0, round to nearest
1050*4882a593Smuzhiyun	tst.b	%d0			| test guard bit
1051*4882a593Smuzhiyun	jpl	9f			| zero is closer
1052*4882a593Smuzhiyun	btst	#8,%d0			| test lsb bit
1053*4882a593Smuzhiyun	| round to even behaviour, see above.
1054*4882a593Smuzhiyun	jne	fp_nsf_doroundup		| round to infinity
1055*4882a593Smuzhiyun	lsl.b	#1,%d0			| check low bits
1056*4882a593Smuzhiyun	jeq	9f			| round to zero
1057*4882a593Smuzhiyunfp_nsf_doroundup:
1058*4882a593Smuzhiyun	| round (the mantissa, that is) towards infinity
1059*4882a593Smuzhiyun	add.l	#0x100,(%a0)
1060*4882a593Smuzhiyun	jcc	9f			| no overflow, good.
1061*4882a593Smuzhiyun	| Overflow.  This means that the %d1 was 0xffffff00, so it
1062*4882a593Smuzhiyun	| is now zero.  We will set the mantissa to reflect this, and
1063*4882a593Smuzhiyun	| increment the exponent (checking for overflow there too)
1064*4882a593Smuzhiyun	move.w	#0x8000,(%a0)
1065*4882a593Smuzhiyun	addq.w	#1,-(%a0)
1066*4882a593Smuzhiyun	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
1067*4882a593Smuzhiyun	jeq	fp_nsf_large		| yes, so make it infinity.
1068*4882a593Smuzhiyun9:	subq.l	#4,%a0
1069*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
1070*4882a593Smuzhiyun	printx	PNORM,%a0@
1071*4882a593Smuzhiyun	printf	PNORM,")\n"
1072*4882a593Smuzhiyun	rts
1073*4882a593Smuzhiyun	| check nondefault rounding modes
1074*4882a593Smuzhiyun2:	subq.w	#2,%d2
1075*4882a593Smuzhiyun	jcs	9b			| %d2 < 2, round to zero
1076*4882a593Smuzhiyun	jhi	3f			| %d2 > 2, round to +infinity
1077*4882a593Smuzhiyun	tst.b	(-3,%a0)		| to -inf
1078*4882a593Smuzhiyun	jne	fp_nsf_doroundup	| negative, round to infinity
1079*4882a593Smuzhiyun	jra	9b			| positive, round to zero
1080*4882a593Smuzhiyun3:	tst.b	(-3,%a0)		| to +inf
1081*4882a593Smuzhiyun	jeq	fp_nsf_doroundup		| positive, round to infinity
1082*4882a593Smuzhiyun	jra	9b			| negative, round to zero
1083*4882a593Smuzhiyun	| Exponent overflow.  Just call it infinity.
1084*4882a593Smuzhiyunfp_nsf_large:
1085*4882a593Smuzhiyun	tst.b	(3,%a0)
1086*4882a593Smuzhiyun	jeq	1f
1087*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2
1088*4882a593Smuzhiyun1:	fp_set_sr FPSR_EXC_OVFL
1089*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2
1090*4882a593Smuzhiyun	jne	3f			| %d2 = 0 round to nearest
1091*4882a593Smuzhiyun1:	move.w	#0x7fff,(-2,%a0)
1092*4882a593Smuzhiyun	clr.l	(%a0)+
1093*4882a593Smuzhiyun	clr.l	(%a0)
1094*4882a593Smuzhiyun2:	subq.l	#8,%a0
1095*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
1096*4882a593Smuzhiyun	printx	PNORM,%a0@
1097*4882a593Smuzhiyun	printf	PNORM,")\n"
1098*4882a593Smuzhiyun	rts
1099*4882a593Smuzhiyun3:	subq.w	#2,%d2
1100*4882a593Smuzhiyun	jcs	5f			| %d2 < 2, round to zero
1101*4882a593Smuzhiyun	jhi	4f			| %d2 > 2, round to +infinity
1102*4882a593Smuzhiyun	tst.b	(-3,%a0)		| to -inf
1103*4882a593Smuzhiyun	jne	1b
1104*4882a593Smuzhiyun	jra	5f
1105*4882a593Smuzhiyun4:	tst.b	(-3,%a0)		| to +inf
1106*4882a593Smuzhiyun	jeq	1b
1107*4882a593Smuzhiyun5:	move.w	#0x407e,(-2,%a0)
1108*4882a593Smuzhiyun	move.l	#0xffffff00,(%a0)+
1109*4882a593Smuzhiyun	clr.l	(%a0)
1110*4882a593Smuzhiyun	jra	2b
1111*4882a593Smuzhiyun	| Infinities or NaNs
1112*4882a593Smuzhiyunfp_nsf_huge:
1113*4882a593Smuzhiyun	subq.l	#4,%a0
1114*4882a593Smuzhiyun	printf	PNORM,"%p(",1,%a0
1115*4882a593Smuzhiyun	printx	PNORM,%a0@
1116*4882a593Smuzhiyun	printf	PNORM,")\n"
1117*4882a593Smuzhiyun	rts
1118*4882a593Smuzhiyun
1119*4882a593Smuzhiyun	| conv_ext2int (macro):
1120*4882a593Smuzhiyun	| Generates a subroutine that converts an extended value to an
1121*4882a593Smuzhiyun	| integer of a given size, again, with the appropriate type of
1122*4882a593Smuzhiyun	| rounding.
1123*4882a593Smuzhiyun
1124*4882a593Smuzhiyun	| Macro arguments:
1125*4882a593Smuzhiyun	| s:	size, as given in an assembly instruction.
1126*4882a593Smuzhiyun	| b:	number of bits in that size.
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun	| Subroutine arguments:
1129*4882a593Smuzhiyun	| %a0:	source (struct fp_ext *)
1130*4882a593Smuzhiyun
1131*4882a593Smuzhiyun	| Returns the integer in %d0 (like it should)
1132*4882a593Smuzhiyun
1133*4882a593Smuzhiyun.macro conv_ext2int s,b
1134*4882a593Smuzhiyun	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
1135*4882a593Smuzhiyun	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
1136*4882a593Smuzhiyun	printx	PCONV,%a0@
1137*4882a593Smuzhiyun	printf	PCONV,") "
1138*4882a593Smuzhiyun	addq.l	#2,%a0
1139*4882a593Smuzhiyun	move.w	(%a0)+,%d2		| exponent
1140*4882a593Smuzhiyun	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
1141*4882a593Smuzhiyun	cmp.w	#0x7fff,%d2
1142*4882a593Smuzhiyun	jeq	fp_e2i_huge\b		| Inf / NaN
1143*4882a593Smuzhiyun	sub.w	#0x3ffe,%d2
1144*4882a593Smuzhiyun	jcs	fp_e2i_small\b
1145*4882a593Smuzhiyun	cmp.w	#\b,%d2
1146*4882a593Smuzhiyun	jhi	fp_e2i_large\b
1147*4882a593Smuzhiyun	move.l	(%a0),%d0
1148*4882a593Smuzhiyun	move.l	%d0,%d1
1149*4882a593Smuzhiyun	lsl.l	%d2,%d1
1150*4882a593Smuzhiyun	jne	fp_e2i_round\b
1151*4882a593Smuzhiyun	tst.l	(4,%a0)
1152*4882a593Smuzhiyun	jne	fp_e2i_round\b
1153*4882a593Smuzhiyun	neg.w	%d2
1154*4882a593Smuzhiyun	add.w	#32,%d2
1155*4882a593Smuzhiyun	lsr.l	%d2,%d0
1156*4882a593Smuzhiyun9:	tst.w	(-4,%a0)
1157*4882a593Smuzhiyun	jne	1f
1158*4882a593Smuzhiyun	tst.\s	%d0
1159*4882a593Smuzhiyun	jmi	fp_e2i_large\b
1160*4882a593Smuzhiyun	printf	PCONV,"-> %p\n",1,%d0
1161*4882a593Smuzhiyun	rts
1162*4882a593Smuzhiyun1:	neg.\s	%d0
1163*4882a593Smuzhiyun	jeq	1f
1164*4882a593Smuzhiyun	jpl	fp_e2i_large\b
1165*4882a593Smuzhiyun1:	printf	PCONV,"-> %p\n",1,%d0
1166*4882a593Smuzhiyun	rts
1167*4882a593Smuzhiyunfp_e2i_round\b:
1168*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1169*4882a593Smuzhiyun	neg.w	%d2
1170*4882a593Smuzhiyun	add.w	#32,%d2
1171*4882a593Smuzhiyun	.if	\b>16
1172*4882a593Smuzhiyun	jeq	5f
1173*4882a593Smuzhiyun	.endif
1174*4882a593Smuzhiyun	lsr.l	%d2,%d0
1175*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1176*4882a593Smuzhiyun	jne	2f			| %d2 == 0, round to nearest
1177*4882a593Smuzhiyun	tst.l	%d1			| test guard bit
1178*4882a593Smuzhiyun	jpl	9b			| zero is closer
1179*4882a593Smuzhiyun	btst	%d2,%d0			| test lsb bit (%d2 still 0)
1180*4882a593Smuzhiyun	jne	fp_e2i_doroundup\b
1181*4882a593Smuzhiyun	lsl.l	#1,%d1			| check low bits
1182*4882a593Smuzhiyun	jne	fp_e2i_doroundup\b
1183*4882a593Smuzhiyun	tst.l	(4,%a0)
1184*4882a593Smuzhiyun	jeq	9b
1185*4882a593Smuzhiyunfp_e2i_doroundup\b:
1186*4882a593Smuzhiyun	addq.l	#1,%d0
1187*4882a593Smuzhiyun	jra	9b
1188*4882a593Smuzhiyun	| check nondefault rounding modes
1189*4882a593Smuzhiyun2:	subq.w	#2,%d2
1190*4882a593Smuzhiyun	jcs	9b			| %d2 < 2, round to zero
1191*4882a593Smuzhiyun	jhi	3f			| %d2 > 2, round to +infinity
1192*4882a593Smuzhiyun	tst.w	(-4,%a0)		| to -inf
1193*4882a593Smuzhiyun	jne	fp_e2i_doroundup\b	| negative, round to infinity
1194*4882a593Smuzhiyun	jra	9b			| positive, round to zero
1195*4882a593Smuzhiyun3:	tst.w	(-4,%a0)		| to +inf
1196*4882a593Smuzhiyun	jeq	fp_e2i_doroundup\b	| positive, round to infinity
1197*4882a593Smuzhiyun	jra	9b	| negative, round to zero
1198*4882a593Smuzhiyun	| we are only want -2**127 get correctly rounded here,
1199*4882a593Smuzhiyun	| since the guard bit is in the lower lword.
1200*4882a593Smuzhiyun	| everything else ends up anyway as overflow.
1201*4882a593Smuzhiyun	.if	\b>16
1202*4882a593Smuzhiyun5:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1203*4882a593Smuzhiyun	jne	2b			| %d2 == 0, round to nearest
1204*4882a593Smuzhiyun	move.l	(4,%a0),%d1		| test guard bit
1205*4882a593Smuzhiyun	jpl	9b			| zero is closer
1206*4882a593Smuzhiyun	lsl.l	#1,%d1			| check low bits
1207*4882a593Smuzhiyun	jne	fp_e2i_doroundup\b
1208*4882a593Smuzhiyun	jra	9b
1209*4882a593Smuzhiyun	.endif
1210*4882a593Smuzhiyunfp_e2i_zero\b:
1211*4882a593Smuzhiyun	clr.l	%d0
1212*4882a593Smuzhiyun	tst.l	(%a0)+
1213*4882a593Smuzhiyun	jne	1f
1214*4882a593Smuzhiyun	tst.l	(%a0)
1215*4882a593Smuzhiyun	jeq	3f
1216*4882a593Smuzhiyun1:	subq.l	#4,%a0
1217*4882a593Smuzhiyun	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
1218*4882a593Smuzhiyunfp_e2i_small\b:
1219*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_INEX2
1220*4882a593Smuzhiyun	clr.l	%d0
1221*4882a593Smuzhiyun	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1222*4882a593Smuzhiyun	subq.w	#2,%d2
1223*4882a593Smuzhiyun	jcs	3f			| %d2 < 2, round to nearest/zero
1224*4882a593Smuzhiyun	jhi	2f			| %d2 > 2, round to +infinity
1225*4882a593Smuzhiyun	tst.w	(-4,%a0)		| to -inf
1226*4882a593Smuzhiyun	jeq	3f
1227*4882a593Smuzhiyun	subq.\s	#1,%d0
1228*4882a593Smuzhiyun	jra	3f
1229*4882a593Smuzhiyun2:	tst.w	(-4,%a0)		| to +inf
1230*4882a593Smuzhiyun	jne	3f
1231*4882a593Smuzhiyun	addq.\s	#1,%d0
1232*4882a593Smuzhiyun3:	printf	PCONV,"-> %p\n",1,%d0
1233*4882a593Smuzhiyun	rts
1234*4882a593Smuzhiyunfp_e2i_large\b:
1235*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_OPERR
1236*4882a593Smuzhiyun	move.\s	#inf,%d0
1237*4882a593Smuzhiyun	tst.w	(-4,%a0)
1238*4882a593Smuzhiyun	jeq	1f
1239*4882a593Smuzhiyun	addq.\s	#1,%d0
1240*4882a593Smuzhiyun1:	printf	PCONV,"-> %p\n",1,%d0
1241*4882a593Smuzhiyun	rts
1242*4882a593Smuzhiyunfp_e2i_huge\b:
1243*4882a593Smuzhiyun	move.\s	(%a0),%d0
1244*4882a593Smuzhiyun	tst.l	(%a0)
1245*4882a593Smuzhiyun	jne	1f
1246*4882a593Smuzhiyun	tst.l	(%a0)
1247*4882a593Smuzhiyun	jeq	fp_e2i_large\b
1248*4882a593Smuzhiyun	| fp_normalize_ext has set this bit already
1249*4882a593Smuzhiyun	| and made the number nonsignaling
1250*4882a593Smuzhiyun1:	fp_tst_sr FPSR_EXC_SNAN
1251*4882a593Smuzhiyun	jne	1f
1252*4882a593Smuzhiyun	fp_set_sr FPSR_EXC_OPERR
1253*4882a593Smuzhiyun1:	printf	PCONV,"-> %p\n",1,%d0
1254*4882a593Smuzhiyun	rts
1255*4882a593Smuzhiyun.endm
1256*4882a593Smuzhiyun
1257*4882a593Smuzhiyunfp_conv_ext2long:
1258*4882a593Smuzhiyun	conv_ext2int l,32
1259*4882a593Smuzhiyun
1260*4882a593Smuzhiyunfp_conv_ext2short:
1261*4882a593Smuzhiyun	conv_ext2int w,16
1262*4882a593Smuzhiyun
1263*4882a593Smuzhiyunfp_conv_ext2byte:
1264*4882a593Smuzhiyun	conv_ext2int b,8
1265*4882a593Smuzhiyun
1266*4882a593Smuzhiyunfp_conv_ext2double:
1267*4882a593Smuzhiyun	jsr	fp_normalize_double
1268*4882a593Smuzhiyun	printf	PCONV,"e2d: %p(",1,%a0
1269*4882a593Smuzhiyun	printx	PCONV,%a0@
1270*4882a593Smuzhiyun	printf	PCONV,"), "
1271*4882a593Smuzhiyun	move.l	(%a0)+,%d2
1272*4882a593Smuzhiyun	cmp.w	#0x7fff,%d2
1273*4882a593Smuzhiyun	jne	1f
1274*4882a593Smuzhiyun	move.w	#0x7ff,%d2
1275*4882a593Smuzhiyun	move.l	(%a0)+,%d0
1276*4882a593Smuzhiyun	jra	2f
1277*4882a593Smuzhiyun1:	sub.w	#0x3fff-0x3ff,%d2
1278*4882a593Smuzhiyun	move.l	(%a0)+,%d0
1279*4882a593Smuzhiyun	jmi	2f
1280*4882a593Smuzhiyun	clr.w	%d2
1281*4882a593Smuzhiyun2:	lsl.w	#5,%d2
1282*4882a593Smuzhiyun	lsl.l	#7,%d2
1283*4882a593Smuzhiyun	lsl.l	#8,%d2
1284*4882a593Smuzhiyun	move.l	%d0,%d1
1285*4882a593Smuzhiyun	lsl.l	#1,%d0
1286*4882a593Smuzhiyun	lsr.l	#4,%d0
1287*4882a593Smuzhiyun	lsr.l	#8,%d0
1288*4882a593Smuzhiyun	or.l	%d2,%d0
1289*4882a593Smuzhiyun	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
1290*4882a593Smuzhiyun	moveq	#21,%d0
1291*4882a593Smuzhiyun	lsl.l	%d0,%d1
1292*4882a593Smuzhiyun	move.l	(%a0),%d0
1293*4882a593Smuzhiyun	lsr.l	#4,%d0
1294*4882a593Smuzhiyun	lsr.l	#7,%d0
1295*4882a593Smuzhiyun	or.l	%d1,%d0
1296*4882a593Smuzhiyun	putuser.l %d0,(%a1),fp_err_ua2,%a1
1297*4882a593Smuzhiyun#ifdef FPU_EMU_DEBUG
1298*4882a593Smuzhiyun	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
1299*4882a593Smuzhiyun	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
1300*4882a593Smuzhiyun	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
1301*4882a593Smuzhiyun#endif
1302*4882a593Smuzhiyun	rts
1303*4882a593Smuzhiyun
1304*4882a593Smuzhiyunfp_conv_ext2single:
1305*4882a593Smuzhiyun	jsr	fp_normalize_single
1306*4882a593Smuzhiyun	printf	PCONV,"e2s: %p(",1,%a0
1307*4882a593Smuzhiyun	printx	PCONV,%a0@
1308*4882a593Smuzhiyun	printf	PCONV,"), "
1309*4882a593Smuzhiyun	move.l	(%a0)+,%d1
1310*4882a593Smuzhiyun	cmp.w	#0x7fff,%d1
1311*4882a593Smuzhiyun	jne	1f
1312*4882a593Smuzhiyun	move.w	#0xff,%d1
1313*4882a593Smuzhiyun	move.l	(%a0)+,%d0
1314*4882a593Smuzhiyun	jra	2f
1315*4882a593Smuzhiyun1:	sub.w	#0x3fff-0x7f,%d1
1316*4882a593Smuzhiyun	move.l	(%a0)+,%d0
1317*4882a593Smuzhiyun	jmi	2f
1318*4882a593Smuzhiyun	clr.w	%d1
1319*4882a593Smuzhiyun2:	lsl.w	#8,%d1
1320*4882a593Smuzhiyun	lsl.l	#7,%d1
1321*4882a593Smuzhiyun	lsl.l	#8,%d1
1322*4882a593Smuzhiyun	bclr	#31,%d0
1323*4882a593Smuzhiyun	lsr.l	#8,%d0
1324*4882a593Smuzhiyun	or.l	%d1,%d0
1325*4882a593Smuzhiyun	printf	PCONV,"%08x\n",1,%d0
1326*4882a593Smuzhiyun	rts
1327*4882a593Smuzhiyun
1328*4882a593Smuzhiyun	| special return addresses for instr that
1329*4882a593Smuzhiyun	| encode the rounding precision in the opcode
1330*4882a593Smuzhiyun	| (e.g. fsmove,fdmove)
1331*4882a593Smuzhiyun
1332*4882a593Smuzhiyunfp_finalrounding_single:
1333*4882a593Smuzhiyun	addq.l	#8,%sp
1334*4882a593Smuzhiyun	jsr	fp_normalize_ext
1335*4882a593Smuzhiyun	jsr	fp_normalize_single
1336*4882a593Smuzhiyun	jra	fp_finaltest
1337*4882a593Smuzhiyun
1338*4882a593Smuzhiyunfp_finalrounding_single_fast:
1339*4882a593Smuzhiyun	addq.l	#8,%sp
1340*4882a593Smuzhiyun	jsr	fp_normalize_ext
1341*4882a593Smuzhiyun	jsr	fp_normalize_single_fast
1342*4882a593Smuzhiyun	jra	fp_finaltest
1343*4882a593Smuzhiyun
1344*4882a593Smuzhiyunfp_finalrounding_double:
1345*4882a593Smuzhiyun	addq.l	#8,%sp
1346*4882a593Smuzhiyun	jsr	fp_normalize_ext
1347*4882a593Smuzhiyun	jsr	fp_normalize_double
1348*4882a593Smuzhiyun	jra	fp_finaltest
1349*4882a593Smuzhiyun
1350*4882a593Smuzhiyun	| fp_finaltest:
1351*4882a593Smuzhiyun	| set the emulated status register based on the outcome of an
1352*4882a593Smuzhiyun	| emulated instruction.
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyunfp_finalrounding:
1355*4882a593Smuzhiyun	addq.l	#8,%sp
1356*4882a593Smuzhiyun|	printf	,"f: %p\n",1,%a0
1357*4882a593Smuzhiyun	jsr	fp_normalize_ext
1358*4882a593Smuzhiyun	move.w	(FPD_PREC,FPDATA),%d0
1359*4882a593Smuzhiyun	subq.w	#1,%d0
1360*4882a593Smuzhiyun	jcs	fp_finaltest
1361*4882a593Smuzhiyun	jne	1f
1362*4882a593Smuzhiyun	jsr	fp_normalize_single
1363*4882a593Smuzhiyun	jra	2f
1364*4882a593Smuzhiyun1:	jsr	fp_normalize_double
1365*4882a593Smuzhiyun2:|	printf	,"f: %p\n",1,%a0
1366*4882a593Smuzhiyunfp_finaltest:
1367*4882a593Smuzhiyun	| First, we do some of the obvious tests for the exception
1368*4882a593Smuzhiyun	| status byte and condition code bytes of fp_sr here, so that
1369*4882a593Smuzhiyun	| they do not have to be handled individually by every
1370*4882a593Smuzhiyun	| emulated instruction.
1371*4882a593Smuzhiyun	clr.l	%d0
1372*4882a593Smuzhiyun	addq.l	#1,%a0
1373*4882a593Smuzhiyun	tst.b	(%a0)+			| sign
1374*4882a593Smuzhiyun	jeq	1f
1375*4882a593Smuzhiyun	bset	#FPSR_CC_NEG-24,%d0	| N bit
1376*4882a593Smuzhiyun1:	cmp.w	#0x7fff,(%a0)+		| exponent
1377*4882a593Smuzhiyun	jeq	2f
1378*4882a593Smuzhiyun	| test for zero
1379*4882a593Smuzhiyun	moveq	#FPSR_CC_Z-24,%d1
1380*4882a593Smuzhiyun	tst.l	(%a0)+
1381*4882a593Smuzhiyun	jne	9f
1382*4882a593Smuzhiyun	tst.l	(%a0)
1383*4882a593Smuzhiyun	jne	9f
1384*4882a593Smuzhiyun	jra	8f
1385*4882a593Smuzhiyun	| infinitiv and NAN
1386*4882a593Smuzhiyun2:	moveq	#FPSR_CC_NAN-24,%d1
1387*4882a593Smuzhiyun	move.l	(%a0)+,%d2
1388*4882a593Smuzhiyun	lsl.l	#1,%d2			| ignore high bit
1389*4882a593Smuzhiyun	jne	8f
1390*4882a593Smuzhiyun	tst.l	(%a0)
1391*4882a593Smuzhiyun	jne	8f
1392*4882a593Smuzhiyun	moveq	#FPSR_CC_INF-24,%d1
1393*4882a593Smuzhiyun8:	bset	%d1,%d0
1394*4882a593Smuzhiyun9:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
1395*4882a593Smuzhiyun	| move instructions enter here
1396*4882a593Smuzhiyun	| Here, we test things in the exception status byte, and set
1397*4882a593Smuzhiyun	| other things in the accrued exception byte accordingly.
1398*4882a593Smuzhiyun	| Emulated instructions can set various things in the former,
1399*4882a593Smuzhiyun	| as defined in fp_emu.h.
1400*4882a593Smuzhiyunfp_final:
1401*4882a593Smuzhiyun	move.l	(FPD_FPSR,FPDATA),%d0
1402*4882a593Smuzhiyun#if 0
1403*4882a593Smuzhiyun	btst	#FPSR_EXC_SNAN,%d0	| EXC_SNAN
1404*4882a593Smuzhiyun	jne	1f
1405*4882a593Smuzhiyun	btst	#FPSR_EXC_OPERR,%d0	| EXC_OPERR
1406*4882a593Smuzhiyun	jeq	2f
1407*4882a593Smuzhiyun1:	bset	#FPSR_AEXC_IOP,%d0	| set IOP bit
1408*4882a593Smuzhiyun2:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1409*4882a593Smuzhiyun	jeq	1f
1410*4882a593Smuzhiyun	bset	#FPSR_AEXC_OVFL,%d0	| set OVFL bit
1411*4882a593Smuzhiyun1:	btst	#FPSR_EXC_UNFL,%d0	| EXC_UNFL
1412*4882a593Smuzhiyun	jeq	1f
1413*4882a593Smuzhiyun	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1414*4882a593Smuzhiyun	jeq	1f
1415*4882a593Smuzhiyun	bset	#FPSR_AEXC_UNFL,%d0	| set UNFL bit
1416*4882a593Smuzhiyun1:	btst	#FPSR_EXC_DZ,%d0	| EXC_INEX1
1417*4882a593Smuzhiyun	jeq	1f
1418*4882a593Smuzhiyun	bset	#FPSR_AEXC_DZ,%d0	| set DZ bit
1419*4882a593Smuzhiyun1:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1420*4882a593Smuzhiyun	jne	1f
1421*4882a593Smuzhiyun	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1422*4882a593Smuzhiyun	jne	1f
1423*4882a593Smuzhiyun	btst	#FPSR_EXC_INEX1,%d0	| EXC_INEX1
1424*4882a593Smuzhiyun	jeq	2f
1425*4882a593Smuzhiyun1:	bset	#FPSR_AEXC_INEX,%d0	| set INEX bit
1426*4882a593Smuzhiyun2:	move.l	%d0,(FPD_FPSR,FPDATA)
1427*4882a593Smuzhiyun#else
1428*4882a593Smuzhiyun	| same as above, greatly optimized, but untested (yet)
1429*4882a593Smuzhiyun	move.l	%d0,%d2
1430*4882a593Smuzhiyun	lsr.l	#5,%d0
1431*4882a593Smuzhiyun	move.l	%d0,%d1
1432*4882a593Smuzhiyun	lsr.l	#4,%d1
1433*4882a593Smuzhiyun	or.l	%d0,%d1
1434*4882a593Smuzhiyun	and.b	#0x08,%d1
1435*4882a593Smuzhiyun	move.l	%d2,%d0
1436*4882a593Smuzhiyun	lsr.l	#6,%d0
1437*4882a593Smuzhiyun	or.l	%d1,%d0
1438*4882a593Smuzhiyun	move.l	%d2,%d1
1439*4882a593Smuzhiyun	lsr.l	#4,%d1
1440*4882a593Smuzhiyun	or.b	#0xdf,%d1
1441*4882a593Smuzhiyun	and.b	%d1,%d0
1442*4882a593Smuzhiyun	move.l	%d2,%d1
1443*4882a593Smuzhiyun	lsr.l	#7,%d1
1444*4882a593Smuzhiyun	and.b	#0x80,%d1
1445*4882a593Smuzhiyun	or.b	%d1,%d0
1446*4882a593Smuzhiyun	and.b	#0xf8,%d0
1447*4882a593Smuzhiyun	or.b	%d0,%d2
1448*4882a593Smuzhiyun	move.l	%d2,(FPD_FPSR,FPDATA)
1449*4882a593Smuzhiyun#endif
1450*4882a593Smuzhiyun	move.b	(FPD_FPSR+2,FPDATA),%d0
1451*4882a593Smuzhiyun	and.b	(FPD_FPCR+2,FPDATA),%d0
1452*4882a593Smuzhiyun	jeq	1f
1453*4882a593Smuzhiyun	printf	,"send signal!!!\n"
1454*4882a593Smuzhiyun1:	jra	fp_end
1455