m68k/fpsp040/res_func.S

*4882a593Smuzhiyun|
*4882a593Smuzhiyun|	res_func.sa 3.9 7/29/91
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Normalizes denormalized numbers if necessary and updates the
*4882a593Smuzhiyun| stack frame.  The function is then restored back into the
*4882a593Smuzhiyun| machine and the 040 completes the operation.  This routine
*4882a593Smuzhiyun| is only used by the unsupported data type/format handler.
*4882a593Smuzhiyun| (Exception vector 55).
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| For packed move out (fmove.p fpm,<ea>) the operation is
*4882a593Smuzhiyun| completed here; data is packed and moved to user memory.
*4882a593Smuzhiyun| The stack is restored to the 040 only in the case of a
*4882a593Smuzhiyun| reportable exception in the conversion.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
*4882a593Smuzhiyun|			All Rights Reserved
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|       For details on the license for this file, please see the
*4882a593Smuzhiyun|       file, README, in this same directory.
*4882a593Smuzhiyun
*4882a593SmuzhiyunRES_FUNC:    |idnt    2,1 | Motorola 040 Floating Point Software Package
*4882a593Smuzhiyun
*4882a593Smuzhiyun	|section	8
*4882a593Smuzhiyun
*4882a593Smuzhiyun#include "fpsp.h"
*4882a593Smuzhiyun
*4882a593Smuzhiyunsp_bnds:	.short	0x3f81,0x407e
*4882a593Smuzhiyun		.short	0x3f6a,0x0000
*4882a593Smuzhiyundp_bnds:	.short	0x3c01,0x43fe
*4882a593Smuzhiyun		.short	0x3bcd,0x0000
*4882a593Smuzhiyun
*4882a593Smuzhiyun	|xref	mem_write
*4882a593Smuzhiyun	|xref	bindec
*4882a593Smuzhiyun	|xref	get_fline
*4882a593Smuzhiyun	|xref	round
*4882a593Smuzhiyun	|xref	denorm
*4882a593Smuzhiyun	|xref	dest_ext
*4882a593Smuzhiyun	|xref	dest_dbl
*4882a593Smuzhiyun	|xref	dest_sgl
*4882a593Smuzhiyun	|xref	unf_sub
*4882a593Smuzhiyun	|xref	nrm_set
*4882a593Smuzhiyun	|xref	dnrm_lp
*4882a593Smuzhiyun	|xref	ovf_res
*4882a593Smuzhiyun	|xref	reg_dest
*4882a593Smuzhiyun	|xref	t_ovfl
*4882a593Smuzhiyun	|xref	t_unfl
*4882a593Smuzhiyun
*4882a593Smuzhiyun	.global	res_func
*4882a593Smuzhiyun	.global	p_move
*4882a593Smuzhiyun
*4882a593Smuzhiyunres_func:
*4882a593Smuzhiyun	clrb	DNRM_FLG(%a6)
*4882a593Smuzhiyun	clrb	RES_FLG(%a6)
*4882a593Smuzhiyun	clrb	CU_ONLY(%a6)
*4882a593Smuzhiyun	tstb	DY_MO_FLG(%a6)
*4882a593Smuzhiyun	beqs	monadic
*4882a593Smuzhiyundyadic:
*4882a593Smuzhiyun	btstb	#7,DTAG(%a6)	|if dop = norm=000, zero=001,
*4882a593Smuzhiyun|				;inf=010 or nan=011
*4882a593Smuzhiyun	beqs	monadic		|then branch
*4882a593Smuzhiyun|				;else denorm
*4882a593Smuzhiyun| HANDLE DESTINATION DENORM HERE
*4882a593Smuzhiyun|				;set dtag to norm
*4882a593Smuzhiyun|				;write the tag & fpte15 to the fstack
*4882a593Smuzhiyun	leal	FPTEMP(%a6),%a0
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bsr	nrm_set		|normalize number (exp will go negative)
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
*4882a593Smuzhiyun	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
*4882a593Smuzhiyun	beqs	dpos
*4882a593Smuzhiyun	bsetb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyundpos:
*4882a593Smuzhiyun	bfclr	DTAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
*4882a593Smuzhiyun	bsetb	#4,DTAG(%a6)	|set FPTE15
*4882a593Smuzhiyun	orb	#0x0f,DNRM_FLG(%a6)
*4882a593Smuzhiyunmonadic:
*4882a593Smuzhiyun	leal	ETEMP(%a6),%a0
*4882a593Smuzhiyun	btstb	#direction_bit,CMDREG1B(%a6)	|check direction
*4882a593Smuzhiyun	bne	opclass3			|it is a mv out
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| At this point, only opclass 0 and 2 possible
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	btstb	#7,STAG(%a6)	|if sop = norm=000, zero=001,
*4882a593Smuzhiyun|				;inf=010 or nan=011
*4882a593Smuzhiyun	bne	mon_dnrm	|else denorm
*4882a593Smuzhiyun	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
*4882a593Smuzhiyun	bne	normal		|require normalization of denorm
*4882a593Smuzhiyun
*4882a593Smuzhiyun| At this point:
*4882a593Smuzhiyun|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
*4882a593Smuzhiyun|				fmove = $00  fsmove = $40  fdmove = $44
*4882a593Smuzhiyun|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
*4882a593Smuzhiyun|				(*fsqrt reencoded to $05)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0	|get command register
*4882a593Smuzhiyun	andil	#0x7f,%d0			|strip to only command word
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
*4882a593Smuzhiyun| fdsqrt are possible.
*4882a593Smuzhiyun| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
*4882a593Smuzhiyun| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	btstl	#0,%d0
*4882a593Smuzhiyun	bne	normal			|weed out fsqrt instructions
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| cu_norm handles fmove in instructions with normalized inputs.
*4882a593Smuzhiyun| The routine round is used to correctly round the input for the
*4882a593Smuzhiyun| destination precision and mode.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_norm:
*4882a593Smuzhiyun	st	CU_ONLY(%a6)		|set cu-only inst flag
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0
*4882a593Smuzhiyun	andib	#0x3b,%d0		|isolate bits to select inst
*4882a593Smuzhiyun	tstb	%d0
*4882a593Smuzhiyun	beql	cu_nmove	|if zero, it is an fmove
*4882a593Smuzhiyun	cmpib	#0x18,%d0
*4882a593Smuzhiyun	beql	cu_nabs		|if $18, it is fabs
*4882a593Smuzhiyun	cmpib	#0x1a,%d0
*4882a593Smuzhiyun	beql	cu_nneg		|if $1a, it is fneg
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is ftst.  Check the source operand and set the cc's accordingly.
*4882a593Smuzhiyun| No write is done, so simply rts.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_ntst:
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d0
*4882a593Smuzhiyun	bclrl	#15,%d0
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	beqs	cu_ntpo
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6) |set N
*4882a593Smuzhiyuncu_ntpo:
*4882a593Smuzhiyun	cmpiw	#0x7fff,%d0	|test for inf/nan
*4882a593Smuzhiyun	bnes	cu_ntcz
*4882a593Smuzhiyun	tstl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	bnes	cu_ntn
*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	bnes	cu_ntn
*4882a593Smuzhiyun	orl	#inf_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyuncu_ntn:
*4882a593Smuzhiyun	orl	#nan_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
*4882a593Smuzhiyun|						;snan handler
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyuncu_ntcz:
*4882a593Smuzhiyun	tstl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	bnel	cu_ntsx
*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	bnel	cu_ntsx
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)
*4882a593Smuzhiyuncu_ntsx:
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fabs.  Execute the absolute value function on the input.
*4882a593Smuzhiyun| Branch to the fmove code.  If the operand is NaN, do nothing.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_nabs:
*4882a593Smuzhiyun	moveb	STAG(%a6),%d0
*4882a593Smuzhiyun	btstl	#5,%d0			|test for NaN or zero
*4882a593Smuzhiyun	bne	wr_etemp		|if either, simply write it
*4882a593Smuzhiyun	bclrb	#7,LOCAL_EX(%a0)		|do abs
*4882a593Smuzhiyun	bras	cu_nmove		|fmove code will finish
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fneg.  Execute the negate value function on the input.
*4882a593Smuzhiyun| Fall though to the fmove code.  If the operand is NaN, do nothing.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_nneg:
*4882a593Smuzhiyun	moveb	STAG(%a6),%d0
*4882a593Smuzhiyun	btstl	#5,%d0			|test for NaN or zero
*4882a593Smuzhiyun	bne	wr_etemp		|if either, simply write it
*4882a593Smuzhiyun	bchgb	#7,LOCAL_EX(%a0)		|do neg
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fmove.  This code also handles all result writes.
*4882a593Smuzhiyun| If bit 2 is set, round is forced to double.  If it is clear,
*4882a593Smuzhiyun| and bit 6 is set, round is forced to single.  If both are clear,
*4882a593Smuzhiyun| the round precision is found in the fpcr.  If the rounding precision
*4882a593Smuzhiyun| is double or single, round the result before the write.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_nmove:
*4882a593Smuzhiyun	moveb	STAG(%a6),%d0
*4882a593Smuzhiyun	andib	#0xe0,%d0			|isolate stag bits
*4882a593Smuzhiyun	bne	wr_etemp		|if not norm, simply write it
*4882a593Smuzhiyun	btstb	#2,CMDREG1B+1(%a6)	|check for rd
*4882a593Smuzhiyun	bne	cu_nmrd
*4882a593Smuzhiyun	btstb	#6,CMDREG1B+1(%a6)	|check for rs
*4882a593Smuzhiyun	bne	cu_nmrs
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The move or operation is not with forced precision.  Test for
*4882a593Smuzhiyun| nan or inf as the input; if so, simply write it to FPn.  Use the
*4882a593Smuzhiyun| FPCR_MODE byte to get rounding on norms and zeros.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_nmnr:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#0:#2},%d0
*4882a593Smuzhiyun	tstb	%d0			|check for extended
*4882a593Smuzhiyun	beq	cu_wrexn		|if so, just write result
*4882a593Smuzhiyun	cmpib	#1,%d0			|check for single
*4882a593Smuzhiyun	beq	cu_nmrs			|fall through to double
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The move is fdmove or round precision is double.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_nmrd:
*4882a593Smuzhiyun	movel	#2,%d0			|set up the size for denorm
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d1		|compare exponent to double threshold
*4882a593Smuzhiyun	andw	#0x7fff,%d1
*4882a593Smuzhiyun	cmpw	#0x3c01,%d1
*4882a593Smuzhiyun	bls	cu_nunfl
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
*4882a593Smuzhiyun	orl	#0x00020000,%d1		|or in rprec (double)
*4882a593Smuzhiyun	clrl	%d0			|clear g,r,s for round
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)	|convert to internal format
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	bsrl	round
*4882a593Smuzhiyun	bfclr	LOCAL_SGN(%a0){#0:#8}
*4882a593Smuzhiyun	beqs	cu_nmrdc
*4882a593Smuzhiyun	bsetb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyuncu_nmrdc:
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d1		|check for overflow
*4882a593Smuzhiyun	andw	#0x7fff,%d1
*4882a593Smuzhiyun	cmpw	#0x43ff,%d1
*4882a593Smuzhiyun	bge	cu_novfl		|take care of overflow case
*4882a593Smuzhiyun	bra	cu_wrexn
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The move is fsmove or round precision is single.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_nmrs:
*4882a593Smuzhiyun	movel	#1,%d0
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d1
*4882a593Smuzhiyun	andw	#0x7fff,%d1
*4882a593Smuzhiyun	cmpw	#0x3f81,%d1
*4882a593Smuzhiyun	bls	cu_nunfl
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1
*4882a593Smuzhiyun	orl	#0x00010000,%d1
*4882a593Smuzhiyun	clrl	%d0
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	bsrl	round
*4882a593Smuzhiyun	bfclr	LOCAL_SGN(%a0){#0:#8}
*4882a593Smuzhiyun	beqs	cu_nmrsc
*4882a593Smuzhiyun	bsetb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyuncu_nmrsc:
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d1
*4882a593Smuzhiyun	andw	#0x7FFF,%d1
*4882a593Smuzhiyun	cmpw	#0x407f,%d1
*4882a593Smuzhiyun	blt	cu_wrexn
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The operand is above precision boundaries.  Use t_ovfl to
*4882a593Smuzhiyun| generate the correct value.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_novfl:
*4882a593Smuzhiyun	bsr	t_ovfl
*4882a593Smuzhiyun	bra	cu_wrexn
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The operand is below precision boundaries.  Use denorm to
*4882a593Smuzhiyun| generate the correct value.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_nunfl:
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	bsr	denorm
*4882a593Smuzhiyun	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
*4882a593Smuzhiyun	beqs	cu_nucont
*4882a593Smuzhiyun	bsetb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyuncu_nucont:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1
*4882a593Smuzhiyun	btstb	#2,CMDREG1B+1(%a6)	|check for rd
*4882a593Smuzhiyun	bne	inst_d
*4882a593Smuzhiyun	btstb	#6,CMDREG1B+1(%a6)	|check for rs
*4882a593Smuzhiyun	bne	inst_s
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	moveb	FPCR_MODE(%a6),%d1
*4882a593Smuzhiyun	lsrb	#6,%d1
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	bra	inst_sd
*4882a593Smuzhiyuninst_d:
*4882a593Smuzhiyun	orl	#0x00020000,%d1
*4882a593Smuzhiyun	bra	inst_sd
*4882a593Smuzhiyuninst_s:
*4882a593Smuzhiyun	orl	#0x00010000,%d1
*4882a593Smuzhiyuninst_sd:
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	bsrl	round
*4882a593Smuzhiyun	bfclr	LOCAL_SGN(%a0){#0:#8}
*4882a593Smuzhiyun	beqs	cu_nuflp
*4882a593Smuzhiyun	bsetb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyuncu_nuflp:
*4882a593Smuzhiyun	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
*4882a593Smuzhiyun	beqs	cu_nuninx
*4882a593Smuzhiyun	orl	#aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL
*4882a593Smuzhiyuncu_nuninx:
*4882a593Smuzhiyun	tstl	LOCAL_HI(%a0)		|test for zero
*4882a593Smuzhiyun	bnes	cu_nunzro
*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	bnes	cu_nunzro
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The mantissa is zero from the denorm loop.  Check sign and rmode
*4882a593Smuzhiyun| to see if rounding should have occurred which would leave the lsb.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0		|isolate rmode
*4882a593Smuzhiyun	cmpil	#0x20,%d0
*4882a593Smuzhiyun	blts	cu_nzro
*4882a593Smuzhiyun	bnes	cu_nrp
*4882a593Smuzhiyuncu_nrm:
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)	|if positive, set lsb
*4882a593Smuzhiyun	bges	cu_nzro
*4882a593Smuzhiyun	btstb	#7,FPCR_MODE(%a6) |check for double
*4882a593Smuzhiyun	beqs	cu_nincs
*4882a593Smuzhiyun	bras	cu_nincd
*4882a593Smuzhiyuncu_nrp:
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)	|if positive, set lsb
*4882a593Smuzhiyun	blts	cu_nzro
*4882a593Smuzhiyun	btstb	#7,FPCR_MODE(%a6) |check for double
*4882a593Smuzhiyun	beqs	cu_nincs
*4882a593Smuzhiyuncu_nincd:
*4882a593Smuzhiyun	orl	#0x800,LOCAL_LO(%a0) |inc for double
*4882a593Smuzhiyun	bra	cu_nunzro
*4882a593Smuzhiyuncu_nincs:
*4882a593Smuzhiyun	orl	#0x100,LOCAL_HI(%a0) |inc for single
*4882a593Smuzhiyun	bra	cu_nunzro
*4882a593Smuzhiyuncu_nzro:
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	moveb	STAG(%a6),%d0
*4882a593Smuzhiyun	andib	#0xe0,%d0
*4882a593Smuzhiyun	cmpib	#0x40,%d0		|check if input was tagged zero
*4882a593Smuzhiyun	beqs	cu_numv
*4882a593Smuzhiyuncu_nunzro:
*4882a593Smuzhiyun	orl	#unfl_mask,USER_FPSR(%a6) |set unfl
*4882a593Smuzhiyuncu_numv:
*4882a593Smuzhiyun	movel	(%a0),ETEMP(%a6)
*4882a593Smuzhiyun	movel	4(%a0),ETEMP_HI(%a6)
*4882a593Smuzhiyun	movel	8(%a0),ETEMP_LO(%a6)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Write the result to memory, setting the fpsr cc bits.  NaN and Inf
*4882a593Smuzhiyun| bypass cu_wrexn.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_wrexn:
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)		|test for zero
*4882a593Smuzhiyun	beqs	cu_wrzero
*4882a593Smuzhiyun	cmpw	#0x8000,LOCAL_EX(%a0)	|test for zero
*4882a593Smuzhiyun	bnes	cu_wreon
*4882a593Smuzhiyuncu_wrzero:
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)	|set Z bit
*4882a593Smuzhiyuncu_wreon:
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)
*4882a593Smuzhiyun	bpl	wr_etemp
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| HANDLE SOURCE DENORM HERE
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|				;clear denorm stag to norm
*4882a593Smuzhiyun|				;write the new tag & ete15 to the fstack
*4882a593Smuzhiyunmon_dnrm:
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| At this point, check for the cases in which normalizing the
*4882a593Smuzhiyun| denorm produces incorrect results.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
*4882a593Smuzhiyun	bnes	nrm_src		|require normalization of denorm
*4882a593Smuzhiyun
*4882a593Smuzhiyun| At this point:
*4882a593Smuzhiyun|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
*4882a593Smuzhiyun|				fmove = $00  fsmove = $40  fdmove = $44
*4882a593Smuzhiyun|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
*4882a593Smuzhiyun|				(*fsqrt reencoded to $05)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0	|get command register
*4882a593Smuzhiyun	andil	#0x7f,%d0			|strip to only command word
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
*4882a593Smuzhiyun| fdsqrt are possible.
*4882a593Smuzhiyun| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
*4882a593Smuzhiyun| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	btstl	#0,%d0
*4882a593Smuzhiyun	bnes	nrm_src		|weed out fsqrt instructions
*4882a593Smuzhiyun	st	CU_ONLY(%a6)	|set cu-only inst flag
*4882a593Smuzhiyun	bra	cu_dnrm		|fmove, fabs, fneg, ftst
*4882a593Smuzhiyun|				;cases go to cu_dnrm
*4882a593Smuzhiyunnrm_src:
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	bsr	nrm_set		|normalize number (exponent will go
*4882a593Smuzhiyun|				; negative)
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
*4882a593Smuzhiyun	beqs	spos
*4882a593Smuzhiyun	bsetb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyunspos:
*4882a593Smuzhiyun	bfclr	STAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
*4882a593Smuzhiyun	bsetb	#4,STAG(%a6)	|set ETE15
*4882a593Smuzhiyun	orb	#0xf0,DNRM_FLG(%a6)
*4882a593Smuzhiyunnormal:
*4882a593Smuzhiyun	tstb	DNRM_FLG(%a6)	|check if any of the ops were denorms
*4882a593Smuzhiyun	bne	ck_wrap		|if so, check if it is a potential
*4882a593Smuzhiyun|				;wrap-around case
*4882a593Smuzhiyunfix_stk:
*4882a593Smuzhiyun	moveb	#0xfe,CU_SAVEPC(%a6)
*4882a593Smuzhiyun	bclrb	#E1,E_BYTE(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	clrw	NMNEXC(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	st	RES_FLG(%a6)	|indicate that a restore is needed
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
*4882a593Smuzhiyun| ftst) completely in software without an frestore to the 040.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dnrm:
*4882a593Smuzhiyun	st	CU_ONLY(%a6)
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0
*4882a593Smuzhiyun	andib	#0x3b,%d0		|isolate bits to select inst
*4882a593Smuzhiyun	tstb	%d0
*4882a593Smuzhiyun	beql	cu_dmove	|if zero, it is an fmove
*4882a593Smuzhiyun	cmpib	#0x18,%d0
*4882a593Smuzhiyun	beql	cu_dabs		|if $18, it is fabs
*4882a593Smuzhiyun	cmpib	#0x1a,%d0
*4882a593Smuzhiyun	beql	cu_dneg		|if $1a, it is fneg
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is ftst.  Check the source operand and set the cc's accordingly.
*4882a593Smuzhiyun| No write is done, so simply rts.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dtst:
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d0
*4882a593Smuzhiyun	bclrl	#15,%d0
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	beqs	cu_dtpo
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6) |set N
*4882a593Smuzhiyuncu_dtpo:
*4882a593Smuzhiyun	cmpiw	#0x7fff,%d0	|test for inf/nan
*4882a593Smuzhiyun	bnes	cu_dtcz
*4882a593Smuzhiyun	tstl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	bnes	cu_dtn
*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	bnes	cu_dtn
*4882a593Smuzhiyun	orl	#inf_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyuncu_dtn:
*4882a593Smuzhiyun	orl	#nan_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
*4882a593Smuzhiyun|						;snan handler
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyuncu_dtcz:
*4882a593Smuzhiyun	tstl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	bnel	cu_dtsx
*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	bnel	cu_dtsx
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)
*4882a593Smuzhiyuncu_dtsx:
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fabs.  Execute the absolute value function on the input.
*4882a593Smuzhiyun| Branch to the fmove code.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dabs:
*4882a593Smuzhiyun	bclrb	#7,LOCAL_EX(%a0)		|do abs
*4882a593Smuzhiyun	bras	cu_dmove		|fmove code will finish
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fneg.  Execute the negate value function on the input.
*4882a593Smuzhiyun| Fall though to the fmove code.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dneg:
*4882a593Smuzhiyun	bchgb	#7,LOCAL_EX(%a0)		|do neg
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fmove.  This code also handles all result writes.
*4882a593Smuzhiyun| If bit 2 is set, round is forced to double.  If it is clear,
*4882a593Smuzhiyun| and bit 6 is set, round is forced to single.  If both are clear,
*4882a593Smuzhiyun| the round precision is found in the fpcr.  If the rounding precision
*4882a593Smuzhiyun| is double or single, the result is zero, and the mode is checked
*4882a593Smuzhiyun| to determine if the lsb of the result should be set.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dmove:
*4882a593Smuzhiyun	btstb	#2,CMDREG1B+1(%a6)	|check for rd
*4882a593Smuzhiyun	bne	cu_dmrd
*4882a593Smuzhiyun	btstb	#6,CMDREG1B+1(%a6)	|check for rs
*4882a593Smuzhiyun	bne	cu_dmrs
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The move or operation is not with forced precision.  Use the
*4882a593Smuzhiyun| FPCR_MODE byte to get rounding.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dmnr:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#0:#2},%d0
*4882a593Smuzhiyun	tstb	%d0			|check for extended
*4882a593Smuzhiyun	beq	cu_wrexd		|if so, just write result
*4882a593Smuzhiyun	cmpib	#1,%d0			|check for single
*4882a593Smuzhiyun	beq	cu_dmrs			|fall through to double
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The move is fdmove or round precision is double.  Result is zero.
*4882a593Smuzhiyun| Check rmode for rp or rm and set lsb accordingly.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dmrd:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)		|check sign
*4882a593Smuzhiyun	blts	cu_dmdn
*4882a593Smuzhiyun	cmpib	#3,%d1			|check for rp
*4882a593Smuzhiyun	bne	cu_dpd			|load double pos zero
*4882a593Smuzhiyun	bra	cu_dpdr			|load double pos zero w/lsb
*4882a593Smuzhiyuncu_dmdn:
*4882a593Smuzhiyun	cmpib	#2,%d1			|check for rm
*4882a593Smuzhiyun	bne	cu_dnd			|load double neg zero
*4882a593Smuzhiyun	bra	cu_dndr			|load double neg zero w/lsb
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The move is fsmove or round precision is single.  Result is zero.
*4882a593Smuzhiyun| Check for rp or rm and set lsb accordingly.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dmrs:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)		|check sign
*4882a593Smuzhiyun	blts	cu_dmsn
*4882a593Smuzhiyun	cmpib	#3,%d1			|check for rp
*4882a593Smuzhiyun	bne	cu_spd			|load single pos zero
*4882a593Smuzhiyun	bra	cu_spdr			|load single pos zero w/lsb
*4882a593Smuzhiyuncu_dmsn:
*4882a593Smuzhiyun	cmpib	#2,%d1			|check for rm
*4882a593Smuzhiyun	bne	cu_snd			|load single neg zero
*4882a593Smuzhiyun	bra	cu_sndr			|load single neg zero w/lsb
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The precision is extended, so the result in etemp is correct.
*4882a593Smuzhiyun| Simply set unfl (not inex2 or aunfl) and write the result to
*4882a593Smuzhiyun| the correct fp register.
*4882a593Smuzhiyuncu_wrexd:
*4882a593Smuzhiyun	orl	#unfl_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)
*4882a593Smuzhiyun	beq	wr_etemp
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| These routines write +/- zero in double format.  The routines
*4882a593Smuzhiyun| cu_dpdr and cu_dndr set the double lsb.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_dpd:
*4882a593Smuzhiyun	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyuncu_dpdr:
*4882a593Smuzhiyun	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyuncu_dnd:
*4882a593Smuzhiyun	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyuncu_dndr:
*4882a593Smuzhiyun	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| These routines write +/- zero in single format.  The routines
*4882a593Smuzhiyun| cu_dpdr and cu_dndr set the single lsb.
*4882a593Smuzhiyun|
*4882a593Smuzhiyuncu_spd:
*4882a593Smuzhiyun	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyuncu_spdr:
*4882a593Smuzhiyun	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
*4882a593Smuzhiyun	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyuncu_snd:
*4882a593Smuzhiyun	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyuncu_sndr:
*4882a593Smuzhiyun	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
*4882a593Smuzhiyun	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	wr_etemp
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| This code checks for 16-bit overflow conditions on dyadic
*4882a593Smuzhiyun| operations which are not restorable into the floating-point
*4882a593Smuzhiyun| unit and must be completed in software.  Basically, this
*4882a593Smuzhiyun| condition exists with a very large norm and a denorm.  One
*4882a593Smuzhiyun| of the operands must be denormalized to enter this code.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Flags used:
*4882a593Smuzhiyun|	DY_MO_FLG contains 0 for monadic op, $ff for dyadic
*4882a593Smuzhiyun|	DNRM_FLG contains $00 for neither op denormalized
*4882a593Smuzhiyun|	                  $0f for the destination op denormalized
*4882a593Smuzhiyun|	                  $f0 for the source op denormalized
*4882a593Smuzhiyun|	                  $ff for both ops denormalized
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The wrap-around condition occurs for add, sub, div, and cmp
*4882a593Smuzhiyun| when
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|	abs(dest_exp - src_exp) >= $8000
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| and for mul when
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|	(dest_exp + src_exp) < $0
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| we must process the operation here if this case is true.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The rts following the frcfpn routine is the exit from res_func
*4882a593Smuzhiyun| for this condition.  The restore flag (RES_FLG) is left clear.
*4882a593Smuzhiyun| No frestore is done unless an exception is to be reported.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| For fadd:
*4882a593Smuzhiyun|	if(sign_of(dest) != sign_of(src))
*4882a593Smuzhiyun|		replace exponent of src with $3fff (keep sign)
*4882a593Smuzhiyun|		use fpu to perform dest+new_src (user's rmode and X)
*4882a593Smuzhiyun|		clr sticky
*4882a593Smuzhiyun|	else
*4882a593Smuzhiyun|		set sticky
*4882a593Smuzhiyun|	call round with user's precision and mode
*4882a593Smuzhiyun|	move result to fpn and wbtemp
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| For fsub:
*4882a593Smuzhiyun|	if(sign_of(dest) == sign_of(src))
*4882a593Smuzhiyun|		replace exponent of src with $3fff (keep sign)
*4882a593Smuzhiyun|		use fpu to perform dest+new_src (user's rmode and X)
*4882a593Smuzhiyun|		clr sticky
*4882a593Smuzhiyun|	else
*4882a593Smuzhiyun|		set sticky
*4882a593Smuzhiyun|	call round with user's precision and mode
*4882a593Smuzhiyun|	move result to fpn and wbtemp
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| For fdiv/fsgldiv:
*4882a593Smuzhiyun|	if(both operands are denorm)
*4882a593Smuzhiyun|		restore_to_fpu;
*4882a593Smuzhiyun|	if(dest is norm)
*4882a593Smuzhiyun|		force_ovf;
*4882a593Smuzhiyun|	else(dest is denorm)
*4882a593Smuzhiyun|		force_unf:
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| For fcmp:
*4882a593Smuzhiyun|	if(dest is norm)
*4882a593Smuzhiyun|		N = sign_of(dest);
*4882a593Smuzhiyun|	else(dest is denorm)
*4882a593Smuzhiyun|		N = sign_of(src);
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| For fmul:
*4882a593Smuzhiyun|	if(both operands are denorm)
*4882a593Smuzhiyun|		force_unf;
*4882a593Smuzhiyun|	if((dest_exp + src_exp) < 0)
*4882a593Smuzhiyun|		force_unf:
*4882a593Smuzhiyun|	else
*4882a593Smuzhiyun|		restore_to_fpu;
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| local equates:
*4882a593Smuzhiyun	.set	addcode,0x22
*4882a593Smuzhiyun	.set	subcode,0x28
*4882a593Smuzhiyun	.set	mulcode,0x23
*4882a593Smuzhiyun	.set	divcode,0x20
*4882a593Smuzhiyun	.set	cmpcode,0x38
*4882a593Smuzhiyunck_wrap:
*4882a593Smuzhiyun	| tstb	DY_MO_FLG(%a6)	;check for fsqrt
*4882a593Smuzhiyun	beq	fix_stk		|if zero, it is fsqrt
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0
*4882a593Smuzhiyun	andiw	#0x3b,%d0		|strip to command bits
*4882a593Smuzhiyun	cmpiw	#addcode,%d0
*4882a593Smuzhiyun	beq	wrap_add
*4882a593Smuzhiyun	cmpiw	#subcode,%d0
*4882a593Smuzhiyun	beq	wrap_sub
*4882a593Smuzhiyun	cmpiw	#mulcode,%d0
*4882a593Smuzhiyun	beq	wrap_mul
*4882a593Smuzhiyun	cmpiw	#cmpcode,%d0
*4882a593Smuzhiyun	beq	wrap_cmp
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fdiv.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunwrap_div:
*4882a593Smuzhiyun	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
*4882a593Smuzhiyun	beq	fix_stk		 |restore to fpu
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| One of the ops is denormalized.  Test for wrap condition
*4882a593Smuzhiyun| and force the result.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
*4882a593Smuzhiyun	bnes	div_srcd
*4882a593Smuzhiyundiv_destd:
*4882a593Smuzhiyun	bsrl	ckinf_ns
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
*4882a593Smuzhiyun	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract dest from src
*4882a593Smuzhiyun	cmpl	#0x7fff,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun	clrb	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
*4882a593Smuzhiyun	movew	FPTEMP_EX(%a6),%d1
*4882a593Smuzhiyun	eorw	%d1,%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	beq	force_unf
*4882a593Smuzhiyun	st	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	bra	force_unf
*4882a593Smuzhiyun
*4882a593Smuzhiyunckinf_ns:
*4882a593Smuzhiyun	moveb	STAG(%a6),%d0		|check source tag for inf or nan
*4882a593Smuzhiyun	bra	ck_in_com
*4882a593Smuzhiyunckinf_nd:
*4882a593Smuzhiyun	moveb	DTAG(%a6),%d0		|check destination tag for inf or nan
*4882a593Smuzhiyunck_in_com:
*4882a593Smuzhiyun	andib	#0x60,%d0			|isolate tag bits
*4882a593Smuzhiyun	cmpb	#0x40,%d0			|is it inf?
*4882a593Smuzhiyun	beq	nan_or_inf		|not wrap case
*4882a593Smuzhiyun	cmpb	#0x60,%d0			|is it nan?
*4882a593Smuzhiyun	beq	nan_or_inf		|yes, not wrap case?
*4882a593Smuzhiyun	cmpb	#0x20,%d0			|is it a zero?
*4882a593Smuzhiyun	beq	nan_or_inf		|yes
*4882a593Smuzhiyun	clrl	%d0
*4882a593Smuzhiyun	rts				|then ; it is either a zero of norm,
*4882a593Smuzhiyun|					;check wrap case
*4882a593Smuzhiyunnan_or_inf:
*4882a593Smuzhiyun	moveql	#-1,%d0
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyundiv_srcd:
*4882a593Smuzhiyun	bsrl	ckinf_nd
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
*4882a593Smuzhiyun	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract src from dest
*4882a593Smuzhiyun	cmpl	#0x8000,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun	clrb	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
*4882a593Smuzhiyun	movew	FPTEMP_EX(%a6),%d1
*4882a593Smuzhiyun	eorw	%d1,%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	beqs	force_ovf
*4882a593Smuzhiyun	st	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| This code handles the case of the instruction resulting in
*4882a593Smuzhiyun| an overflow condition.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunforce_ovf:
*4882a593Smuzhiyun	bclrb	#E1,E_BYTE(%a6)
*4882a593Smuzhiyun	orl	#ovfl_inx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	clrw	NMNEXC(%a6)
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0		|point a0 to memory location
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0
*4882a593Smuzhiyun	btstl	#6,%d0			|test for forced precision
*4882a593Smuzhiyun	beqs	frcovf_fpcr
*4882a593Smuzhiyun	btstl	#2,%d0			|check for double
*4882a593Smuzhiyun	bnes	frcovf_dbl
*4882a593Smuzhiyun	movel	#0x1,%d0			|inst is forced single
*4882a593Smuzhiyun	bras	frcovf_rnd
*4882a593Smuzhiyunfrcovf_dbl:
*4882a593Smuzhiyun	movel	#0x2,%d0			|inst is forced double
*4882a593Smuzhiyun	bras	frcovf_rnd
*4882a593Smuzhiyunfrcovf_fpcr:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
*4882a593Smuzhiyunfrcovf_rnd:
*4882a593Smuzhiyun
*4882a593Smuzhiyun| The 881/882 does not set inex2 for the following case, so the
*4882a593Smuzhiyun| line is commented out to be compatible with 881/882
*4882a593Smuzhiyun|	tst.b	%d0
*4882a593Smuzhiyun|	beq.b	frcovf_x
*4882a593Smuzhiyun|	or.l	#inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2
*4882a593Smuzhiyun
*4882a593Smuzhiyun|frcovf_x:
*4882a593Smuzhiyun	bsrl	ovf_res			|get correct result based on
*4882a593Smuzhiyun|					;round precision/mode.  This
*4882a593Smuzhiyun|					;sets FPSR_CC correctly
*4882a593Smuzhiyun|					;returns in external format
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}
*4882a593Smuzhiyun	beq	frcfpn
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	bra	frcfpn
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fadd.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunwrap_add:
*4882a593Smuzhiyun	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
*4882a593Smuzhiyun	beq	fix_stk		 |restore to fpu
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| One of the ops is denormalized.  Test for wrap condition
*4882a593Smuzhiyun| and complete the instruction.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
*4882a593Smuzhiyun	bnes	add_srcd
*4882a593Smuzhiyunadd_destd:
*4882a593Smuzhiyun	bsrl	ckinf_ns
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
*4882a593Smuzhiyun	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract dest from src
*4882a593Smuzhiyun	cmpl	#0x8000,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun	bra	add_wrap
*4882a593Smuzhiyunadd_srcd:
*4882a593Smuzhiyun	bsrl	ckinf_nd
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
*4882a593Smuzhiyun	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract src from dest
*4882a593Smuzhiyun	cmpl	#0x8000,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Check the signs of the operands.  If they are unlike, the fpu
*4882a593Smuzhiyun| can be used to add the norm and 1.0 with the sign of the
*4882a593Smuzhiyun| denorm and it will correctly generate the result in extended
*4882a593Smuzhiyun| precision.  We can then call round with no sticky and the result
*4882a593Smuzhiyun| will be correct for the user's rounding mode and precision.  If
*4882a593Smuzhiyun| the signs are the same, we call round with the sticky bit set
*4882a593Smuzhiyun| and the result will be correct for the user's rounding mode and
*4882a593Smuzhiyun| precision.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunadd_wrap:
*4882a593Smuzhiyun	movew	ETEMP_EX(%a6),%d0
*4882a593Smuzhiyun	movew	FPTEMP_EX(%a6),%d1
*4882a593Smuzhiyun	eorw	%d1,%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	beq	add_same
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The signs are unlike.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
*4882a593Smuzhiyun	bnes	add_u_srcd
*4882a593Smuzhiyun	movew	FPTEMP_EX(%a6),%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	orw	#0x3fff,%d0	|force the exponent to +/- 1
*4882a593Smuzhiyun	movew	%d0,FPTEMP_EX(%a6) |in the denorm
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	fmovel	%d0,%fpcr		|set up users rmode and X
*4882a593Smuzhiyun	fmovex	ETEMP(%a6),%fp0
*4882a593Smuzhiyun	faddx	FPTEMP(%a6),%fp0
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
*4882a593Smuzhiyun	fmovel	%fpsr,%d1
*4882a593Smuzhiyun	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
*4882a593Smuzhiyun	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	clrl	%d0		|force sticky to zero
*4882a593Smuzhiyun	bclrb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	sne	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beq	frcfpnr
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	bra	frcfpnr
*4882a593Smuzhiyunadd_u_srcd:
*4882a593Smuzhiyun	movew	ETEMP_EX(%a6),%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	orw	#0x3fff,%d0	|force the exponent to +/- 1
*4882a593Smuzhiyun	movew	%d0,ETEMP_EX(%a6) |in the denorm
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	fmovel	%d0,%fpcr		|set up users rmode and X
*4882a593Smuzhiyun	fmovex	ETEMP(%a6),%fp0
*4882a593Smuzhiyun	faddx	FPTEMP(%a6),%fp0
*4882a593Smuzhiyun	fmovel	%fpsr,%d1
*4882a593Smuzhiyun	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
*4882a593Smuzhiyun	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	clrl	%d0		|force sticky to zero
*4882a593Smuzhiyun	bclrb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	sne	WBTEMP_SGN(%a6)	|use internal format for round
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beq	frcfpnr
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	bra	frcfpnr
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Signs are alike:
*4882a593Smuzhiyun|
*4882a593Smuzhiyunadd_same:
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
*4882a593Smuzhiyun	bnes	add_s_srcd
*4882a593Smuzhiyunadd_s_destd:
*4882a593Smuzhiyun	leal	ETEMP(%a6),%a0
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	movel	#0x20000000,%d0	|set sticky for round
*4882a593Smuzhiyun	bclrb	#sign_bit,ETEMP_EX(%a6)
*4882a593Smuzhiyun	sne	ETEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beqs	add_s_dclr
*4882a593Smuzhiyun	bsetb	#sign_bit,ETEMP_EX(%a6)
*4882a593Smuzhiyunadd_s_dclr:
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0
*4882a593Smuzhiyun	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),4(%a0)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),8(%a0)
*4882a593Smuzhiyun	tstw	ETEMP_EX(%a6)
*4882a593Smuzhiyun	bgt	add_ckovf
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	add_ckovf
*4882a593Smuzhiyunadd_s_srcd:
*4882a593Smuzhiyun	leal	FPTEMP(%a6),%a0
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	movel	#0x20000000,%d0	|set sticky for round
*4882a593Smuzhiyun	bclrb	#sign_bit,FPTEMP_EX(%a6)
*4882a593Smuzhiyun	sne	FPTEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beqs	add_s_sclr
*4882a593Smuzhiyun	bsetb	#sign_bit,FPTEMP_EX(%a6)
*4882a593Smuzhiyunadd_s_sclr:
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0
*4882a593Smuzhiyun	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
*4882a593Smuzhiyun	movel	FPTEMP_HI(%a6),4(%a0)
*4882a593Smuzhiyun	movel	FPTEMP_LO(%a6),8(%a0)
*4882a593Smuzhiyun	tstw	FPTEMP_EX(%a6)
*4882a593Smuzhiyun	bgt	add_ckovf
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyunadd_ckovf:
*4882a593Smuzhiyun	movew	WBTEMP_EX(%a6),%d0
*4882a593Smuzhiyun	andiw	#0x7fff,%d0
*4882a593Smuzhiyun	cmpiw	#0x7fff,%d0
*4882a593Smuzhiyun	bne	frcfpnr
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The result has overflowed to $7fff exponent.  Set I, ovfl,
*4882a593Smuzhiyun| and aovfl, and clr the mantissa (incorrectly set by the
*4882a593Smuzhiyun| round routine.)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	clrl	4(%a0)
*4882a593Smuzhiyun	bra	frcfpnr
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fsub.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunwrap_sub:
*4882a593Smuzhiyun	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
*4882a593Smuzhiyun	beq	fix_stk		 |restore to fpu
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| One of the ops is denormalized.  Test for wrap condition
*4882a593Smuzhiyun| and complete the instruction.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
*4882a593Smuzhiyun	bnes	sub_srcd
*4882a593Smuzhiyunsub_destd:
*4882a593Smuzhiyun	bsrl	ckinf_ns
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
*4882a593Smuzhiyun	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract src from dest
*4882a593Smuzhiyun	cmpl	#0x8000,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun	bra	sub_wrap
*4882a593Smuzhiyunsub_srcd:
*4882a593Smuzhiyun	bsrl	ckinf_nd
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
*4882a593Smuzhiyun	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract dest from src
*4882a593Smuzhiyun	cmpl	#0x8000,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Check the signs of the operands.  If they are alike, the fpu
*4882a593Smuzhiyun| can be used to subtract from the norm 1.0 with the sign of the
*4882a593Smuzhiyun| denorm and it will correctly generate the result in extended
*4882a593Smuzhiyun| precision.  We can then call round with no sticky and the result
*4882a593Smuzhiyun| will be correct for the user's rounding mode and precision.  If
*4882a593Smuzhiyun| the signs are unlike, we call round with the sticky bit set
*4882a593Smuzhiyun| and the result will be correct for the user's rounding mode and
*4882a593Smuzhiyun| precision.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunsub_wrap:
*4882a593Smuzhiyun	movew	ETEMP_EX(%a6),%d0
*4882a593Smuzhiyun	movew	FPTEMP_EX(%a6),%d1
*4882a593Smuzhiyun	eorw	%d1,%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	bne	sub_diff
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The signs are alike.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
*4882a593Smuzhiyun	bnes	sub_u_srcd
*4882a593Smuzhiyun	movew	FPTEMP_EX(%a6),%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	orw	#0x3fff,%d0	|force the exponent to +/- 1
*4882a593Smuzhiyun	movew	%d0,FPTEMP_EX(%a6) |in the denorm
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	fmovel	%d0,%fpcr		|set up users rmode and X
*4882a593Smuzhiyun	fmovex	FPTEMP(%a6),%fp0
*4882a593Smuzhiyun	fsubx	ETEMP(%a6),%fp0
*4882a593Smuzhiyun	fmovel	%fpsr,%d1
*4882a593Smuzhiyun	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
*4882a593Smuzhiyun	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	clrl	%d0		|force sticky to zero
*4882a593Smuzhiyun	bclrb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	sne	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beq	frcfpnr
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	bra	frcfpnr
*4882a593Smuzhiyunsub_u_srcd:
*4882a593Smuzhiyun	movew	ETEMP_EX(%a6),%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	orw	#0x3fff,%d0	|force the exponent to +/- 1
*4882a593Smuzhiyun	movew	%d0,ETEMP_EX(%a6) |in the denorm
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	fmovel	%d0,%fpcr		|set up users rmode and X
*4882a593Smuzhiyun	fmovex	FPTEMP(%a6),%fp0
*4882a593Smuzhiyun	fsubx	ETEMP(%a6),%fp0
*4882a593Smuzhiyun	fmovel	%fpsr,%d1
*4882a593Smuzhiyun	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
*4882a593Smuzhiyun	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	clrl	%d0		|force sticky to zero
*4882a593Smuzhiyun	bclrb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	sne	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beq	frcfpnr
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	bra	frcfpnr
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Signs are unlike:
*4882a593Smuzhiyun|
*4882a593Smuzhiyunsub_diff:
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
*4882a593Smuzhiyun	bnes	sub_s_srcd
*4882a593Smuzhiyunsub_s_destd:
*4882a593Smuzhiyun	leal	ETEMP(%a6),%a0
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	movel	#0x20000000,%d0	|set sticky for round
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Since the dest is the denorm, the sign is the opposite of the
*4882a593Smuzhiyun| norm sign.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	eoriw	#0x8000,ETEMP_EX(%a6)	|flip sign on result
*4882a593Smuzhiyun	tstw	ETEMP_EX(%a6)
*4882a593Smuzhiyun	bgts	sub_s_dwr
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyunsub_s_dwr:
*4882a593Smuzhiyun	bclrb	#sign_bit,ETEMP_EX(%a6)
*4882a593Smuzhiyun	sne	ETEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beqs	sub_s_dclr
*4882a593Smuzhiyun	bsetb	#sign_bit,ETEMP_EX(%a6)
*4882a593Smuzhiyunsub_s_dclr:
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0
*4882a593Smuzhiyun	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),4(%a0)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),8(%a0)
*4882a593Smuzhiyun	bra	sub_ckovf
*4882a593Smuzhiyunsub_s_srcd:
*4882a593Smuzhiyun	leal	FPTEMP(%a6),%a0
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d0
*4882a593Smuzhiyun	andil	#0x30,%d0
*4882a593Smuzhiyun	lsrl	#4,%d0		|put rmode in lower 2 bits
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1
*4882a593Smuzhiyun	andil	#0xc0,%d1
*4882a593Smuzhiyun	lsrl	#6,%d1		|put precision in upper word
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	orl	%d0,%d1		|set up for round call
*4882a593Smuzhiyun	movel	#0x20000000,%d0	|set sticky for round
*4882a593Smuzhiyun	bclrb	#sign_bit,FPTEMP_EX(%a6)
*4882a593Smuzhiyun	sne	FPTEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	round		|round result to users rmode & prec
*4882a593Smuzhiyun	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beqs	sub_s_sclr
*4882a593Smuzhiyun	bsetb	#sign_bit,FPTEMP_EX(%a6)
*4882a593Smuzhiyunsub_s_sclr:
*4882a593Smuzhiyun	leal	WBTEMP(%a6),%a0
*4882a593Smuzhiyun	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
*4882a593Smuzhiyun	movel	FPTEMP_HI(%a6),4(%a0)
*4882a593Smuzhiyun	movel	FPTEMP_LO(%a6),8(%a0)
*4882a593Smuzhiyun	tstw	FPTEMP_EX(%a6)
*4882a593Smuzhiyun	bgt	sub_ckovf
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyunsub_ckovf:
*4882a593Smuzhiyun	movew	WBTEMP_EX(%a6),%d0
*4882a593Smuzhiyun	andiw	#0x7fff,%d0
*4882a593Smuzhiyun	cmpiw	#0x7fff,%d0
*4882a593Smuzhiyun	bne	frcfpnr
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| The result has overflowed to $7fff exponent.  Set I, ovfl,
*4882a593Smuzhiyun| and aovfl, and clr the mantissa (incorrectly set by the
*4882a593Smuzhiyun| round routine.)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	clrl	4(%a0)
*4882a593Smuzhiyun	bra	frcfpnr
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fcmp.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunwrap_cmp:
*4882a593Smuzhiyun	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
*4882a593Smuzhiyun	beq	fix_stk		 |restore to fpu
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| One of the ops is denormalized.  Test for wrap condition
*4882a593Smuzhiyun| and complete the instruction.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
*4882a593Smuzhiyun	bnes	cmp_srcd
*4882a593Smuzhiyuncmp_destd:
*4882a593Smuzhiyun	bsrl	ckinf_ns
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
*4882a593Smuzhiyun	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract dest from src
*4882a593Smuzhiyun	cmpl	#0x8000,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun	tstw	ETEMP_EX(%a6)		|set N to ~sign_of(src)
*4882a593Smuzhiyun	bge	cmp_setn
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyuncmp_srcd:
*4882a593Smuzhiyun	bsrl	ckinf_nd
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
*4882a593Smuzhiyun	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
*4882a593Smuzhiyun	subl	%d1,%d0			|subtract src from dest
*4882a593Smuzhiyun	cmpl	#0x8000,%d0
*4882a593Smuzhiyun	blt	fix_stk			|if less, not wrap case
*4882a593Smuzhiyun	tstw	FPTEMP_EX(%a6)		|set N to sign_of(dest)
*4882a593Smuzhiyun	blt	cmp_setn
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyuncmp_setn:
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Inst is fmul.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunwrap_mul:
*4882a593Smuzhiyun	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
*4882a593Smuzhiyun	beq	force_unf	|force an underflow (really!)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| One of the ops is denormalized.  Test for wrap condition
*4882a593Smuzhiyun| and complete the instruction.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
*4882a593Smuzhiyun	bnes	mul_srcd
*4882a593Smuzhiyunmul_destd:
*4882a593Smuzhiyun	bsrl	ckinf_ns
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
*4882a593Smuzhiyun	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
*4882a593Smuzhiyun	addl	%d1,%d0			|subtract dest from src
*4882a593Smuzhiyun	bgt	fix_stk
*4882a593Smuzhiyun	bra	force_unf
*4882a593Smuzhiyunmul_srcd:
*4882a593Smuzhiyun	bsrl	ckinf_nd
*4882a593Smuzhiyun	bne	fix_stk
*4882a593Smuzhiyun	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
*4882a593Smuzhiyun	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
*4882a593Smuzhiyun	addl	%d1,%d0			|subtract src from dest
*4882a593Smuzhiyun	bgt	fix_stk
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| This code handles the case of the instruction resulting in
*4882a593Smuzhiyun| an underflow condition.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunforce_unf:
*4882a593Smuzhiyun	bclrb	#E1,E_BYTE(%a6)
*4882a593Smuzhiyun	orl	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	clrw	NMNEXC(%a6)
*4882a593Smuzhiyun	clrb	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
*4882a593Smuzhiyun	movew	FPTEMP_EX(%a6),%d1
*4882a593Smuzhiyun	eorw	%d1,%d0
*4882a593Smuzhiyun	andiw	#0x8000,%d0
*4882a593Smuzhiyun	beqs	frcunfcont
*4882a593Smuzhiyun	st	WBTEMP_SGN(%a6)
*4882a593Smuzhiyunfrcunfcont:
*4882a593Smuzhiyun	lea	WBTEMP(%a6),%a0		|point a0 to memory location
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0
*4882a593Smuzhiyun	btstl	#6,%d0			|test for forced precision
*4882a593Smuzhiyun	beqs	frcunf_fpcr
*4882a593Smuzhiyun	btstl	#2,%d0			|check for double
*4882a593Smuzhiyun	bnes	frcunf_dbl
*4882a593Smuzhiyun	movel	#0x1,%d0			|inst is forced single
*4882a593Smuzhiyun	bras	frcunf_rnd
*4882a593Smuzhiyunfrcunf_dbl:
*4882a593Smuzhiyun	movel	#0x2,%d0			|inst is forced double
*4882a593Smuzhiyun	bras	frcunf_rnd
*4882a593Smuzhiyunfrcunf_fpcr:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
*4882a593Smuzhiyunfrcunf_rnd:
*4882a593Smuzhiyun	bsrl	unf_sub			|get correct result based on
*4882a593Smuzhiyun|					;round precision/mode.  This
*4882a593Smuzhiyun|					;sets FPSR_CC correctly
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beqs	frcfpn
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	bra	frcfpn
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Write the result to the user's fpn.  All results must be HUGE to be
*4882a593Smuzhiyun| written; otherwise the results would have overflowed or underflowed.
*4882a593Smuzhiyun| If the rounding precision is single or double, the ovf_res routine
*4882a593Smuzhiyun| is needed to correctly supply the max value.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunfrcfpnr:
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0
*4882a593Smuzhiyun	btstl	#6,%d0			|test for forced precision
*4882a593Smuzhiyun	beqs	frcfpn_fpcr
*4882a593Smuzhiyun	btstl	#2,%d0			|check for double
*4882a593Smuzhiyun	bnes	frcfpn_dbl
*4882a593Smuzhiyun	movel	#0x1,%d0			|inst is forced single
*4882a593Smuzhiyun	bras	frcfpn_rnd
*4882a593Smuzhiyunfrcfpn_dbl:
*4882a593Smuzhiyun	movel	#0x2,%d0			|inst is forced double
*4882a593Smuzhiyun	bras	frcfpn_rnd
*4882a593Smuzhiyunfrcfpn_fpcr:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
*4882a593Smuzhiyun	tstb	%d0
*4882a593Smuzhiyun	beqs	frcfpn			|if extended, write what you got
*4882a593Smuzhiyunfrcfpn_rnd:
*4882a593Smuzhiyun	bclrb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyun	sne	WBTEMP_SGN(%a6)
*4882a593Smuzhiyun	bsrl	ovf_res			|get correct result based on
*4882a593Smuzhiyun|					;round precision/mode.  This
*4882a593Smuzhiyun|					;sets FPSR_CC correctly
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
*4882a593Smuzhiyun	beqs	frcfpn_clr
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyunfrcfpn_clr:
*4882a593Smuzhiyun	orl	#ovfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Perform the write.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunfrcfpn:
*4882a593Smuzhiyun	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
*4882a593Smuzhiyun	cmpib	#3,%d0
*4882a593Smuzhiyun	bles	frc0123			|check if dest is fp0-fp3
*4882a593Smuzhiyun	movel	#7,%d1
*4882a593Smuzhiyun	subl	%d0,%d1
*4882a593Smuzhiyun	clrl	%d0
*4882a593Smuzhiyun	bsetl	%d1,%d0
*4882a593Smuzhiyun	fmovemx WBTEMP(%a6),%d0
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfrc0123:
*4882a593Smuzhiyun	cmpib	#0,%d0
*4882a593Smuzhiyun	beqs	frc0_dst
*4882a593Smuzhiyun	cmpib	#1,%d0
*4882a593Smuzhiyun	beqs	frc1_dst
*4882a593Smuzhiyun	cmpib	#2,%d0
*4882a593Smuzhiyun	beqs	frc2_dst
*4882a593Smuzhiyunfrc3_dst:
*4882a593Smuzhiyun	movel	WBTEMP_EX(%a6),USER_FP3(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_HI(%a6),USER_FP3+4(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_LO(%a6),USER_FP3+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfrc2_dst:
*4882a593Smuzhiyun	movel	WBTEMP_EX(%a6),USER_FP2(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_HI(%a6),USER_FP2+4(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_LO(%a6),USER_FP2+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfrc1_dst:
*4882a593Smuzhiyun	movel	WBTEMP_EX(%a6),USER_FP1(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_HI(%a6),USER_FP1+4(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_LO(%a6),USER_FP1+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfrc0_dst:
*4882a593Smuzhiyun	movel	WBTEMP_EX(%a6),USER_FP0(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_HI(%a6),USER_FP0+4(%a6)
*4882a593Smuzhiyun	movel	WBTEMP_LO(%a6),USER_FP0+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Write etemp to fpn.
*4882a593Smuzhiyun| A check is made on enabled and signalled snan exceptions,
*4882a593Smuzhiyun| and the destination is not overwritten if this condition exists.
*4882a593Smuzhiyun| This code is designed to make fmoveins of unsupported data types
*4882a593Smuzhiyun| faster.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunwr_etemp:
*4882a593Smuzhiyun	btstb	#snan_bit,FPSR_EXCEPT(%a6)	|if snan is set, and
*4882a593Smuzhiyun	beqs	fmoveinc		|enabled, force restore
*4882a593Smuzhiyun	btstb	#snan_bit,FPCR_ENABLE(%a6) |and don't overwrite
*4882a593Smuzhiyun	beqs	fmoveinc		|the dest
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
*4882a593Smuzhiyun|						;snan handler
*4882a593Smuzhiyun	tstb	ETEMP(%a6)		|check for negative
*4882a593Smuzhiyun	blts	snan_neg
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunsnan_neg:
*4882a593Smuzhiyun	orl	#neg_bit,USER_FPSR(%a6)	|snan is negative; set N
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfmoveinc:
*4882a593Smuzhiyun	clrw	NMNEXC(%a6)
*4882a593Smuzhiyun	bclrb	#E1,E_BYTE(%a6)
*4882a593Smuzhiyun	moveb	STAG(%a6),%d0		|check if stag is inf
*4882a593Smuzhiyun	andib	#0xe0,%d0
*4882a593Smuzhiyun	cmpib	#0x40,%d0
*4882a593Smuzhiyun	bnes	fminc_cnan
*4882a593Smuzhiyun	orl	#inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)		|check sign
*4882a593Smuzhiyun	bges	fminc_con
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	fminc_con
*4882a593Smuzhiyunfminc_cnan:
*4882a593Smuzhiyun	cmpib	#0x60,%d0			|check if stag is NaN
*4882a593Smuzhiyun	bnes	fminc_czero
*4882a593Smuzhiyun	orl	#nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
*4882a593Smuzhiyun|						;snan handler
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)		|check sign
*4882a593Smuzhiyun	bges	fminc_con
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	fminc_con
*4882a593Smuzhiyunfminc_czero:
*4882a593Smuzhiyun	cmpib	#0x20,%d0			|check if zero
*4882a593Smuzhiyun	bnes	fminc_con
*4882a593Smuzhiyun	orl	#z_mask,USER_FPSR(%a6)	|if zero, set Z
*4882a593Smuzhiyun	tstw	LOCAL_EX(%a0)		|check sign
*4882a593Smuzhiyun	bges	fminc_con
*4882a593Smuzhiyun	orl	#neg_mask,USER_FPSR(%a6)
*4882a593Smuzhiyunfminc_con:
*4882a593Smuzhiyun	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
*4882a593Smuzhiyun	cmpib	#3,%d0
*4882a593Smuzhiyun	bles	fp0123			|check if dest is fp0-fp3
*4882a593Smuzhiyun	movel	#7,%d1
*4882a593Smuzhiyun	subl	%d0,%d1
*4882a593Smuzhiyun	clrl	%d0
*4882a593Smuzhiyun	bsetl	%d1,%d0
*4882a593Smuzhiyun	fmovemx ETEMP(%a6),%d0
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyunfp0123:
*4882a593Smuzhiyun	cmpib	#0,%d0
*4882a593Smuzhiyun	beqs	fp0_dst
*4882a593Smuzhiyun	cmpib	#1,%d0
*4882a593Smuzhiyun	beqs	fp1_dst
*4882a593Smuzhiyun	cmpib	#2,%d0
*4882a593Smuzhiyun	beqs	fp2_dst
*4882a593Smuzhiyunfp3_dst:
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),USER_FP3(%a6)
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),USER_FP3+4(%a6)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),USER_FP3+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfp2_dst:
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),USER_FP2(%a6)
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),USER_FP2+4(%a6)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),USER_FP2+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfp1_dst:
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),USER_FP1(%a6)
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),USER_FP1+4(%a6)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),USER_FP1+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyunfp0_dst:
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),USER_FP0(%a6)
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),USER_FP0+4(%a6)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),USER_FP0+8(%a6)
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyunopclass3:
*4882a593Smuzhiyun	st	CU_ONLY(%a6)
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0	|check if packed moveout
*4882a593Smuzhiyun	andiw	#0x0c00,%d0	|isolate last 2 bits of size field
*4882a593Smuzhiyun	cmpiw	#0x0c00,%d0	|if size is 011 or 111, it is packed
*4882a593Smuzhiyun	beq	pack_out	|else it is norm or denorm
*4882a593Smuzhiyun	bra	mv_out
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|	MOVE OUT
*4882a593Smuzhiyun|
*4882a593Smuzhiyun
*4882a593Smuzhiyunmv_tbl:
*4882a593Smuzhiyun	.long	li
*4882a593Smuzhiyun	.long	sgp
*4882a593Smuzhiyun	.long	xp
*4882a593Smuzhiyun	.long	mvout_end	|should never be taken
*4882a593Smuzhiyun	.long	wi
*4882a593Smuzhiyun	.long	dp
*4882a593Smuzhiyun	.long	bi
*4882a593Smuzhiyun	.long	mvout_end	|should never be taken
*4882a593Smuzhiyunmv_out:
*4882a593Smuzhiyun	bfextu	CMDREG1B(%a6){#3:#3},%d1	|put source specifier in d1
*4882a593Smuzhiyun	leal	mv_tbl,%a0
*4882a593Smuzhiyun	movel	%a0@(%d1:l:4),%a0
*4882a593Smuzhiyun	jmp	(%a0)
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| This exit is for move-out to memory.  The aunfl bit is
*4882a593Smuzhiyun| set if the result is inex and unfl is signalled.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunmvout_end:
*4882a593Smuzhiyun	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
*4882a593Smuzhiyun	beqs	no_aufl
*4882a593Smuzhiyun	btstb	#unfl_bit,FPSR_EXCEPT(%a6)
*4882a593Smuzhiyun	beqs	no_aufl
*4882a593Smuzhiyun	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
*4882a593Smuzhiyunno_aufl:
*4882a593Smuzhiyun	clrw	NMNEXC(%a6)
*4882a593Smuzhiyun	bclrb	#E1,E_BYTE(%a6)
*4882a593Smuzhiyun	fmovel	#0,%FPSR			|clear any cc bits from res_func
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Return ETEMP to extended format from internal extended format so
*4882a593Smuzhiyun| that gen_except will have a correctly signed value for ovfl/unfl
*4882a593Smuzhiyun| handlers.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	bfclr	ETEMP_SGN(%a6){#0:#8}
*4882a593Smuzhiyun	beqs	mvout_con
*4882a593Smuzhiyun	bsetb	#sign_bit,ETEMP_EX(%a6)
*4882a593Smuzhiyunmvout_con:
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| This exit is for move-out to int register.  The aunfl bit is
*4882a593Smuzhiyun| not set in any case for this move.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunmvouti_end:
*4882a593Smuzhiyun	clrw	NMNEXC(%a6)
*4882a593Smuzhiyun	bclrb	#E1,E_BYTE(%a6)
*4882a593Smuzhiyun	fmovel	#0,%FPSR			|clear any cc bits from res_func
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Return ETEMP to extended format from internal extended format so
*4882a593Smuzhiyun| that gen_except will have a correctly signed value for ovfl/unfl
*4882a593Smuzhiyun| handlers.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	bfclr	ETEMP_SGN(%a6){#0:#8}
*4882a593Smuzhiyun	beqs	mvouti_con
*4882a593Smuzhiyun	bsetb	#sign_bit,ETEMP_EX(%a6)
*4882a593Smuzhiyunmvouti_con:
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| li is used to handle a long integer source specifier
*4882a593Smuzhiyun|
*4882a593Smuzhiyun
*4882a593Smuzhiyunli:
*4882a593Smuzhiyun	moveql	#4,%d0		|set byte count
*4882a593Smuzhiyun
*4882a593Smuzhiyun	btstb	#7,STAG(%a6)	|check for extended denorm
*4882a593Smuzhiyun	bne	int_dnrm	|if so, branch
*4882a593Smuzhiyun
*4882a593Smuzhiyun	fmovemx ETEMP(%a6),%fp0-%fp0
*4882a593Smuzhiyun	fcmpd	#0x41dfffffffc00000,%fp0
*4882a593Smuzhiyun| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
*4882a593Smuzhiyun	fbge	lo_plrg
*4882a593Smuzhiyun	fcmpd	#0xc1e0000000000000,%fp0
*4882a593Smuzhiyun| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
*4882a593Smuzhiyun	fble	lo_nlrg
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| at this point, the answer is between the largest pos and neg values
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
*4882a593Smuzhiyun	andil	#0x30,%d1
*4882a593Smuzhiyun	fmovel	%d1,%fpcr
*4882a593Smuzhiyun	fmovel	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
*4882a593Smuzhiyun	fmovel %fpsr,%d1
*4882a593Smuzhiyun	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
*4882a593Smuzhiyun	bra	int_wrt
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyunlo_plrg:
*4882a593Smuzhiyun	movel	#0x7fffffff,L_SCR1(%a6)	|answer is largest positive int
*4882a593Smuzhiyun	fbeq	int_wrt			|exact answer
*4882a593Smuzhiyun	fcmpd	#0x41dfffffffe00000,%fp0
*4882a593Smuzhiyun| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
*4882a593Smuzhiyun	fbge	int_operr		|set operr
*4882a593Smuzhiyun	bra	int_inx			|set inexact
*4882a593Smuzhiyun
*4882a593Smuzhiyunlo_nlrg:
*4882a593Smuzhiyun	movel	#0x80000000,L_SCR1(%a6)
*4882a593Smuzhiyun	fbeq	int_wrt			|exact answer
*4882a593Smuzhiyun	fcmpd	#0xc1e0000000100000,%fp0
*4882a593Smuzhiyun| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
*4882a593Smuzhiyun	fblt	int_operr		|set operr
*4882a593Smuzhiyun	bra	int_inx			|set inexact
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| wi is used to handle a word integer source specifier
*4882a593Smuzhiyun|
*4882a593Smuzhiyun
*4882a593Smuzhiyunwi:
*4882a593Smuzhiyun	moveql	#2,%d0		|set byte count
*4882a593Smuzhiyun
*4882a593Smuzhiyun	btstb	#7,STAG(%a6)	|check for extended denorm
*4882a593Smuzhiyun	bne	int_dnrm	|branch if so
*4882a593Smuzhiyun
*4882a593Smuzhiyun	fmovemx ETEMP(%a6),%fp0-%fp0
*4882a593Smuzhiyun	fcmps	#0x46fffe00,%fp0
*4882a593Smuzhiyun| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
*4882a593Smuzhiyun	fbge	wo_plrg
*4882a593Smuzhiyun	fcmps	#0xc7000000,%fp0
*4882a593Smuzhiyun| c7000000 in sgl prec = c00e00008000000000000000 in ext prec
*4882a593Smuzhiyun	fble	wo_nlrg
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| at this point, the answer is between the largest pos and neg values
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
*4882a593Smuzhiyun	andil	#0x30,%d1
*4882a593Smuzhiyun	fmovel	%d1,%fpcr
*4882a593Smuzhiyun	fmovew	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
*4882a593Smuzhiyun	fmovel %fpsr,%d1
*4882a593Smuzhiyun	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
*4882a593Smuzhiyun	bra	int_wrt
*4882a593Smuzhiyun
*4882a593Smuzhiyunwo_plrg:
*4882a593Smuzhiyun	movew	#0x7fff,L_SCR1(%a6)	|answer is largest positive int
*4882a593Smuzhiyun	fbeq	int_wrt			|exact answer
*4882a593Smuzhiyun	fcmps	#0x46ffff00,%fp0
*4882a593Smuzhiyun| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
*4882a593Smuzhiyun	fbge	int_operr		|set operr
*4882a593Smuzhiyun	bra	int_inx			|set inexact
*4882a593Smuzhiyun
*4882a593Smuzhiyunwo_nlrg:
*4882a593Smuzhiyun	movew	#0x8000,L_SCR1(%a6)
*4882a593Smuzhiyun	fbeq	int_wrt			|exact answer
*4882a593Smuzhiyun	fcmps	#0xc7000080,%fp0
*4882a593Smuzhiyun| c7000080 in sgl prec = c00e00008000800000000000 in ext prec
*4882a593Smuzhiyun	fblt	int_operr		|set operr
*4882a593Smuzhiyun	bra	int_inx			|set inexact
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| bi is used to handle a byte integer source specifier
*4882a593Smuzhiyun|
*4882a593Smuzhiyun
*4882a593Smuzhiyunbi:
*4882a593Smuzhiyun	moveql	#1,%d0		|set byte count
*4882a593Smuzhiyun
*4882a593Smuzhiyun	btstb	#7,STAG(%a6)	|check for extended denorm
*4882a593Smuzhiyun	bne	int_dnrm	|branch if so
*4882a593Smuzhiyun
*4882a593Smuzhiyun	fmovemx ETEMP(%a6),%fp0-%fp0
*4882a593Smuzhiyun	fcmps	#0x42fe0000,%fp0
*4882a593Smuzhiyun| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
*4882a593Smuzhiyun	fbge	by_plrg
*4882a593Smuzhiyun	fcmps	#0xc3000000,%fp0
*4882a593Smuzhiyun| c3000000 in sgl prec = c00600008000000000000000 in ext prec
*4882a593Smuzhiyun	fble	by_nlrg
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| at this point, the answer is between the largest pos and neg values
*4882a593Smuzhiyun|
*4882a593Smuzhiyun	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
*4882a593Smuzhiyun	andil	#0x30,%d1
*4882a593Smuzhiyun	fmovel	%d1,%fpcr
*4882a593Smuzhiyun	fmoveb	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
*4882a593Smuzhiyun	fmovel %fpsr,%d1
*4882a593Smuzhiyun	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
*4882a593Smuzhiyun	bra	int_wrt
*4882a593Smuzhiyun
*4882a593Smuzhiyunby_plrg:
*4882a593Smuzhiyun	moveb	#0x7f,L_SCR1(%a6)		|answer is largest positive int
*4882a593Smuzhiyun	fbeq	int_wrt			|exact answer
*4882a593Smuzhiyun	fcmps	#0x42ff0000,%fp0
*4882a593Smuzhiyun| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
*4882a593Smuzhiyun	fbge	int_operr		|set operr
*4882a593Smuzhiyun	bra	int_inx			|set inexact
*4882a593Smuzhiyun
*4882a593Smuzhiyunby_nlrg:
*4882a593Smuzhiyun	moveb	#0x80,L_SCR1(%a6)
*4882a593Smuzhiyun	fbeq	int_wrt			|exact answer
*4882a593Smuzhiyun	fcmps	#0xc3008000,%fp0
*4882a593Smuzhiyun| c3008000 in sgl prec = c00600008080000000000000 in ext prec
*4882a593Smuzhiyun	fblt	int_operr		|set operr
*4882a593Smuzhiyun	bra	int_inx			|set inexact
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Common integer routines
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| int_drnrm---account for possible nonzero result for round up with positive
*4882a593Smuzhiyun| operand and round down for negative answer.  In the first case (result = 1)
*4882a593Smuzhiyun| byte-width (store in d0) of result must be honored.  In the second case,
*4882a593Smuzhiyun| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
*4882a593Smuzhiyun
*4882a593Smuzhiyunint_dnrm:
*4882a593Smuzhiyun	movel	#0,L_SCR1(%a6)	| initialize result to 0
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1	| d1 is the rounding mode
*4882a593Smuzhiyun	cmpb	#2,%d1
*4882a593Smuzhiyun	bmis	int_inx		| if RN or RZ, done
*4882a593Smuzhiyun	bnes	int_rp		| if RP, continue below
*4882a593Smuzhiyun	tstw	ETEMP(%a6)	| RM: store -1 in L_SCR1 if src is negative
*4882a593Smuzhiyun	bpls	int_inx		| otherwise result is 0
*4882a593Smuzhiyun	movel	#-1,L_SCR1(%a6)
*4882a593Smuzhiyun	bras	int_inx
*4882a593Smuzhiyunint_rp:
*4882a593Smuzhiyun	tstw	ETEMP(%a6)	| RP: store +1 of proper width in L_SCR1 if
*4882a593Smuzhiyun|				; source is greater than 0
*4882a593Smuzhiyun	bmis	int_inx		| otherwise, result is 0
*4882a593Smuzhiyun	lea	L_SCR1(%a6),%a1	| a1 is address of L_SCR1
*4882a593Smuzhiyun	addal	%d0,%a1		| offset by destination width -1
*4882a593Smuzhiyun	subal	#1,%a1
*4882a593Smuzhiyun	bsetb	#0,(%a1)		| set low bit at a1 address
*4882a593Smuzhiyunint_inx:
*4882a593Smuzhiyun	oril	#inx2a_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bras	int_wrt
*4882a593Smuzhiyunint_operr:
*4882a593Smuzhiyun	fmovemx %fp0-%fp0,FPTEMP(%a6)	|FPTEMP must contain the extended
*4882a593Smuzhiyun|				;precision source that needs to be
*4882a593Smuzhiyun|				;converted to integer this is required
*4882a593Smuzhiyun|				;if the operr exception is enabled.
*4882a593Smuzhiyun|				;set operr/aiop (no inex2 on int ovfl)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	oril	#opaop_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun|				;fall through to perform int_wrt
*4882a593Smuzhiyunint_wrt:
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a1	|load destination address
*4882a593Smuzhiyun	tstl	%a1		|check to see if it is a dest register
*4882a593Smuzhiyun	beqs	wrt_dn		|write data register
*4882a593Smuzhiyun	lea	L_SCR1(%a6),%a0	|point to supervisor source address
*4882a593Smuzhiyun	bsrl	mem_write
*4882a593Smuzhiyun	bra	mvouti_end
*4882a593Smuzhiyun
*4882a593Smuzhiyunwrt_dn:
*4882a593Smuzhiyun	movel	%d0,-(%sp)	|d0 currently contains the size to write
*4882a593Smuzhiyun	bsrl	get_fline	|get_fline returns Dn in d0
*4882a593Smuzhiyun	andiw	#0x7,%d0		|isolate register
*4882a593Smuzhiyun	movel	(%sp)+,%d1	|get size
*4882a593Smuzhiyun	cmpil	#4,%d1		|most frequent case
*4882a593Smuzhiyun	beqs	sz_long
*4882a593Smuzhiyun	cmpil	#2,%d1
*4882a593Smuzhiyun	bnes	sz_con
*4882a593Smuzhiyun	orl	#8,%d0		|add 'word' size to register#
*4882a593Smuzhiyun	bras	sz_con
*4882a593Smuzhiyunsz_long:
*4882a593Smuzhiyun	orl	#0x10,%d0		|add 'long' size to register#
*4882a593Smuzhiyunsz_con:
*4882a593Smuzhiyun	movel	%d0,%d1		|reg_dest expects size:reg in d1
*4882a593Smuzhiyun	bsrl	reg_dest	|load proper data register
*4882a593Smuzhiyun	bra	mvouti_end
*4882a593Smuzhiyunxp:
*4882a593Smuzhiyun	lea	ETEMP(%a6),%a0
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	btstb	#7,STAG(%a6)	|check for extended denorm
*4882a593Smuzhiyun	bne	xdnrm
*4882a593Smuzhiyun	clrl	%d0
*4882a593Smuzhiyun	bras	do_fp		|do normal case
*4882a593Smuzhiyunsgp:
*4882a593Smuzhiyun	lea	ETEMP(%a6),%a0
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun	btstb	#7,STAG(%a6)	|check for extended denorm
*4882a593Smuzhiyun	bne	sp_catas	|branch if so
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d0
*4882a593Smuzhiyun	lea	sp_bnds,%a1
*4882a593Smuzhiyun	cmpw	(%a1),%d0
*4882a593Smuzhiyun	blt	sp_under
*4882a593Smuzhiyun	cmpw	2(%a1),%d0
*4882a593Smuzhiyun	bgt	sp_over
*4882a593Smuzhiyun	movel	#1,%d0		|set destination format to single
*4882a593Smuzhiyun	bras	do_fp		|do normal case
*4882a593Smuzhiyundp:
*4882a593Smuzhiyun	lea	ETEMP(%a6),%a0
*4882a593Smuzhiyun	bclrb	#sign_bit,LOCAL_EX(%a0)
*4882a593Smuzhiyun	sne	LOCAL_SGN(%a0)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	btstb	#7,STAG(%a6)	|check for extended denorm
*4882a593Smuzhiyun	bne	dp_catas	|branch if so
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d0
*4882a593Smuzhiyun	lea	dp_bnds,%a1
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpw	(%a1),%d0
*4882a593Smuzhiyun	blt	dp_under
*4882a593Smuzhiyun	cmpw	2(%a1),%d0
*4882a593Smuzhiyun	bgt	dp_over
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#2,%d0		|set destination format to double
*4882a593Smuzhiyun|				;fall through to do_fp
*4882a593Smuzhiyun|
*4882a593Smuzhiyundo_fp:
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|rnd mode in d1
*4882a593Smuzhiyun	swap	%d0			|rnd prec in upper word
*4882a593Smuzhiyun	addl	%d0,%d1			|d1 has PREC/MODE info
*4882a593Smuzhiyun
*4882a593Smuzhiyun	clrl	%d0			|clear g,r,s
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bsrl	round			|round
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	%a0,%a1
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a0
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bfextu	CMDREG1B(%a6){#3:#3},%d1	|extract destination format
*4882a593Smuzhiyun|					;at this point only the dest
*4882a593Smuzhiyun|					;formats sgl, dbl, ext are
*4882a593Smuzhiyun|					;possible
*4882a593Smuzhiyun	cmpb	#2,%d1
*4882a593Smuzhiyun	bgts	ddbl			|double=5, extended=2, single=1
*4882a593Smuzhiyun	bnes	dsgl
*4882a593Smuzhiyun|					;fall through to dext
*4882a593Smuzhiyundext:
*4882a593Smuzhiyun	bsrl	dest_ext
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyundsgl:
*4882a593Smuzhiyun	bsrl	dest_sgl
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyunddbl:
*4882a593Smuzhiyun	bsrl	dest_dbl
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Handle possible denorm or catastrophic underflow cases here
*4882a593Smuzhiyun|
*4882a593Smuzhiyunxdnrm:
*4882a593Smuzhiyun	bsr	set_xop		|initialize WBTEMP
*4882a593Smuzhiyun	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	%a0,%a1
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
*4882a593Smuzhiyun	bsrl	dest_ext	|store to memory
*4882a593Smuzhiyun	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyun
*4882a593Smuzhiyunsp_under:
*4882a593Smuzhiyun	bsetb	#etemp15_bit,STAG(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpw	4(%a1),%d0
*4882a593Smuzhiyun	blts	sp_catas	|catastrophic underflow case
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#1,%d0		|load in round precision
*4882a593Smuzhiyun	movel	#sgl_thresh,%d1	|load in single denorm threshold
*4882a593Smuzhiyun	bsrl	dpspdnrm	|expects d1 to have the proper
*4882a593Smuzhiyun|				;denorm threshold
*4882a593Smuzhiyun	bsrl	dest_sgl	|stores value to destination
*4882a593Smuzhiyun	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
*4882a593Smuzhiyun	bra	mvout_end	|exit
*4882a593Smuzhiyun
*4882a593Smuzhiyundp_under:
*4882a593Smuzhiyun	bsetb	#etemp15_bit,STAG(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpw	4(%a1),%d0
*4882a593Smuzhiyun	blts	dp_catas	|catastrophic underflow case
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#dbl_thresh,%d1	|load in double precision threshold
*4882a593Smuzhiyun	movel	#2,%d0
*4882a593Smuzhiyun	bsrl	dpspdnrm	|expects d1 to have proper
*4882a593Smuzhiyun|				;denorm threshold
*4882a593Smuzhiyun|				;expects d0 to have round precision
*4882a593Smuzhiyun	bsrl	dest_dbl	|store value to destination
*4882a593Smuzhiyun	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
*4882a593Smuzhiyun	bra	mvout_end	|exit
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Handle catastrophic underflow cases here
*4882a593Smuzhiyun|
*4882a593Smuzhiyunsp_catas:
*4882a593Smuzhiyun| Temp fix for z bit set in unf_sub
*4882a593Smuzhiyun	movel	USER_FPSR(%a6),-(%a7)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#1,%d0		|set round precision to sgl
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bsrl	unf_sub		|a0 points to result
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	(%a7)+,USER_FPSR(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#1,%d0
*4882a593Smuzhiyun	subw	%d0,LOCAL_EX(%a0) |account for difference between
*4882a593Smuzhiyun|				;denorm/norm bias
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	%a0,%a1		|a1 has the operand input
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bsrl	dest_sgl	|store the result
*4882a593Smuzhiyun	oril	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyun
*4882a593Smuzhiyundp_catas:
*4882a593Smuzhiyun| Temp fix for z bit set in unf_sub
*4882a593Smuzhiyun	movel	USER_FPSR(%a6),-(%a7)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#2,%d0		|set round precision to dbl
*4882a593Smuzhiyun	bsrl	unf_sub		|a0 points to result
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	(%a7)+,USER_FPSR(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#1,%d0
*4882a593Smuzhiyun	subw	%d0,LOCAL_EX(%a0) |account for difference between
*4882a593Smuzhiyun|				;denorm/norm bias
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	%a0,%a1		|a1 has the operand input
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bsrl	dest_dbl	|store the result
*4882a593Smuzhiyun	oril	#unfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Handle catastrophic overflow cases here
*4882a593Smuzhiyun|
*4882a593Smuzhiyunsp_over:
*4882a593Smuzhiyun| Temp fix for z bit set in unf_sub
*4882a593Smuzhiyun	movel	USER_FPSR(%a6),-(%a7)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#1,%d0
*4882a593Smuzhiyun	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),(%a0)
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),4(%a0)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),8(%a0)
*4882a593Smuzhiyun	bsrl	ovf_res
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	(%a7)+,USER_FPSR(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	%a0,%a1
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a0
*4882a593Smuzhiyun	bsrl	dest_sgl
*4882a593Smuzhiyun	orl	#ovfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyun
*4882a593Smuzhiyundp_over:
*4882a593Smuzhiyun| Temp fix for z bit set in ovf_res
*4882a593Smuzhiyun	movel	USER_FPSR(%a6),-(%a7)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	#2,%d0
*4882a593Smuzhiyun	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
*4882a593Smuzhiyun	movel	ETEMP_EX(%a6),(%a0)
*4882a593Smuzhiyun	movel	ETEMP_HI(%a6),4(%a0)
*4882a593Smuzhiyun	movel	ETEMP_LO(%a6),8(%a0)
*4882a593Smuzhiyun	bsrl	ovf_res
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	(%a7)+,USER_FPSR(%a6)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	%a0,%a1
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a0
*4882a593Smuzhiyun	bsrl	dest_dbl
*4882a593Smuzhiyun	orl	#ovfinx_mask,USER_FPSR(%a6)
*4882a593Smuzhiyun	bra	mvout_end
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|	DPSPDNRM
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| This subroutine takes an extended normalized number and denormalizes
*4882a593Smuzhiyun| it to the given round precision. This subroutine also decrements
*4882a593Smuzhiyun| the input operand's exponent by 1 to account for the fact that
*4882a593Smuzhiyun| dest_sgl or dest_dbl expects a normalized number's bias.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Input: a0  points to a normalized number in internal extended format
*4882a593Smuzhiyun|	 d0  is the round precision (=1 for sgl; =2 for dbl)
*4882a593Smuzhiyun|	 d1  is the single precision or double precision
*4882a593Smuzhiyun|	     denorm threshold
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Output: (In the format for dest_sgl or dest_dbl)
*4882a593Smuzhiyun|	 a0   points to the destination
*4882a593Smuzhiyun|	 a1   points to the operand
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
*4882a593Smuzhiyun|
*4882a593Smuzhiyundpspdnrm:
*4882a593Smuzhiyun	movel	%d0,-(%a7)	|save round precision
*4882a593Smuzhiyun	clrl	%d0		|clear initial g,r,s
*4882a593Smuzhiyun	bsrl	dnrm_lp		|careful with d0, it's needed by round
*4882a593Smuzhiyun
*4882a593Smuzhiyun	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode
*4882a593Smuzhiyun	swap	%d1
*4882a593Smuzhiyun	movew	2(%a7),%d1	|set rounding precision
*4882a593Smuzhiyun	swap	%d1		|at this point d1 has PREC/MODE info
*4882a593Smuzhiyun	bsrl	round		|round result, sets the inex bit in
*4882a593Smuzhiyun|				;USER_FPSR if needed
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movew	#1,%d0
*4882a593Smuzhiyun	subw	%d0,LOCAL_EX(%a0) |account for difference in denorm
*4882a593Smuzhiyun|				;vs norm bias
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movel	%a0,%a1		|a1 has the operand input
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
*4882a593Smuzhiyun	addw	#4,%a7		|pop stack
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| SET_XOP initialized WBTEMP with the value pointed to by a0
*4882a593Smuzhiyun| input: a0 points to input operand in the internal extended format
*4882a593Smuzhiyun|
*4882a593Smuzhiyunset_xop:
*4882a593Smuzhiyun	movel	LOCAL_EX(%a0),WBTEMP_EX(%a6)
*4882a593Smuzhiyun	movel	LOCAL_HI(%a0),WBTEMP_HI(%a6)
*4882a593Smuzhiyun	movel	LOCAL_LO(%a0),WBTEMP_LO(%a6)
*4882a593Smuzhiyun	bfclr	WBTEMP_SGN(%a6){#0:#8}
*4882a593Smuzhiyun	beqs	sxop
*4882a593Smuzhiyun	bsetb	#sign_bit,WBTEMP_EX(%a6)
*4882a593Smuzhiyunsxop:
*4882a593Smuzhiyun	bfclr	STAG(%a6){#5:#4}	|clear wbtm66,wbtm1,wbtm0,sbit
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun|
*4882a593Smuzhiyun|	P_MOVE
*4882a593Smuzhiyun|
*4882a593Smuzhiyunp_movet:
*4882a593Smuzhiyun	.long	p_move
*4882a593Smuzhiyun	.long	p_movez
*4882a593Smuzhiyun	.long	p_movei
*4882a593Smuzhiyun	.long	p_moven
*4882a593Smuzhiyun	.long	p_move
*4882a593Smuzhiyunp_regd:
*4882a593Smuzhiyun	.long	p_dyd0
*4882a593Smuzhiyun	.long	p_dyd1
*4882a593Smuzhiyun	.long	p_dyd2
*4882a593Smuzhiyun	.long	p_dyd3
*4882a593Smuzhiyun	.long	p_dyd4
*4882a593Smuzhiyun	.long	p_dyd5
*4882a593Smuzhiyun	.long	p_dyd6
*4882a593Smuzhiyun	.long	p_dyd7
*4882a593Smuzhiyun
*4882a593Smuzhiyunpack_out:
*4882a593Smuzhiyun	leal	p_movet,%a0	|load jmp table address
*4882a593Smuzhiyun	movew	STAG(%a6),%d0	|get source tag
*4882a593Smuzhiyun	bfextu	%d0{#16:#3},%d0	|isolate source bits
*4882a593Smuzhiyun	movel	(%a0,%d0.w*4),%a0	|load a0 with routine label for tag
*4882a593Smuzhiyun	jmp	(%a0)		|go to the routine
*4882a593Smuzhiyun
*4882a593Smuzhiyunp_write:
*4882a593Smuzhiyun	movel	#0x0c,%d0	|get byte count
*4882a593Smuzhiyun	movel	EXC_EA(%a6),%a1	|get the destination address
*4882a593Smuzhiyun	bsr	mem_write	|write the user's destination
*4882a593Smuzhiyun	moveb	#0,CU_SAVEPC(%a6) |set the cu save pc to all 0's
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Also note that the dtag must be set to norm here - this is because
*4882a593Smuzhiyun| the 040 uses the dtag to execute the correct microcode.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun        bfclr    DTAG(%a6){#0:#3}  |set dtag to norm
*4882a593Smuzhiyun
*4882a593Smuzhiyun	rts
*4882a593Smuzhiyun
*4882a593Smuzhiyun| Notes on handling of special case (zero, inf, and nan) inputs:
*4882a593Smuzhiyun|	1. Operr is not signalled if the k-factor is greater than 18.
*4882a593Smuzhiyun|	2. Per the manual, status bits are not set.
*4882a593Smuzhiyun|
*4882a593Smuzhiyun
*4882a593Smuzhiyunp_move:
*4882a593Smuzhiyun	movew	CMDREG1B(%a6),%d0
*4882a593Smuzhiyun	btstl	#kfact_bit,%d0	|test for dynamic k-factor
*4882a593Smuzhiyun	beqs	statick		|if clear, k-factor is static
*4882a593Smuzhiyundynamick:
*4882a593Smuzhiyun	bfextu	%d0{#25:#3},%d0	|isolate register for dynamic k-factor
*4882a593Smuzhiyun	lea	p_regd,%a0
*4882a593Smuzhiyun	movel	%a0@(%d0:l:4),%a0
*4882a593Smuzhiyun	jmp	(%a0)
*4882a593Smuzhiyunstatick:
*4882a593Smuzhiyun	andiw	#0x007f,%d0	|get k-factor
*4882a593Smuzhiyun	bfexts	%d0{#25:#7},%d0	|sign extend d0 for bindec
*4882a593Smuzhiyun	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
*4882a593Smuzhiyun	bsrl	bindec		|perform the convert; data at a6
*4882a593Smuzhiyun	leal	FP_SCR1(%a6),%a0	|load a0 with result address
*4882a593Smuzhiyun	bral	p_write
*4882a593Smuzhiyunp_movez:
*4882a593Smuzhiyun	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
*4882a593Smuzhiyun	clrw	2(%a0)		|clear lower word of exp
*4882a593Smuzhiyun	clrl	4(%a0)		|load second lword of ZERO
*4882a593Smuzhiyun	clrl	8(%a0)		|load third lword of ZERO
*4882a593Smuzhiyun	bra	p_write		|go write results
*4882a593Smuzhiyunp_movei:
*4882a593Smuzhiyun	fmovel	#0,%FPSR		|clear aiop
*4882a593Smuzhiyun	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
*4882a593Smuzhiyun	clrw	2(%a0)		|clear lower word of exp
*4882a593Smuzhiyun	bra	p_write		|go write the result
*4882a593Smuzhiyunp_moven:
*4882a593Smuzhiyun	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
*4882a593Smuzhiyun	clrw	2(%a0)		|clear lower word of exp
*4882a593Smuzhiyun	bra	p_write		|go write the result
*4882a593Smuzhiyun
*4882a593Smuzhiyun|
*4882a593Smuzhiyun| Routines to read the dynamic k-factor from Dn.
*4882a593Smuzhiyun|
*4882a593Smuzhiyunp_dyd0:
*4882a593Smuzhiyun	movel	USER_D0(%a6),%d0
*4882a593Smuzhiyun	bras	statick
*4882a593Smuzhiyunp_dyd1:
*4882a593Smuzhiyun	movel	USER_D1(%a6),%d0
*4882a593Smuzhiyun	bras	statick
*4882a593Smuzhiyunp_dyd2:
*4882a593Smuzhiyun	movel	%d2,%d0
*4882a593Smuzhiyun	bras	statick
*4882a593Smuzhiyunp_dyd3:
*4882a593Smuzhiyun	movel	%d3,%d0
*4882a593Smuzhiyun	bras	statick
*4882a593Smuzhiyunp_dyd4:
*4882a593Smuzhiyun	movel	%d4,%d0
*4882a593Smuzhiyun	bras	statick
*4882a593Smuzhiyunp_dyd5:
*4882a593Smuzhiyun	movel	%d5,%d0
*4882a593Smuzhiyun	bras	statick
*4882a593Smuzhiyunp_dyd6:
*4882a593Smuzhiyun	movel	%d6,%d0
*4882a593Smuzhiyun	bra	statick
*4882a593Smuzhiyunp_dyd7:
*4882a593Smuzhiyun	movel	%d7,%d0
*4882a593Smuzhiyun	bra	statick
*4882a593Smuzhiyun
*4882a593Smuzhiyun	|end