x86/math-emu/reg_round.S

*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
*4882a593Smuzhiyun	.file "reg_round.S"
*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
*4882a593Smuzhiyun |  reg_round.S                                                              |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | Rounding/truncation/etc for FPU basic arithmetic functions.               |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | Copyright (C) 1993,1995,1997                                              |
*4882a593Smuzhiyun |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
*4882a593Smuzhiyun |                       Australia.  E-mail billm@suburbia.net               |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | This code has four possible entry points.                                 |
*4882a593Smuzhiyun | The following must be entered by a jmp instruction:                       |
*4882a593Smuzhiyun |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | The FPU_round entry point is intended to be used by C code.               |
*4882a593Smuzhiyun | From C, call as:                                                          |
*4882a593Smuzhiyun |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
*4882a593Smuzhiyun |    one was raised, or -1 on internal error.                               |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | For correct "up" and "down" rounding, the argument must have the correct  |
*4882a593Smuzhiyun | sign.                                                                     |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
*4882a593Smuzhiyun | Four entry points.                                                        |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
*4882a593Smuzhiyun |  %eax:%ebx  64 bit significand                                            |
*4882a593Smuzhiyun |  %edx       32 bit extension of the significand                           |
*4882a593Smuzhiyun |  %edi       pointer to an FPU_REG for the result to be stored             |
*4882a593Smuzhiyun |  stack      calling function must have set up a C stack frame and         |
*4882a593Smuzhiyun |             pushed %esi, %edi, and %ebx                                   |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | Needed just for the fpu_reg_round_sqrt entry point:                       |
*4882a593Smuzhiyun |  %cx  A control word in the same format as the FPU control word.          |
*4882a593Smuzhiyun | Otherwise, PARAM4 must give such a value.                                 |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun | The significand and its extension are assumed to be exact in the          |
*4882a593Smuzhiyun | following sense:                                                          |
*4882a593Smuzhiyun |   If the significand by itself is the exact result then the significand   |
*4882a593Smuzhiyun |   extension (%edx) must contain 0, otherwise the significand extension    |
*4882a593Smuzhiyun |   must be non-zero.                                                       |
*4882a593Smuzhiyun |   If the significand extension is non-zero then the significand is        |
*4882a593Smuzhiyun |   smaller than the magnitude of the correct exact result by an amount     |
*4882a593Smuzhiyun |   greater than zero and less than one ls bit of the significand.          |
*4882a593Smuzhiyun |   The significand extension is only required to have three possible       |
*4882a593Smuzhiyun |   non-zero values:                                                        |
*4882a593Smuzhiyun |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
*4882a593Smuzhiyun |                                 bit smaller than the magnitude of the     |
*4882a593Smuzhiyun |                                 true exact result.                        |
*4882a593Smuzhiyun |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
*4882a593Smuzhiyun |                                 smaller than the magnitude of the true    |
*4882a593Smuzhiyun |                                 exact result.                             |
*4882a593Smuzhiyun |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
*4882a593Smuzhiyun |                                 bit smaller than the magnitude of the     |
*4882a593Smuzhiyun |                                 true exact result.                        |
*4882a593Smuzhiyun |                                                                           |
*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
*4882a593Smuzhiyun |  The code in this module has become quite complex, but it should handle   |
*4882a593Smuzhiyun |  all of the FPU flags which are set at this stage of the basic arithmetic |
*4882a593Smuzhiyun |  computations.                                                            |
*4882a593Smuzhiyun |  There are a few rare cases where the results are not set identically to  |
*4882a593Smuzhiyun |  a real FPU. These require a bit more thought because at this stage the   |
*4882a593Smuzhiyun |  results of the code here appear to be more consistent...                 |
*4882a593Smuzhiyun |  This may be changed in a future version.                                 |
*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun#include "fpu_emu.h"
*4882a593Smuzhiyun#include "exception.h"
*4882a593Smuzhiyun#include "control_w.h"
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Flags for FPU_bits_lost */
*4882a593Smuzhiyun#define	LOST_DOWN	$1
*4882a593Smuzhiyun#define	LOST_UP		$2
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Flags for FPU_denormal */
*4882a593Smuzhiyun#define	DENORMAL	$1
*4882a593Smuzhiyun#define	UNMASKED_UNDERFLOW $2
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU
*4882a593Smuzhiyun/*	Make the code re-entrant by putting
*4882a593Smuzhiyun	local storage on the stack: */
*4882a593Smuzhiyun#define FPU_bits_lost	(%esp)
*4882a593Smuzhiyun#define FPU_denormal	1(%esp)
*4882a593Smuzhiyun
*4882a593Smuzhiyun#else
*4882a593Smuzhiyun/*	Not re-entrant, so we can gain speed by putting
*4882a593Smuzhiyun	local storage in a static area: */
*4882a593Smuzhiyun.data
*4882a593Smuzhiyun	.align 4,0
*4882a593SmuzhiyunFPU_bits_lost:
*4882a593Smuzhiyun	.byte	0
*4882a593SmuzhiyunFPU_denormal:
*4882a593Smuzhiyun	.byte	0
*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun.text
*4882a593Smuzhiyun.globl fpu_reg_round
*4882a593Smuzhiyun.globl fpu_Arith_exit
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Entry point when called from C */
*4882a593SmuzhiyunSYM_FUNC_START(FPU_round)
*4882a593Smuzhiyun	pushl	%ebp
*4882a593Smuzhiyun	movl	%esp,%ebp
*4882a593Smuzhiyun	pushl	%esi
*4882a593Smuzhiyun	pushl	%edi
*4882a593Smuzhiyun	pushl	%ebx
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movl	PARAM1,%edi
*4882a593Smuzhiyun	movl	SIGH(%edi),%eax
*4882a593Smuzhiyun	movl	SIGL(%edi),%ebx
*4882a593Smuzhiyun	movl	PARAM2,%edx
*4882a593Smuzhiyun
*4882a593Smuzhiyunfpu_reg_round:			/* Normal entry point */
*4882a593Smuzhiyun	movl	PARAM4,%ecx
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU
*4882a593Smuzhiyun	pushl	%ebx		/* adjust the stack pointer */
*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun/* Cannot use this here yet */
*4882a593Smuzhiyun/*	orl	%eax,%eax */
*4882a593Smuzhiyun/*	jns	L_entry_bugged */
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpw	EXP_UNDER,EXP(%edi)
*4882a593Smuzhiyun	jle	L_Make_denorm			/* The number is a de-normal */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movb	$0,FPU_denormal			/* 0 -> not a de-normal */
*4882a593Smuzhiyun
*4882a593SmuzhiyunDenorm_done:
*4882a593Smuzhiyun	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movl	%ecx,%esi
*4882a593Smuzhiyun	andl	CW_PC,%ecx
*4882a593Smuzhiyun	cmpl	PR_64_BITS,%ecx
*4882a593Smuzhiyun	je	LRound_To_64
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	PR_53_BITS,%ecx
*4882a593Smuzhiyun	je	LRound_To_53
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	PR_24_BITS,%ecx
*4882a593Smuzhiyun	je	LRound_To_24
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef PECULIAR_486
*4882a593Smuzhiyun/* With the precision control bits set to 01 "(reserved)", a real 80486
*4882a593Smuzhiyun   behaves as if the precision control bits were set to 11 "64 bits" */
*4882a593Smuzhiyun	cmpl	PR_RESERVED_BITS,%ecx
*4882a593Smuzhiyun	je	LRound_To_64
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun	jmp	L_bugged_denorm_486
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun#else
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun#endif /* PECULIAR_486 */
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Round etc to 24 bit precision */
*4882a593SmuzhiyunLRound_To_24:
*4882a593Smuzhiyun	movl	%esi,%ecx
*4882a593Smuzhiyun	andl	CW_RC,%ecx
*4882a593Smuzhiyun	cmpl	RC_RND,%ecx
*4882a593Smuzhiyun	je	LRound_nearest_24
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_CHOP,%ecx
*4882a593Smuzhiyun	je	LCheck_truncate_24
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_UP,%ecx		/* Towards +infinity */
*4882a593Smuzhiyun	je	LUp_24
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
*4882a593Smuzhiyun	je	LDown_24
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun	jmp	L_bugged_round24
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun
*4882a593SmuzhiyunLUp_24:
*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
*4882a593Smuzhiyun	jne	LCheck_truncate_24	/* If negative then  up==truncate */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	jmp	LCheck_24_round_up
*4882a593Smuzhiyun
*4882a593SmuzhiyunLDown_24:
*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
*4882a593Smuzhiyun	je	LCheck_truncate_24	/* If positive then  down==truncate */
*4882a593Smuzhiyun
*4882a593SmuzhiyunLCheck_24_round_up:
*4882a593Smuzhiyun	movl	%eax,%ecx
*4882a593Smuzhiyun	andl	$0x000000ff,%ecx
*4882a593Smuzhiyun	orl	%ebx,%ecx
*4882a593Smuzhiyun	orl	%edx,%ecx
*4882a593Smuzhiyun	jnz	LDo_24_round_up
*4882a593Smuzhiyun	jmp	L_Re_normalise
*4882a593Smuzhiyun
*4882a593SmuzhiyunLRound_nearest_24:
*4882a593Smuzhiyun	/* Do rounding of the 24th bit if needed (nearest or even) */
*4882a593Smuzhiyun	movl	%eax,%ecx
*4882a593Smuzhiyun	andl	$0x000000ff,%ecx
*4882a593Smuzhiyun	cmpl	$0x00000080,%ecx
*4882a593Smuzhiyun	jc	LCheck_truncate_24	/* less than half, no increment needed */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	jne	LGreater_Half_24	/* greater than half, increment needed */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* Possibly half, we need to check the ls bits */
*4882a593Smuzhiyun	orl	%ebx,%ebx
*4882a593Smuzhiyun	jnz	LGreater_Half_24	/* greater than half, increment needed */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	orl	%edx,%edx
*4882a593Smuzhiyun	jnz	LGreater_Half_24	/* greater than half, increment needed */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* Exactly half, increment only if 24th bit is 1 (round to even) */
*4882a593Smuzhiyun	testl	$0x00000100,%eax
*4882a593Smuzhiyun	jz	LDo_truncate_24
*4882a593Smuzhiyun
*4882a593SmuzhiyunLGreater_Half_24:			/* Rounding: increment at the 24th bit */
*4882a593SmuzhiyunLDo_24_round_up:
*4882a593Smuzhiyun	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
*4882a593Smuzhiyun	xorl	%ebx,%ebx
*4882a593Smuzhiyun	movb	LOST_UP,FPU_bits_lost
*4882a593Smuzhiyun	addl	$0x00000100,%eax
*4882a593Smuzhiyun	jmp	LCheck_Round_Overflow
*4882a593Smuzhiyun
*4882a593SmuzhiyunLCheck_truncate_24:
*4882a593Smuzhiyun	movl	%eax,%ecx
*4882a593Smuzhiyun	andl	$0x000000ff,%ecx
*4882a593Smuzhiyun	orl	%ebx,%ecx
*4882a593Smuzhiyun	orl	%edx,%ecx
*4882a593Smuzhiyun	jz	L_Re_normalise		/* No truncation needed */
*4882a593Smuzhiyun
*4882a593SmuzhiyunLDo_truncate_24:
*4882a593Smuzhiyun	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
*4882a593Smuzhiyun	xorl	%ebx,%ebx
*4882a593Smuzhiyun	movb	LOST_DOWN,FPU_bits_lost
*4882a593Smuzhiyun	jmp	L_Re_normalise
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Round etc to 53 bit precision */
*4882a593SmuzhiyunLRound_To_53:
*4882a593Smuzhiyun	movl	%esi,%ecx
*4882a593Smuzhiyun	andl	CW_RC,%ecx
*4882a593Smuzhiyun	cmpl	RC_RND,%ecx
*4882a593Smuzhiyun	je	LRound_nearest_53
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_CHOP,%ecx
*4882a593Smuzhiyun	je	LCheck_truncate_53
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_UP,%ecx		/* Towards +infinity */
*4882a593Smuzhiyun	je	LUp_53
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
*4882a593Smuzhiyun	je	LDown_53
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun	jmp	L_bugged_round53
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun
*4882a593SmuzhiyunLUp_53:
*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
*4882a593Smuzhiyun	jne	LCheck_truncate_53	/* If negative then  up==truncate */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	jmp	LCheck_53_round_up
*4882a593Smuzhiyun
*4882a593SmuzhiyunLDown_53:
*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
*4882a593Smuzhiyun	je	LCheck_truncate_53	/* If positive then  down==truncate */
*4882a593Smuzhiyun
*4882a593SmuzhiyunLCheck_53_round_up:
*4882a593Smuzhiyun	movl	%ebx,%ecx
*4882a593Smuzhiyun	andl	$0x000007ff,%ecx
*4882a593Smuzhiyun	orl	%edx,%ecx
*4882a593Smuzhiyun	jnz	LDo_53_round_up
*4882a593Smuzhiyun	jmp	L_Re_normalise
*4882a593Smuzhiyun
*4882a593SmuzhiyunLRound_nearest_53:
*4882a593Smuzhiyun	/* Do rounding of the 53rd bit if needed (nearest or even) */
*4882a593Smuzhiyun	movl	%ebx,%ecx
*4882a593Smuzhiyun	andl	$0x000007ff,%ecx
*4882a593Smuzhiyun	cmpl	$0x00000400,%ecx
*4882a593Smuzhiyun	jc	LCheck_truncate_53	/* less than half, no increment needed */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	jnz	LGreater_Half_53	/* greater than half, increment needed */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* Possibly half, we need to check the ls bits */
*4882a593Smuzhiyun	orl	%edx,%edx
*4882a593Smuzhiyun	jnz	LGreater_Half_53	/* greater than half, increment needed */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
*4882a593Smuzhiyun	testl	$0x00000800,%ebx
*4882a593Smuzhiyun	jz	LTruncate_53
*4882a593Smuzhiyun
*4882a593SmuzhiyunLGreater_Half_53:			/* Rounding: increment at the 53rd bit */
*4882a593SmuzhiyunLDo_53_round_up:
*4882a593Smuzhiyun	movb	LOST_UP,FPU_bits_lost
*4882a593Smuzhiyun	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
*4882a593Smuzhiyun	addl	$0x00000800,%ebx
*4882a593Smuzhiyun	adcl	$0,%eax
*4882a593Smuzhiyun	jmp	LCheck_Round_Overflow
*4882a593Smuzhiyun
*4882a593SmuzhiyunLCheck_truncate_53:
*4882a593Smuzhiyun	movl	%ebx,%ecx
*4882a593Smuzhiyun	andl	$0x000007ff,%ecx
*4882a593Smuzhiyun	orl	%edx,%ecx
*4882a593Smuzhiyun	jz	L_Re_normalise
*4882a593Smuzhiyun
*4882a593SmuzhiyunLTruncate_53:
*4882a593Smuzhiyun	movb	LOST_DOWN,FPU_bits_lost
*4882a593Smuzhiyun	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
*4882a593Smuzhiyun	jmp	L_Re_normalise
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Round etc to 64 bit precision */
*4882a593SmuzhiyunLRound_To_64:
*4882a593Smuzhiyun	movl	%esi,%ecx
*4882a593Smuzhiyun	andl	CW_RC,%ecx
*4882a593Smuzhiyun	cmpl	RC_RND,%ecx
*4882a593Smuzhiyun	je	LRound_nearest_64
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_CHOP,%ecx
*4882a593Smuzhiyun	je	LCheck_truncate_64
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_UP,%ecx		/* Towards +infinity */
*4882a593Smuzhiyun	je	LUp_64
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
*4882a593Smuzhiyun	je	LDown_64
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun	jmp	L_bugged_round64
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun
*4882a593SmuzhiyunLUp_64:
*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
*4882a593Smuzhiyun	jne	LCheck_truncate_64	/* If negative then  up==truncate */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	orl	%edx,%edx
*4882a593Smuzhiyun	jnz	LDo_64_round_up
*4882a593Smuzhiyun	jmp	L_Re_normalise
*4882a593Smuzhiyun
*4882a593SmuzhiyunLDown_64:
*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
*4882a593Smuzhiyun	je	LCheck_truncate_64	/* If positive then  down==truncate */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	orl	%edx,%edx
*4882a593Smuzhiyun	jnz	LDo_64_round_up
*4882a593Smuzhiyun	jmp	L_Re_normalise
*4882a593Smuzhiyun
*4882a593SmuzhiyunLRound_nearest_64:
*4882a593Smuzhiyun	cmpl	$0x80000000,%edx
*4882a593Smuzhiyun	jc	LCheck_truncate_64
*4882a593Smuzhiyun
*4882a593Smuzhiyun	jne	LDo_64_round_up
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* Now test for round-to-even */
*4882a593Smuzhiyun	testb	$1,%bl
*4882a593Smuzhiyun	jz	LCheck_truncate_64
*4882a593Smuzhiyun
*4882a593SmuzhiyunLDo_64_round_up:
*4882a593Smuzhiyun	movb	LOST_UP,FPU_bits_lost
*4882a593Smuzhiyun	addl	$1,%ebx
*4882a593Smuzhiyun	adcl	$0,%eax
*4882a593Smuzhiyun
*4882a593SmuzhiyunLCheck_Round_Overflow:
*4882a593Smuzhiyun	jnc	L_Re_normalise
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* Overflow, adjust the result (significand to 1.0) */
*4882a593Smuzhiyun	rcrl	$1,%eax
*4882a593Smuzhiyun	rcrl	$1,%ebx
*4882a593Smuzhiyun	incw	EXP(%edi)
*4882a593Smuzhiyun	jmp	L_Re_normalise
*4882a593Smuzhiyun
*4882a593SmuzhiyunLCheck_truncate_64:
*4882a593Smuzhiyun	orl	%edx,%edx
*4882a593Smuzhiyun	jz	L_Re_normalise
*4882a593Smuzhiyun
*4882a593SmuzhiyunLTruncate_64:
*4882a593Smuzhiyun	movb	LOST_DOWN,FPU_bits_lost
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_Re_normalise:
*4882a593Smuzhiyun	testb	$0xff,FPU_denormal
*4882a593Smuzhiyun	jnz	Normalise_result
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_Normalised:
*4882a593Smuzhiyun	movl	TAG_Valid,%edx
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_deNormalised:
*4882a593Smuzhiyun	cmpb	LOST_UP,FPU_bits_lost
*4882a593Smuzhiyun	je	L_precision_lost_up
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpb	LOST_DOWN,FPU_bits_lost
*4882a593Smuzhiyun	je	L_precision_lost_down
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_no_precision_loss:
*4882a593Smuzhiyun	/* store the result */
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_Store_significand:
*4882a593Smuzhiyun	movl	%eax,SIGH(%edi)
*4882a593Smuzhiyun	movl	%ebx,SIGL(%edi)
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpw	EXP_OVER,EXP(%edi)
*4882a593Smuzhiyun	jge	L_overflow
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movl	%edx,%eax
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* Convert the exponent to 80x87 form. */
*4882a593Smuzhiyun	addw	EXTENDED_Ebias,EXP(%edi)
*4882a593Smuzhiyun	andw	$0x7fff,EXP(%edi)
*4882a593Smuzhiyun
*4882a593Smuzhiyunfpu_reg_round_signed_special_exit:
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
*4882a593Smuzhiyun	je	fpu_reg_round_special_exit
*4882a593Smuzhiyun
*4882a593Smuzhiyun	orw	$0x8000,EXP(%edi)	/* Negative sign for the result. */
*4882a593Smuzhiyun
*4882a593Smuzhiyunfpu_reg_round_special_exit:
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU
*4882a593Smuzhiyun	popl	%ebx		/* adjust the stack pointer */
*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */
*4882a593Smuzhiyun
*4882a593Smuzhiyunfpu_Arith_exit:
*4882a593Smuzhiyun	popl	%ebx
*4882a593Smuzhiyun	popl	%edi
*4882a593Smuzhiyun	popl	%esi
*4882a593Smuzhiyun	leave
*4882a593Smuzhiyun	RET
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Set the FPU status flags to represent precision loss due to
*4882a593Smuzhiyun * round-up.
*4882a593Smuzhiyun */
*4882a593SmuzhiyunL_precision_lost_up:
*4882a593Smuzhiyun	push	%edx
*4882a593Smuzhiyun	push	%eax
*4882a593Smuzhiyun	call	set_precision_flag_up
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun	popl	%edx
*4882a593Smuzhiyun	jmp	L_no_precision_loss
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Set the FPU status flags to represent precision loss due to
*4882a593Smuzhiyun * truncation.
*4882a593Smuzhiyun */
*4882a593SmuzhiyunL_precision_lost_down:
*4882a593Smuzhiyun	push	%edx
*4882a593Smuzhiyun	push	%eax
*4882a593Smuzhiyun	call	set_precision_flag_down
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun	popl	%edx
*4882a593Smuzhiyun	jmp	L_no_precision_loss
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * The number is a denormal (which might get rounded up to a normal)
*4882a593Smuzhiyun * Shift the number right the required number of bits, which will
*4882a593Smuzhiyun * have to be undone later...
*4882a593Smuzhiyun */
*4882a593SmuzhiyunL_Make_denorm:
*4882a593Smuzhiyun	/* The action to be taken depends upon whether the underflow
*4882a593Smuzhiyun	   exception is masked */
*4882a593Smuzhiyun	testb	CW_Underflow,%cl		/* Underflow mask. */
*4882a593Smuzhiyun	jz	Unmasked_underflow		/* Do not make a denormal. */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	movb	DENORMAL,FPU_denormal
*4882a593Smuzhiyun
*4882a593Smuzhiyun	pushl	%ecx		/* Save */
*4882a593Smuzhiyun	movw	EXP_UNDER+1,%cx
*4882a593Smuzhiyun	subw	EXP(%edi),%cx
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpw	$64,%cx	/* shrd only works for 0..31 bits */
*4882a593Smuzhiyun	jnc	Denorm_shift_more_than_63
*4882a593Smuzhiyun
*4882a593Smuzhiyun	cmpw	$32,%cx	/* shrd only works for 0..31 bits */
*4882a593Smuzhiyun	jnc	Denorm_shift_more_than_32
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * We got here without jumps by assuming that the most common requirement
*4882a593Smuzhiyun *   is for a small de-normalising shift.
*4882a593Smuzhiyun * Shift by [1..31] bits
*4882a593Smuzhiyun */
*4882a593Smuzhiyun	addw	%cx,EXP(%edi)
*4882a593Smuzhiyun	orl	%edx,%edx	/* extension */
*4882a593Smuzhiyun	setne	%ch		/* Save whether %edx is non-zero */
*4882a593Smuzhiyun	xorl	%edx,%edx
*4882a593Smuzhiyun	shrd	%cl,%ebx,%edx
*4882a593Smuzhiyun	shrd	%cl,%eax,%ebx
*4882a593Smuzhiyun	shr	%cl,%eax
*4882a593Smuzhiyun	orb	%ch,%dl
*4882a593Smuzhiyun	popl	%ecx
*4882a593Smuzhiyun	jmp	Denorm_done
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Shift by [32..63] bits */
*4882a593SmuzhiyunDenorm_shift_more_than_32:
*4882a593Smuzhiyun	addw	%cx,EXP(%edi)
*4882a593Smuzhiyun	subb	$32,%cl
*4882a593Smuzhiyun	orl	%edx,%edx
*4882a593Smuzhiyun	setne	%ch
*4882a593Smuzhiyun	orb	%ch,%bl
*4882a593Smuzhiyun	xorl	%edx,%edx
*4882a593Smuzhiyun	shrd	%cl,%ebx,%edx
*4882a593Smuzhiyun	shrd	%cl,%eax,%ebx
*4882a593Smuzhiyun	shr	%cl,%eax
*4882a593Smuzhiyun	orl	%edx,%edx		/* test these 32 bits */
*4882a593Smuzhiyun	setne	%cl
*4882a593Smuzhiyun	orb	%ch,%bl
*4882a593Smuzhiyun	orb	%cl,%bl
*4882a593Smuzhiyun	movl	%ebx,%edx
*4882a593Smuzhiyun	movl	%eax,%ebx
*4882a593Smuzhiyun	xorl	%eax,%eax
*4882a593Smuzhiyun	popl	%ecx
*4882a593Smuzhiyun	jmp	Denorm_done
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Shift by [64..) bits */
*4882a593SmuzhiyunDenorm_shift_more_than_63:
*4882a593Smuzhiyun	cmpw	$64,%cx
*4882a593Smuzhiyun	jne	Denorm_shift_more_than_64
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Exactly 64 bit shift */
*4882a593Smuzhiyun	addw	%cx,EXP(%edi)
*4882a593Smuzhiyun	xorl	%ecx,%ecx
*4882a593Smuzhiyun	orl	%edx,%edx
*4882a593Smuzhiyun	setne	%cl
*4882a593Smuzhiyun	orl	%ebx,%ebx
*4882a593Smuzhiyun	setne	%ch
*4882a593Smuzhiyun	orb	%ch,%cl
*4882a593Smuzhiyun	orb	%cl,%al
*4882a593Smuzhiyun	movl	%eax,%edx
*4882a593Smuzhiyun	xorl	%eax,%eax
*4882a593Smuzhiyun	xorl	%ebx,%ebx
*4882a593Smuzhiyun	popl	%ecx
*4882a593Smuzhiyun	jmp	Denorm_done
*4882a593Smuzhiyun
*4882a593SmuzhiyunDenorm_shift_more_than_64:
*4882a593Smuzhiyun	movw	EXP_UNDER+1,EXP(%edi)
*4882a593Smuzhiyun/* This is easy, %eax must be non-zero, so.. */
*4882a593Smuzhiyun	movl	$1,%edx
*4882a593Smuzhiyun	xorl	%eax,%eax
*4882a593Smuzhiyun	xorl	%ebx,%ebx
*4882a593Smuzhiyun	popl	%ecx
*4882a593Smuzhiyun	jmp	Denorm_done
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593SmuzhiyunUnmasked_underflow:
*4882a593Smuzhiyun	movb	UNMASKED_UNDERFLOW,FPU_denormal
*4882a593Smuzhiyun	jmp	Denorm_done
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Undo the de-normalisation. */
*4882a593SmuzhiyunNormalise_result:
*4882a593Smuzhiyun	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
*4882a593Smuzhiyun	je	Signal_underflow
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* The number must be a denormal if we got here. */
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun	/* But check it... just in case. */
*4882a593Smuzhiyun	cmpw	EXP_UNDER+1,EXP(%edi)
*4882a593Smuzhiyun	jne	L_norm_bugged
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef PECULIAR_486
*4882a593Smuzhiyun	/*
*4882a593Smuzhiyun	 * This implements a special feature of 80486 behaviour.
*4882a593Smuzhiyun	 * Underflow will be signalled even if the number is
*4882a593Smuzhiyun	 * not a denormal after rounding.
*4882a593Smuzhiyun	 * This difference occurs only for masked underflow, and not
*4882a593Smuzhiyun	 * in the unmasked case.
*4882a593Smuzhiyun	 * Actual 80486 behaviour differs from this in some circumstances.
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	orl	%eax,%eax		/* ms bits */
*4882a593Smuzhiyun	js	LPseudoDenormal		/* Will be masked underflow */
*4882a593Smuzhiyun#else
*4882a593Smuzhiyun	orl	%eax,%eax		/* ms bits */
*4882a593Smuzhiyun	js	L_Normalised		/* No longer a denormal */
*4882a593Smuzhiyun#endif /* PECULIAR_486 */
*4882a593Smuzhiyun
*4882a593Smuzhiyun	jnz	LDenormal_adj_exponent
*4882a593Smuzhiyun
*4882a593Smuzhiyun	orl	%ebx,%ebx
*4882a593Smuzhiyun	jz	L_underflow_to_zero	/* The contents are zero */
*4882a593Smuzhiyun
*4882a593SmuzhiyunLDenormal_adj_exponent:
*4882a593Smuzhiyun	decw	EXP(%edi)
*4882a593Smuzhiyun
*4882a593SmuzhiyunLPseudoDenormal:
*4882a593Smuzhiyun	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
*4882a593Smuzhiyun	movl	TAG_Special,%edx
*4882a593Smuzhiyun	jz	L_deNormalised
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/* There must be a masked underflow */
*4882a593Smuzhiyun	push	%eax
*4882a593Smuzhiyun	pushl	EX_Underflow
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun	movl	TAG_Special,%edx
*4882a593Smuzhiyun	jmp	L_deNormalised
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * The operations resulted in a number too small to represent.
*4882a593Smuzhiyun * Masked response.
*4882a593Smuzhiyun */
*4882a593SmuzhiyunL_underflow_to_zero:
*4882a593Smuzhiyun	push	%eax
*4882a593Smuzhiyun	call	set_precision_flag_down
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun
*4882a593Smuzhiyun	push	%eax
*4882a593Smuzhiyun	pushl	EX_Underflow
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Reduce the exponent to EXP_UNDER */
*4882a593Smuzhiyun	movw	EXP_UNDER,EXP(%edi)
*4882a593Smuzhiyun	movl	TAG_Zero,%edx
*4882a593Smuzhiyun	jmp	L_Store_significand
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* The operations resulted in a number too large to represent. */
*4882a593SmuzhiyunL_overflow:
*4882a593Smuzhiyun	addw	EXTENDED_Ebias,EXP(%edi)	/* Set for unmasked response. */
*4882a593Smuzhiyun	push	%edi
*4882a593Smuzhiyun	call	arith_overflow
*4882a593Smuzhiyun	pop	%edi
*4882a593Smuzhiyun	jmp	fpu_reg_round_signed_special_exit
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593SmuzhiyunSignal_underflow:
*4882a593Smuzhiyun	/* The number may have been changed to a non-denormal */
*4882a593Smuzhiyun	/* by the rounding operations. */
*4882a593Smuzhiyun	cmpw	EXP_UNDER,EXP(%edi)
*4882a593Smuzhiyun	jle	Do_unmasked_underflow
*4882a593Smuzhiyun
*4882a593Smuzhiyun	jmp	L_Normalised
*4882a593Smuzhiyun
*4882a593SmuzhiyunDo_unmasked_underflow:
*4882a593Smuzhiyun	/* Increase the exponent by the magic number */
*4882a593Smuzhiyun	addw	$(3*(1<<13)),EXP(%edi)
*4882a593Smuzhiyun	push	%eax
*4882a593Smuzhiyun	pushl	EX_Underflow
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun	popl	%eax
*4882a593Smuzhiyun	jmp	L_Normalised
*4882a593Smuzhiyun
*4882a593Smuzhiyun
*4882a593Smuzhiyun#ifdef PARANOID
*4882a593Smuzhiyun#ifdef PECULIAR_486
*4882a593SmuzhiyunL_bugged_denorm_486:
*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x236
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%ebx
*4882a593Smuzhiyun	jmp	L_exception_exit
*4882a593Smuzhiyun#else
*4882a593SmuzhiyunL_bugged_denorm:
*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x230
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%ebx
*4882a593Smuzhiyun	jmp	L_exception_exit
*4882a593Smuzhiyun#endif /* PECULIAR_486 */
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_bugged_round24:
*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x231
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%ebx
*4882a593Smuzhiyun	jmp	L_exception_exit
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_bugged_round53:
*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x232
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%ebx
*4882a593Smuzhiyun	jmp	L_exception_exit
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_bugged_round64:
*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x233
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%ebx
*4882a593Smuzhiyun	jmp	L_exception_exit
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_norm_bugged:
*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x234
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%ebx
*4882a593Smuzhiyun	jmp	L_exception_exit
*4882a593Smuzhiyun
*4882a593SmuzhiyunL_entry_bugged:
*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x235
*4882a593Smuzhiyun	call	EXCEPTION
*4882a593Smuzhiyun	popl	%ebx
*4882a593SmuzhiyunL_exception_exit:
*4882a593Smuzhiyun	mov	$-1,%eax
*4882a593Smuzhiyun	jmp	fpu_reg_round_special_exit
*4882a593Smuzhiyun#endif /* PARANOID */
*4882a593Smuzhiyun
*4882a593SmuzhiyunSYM_FUNC_END(FPU_round)