xref: /OK3568_Linux_fs/kernel/arch/x86/math-emu/reg_round.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun	.file "reg_round.S"
3*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
4*4882a593Smuzhiyun |  reg_round.S                                                              |
5*4882a593Smuzhiyun |                                                                           |
6*4882a593Smuzhiyun | Rounding/truncation/etc for FPU basic arithmetic functions.               |
7*4882a593Smuzhiyun |                                                                           |
8*4882a593Smuzhiyun | Copyright (C) 1993,1995,1997                                              |
9*4882a593Smuzhiyun |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
10*4882a593Smuzhiyun |                       Australia.  E-mail billm@suburbia.net               |
11*4882a593Smuzhiyun |                                                                           |
12*4882a593Smuzhiyun | This code has four possible entry points.                                 |
13*4882a593Smuzhiyun | The following must be entered by a jmp instruction:                       |
14*4882a593Smuzhiyun |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
15*4882a593Smuzhiyun |                                                                           |
16*4882a593Smuzhiyun | The FPU_round entry point is intended to be used by C code.               |
17*4882a593Smuzhiyun | From C, call as:                                                          |
18*4882a593Smuzhiyun |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
19*4882a593Smuzhiyun |                                                                           |
20*4882a593Smuzhiyun |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
21*4882a593Smuzhiyun |    one was raised, or -1 on internal error.                               |
22*4882a593Smuzhiyun |                                                                           |
23*4882a593Smuzhiyun | For correct "up" and "down" rounding, the argument must have the correct  |
24*4882a593Smuzhiyun | sign.                                                                     |
25*4882a593Smuzhiyun |                                                                           |
26*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
29*4882a593Smuzhiyun | Four entry points.                                                        |
30*4882a593Smuzhiyun |                                                                           |
31*4882a593Smuzhiyun | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
32*4882a593Smuzhiyun |  %eax:%ebx  64 bit significand                                            |
33*4882a593Smuzhiyun |  %edx       32 bit extension of the significand                           |
34*4882a593Smuzhiyun |  %edi       pointer to an FPU_REG for the result to be stored             |
35*4882a593Smuzhiyun |  stack      calling function must have set up a C stack frame and         |
36*4882a593Smuzhiyun |             pushed %esi, %edi, and %ebx                                   |
37*4882a593Smuzhiyun |                                                                           |
38*4882a593Smuzhiyun | Needed just for the fpu_reg_round_sqrt entry point:                       |
39*4882a593Smuzhiyun |  %cx  A control word in the same format as the FPU control word.          |
40*4882a593Smuzhiyun | Otherwise, PARAM4 must give such a value.                                 |
41*4882a593Smuzhiyun |                                                                           |
42*4882a593Smuzhiyun |                                                                           |
43*4882a593Smuzhiyun | The significand and its extension are assumed to be exact in the          |
44*4882a593Smuzhiyun | following sense:                                                          |
45*4882a593Smuzhiyun |   If the significand by itself is the exact result then the significand   |
46*4882a593Smuzhiyun |   extension (%edx) must contain 0, otherwise the significand extension    |
47*4882a593Smuzhiyun |   must be non-zero.                                                       |
48*4882a593Smuzhiyun |   If the significand extension is non-zero then the significand is        |
49*4882a593Smuzhiyun |   smaller than the magnitude of the correct exact result by an amount     |
50*4882a593Smuzhiyun |   greater than zero and less than one ls bit of the significand.          |
51*4882a593Smuzhiyun |   The significand extension is only required to have three possible       |
52*4882a593Smuzhiyun |   non-zero values:                                                        |
53*4882a593Smuzhiyun |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
54*4882a593Smuzhiyun |                                 bit smaller than the magnitude of the     |
55*4882a593Smuzhiyun |                                 true exact result.                        |
56*4882a593Smuzhiyun |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
57*4882a593Smuzhiyun |                                 smaller than the magnitude of the true    |
58*4882a593Smuzhiyun |                                 exact result.                             |
59*4882a593Smuzhiyun |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
60*4882a593Smuzhiyun |                                 bit smaller than the magnitude of the     |
61*4882a593Smuzhiyun |                                 true exact result.                        |
62*4882a593Smuzhiyun |                                                                           |
63*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
66*4882a593Smuzhiyun |  The code in this module has become quite complex, but it should handle   |
67*4882a593Smuzhiyun |  all of the FPU flags which are set at this stage of the basic arithmetic |
68*4882a593Smuzhiyun |  computations.                                                            |
69*4882a593Smuzhiyun |  There are a few rare cases where the results are not set identically to  |
70*4882a593Smuzhiyun |  a real FPU. These require a bit more thought because at this stage the   |
71*4882a593Smuzhiyun |  results of the code here appear to be more consistent...                 |
72*4882a593Smuzhiyun |  This may be changed in a future version.                                 |
73*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun#include "fpu_emu.h"
77*4882a593Smuzhiyun#include "exception.h"
78*4882a593Smuzhiyun#include "control_w.h"
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun/* Flags for FPU_bits_lost */
81*4882a593Smuzhiyun#define	LOST_DOWN	$1
82*4882a593Smuzhiyun#define	LOST_UP		$2
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun/* Flags for FPU_denormal */
85*4882a593Smuzhiyun#define	DENORMAL	$1
86*4882a593Smuzhiyun#define	UNMASKED_UNDERFLOW $2
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU
90*4882a593Smuzhiyun/*	Make the code re-entrant by putting
91*4882a593Smuzhiyun	local storage on the stack: */
92*4882a593Smuzhiyun#define FPU_bits_lost	(%esp)
93*4882a593Smuzhiyun#define FPU_denormal	1(%esp)
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun#else
96*4882a593Smuzhiyun/*	Not re-entrant, so we can gain speed by putting
97*4882a593Smuzhiyun	local storage in a static area: */
98*4882a593Smuzhiyun.data
99*4882a593Smuzhiyun	.align 4,0
100*4882a593SmuzhiyunFPU_bits_lost:
101*4882a593Smuzhiyun	.byte	0
102*4882a593SmuzhiyunFPU_denormal:
103*4882a593Smuzhiyun	.byte	0
104*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun.text
108*4882a593Smuzhiyun.globl fpu_reg_round
109*4882a593Smuzhiyun.globl fpu_Arith_exit
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun/* Entry point when called from C */
112*4882a593SmuzhiyunSYM_FUNC_START(FPU_round)
113*4882a593Smuzhiyun	pushl	%ebp
114*4882a593Smuzhiyun	movl	%esp,%ebp
115*4882a593Smuzhiyun	pushl	%esi
116*4882a593Smuzhiyun	pushl	%edi
117*4882a593Smuzhiyun	pushl	%ebx
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun	movl	PARAM1,%edi
120*4882a593Smuzhiyun	movl	SIGH(%edi),%eax
121*4882a593Smuzhiyun	movl	SIGL(%edi),%ebx
122*4882a593Smuzhiyun	movl	PARAM2,%edx
123*4882a593Smuzhiyun
124*4882a593Smuzhiyunfpu_reg_round:			/* Normal entry point */
125*4882a593Smuzhiyun	movl	PARAM4,%ecx
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU
128*4882a593Smuzhiyun	pushl	%ebx		/* adjust the stack pointer */
129*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun#ifdef PARANOID
132*4882a593Smuzhiyun/* Cannot use this here yet */
133*4882a593Smuzhiyun/*	orl	%eax,%eax */
134*4882a593Smuzhiyun/*	jns	L_entry_bugged */
135*4882a593Smuzhiyun#endif /* PARANOID */
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun	cmpw	EXP_UNDER,EXP(%edi)
138*4882a593Smuzhiyun	jle	L_Make_denorm			/* The number is a de-normal */
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun	movb	$0,FPU_denormal			/* 0 -> not a de-normal */
141*4882a593Smuzhiyun
142*4882a593SmuzhiyunDenorm_done:
143*4882a593Smuzhiyun	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun	movl	%ecx,%esi
146*4882a593Smuzhiyun	andl	CW_PC,%ecx
147*4882a593Smuzhiyun	cmpl	PR_64_BITS,%ecx
148*4882a593Smuzhiyun	je	LRound_To_64
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun	cmpl	PR_53_BITS,%ecx
151*4882a593Smuzhiyun	je	LRound_To_53
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun	cmpl	PR_24_BITS,%ecx
154*4882a593Smuzhiyun	je	LRound_To_24
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun#ifdef PECULIAR_486
157*4882a593Smuzhiyun/* With the precision control bits set to 01 "(reserved)", a real 80486
158*4882a593Smuzhiyun   behaves as if the precision control bits were set to 11 "64 bits" */
159*4882a593Smuzhiyun	cmpl	PR_RESERVED_BITS,%ecx
160*4882a593Smuzhiyun	je	LRound_To_64
161*4882a593Smuzhiyun#ifdef PARANOID
162*4882a593Smuzhiyun	jmp	L_bugged_denorm_486
163*4882a593Smuzhiyun#endif /* PARANOID */
164*4882a593Smuzhiyun#else
165*4882a593Smuzhiyun#ifdef PARANOID
166*4882a593Smuzhiyun	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
167*4882a593Smuzhiyun#endif /* PARANOID */
168*4882a593Smuzhiyun#endif /* PECULIAR_486 */
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun/* Round etc to 24 bit precision */
172*4882a593SmuzhiyunLRound_To_24:
173*4882a593Smuzhiyun	movl	%esi,%ecx
174*4882a593Smuzhiyun	andl	CW_RC,%ecx
175*4882a593Smuzhiyun	cmpl	RC_RND,%ecx
176*4882a593Smuzhiyun	je	LRound_nearest_24
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun	cmpl	RC_CHOP,%ecx
179*4882a593Smuzhiyun	je	LCheck_truncate_24
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun	cmpl	RC_UP,%ecx		/* Towards +infinity */
182*4882a593Smuzhiyun	je	LUp_24
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
185*4882a593Smuzhiyun	je	LDown_24
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun#ifdef PARANOID
188*4882a593Smuzhiyun	jmp	L_bugged_round24
189*4882a593Smuzhiyun#endif /* PARANOID */
190*4882a593Smuzhiyun
191*4882a593SmuzhiyunLUp_24:
192*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
193*4882a593Smuzhiyun	jne	LCheck_truncate_24	/* If negative then  up==truncate */
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun	jmp	LCheck_24_round_up
196*4882a593Smuzhiyun
197*4882a593SmuzhiyunLDown_24:
198*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
199*4882a593Smuzhiyun	je	LCheck_truncate_24	/* If positive then  down==truncate */
200*4882a593Smuzhiyun
201*4882a593SmuzhiyunLCheck_24_round_up:
202*4882a593Smuzhiyun	movl	%eax,%ecx
203*4882a593Smuzhiyun	andl	$0x000000ff,%ecx
204*4882a593Smuzhiyun	orl	%ebx,%ecx
205*4882a593Smuzhiyun	orl	%edx,%ecx
206*4882a593Smuzhiyun	jnz	LDo_24_round_up
207*4882a593Smuzhiyun	jmp	L_Re_normalise
208*4882a593Smuzhiyun
209*4882a593SmuzhiyunLRound_nearest_24:
210*4882a593Smuzhiyun	/* Do rounding of the 24th bit if needed (nearest or even) */
211*4882a593Smuzhiyun	movl	%eax,%ecx
212*4882a593Smuzhiyun	andl	$0x000000ff,%ecx
213*4882a593Smuzhiyun	cmpl	$0x00000080,%ecx
214*4882a593Smuzhiyun	jc	LCheck_truncate_24	/* less than half, no increment needed */
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun	jne	LGreater_Half_24	/* greater than half, increment needed */
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun	/* Possibly half, we need to check the ls bits */
219*4882a593Smuzhiyun	orl	%ebx,%ebx
220*4882a593Smuzhiyun	jnz	LGreater_Half_24	/* greater than half, increment needed */
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun	orl	%edx,%edx
223*4882a593Smuzhiyun	jnz	LGreater_Half_24	/* greater than half, increment needed */
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun	/* Exactly half, increment only if 24th bit is 1 (round to even) */
226*4882a593Smuzhiyun	testl	$0x00000100,%eax
227*4882a593Smuzhiyun	jz	LDo_truncate_24
228*4882a593Smuzhiyun
229*4882a593SmuzhiyunLGreater_Half_24:			/* Rounding: increment at the 24th bit */
230*4882a593SmuzhiyunLDo_24_round_up:
231*4882a593Smuzhiyun	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
232*4882a593Smuzhiyun	xorl	%ebx,%ebx
233*4882a593Smuzhiyun	movb	LOST_UP,FPU_bits_lost
234*4882a593Smuzhiyun	addl	$0x00000100,%eax
235*4882a593Smuzhiyun	jmp	LCheck_Round_Overflow
236*4882a593Smuzhiyun
237*4882a593SmuzhiyunLCheck_truncate_24:
238*4882a593Smuzhiyun	movl	%eax,%ecx
239*4882a593Smuzhiyun	andl	$0x000000ff,%ecx
240*4882a593Smuzhiyun	orl	%ebx,%ecx
241*4882a593Smuzhiyun	orl	%edx,%ecx
242*4882a593Smuzhiyun	jz	L_Re_normalise		/* No truncation needed */
243*4882a593Smuzhiyun
244*4882a593SmuzhiyunLDo_truncate_24:
245*4882a593Smuzhiyun	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
246*4882a593Smuzhiyun	xorl	%ebx,%ebx
247*4882a593Smuzhiyun	movb	LOST_DOWN,FPU_bits_lost
248*4882a593Smuzhiyun	jmp	L_Re_normalise
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun/* Round etc to 53 bit precision */
252*4882a593SmuzhiyunLRound_To_53:
253*4882a593Smuzhiyun	movl	%esi,%ecx
254*4882a593Smuzhiyun	andl	CW_RC,%ecx
255*4882a593Smuzhiyun	cmpl	RC_RND,%ecx
256*4882a593Smuzhiyun	je	LRound_nearest_53
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun	cmpl	RC_CHOP,%ecx
259*4882a593Smuzhiyun	je	LCheck_truncate_53
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun	cmpl	RC_UP,%ecx		/* Towards +infinity */
262*4882a593Smuzhiyun	je	LUp_53
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
265*4882a593Smuzhiyun	je	LDown_53
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun#ifdef PARANOID
268*4882a593Smuzhiyun	jmp	L_bugged_round53
269*4882a593Smuzhiyun#endif /* PARANOID */
270*4882a593Smuzhiyun
271*4882a593SmuzhiyunLUp_53:
272*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
273*4882a593Smuzhiyun	jne	LCheck_truncate_53	/* If negative then  up==truncate */
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun	jmp	LCheck_53_round_up
276*4882a593Smuzhiyun
277*4882a593SmuzhiyunLDown_53:
278*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
279*4882a593Smuzhiyun	je	LCheck_truncate_53	/* If positive then  down==truncate */
280*4882a593Smuzhiyun
281*4882a593SmuzhiyunLCheck_53_round_up:
282*4882a593Smuzhiyun	movl	%ebx,%ecx
283*4882a593Smuzhiyun	andl	$0x000007ff,%ecx
284*4882a593Smuzhiyun	orl	%edx,%ecx
285*4882a593Smuzhiyun	jnz	LDo_53_round_up
286*4882a593Smuzhiyun	jmp	L_Re_normalise
287*4882a593Smuzhiyun
288*4882a593SmuzhiyunLRound_nearest_53:
289*4882a593Smuzhiyun	/* Do rounding of the 53rd bit if needed (nearest or even) */
290*4882a593Smuzhiyun	movl	%ebx,%ecx
291*4882a593Smuzhiyun	andl	$0x000007ff,%ecx
292*4882a593Smuzhiyun	cmpl	$0x00000400,%ecx
293*4882a593Smuzhiyun	jc	LCheck_truncate_53	/* less than half, no increment needed */
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun	jnz	LGreater_Half_53	/* greater than half, increment needed */
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun	/* Possibly half, we need to check the ls bits */
298*4882a593Smuzhiyun	orl	%edx,%edx
299*4882a593Smuzhiyun	jnz	LGreater_Half_53	/* greater than half, increment needed */
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
302*4882a593Smuzhiyun	testl	$0x00000800,%ebx
303*4882a593Smuzhiyun	jz	LTruncate_53
304*4882a593Smuzhiyun
305*4882a593SmuzhiyunLGreater_Half_53:			/* Rounding: increment at the 53rd bit */
306*4882a593SmuzhiyunLDo_53_round_up:
307*4882a593Smuzhiyun	movb	LOST_UP,FPU_bits_lost
308*4882a593Smuzhiyun	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
309*4882a593Smuzhiyun	addl	$0x00000800,%ebx
310*4882a593Smuzhiyun	adcl	$0,%eax
311*4882a593Smuzhiyun	jmp	LCheck_Round_Overflow
312*4882a593Smuzhiyun
313*4882a593SmuzhiyunLCheck_truncate_53:
314*4882a593Smuzhiyun	movl	%ebx,%ecx
315*4882a593Smuzhiyun	andl	$0x000007ff,%ecx
316*4882a593Smuzhiyun	orl	%edx,%ecx
317*4882a593Smuzhiyun	jz	L_Re_normalise
318*4882a593Smuzhiyun
319*4882a593SmuzhiyunLTruncate_53:
320*4882a593Smuzhiyun	movb	LOST_DOWN,FPU_bits_lost
321*4882a593Smuzhiyun	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
322*4882a593Smuzhiyun	jmp	L_Re_normalise
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun/* Round etc to 64 bit precision */
326*4882a593SmuzhiyunLRound_To_64:
327*4882a593Smuzhiyun	movl	%esi,%ecx
328*4882a593Smuzhiyun	andl	CW_RC,%ecx
329*4882a593Smuzhiyun	cmpl	RC_RND,%ecx
330*4882a593Smuzhiyun	je	LRound_nearest_64
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun	cmpl	RC_CHOP,%ecx
333*4882a593Smuzhiyun	je	LCheck_truncate_64
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun	cmpl	RC_UP,%ecx		/* Towards +infinity */
336*4882a593Smuzhiyun	je	LUp_64
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
339*4882a593Smuzhiyun	je	LDown_64
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun#ifdef PARANOID
342*4882a593Smuzhiyun	jmp	L_bugged_round64
343*4882a593Smuzhiyun#endif /* PARANOID */
344*4882a593Smuzhiyun
345*4882a593SmuzhiyunLUp_64:
346*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
347*4882a593Smuzhiyun	jne	LCheck_truncate_64	/* If negative then  up==truncate */
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun	orl	%edx,%edx
350*4882a593Smuzhiyun	jnz	LDo_64_round_up
351*4882a593Smuzhiyun	jmp	L_Re_normalise
352*4882a593Smuzhiyun
353*4882a593SmuzhiyunLDown_64:
354*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
355*4882a593Smuzhiyun	je	LCheck_truncate_64	/* If positive then  down==truncate */
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun	orl	%edx,%edx
358*4882a593Smuzhiyun	jnz	LDo_64_round_up
359*4882a593Smuzhiyun	jmp	L_Re_normalise
360*4882a593Smuzhiyun
361*4882a593SmuzhiyunLRound_nearest_64:
362*4882a593Smuzhiyun	cmpl	$0x80000000,%edx
363*4882a593Smuzhiyun	jc	LCheck_truncate_64
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun	jne	LDo_64_round_up
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun	/* Now test for round-to-even */
368*4882a593Smuzhiyun	testb	$1,%bl
369*4882a593Smuzhiyun	jz	LCheck_truncate_64
370*4882a593Smuzhiyun
371*4882a593SmuzhiyunLDo_64_round_up:
372*4882a593Smuzhiyun	movb	LOST_UP,FPU_bits_lost
373*4882a593Smuzhiyun	addl	$1,%ebx
374*4882a593Smuzhiyun	adcl	$0,%eax
375*4882a593Smuzhiyun
376*4882a593SmuzhiyunLCheck_Round_Overflow:
377*4882a593Smuzhiyun	jnc	L_Re_normalise
378*4882a593Smuzhiyun
379*4882a593Smuzhiyun	/* Overflow, adjust the result (significand to 1.0) */
380*4882a593Smuzhiyun	rcrl	$1,%eax
381*4882a593Smuzhiyun	rcrl	$1,%ebx
382*4882a593Smuzhiyun	incw	EXP(%edi)
383*4882a593Smuzhiyun	jmp	L_Re_normalise
384*4882a593Smuzhiyun
385*4882a593SmuzhiyunLCheck_truncate_64:
386*4882a593Smuzhiyun	orl	%edx,%edx
387*4882a593Smuzhiyun	jz	L_Re_normalise
388*4882a593Smuzhiyun
389*4882a593SmuzhiyunLTruncate_64:
390*4882a593Smuzhiyun	movb	LOST_DOWN,FPU_bits_lost
391*4882a593Smuzhiyun
392*4882a593SmuzhiyunL_Re_normalise:
393*4882a593Smuzhiyun	testb	$0xff,FPU_denormal
394*4882a593Smuzhiyun	jnz	Normalise_result
395*4882a593Smuzhiyun
396*4882a593SmuzhiyunL_Normalised:
397*4882a593Smuzhiyun	movl	TAG_Valid,%edx
398*4882a593Smuzhiyun
399*4882a593SmuzhiyunL_deNormalised:
400*4882a593Smuzhiyun	cmpb	LOST_UP,FPU_bits_lost
401*4882a593Smuzhiyun	je	L_precision_lost_up
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun	cmpb	LOST_DOWN,FPU_bits_lost
404*4882a593Smuzhiyun	je	L_precision_lost_down
405*4882a593Smuzhiyun
406*4882a593SmuzhiyunL_no_precision_loss:
407*4882a593Smuzhiyun	/* store the result */
408*4882a593Smuzhiyun
409*4882a593SmuzhiyunL_Store_significand:
410*4882a593Smuzhiyun	movl	%eax,SIGH(%edi)
411*4882a593Smuzhiyun	movl	%ebx,SIGL(%edi)
412*4882a593Smuzhiyun
413*4882a593Smuzhiyun	cmpw	EXP_OVER,EXP(%edi)
414*4882a593Smuzhiyun	jge	L_overflow
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun	movl	%edx,%eax
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun	/* Convert the exponent to 80x87 form. */
419*4882a593Smuzhiyun	addw	EXTENDED_Ebias,EXP(%edi)
420*4882a593Smuzhiyun	andw	$0x7fff,EXP(%edi)
421*4882a593Smuzhiyun
422*4882a593Smuzhiyunfpu_reg_round_signed_special_exit:
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun	cmpb	SIGN_POS,PARAM5
425*4882a593Smuzhiyun	je	fpu_reg_round_special_exit
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun	orw	$0x8000,EXP(%edi)	/* Negative sign for the result. */
428*4882a593Smuzhiyun
429*4882a593Smuzhiyunfpu_reg_round_special_exit:
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU
432*4882a593Smuzhiyun	popl	%ebx		/* adjust the stack pointer */
433*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */
434*4882a593Smuzhiyun
435*4882a593Smuzhiyunfpu_Arith_exit:
436*4882a593Smuzhiyun	popl	%ebx
437*4882a593Smuzhiyun	popl	%edi
438*4882a593Smuzhiyun	popl	%esi
439*4882a593Smuzhiyun	leave
440*4882a593Smuzhiyun	RET
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun/*
444*4882a593Smuzhiyun * Set the FPU status flags to represent precision loss due to
445*4882a593Smuzhiyun * round-up.
446*4882a593Smuzhiyun */
447*4882a593SmuzhiyunL_precision_lost_up:
448*4882a593Smuzhiyun	push	%edx
449*4882a593Smuzhiyun	push	%eax
450*4882a593Smuzhiyun	call	set_precision_flag_up
451*4882a593Smuzhiyun	popl	%eax
452*4882a593Smuzhiyun	popl	%edx
453*4882a593Smuzhiyun	jmp	L_no_precision_loss
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun/*
456*4882a593Smuzhiyun * Set the FPU status flags to represent precision loss due to
457*4882a593Smuzhiyun * truncation.
458*4882a593Smuzhiyun */
459*4882a593SmuzhiyunL_precision_lost_down:
460*4882a593Smuzhiyun	push	%edx
461*4882a593Smuzhiyun	push	%eax
462*4882a593Smuzhiyun	call	set_precision_flag_down
463*4882a593Smuzhiyun	popl	%eax
464*4882a593Smuzhiyun	popl	%edx
465*4882a593Smuzhiyun	jmp	L_no_precision_loss
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun/*
469*4882a593Smuzhiyun * The number is a denormal (which might get rounded up to a normal)
470*4882a593Smuzhiyun * Shift the number right the required number of bits, which will
471*4882a593Smuzhiyun * have to be undone later...
472*4882a593Smuzhiyun */
473*4882a593SmuzhiyunL_Make_denorm:
474*4882a593Smuzhiyun	/* The action to be taken depends upon whether the underflow
475*4882a593Smuzhiyun	   exception is masked */
476*4882a593Smuzhiyun	testb	CW_Underflow,%cl		/* Underflow mask. */
477*4882a593Smuzhiyun	jz	Unmasked_underflow		/* Do not make a denormal. */
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun	movb	DENORMAL,FPU_denormal
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun	pushl	%ecx		/* Save */
482*4882a593Smuzhiyun	movw	EXP_UNDER+1,%cx
483*4882a593Smuzhiyun	subw	EXP(%edi),%cx
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun	cmpw	$64,%cx	/* shrd only works for 0..31 bits */
486*4882a593Smuzhiyun	jnc	Denorm_shift_more_than_63
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun	cmpw	$32,%cx	/* shrd only works for 0..31 bits */
489*4882a593Smuzhiyun	jnc	Denorm_shift_more_than_32
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun/*
492*4882a593Smuzhiyun * We got here without jumps by assuming that the most common requirement
493*4882a593Smuzhiyun *   is for a small de-normalising shift.
494*4882a593Smuzhiyun * Shift by [1..31] bits
495*4882a593Smuzhiyun */
496*4882a593Smuzhiyun	addw	%cx,EXP(%edi)
497*4882a593Smuzhiyun	orl	%edx,%edx	/* extension */
498*4882a593Smuzhiyun	setne	%ch		/* Save whether %edx is non-zero */
499*4882a593Smuzhiyun	xorl	%edx,%edx
500*4882a593Smuzhiyun	shrd	%cl,%ebx,%edx
501*4882a593Smuzhiyun	shrd	%cl,%eax,%ebx
502*4882a593Smuzhiyun	shr	%cl,%eax
503*4882a593Smuzhiyun	orb	%ch,%dl
504*4882a593Smuzhiyun	popl	%ecx
505*4882a593Smuzhiyun	jmp	Denorm_done
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun/* Shift by [32..63] bits */
508*4882a593SmuzhiyunDenorm_shift_more_than_32:
509*4882a593Smuzhiyun	addw	%cx,EXP(%edi)
510*4882a593Smuzhiyun	subb	$32,%cl
511*4882a593Smuzhiyun	orl	%edx,%edx
512*4882a593Smuzhiyun	setne	%ch
513*4882a593Smuzhiyun	orb	%ch,%bl
514*4882a593Smuzhiyun	xorl	%edx,%edx
515*4882a593Smuzhiyun	shrd	%cl,%ebx,%edx
516*4882a593Smuzhiyun	shrd	%cl,%eax,%ebx
517*4882a593Smuzhiyun	shr	%cl,%eax
518*4882a593Smuzhiyun	orl	%edx,%edx		/* test these 32 bits */
519*4882a593Smuzhiyun	setne	%cl
520*4882a593Smuzhiyun	orb	%ch,%bl
521*4882a593Smuzhiyun	orb	%cl,%bl
522*4882a593Smuzhiyun	movl	%ebx,%edx
523*4882a593Smuzhiyun	movl	%eax,%ebx
524*4882a593Smuzhiyun	xorl	%eax,%eax
525*4882a593Smuzhiyun	popl	%ecx
526*4882a593Smuzhiyun	jmp	Denorm_done
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun/* Shift by [64..) bits */
529*4882a593SmuzhiyunDenorm_shift_more_than_63:
530*4882a593Smuzhiyun	cmpw	$64,%cx
531*4882a593Smuzhiyun	jne	Denorm_shift_more_than_64
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun/* Exactly 64 bit shift */
534*4882a593Smuzhiyun	addw	%cx,EXP(%edi)
535*4882a593Smuzhiyun	xorl	%ecx,%ecx
536*4882a593Smuzhiyun	orl	%edx,%edx
537*4882a593Smuzhiyun	setne	%cl
538*4882a593Smuzhiyun	orl	%ebx,%ebx
539*4882a593Smuzhiyun	setne	%ch
540*4882a593Smuzhiyun	orb	%ch,%cl
541*4882a593Smuzhiyun	orb	%cl,%al
542*4882a593Smuzhiyun	movl	%eax,%edx
543*4882a593Smuzhiyun	xorl	%eax,%eax
544*4882a593Smuzhiyun	xorl	%ebx,%ebx
545*4882a593Smuzhiyun	popl	%ecx
546*4882a593Smuzhiyun	jmp	Denorm_done
547*4882a593Smuzhiyun
548*4882a593SmuzhiyunDenorm_shift_more_than_64:
549*4882a593Smuzhiyun	movw	EXP_UNDER+1,EXP(%edi)
550*4882a593Smuzhiyun/* This is easy, %eax must be non-zero, so.. */
551*4882a593Smuzhiyun	movl	$1,%edx
552*4882a593Smuzhiyun	xorl	%eax,%eax
553*4882a593Smuzhiyun	xorl	%ebx,%ebx
554*4882a593Smuzhiyun	popl	%ecx
555*4882a593Smuzhiyun	jmp	Denorm_done
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun
558*4882a593SmuzhiyunUnmasked_underflow:
559*4882a593Smuzhiyun	movb	UNMASKED_UNDERFLOW,FPU_denormal
560*4882a593Smuzhiyun	jmp	Denorm_done
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun/* Undo the de-normalisation. */
564*4882a593SmuzhiyunNormalise_result:
565*4882a593Smuzhiyun	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
566*4882a593Smuzhiyun	je	Signal_underflow
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun/* The number must be a denormal if we got here. */
569*4882a593Smuzhiyun#ifdef PARANOID
570*4882a593Smuzhiyun	/* But check it... just in case. */
571*4882a593Smuzhiyun	cmpw	EXP_UNDER+1,EXP(%edi)
572*4882a593Smuzhiyun	jne	L_norm_bugged
573*4882a593Smuzhiyun#endif /* PARANOID */
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun#ifdef PECULIAR_486
576*4882a593Smuzhiyun	/*
577*4882a593Smuzhiyun	 * This implements a special feature of 80486 behaviour.
578*4882a593Smuzhiyun	 * Underflow will be signalled even if the number is
579*4882a593Smuzhiyun	 * not a denormal after rounding.
580*4882a593Smuzhiyun	 * This difference occurs only for masked underflow, and not
581*4882a593Smuzhiyun	 * in the unmasked case.
582*4882a593Smuzhiyun	 * Actual 80486 behaviour differs from this in some circumstances.
583*4882a593Smuzhiyun	 */
584*4882a593Smuzhiyun	orl	%eax,%eax		/* ms bits */
585*4882a593Smuzhiyun	js	LPseudoDenormal		/* Will be masked underflow */
586*4882a593Smuzhiyun#else
587*4882a593Smuzhiyun	orl	%eax,%eax		/* ms bits */
588*4882a593Smuzhiyun	js	L_Normalised		/* No longer a denormal */
589*4882a593Smuzhiyun#endif /* PECULIAR_486 */
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun	jnz	LDenormal_adj_exponent
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun	orl	%ebx,%ebx
594*4882a593Smuzhiyun	jz	L_underflow_to_zero	/* The contents are zero */
595*4882a593Smuzhiyun
596*4882a593SmuzhiyunLDenormal_adj_exponent:
597*4882a593Smuzhiyun	decw	EXP(%edi)
598*4882a593Smuzhiyun
599*4882a593SmuzhiyunLPseudoDenormal:
600*4882a593Smuzhiyun	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
601*4882a593Smuzhiyun	movl	TAG_Special,%edx
602*4882a593Smuzhiyun	jz	L_deNormalised
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun	/* There must be a masked underflow */
605*4882a593Smuzhiyun	push	%eax
606*4882a593Smuzhiyun	pushl	EX_Underflow
607*4882a593Smuzhiyun	call	EXCEPTION
608*4882a593Smuzhiyun	popl	%eax
609*4882a593Smuzhiyun	popl	%eax
610*4882a593Smuzhiyun	movl	TAG_Special,%edx
611*4882a593Smuzhiyun	jmp	L_deNormalised
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun/*
615*4882a593Smuzhiyun * The operations resulted in a number too small to represent.
616*4882a593Smuzhiyun * Masked response.
617*4882a593Smuzhiyun */
618*4882a593SmuzhiyunL_underflow_to_zero:
619*4882a593Smuzhiyun	push	%eax
620*4882a593Smuzhiyun	call	set_precision_flag_down
621*4882a593Smuzhiyun	popl	%eax
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun	push	%eax
624*4882a593Smuzhiyun	pushl	EX_Underflow
625*4882a593Smuzhiyun	call	EXCEPTION
626*4882a593Smuzhiyun	popl	%eax
627*4882a593Smuzhiyun	popl	%eax
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun/* Reduce the exponent to EXP_UNDER */
630*4882a593Smuzhiyun	movw	EXP_UNDER,EXP(%edi)
631*4882a593Smuzhiyun	movl	TAG_Zero,%edx
632*4882a593Smuzhiyun	jmp	L_Store_significand
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun/* The operations resulted in a number too large to represent. */
636*4882a593SmuzhiyunL_overflow:
637*4882a593Smuzhiyun	addw	EXTENDED_Ebias,EXP(%edi)	/* Set for unmasked response. */
638*4882a593Smuzhiyun	push	%edi
639*4882a593Smuzhiyun	call	arith_overflow
640*4882a593Smuzhiyun	pop	%edi
641*4882a593Smuzhiyun	jmp	fpu_reg_round_signed_special_exit
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun
644*4882a593SmuzhiyunSignal_underflow:
645*4882a593Smuzhiyun	/* The number may have been changed to a non-denormal */
646*4882a593Smuzhiyun	/* by the rounding operations. */
647*4882a593Smuzhiyun	cmpw	EXP_UNDER,EXP(%edi)
648*4882a593Smuzhiyun	jle	Do_unmasked_underflow
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun	jmp	L_Normalised
651*4882a593Smuzhiyun
652*4882a593SmuzhiyunDo_unmasked_underflow:
653*4882a593Smuzhiyun	/* Increase the exponent by the magic number */
654*4882a593Smuzhiyun	addw	$(3*(1<<13)),EXP(%edi)
655*4882a593Smuzhiyun	push	%eax
656*4882a593Smuzhiyun	pushl	EX_Underflow
657*4882a593Smuzhiyun	call	EXCEPTION
658*4882a593Smuzhiyun	popl	%eax
659*4882a593Smuzhiyun	popl	%eax
660*4882a593Smuzhiyun	jmp	L_Normalised
661*4882a593Smuzhiyun
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun#ifdef PARANOID
664*4882a593Smuzhiyun#ifdef PECULIAR_486
665*4882a593SmuzhiyunL_bugged_denorm_486:
666*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x236
667*4882a593Smuzhiyun	call	EXCEPTION
668*4882a593Smuzhiyun	popl	%ebx
669*4882a593Smuzhiyun	jmp	L_exception_exit
670*4882a593Smuzhiyun#else
671*4882a593SmuzhiyunL_bugged_denorm:
672*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x230
673*4882a593Smuzhiyun	call	EXCEPTION
674*4882a593Smuzhiyun	popl	%ebx
675*4882a593Smuzhiyun	jmp	L_exception_exit
676*4882a593Smuzhiyun#endif /* PECULIAR_486 */
677*4882a593Smuzhiyun
678*4882a593SmuzhiyunL_bugged_round24:
679*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x231
680*4882a593Smuzhiyun	call	EXCEPTION
681*4882a593Smuzhiyun	popl	%ebx
682*4882a593Smuzhiyun	jmp	L_exception_exit
683*4882a593Smuzhiyun
684*4882a593SmuzhiyunL_bugged_round53:
685*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x232
686*4882a593Smuzhiyun	call	EXCEPTION
687*4882a593Smuzhiyun	popl	%ebx
688*4882a593Smuzhiyun	jmp	L_exception_exit
689*4882a593Smuzhiyun
690*4882a593SmuzhiyunL_bugged_round64:
691*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x233
692*4882a593Smuzhiyun	call	EXCEPTION
693*4882a593Smuzhiyun	popl	%ebx
694*4882a593Smuzhiyun	jmp	L_exception_exit
695*4882a593Smuzhiyun
696*4882a593SmuzhiyunL_norm_bugged:
697*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x234
698*4882a593Smuzhiyun	call	EXCEPTION
699*4882a593Smuzhiyun	popl	%ebx
700*4882a593Smuzhiyun	jmp	L_exception_exit
701*4882a593Smuzhiyun
702*4882a593SmuzhiyunL_entry_bugged:
703*4882a593Smuzhiyun	pushl	EX_INTERNAL|0x235
704*4882a593Smuzhiyun	call	EXCEPTION
705*4882a593Smuzhiyun	popl	%ebx
706*4882a593SmuzhiyunL_exception_exit:
707*4882a593Smuzhiyun	mov	$-1,%eax
708*4882a593Smuzhiyun	jmp	fpu_reg_round_special_exit
709*4882a593Smuzhiyun#endif /* PARANOID */
710*4882a593Smuzhiyun
711*4882a593SmuzhiyunSYM_FUNC_END(FPU_round)
712