1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun .file "reg_round.S" 3*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 4*4882a593Smuzhiyun | reg_round.S | 5*4882a593Smuzhiyun | | 6*4882a593Smuzhiyun | Rounding/truncation/etc for FPU basic arithmetic functions. | 7*4882a593Smuzhiyun | | 8*4882a593Smuzhiyun | Copyright (C) 1993,1995,1997 | 9*4882a593Smuzhiyun | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 10*4882a593Smuzhiyun | Australia. E-mail billm@suburbia.net | 11*4882a593Smuzhiyun | | 12*4882a593Smuzhiyun | This code has four possible entry points. | 13*4882a593Smuzhiyun | The following must be entered by a jmp instruction: | 14*4882a593Smuzhiyun | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | 15*4882a593Smuzhiyun | | 16*4882a593Smuzhiyun | The FPU_round entry point is intended to be used by C code. | 17*4882a593Smuzhiyun | From C, call as: | 18*4882a593Smuzhiyun | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | 19*4882a593Smuzhiyun | | 20*4882a593Smuzhiyun | Return value is the tag of the answer, or-ed with FPU_Exception if | 21*4882a593Smuzhiyun | one was raised, or -1 on internal error. | 22*4882a593Smuzhiyun | | 23*4882a593Smuzhiyun | For correct "up" and "down" rounding, the argument must have the correct | 24*4882a593Smuzhiyun | sign. | 25*4882a593Smuzhiyun | | 26*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 27*4882a593Smuzhiyun 28*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 29*4882a593Smuzhiyun | Four entry points. | 30*4882a593Smuzhiyun | | 31*4882a593Smuzhiyun | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | 32*4882a593Smuzhiyun | %eax:%ebx 64 bit significand | 33*4882a593Smuzhiyun | %edx 32 bit extension of the significand | 34*4882a593Smuzhiyun | %edi pointer to an FPU_REG for the result to be stored | 35*4882a593Smuzhiyun | stack calling function must have set up a C stack frame and | 36*4882a593Smuzhiyun | pushed %esi, %edi, and %ebx | 37*4882a593Smuzhiyun | | 38*4882a593Smuzhiyun | Needed just for the fpu_reg_round_sqrt entry point: | 39*4882a593Smuzhiyun | %cx A control word in the same format as the FPU control word. | 40*4882a593Smuzhiyun | Otherwise, PARAM4 must give such a value. | 41*4882a593Smuzhiyun | | 42*4882a593Smuzhiyun | | 43*4882a593Smuzhiyun | The significand and its extension are assumed to be exact in the | 44*4882a593Smuzhiyun | following sense: | 45*4882a593Smuzhiyun | If the significand by itself is the exact result then the significand | 46*4882a593Smuzhiyun | extension (%edx) must contain 0, otherwise the significand extension | 47*4882a593Smuzhiyun | must be non-zero. | 48*4882a593Smuzhiyun | If the significand extension is non-zero then the significand is | 49*4882a593Smuzhiyun | smaller than the magnitude of the correct exact result by an amount | 50*4882a593Smuzhiyun | greater than zero and less than one ls bit of the significand. | 51*4882a593Smuzhiyun | The significand extension is only required to have three possible | 52*4882a593Smuzhiyun | non-zero values: | 53*4882a593Smuzhiyun | less than 0x80000000 <=> the significand is less than 1/2 an ls | 54*4882a593Smuzhiyun | bit smaller than the magnitude of the | 55*4882a593Smuzhiyun | true exact result. | 56*4882a593Smuzhiyun | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | 57*4882a593Smuzhiyun | smaller than the magnitude of the true | 58*4882a593Smuzhiyun | exact result. | 59*4882a593Smuzhiyun | greater than 0x80000000 <=> the significand is more than 1/2 an ls | 60*4882a593Smuzhiyun | bit smaller than the magnitude of the | 61*4882a593Smuzhiyun | true exact result. | 62*4882a593Smuzhiyun | | 63*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun/*---------------------------------------------------------------------------+ 66*4882a593Smuzhiyun | The code in this module has become quite complex, but it should handle | 67*4882a593Smuzhiyun | all of the FPU flags which are set at this stage of the basic arithmetic | 68*4882a593Smuzhiyun | computations. | 69*4882a593Smuzhiyun | There are a few rare cases where the results are not set identically to | 70*4882a593Smuzhiyun | a real FPU. These require a bit more thought because at this stage the | 71*4882a593Smuzhiyun | results of the code here appear to be more consistent... | 72*4882a593Smuzhiyun | This may be changed in a future version. | 73*4882a593Smuzhiyun +---------------------------------------------------------------------------*/ 74*4882a593Smuzhiyun 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun#include "fpu_emu.h" 77*4882a593Smuzhiyun#include "exception.h" 78*4882a593Smuzhiyun#include "control_w.h" 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun/* Flags for FPU_bits_lost */ 81*4882a593Smuzhiyun#define LOST_DOWN $1 82*4882a593Smuzhiyun#define LOST_UP $2 83*4882a593Smuzhiyun 84*4882a593Smuzhiyun/* Flags for FPU_denormal */ 85*4882a593Smuzhiyun#define DENORMAL $1 86*4882a593Smuzhiyun#define UNMASKED_UNDERFLOW $2 87*4882a593Smuzhiyun 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU 90*4882a593Smuzhiyun/* Make the code re-entrant by putting 91*4882a593Smuzhiyun local storage on the stack: */ 92*4882a593Smuzhiyun#define FPU_bits_lost (%esp) 93*4882a593Smuzhiyun#define FPU_denormal 1(%esp) 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun#else 96*4882a593Smuzhiyun/* Not re-entrant, so we can gain speed by putting 97*4882a593Smuzhiyun local storage in a static area: */ 98*4882a593Smuzhiyun.data 99*4882a593Smuzhiyun .align 4,0 100*4882a593SmuzhiyunFPU_bits_lost: 101*4882a593Smuzhiyun .byte 0 102*4882a593SmuzhiyunFPU_denormal: 103*4882a593Smuzhiyun .byte 0 104*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */ 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun.text 108*4882a593Smuzhiyun.globl fpu_reg_round 109*4882a593Smuzhiyun.globl fpu_Arith_exit 110*4882a593Smuzhiyun 111*4882a593Smuzhiyun/* Entry point when called from C */ 112*4882a593SmuzhiyunSYM_FUNC_START(FPU_round) 113*4882a593Smuzhiyun pushl %ebp 114*4882a593Smuzhiyun movl %esp,%ebp 115*4882a593Smuzhiyun pushl %esi 116*4882a593Smuzhiyun pushl %edi 117*4882a593Smuzhiyun pushl %ebx 118*4882a593Smuzhiyun 119*4882a593Smuzhiyun movl PARAM1,%edi 120*4882a593Smuzhiyun movl SIGH(%edi),%eax 121*4882a593Smuzhiyun movl SIGL(%edi),%ebx 122*4882a593Smuzhiyun movl PARAM2,%edx 123*4882a593Smuzhiyun 124*4882a593Smuzhiyunfpu_reg_round: /* Normal entry point */ 125*4882a593Smuzhiyun movl PARAM4,%ecx 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU 128*4882a593Smuzhiyun pushl %ebx /* adjust the stack pointer */ 129*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */ 130*4882a593Smuzhiyun 131*4882a593Smuzhiyun#ifdef PARANOID 132*4882a593Smuzhiyun/* Cannot use this here yet */ 133*4882a593Smuzhiyun/* orl %eax,%eax */ 134*4882a593Smuzhiyun/* jns L_entry_bugged */ 135*4882a593Smuzhiyun#endif /* PARANOID */ 136*4882a593Smuzhiyun 137*4882a593Smuzhiyun cmpw EXP_UNDER,EXP(%edi) 138*4882a593Smuzhiyun jle L_Make_denorm /* The number is a de-normal */ 139*4882a593Smuzhiyun 140*4882a593Smuzhiyun movb $0,FPU_denormal /* 0 -> not a de-normal */ 141*4882a593Smuzhiyun 142*4882a593SmuzhiyunDenorm_done: 143*4882a593Smuzhiyun movb $0,FPU_bits_lost /* No bits yet lost in rounding */ 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun movl %ecx,%esi 146*4882a593Smuzhiyun andl CW_PC,%ecx 147*4882a593Smuzhiyun cmpl PR_64_BITS,%ecx 148*4882a593Smuzhiyun je LRound_To_64 149*4882a593Smuzhiyun 150*4882a593Smuzhiyun cmpl PR_53_BITS,%ecx 151*4882a593Smuzhiyun je LRound_To_53 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun cmpl PR_24_BITS,%ecx 154*4882a593Smuzhiyun je LRound_To_24 155*4882a593Smuzhiyun 156*4882a593Smuzhiyun#ifdef PECULIAR_486 157*4882a593Smuzhiyun/* With the precision control bits set to 01 "(reserved)", a real 80486 158*4882a593Smuzhiyun behaves as if the precision control bits were set to 11 "64 bits" */ 159*4882a593Smuzhiyun cmpl PR_RESERVED_BITS,%ecx 160*4882a593Smuzhiyun je LRound_To_64 161*4882a593Smuzhiyun#ifdef PARANOID 162*4882a593Smuzhiyun jmp L_bugged_denorm_486 163*4882a593Smuzhiyun#endif /* PARANOID */ 164*4882a593Smuzhiyun#else 165*4882a593Smuzhiyun#ifdef PARANOID 166*4882a593Smuzhiyun jmp L_bugged_denorm /* There is no bug, just a bad control word */ 167*4882a593Smuzhiyun#endif /* PARANOID */ 168*4882a593Smuzhiyun#endif /* PECULIAR_486 */ 169*4882a593Smuzhiyun 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun/* Round etc to 24 bit precision */ 172*4882a593SmuzhiyunLRound_To_24: 173*4882a593Smuzhiyun movl %esi,%ecx 174*4882a593Smuzhiyun andl CW_RC,%ecx 175*4882a593Smuzhiyun cmpl RC_RND,%ecx 176*4882a593Smuzhiyun je LRound_nearest_24 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun cmpl RC_CHOP,%ecx 179*4882a593Smuzhiyun je LCheck_truncate_24 180*4882a593Smuzhiyun 181*4882a593Smuzhiyun cmpl RC_UP,%ecx /* Towards +infinity */ 182*4882a593Smuzhiyun je LUp_24 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun cmpl RC_DOWN,%ecx /* Towards -infinity */ 185*4882a593Smuzhiyun je LDown_24 186*4882a593Smuzhiyun 187*4882a593Smuzhiyun#ifdef PARANOID 188*4882a593Smuzhiyun jmp L_bugged_round24 189*4882a593Smuzhiyun#endif /* PARANOID */ 190*4882a593Smuzhiyun 191*4882a593SmuzhiyunLUp_24: 192*4882a593Smuzhiyun cmpb SIGN_POS,PARAM5 193*4882a593Smuzhiyun jne LCheck_truncate_24 /* If negative then up==truncate */ 194*4882a593Smuzhiyun 195*4882a593Smuzhiyun jmp LCheck_24_round_up 196*4882a593Smuzhiyun 197*4882a593SmuzhiyunLDown_24: 198*4882a593Smuzhiyun cmpb SIGN_POS,PARAM5 199*4882a593Smuzhiyun je LCheck_truncate_24 /* If positive then down==truncate */ 200*4882a593Smuzhiyun 201*4882a593SmuzhiyunLCheck_24_round_up: 202*4882a593Smuzhiyun movl %eax,%ecx 203*4882a593Smuzhiyun andl $0x000000ff,%ecx 204*4882a593Smuzhiyun orl %ebx,%ecx 205*4882a593Smuzhiyun orl %edx,%ecx 206*4882a593Smuzhiyun jnz LDo_24_round_up 207*4882a593Smuzhiyun jmp L_Re_normalise 208*4882a593Smuzhiyun 209*4882a593SmuzhiyunLRound_nearest_24: 210*4882a593Smuzhiyun /* Do rounding of the 24th bit if needed (nearest or even) */ 211*4882a593Smuzhiyun movl %eax,%ecx 212*4882a593Smuzhiyun andl $0x000000ff,%ecx 213*4882a593Smuzhiyun cmpl $0x00000080,%ecx 214*4882a593Smuzhiyun jc LCheck_truncate_24 /* less than half, no increment needed */ 215*4882a593Smuzhiyun 216*4882a593Smuzhiyun jne LGreater_Half_24 /* greater than half, increment needed */ 217*4882a593Smuzhiyun 218*4882a593Smuzhiyun /* Possibly half, we need to check the ls bits */ 219*4882a593Smuzhiyun orl %ebx,%ebx 220*4882a593Smuzhiyun jnz LGreater_Half_24 /* greater than half, increment needed */ 221*4882a593Smuzhiyun 222*4882a593Smuzhiyun orl %edx,%edx 223*4882a593Smuzhiyun jnz LGreater_Half_24 /* greater than half, increment needed */ 224*4882a593Smuzhiyun 225*4882a593Smuzhiyun /* Exactly half, increment only if 24th bit is 1 (round to even) */ 226*4882a593Smuzhiyun testl $0x00000100,%eax 227*4882a593Smuzhiyun jz LDo_truncate_24 228*4882a593Smuzhiyun 229*4882a593SmuzhiyunLGreater_Half_24: /* Rounding: increment at the 24th bit */ 230*4882a593SmuzhiyunLDo_24_round_up: 231*4882a593Smuzhiyun andl $0xffffff00,%eax /* Truncate to 24 bits */ 232*4882a593Smuzhiyun xorl %ebx,%ebx 233*4882a593Smuzhiyun movb LOST_UP,FPU_bits_lost 234*4882a593Smuzhiyun addl $0x00000100,%eax 235*4882a593Smuzhiyun jmp LCheck_Round_Overflow 236*4882a593Smuzhiyun 237*4882a593SmuzhiyunLCheck_truncate_24: 238*4882a593Smuzhiyun movl %eax,%ecx 239*4882a593Smuzhiyun andl $0x000000ff,%ecx 240*4882a593Smuzhiyun orl %ebx,%ecx 241*4882a593Smuzhiyun orl %edx,%ecx 242*4882a593Smuzhiyun jz L_Re_normalise /* No truncation needed */ 243*4882a593Smuzhiyun 244*4882a593SmuzhiyunLDo_truncate_24: 245*4882a593Smuzhiyun andl $0xffffff00,%eax /* Truncate to 24 bits */ 246*4882a593Smuzhiyun xorl %ebx,%ebx 247*4882a593Smuzhiyun movb LOST_DOWN,FPU_bits_lost 248*4882a593Smuzhiyun jmp L_Re_normalise 249*4882a593Smuzhiyun 250*4882a593Smuzhiyun 251*4882a593Smuzhiyun/* Round etc to 53 bit precision */ 252*4882a593SmuzhiyunLRound_To_53: 253*4882a593Smuzhiyun movl %esi,%ecx 254*4882a593Smuzhiyun andl CW_RC,%ecx 255*4882a593Smuzhiyun cmpl RC_RND,%ecx 256*4882a593Smuzhiyun je LRound_nearest_53 257*4882a593Smuzhiyun 258*4882a593Smuzhiyun cmpl RC_CHOP,%ecx 259*4882a593Smuzhiyun je LCheck_truncate_53 260*4882a593Smuzhiyun 261*4882a593Smuzhiyun cmpl RC_UP,%ecx /* Towards +infinity */ 262*4882a593Smuzhiyun je LUp_53 263*4882a593Smuzhiyun 264*4882a593Smuzhiyun cmpl RC_DOWN,%ecx /* Towards -infinity */ 265*4882a593Smuzhiyun je LDown_53 266*4882a593Smuzhiyun 267*4882a593Smuzhiyun#ifdef PARANOID 268*4882a593Smuzhiyun jmp L_bugged_round53 269*4882a593Smuzhiyun#endif /* PARANOID */ 270*4882a593Smuzhiyun 271*4882a593SmuzhiyunLUp_53: 272*4882a593Smuzhiyun cmpb SIGN_POS,PARAM5 273*4882a593Smuzhiyun jne LCheck_truncate_53 /* If negative then up==truncate */ 274*4882a593Smuzhiyun 275*4882a593Smuzhiyun jmp LCheck_53_round_up 276*4882a593Smuzhiyun 277*4882a593SmuzhiyunLDown_53: 278*4882a593Smuzhiyun cmpb SIGN_POS,PARAM5 279*4882a593Smuzhiyun je LCheck_truncate_53 /* If positive then down==truncate */ 280*4882a593Smuzhiyun 281*4882a593SmuzhiyunLCheck_53_round_up: 282*4882a593Smuzhiyun movl %ebx,%ecx 283*4882a593Smuzhiyun andl $0x000007ff,%ecx 284*4882a593Smuzhiyun orl %edx,%ecx 285*4882a593Smuzhiyun jnz LDo_53_round_up 286*4882a593Smuzhiyun jmp L_Re_normalise 287*4882a593Smuzhiyun 288*4882a593SmuzhiyunLRound_nearest_53: 289*4882a593Smuzhiyun /* Do rounding of the 53rd bit if needed (nearest or even) */ 290*4882a593Smuzhiyun movl %ebx,%ecx 291*4882a593Smuzhiyun andl $0x000007ff,%ecx 292*4882a593Smuzhiyun cmpl $0x00000400,%ecx 293*4882a593Smuzhiyun jc LCheck_truncate_53 /* less than half, no increment needed */ 294*4882a593Smuzhiyun 295*4882a593Smuzhiyun jnz LGreater_Half_53 /* greater than half, increment needed */ 296*4882a593Smuzhiyun 297*4882a593Smuzhiyun /* Possibly half, we need to check the ls bits */ 298*4882a593Smuzhiyun orl %edx,%edx 299*4882a593Smuzhiyun jnz LGreater_Half_53 /* greater than half, increment needed */ 300*4882a593Smuzhiyun 301*4882a593Smuzhiyun /* Exactly half, increment only if 53rd bit is 1 (round to even) */ 302*4882a593Smuzhiyun testl $0x00000800,%ebx 303*4882a593Smuzhiyun jz LTruncate_53 304*4882a593Smuzhiyun 305*4882a593SmuzhiyunLGreater_Half_53: /* Rounding: increment at the 53rd bit */ 306*4882a593SmuzhiyunLDo_53_round_up: 307*4882a593Smuzhiyun movb LOST_UP,FPU_bits_lost 308*4882a593Smuzhiyun andl $0xfffff800,%ebx /* Truncate to 53 bits */ 309*4882a593Smuzhiyun addl $0x00000800,%ebx 310*4882a593Smuzhiyun adcl $0,%eax 311*4882a593Smuzhiyun jmp LCheck_Round_Overflow 312*4882a593Smuzhiyun 313*4882a593SmuzhiyunLCheck_truncate_53: 314*4882a593Smuzhiyun movl %ebx,%ecx 315*4882a593Smuzhiyun andl $0x000007ff,%ecx 316*4882a593Smuzhiyun orl %edx,%ecx 317*4882a593Smuzhiyun jz L_Re_normalise 318*4882a593Smuzhiyun 319*4882a593SmuzhiyunLTruncate_53: 320*4882a593Smuzhiyun movb LOST_DOWN,FPU_bits_lost 321*4882a593Smuzhiyun andl $0xfffff800,%ebx /* Truncate to 53 bits */ 322*4882a593Smuzhiyun jmp L_Re_normalise 323*4882a593Smuzhiyun 324*4882a593Smuzhiyun 325*4882a593Smuzhiyun/* Round etc to 64 bit precision */ 326*4882a593SmuzhiyunLRound_To_64: 327*4882a593Smuzhiyun movl %esi,%ecx 328*4882a593Smuzhiyun andl CW_RC,%ecx 329*4882a593Smuzhiyun cmpl RC_RND,%ecx 330*4882a593Smuzhiyun je LRound_nearest_64 331*4882a593Smuzhiyun 332*4882a593Smuzhiyun cmpl RC_CHOP,%ecx 333*4882a593Smuzhiyun je LCheck_truncate_64 334*4882a593Smuzhiyun 335*4882a593Smuzhiyun cmpl RC_UP,%ecx /* Towards +infinity */ 336*4882a593Smuzhiyun je LUp_64 337*4882a593Smuzhiyun 338*4882a593Smuzhiyun cmpl RC_DOWN,%ecx /* Towards -infinity */ 339*4882a593Smuzhiyun je LDown_64 340*4882a593Smuzhiyun 341*4882a593Smuzhiyun#ifdef PARANOID 342*4882a593Smuzhiyun jmp L_bugged_round64 343*4882a593Smuzhiyun#endif /* PARANOID */ 344*4882a593Smuzhiyun 345*4882a593SmuzhiyunLUp_64: 346*4882a593Smuzhiyun cmpb SIGN_POS,PARAM5 347*4882a593Smuzhiyun jne LCheck_truncate_64 /* If negative then up==truncate */ 348*4882a593Smuzhiyun 349*4882a593Smuzhiyun orl %edx,%edx 350*4882a593Smuzhiyun jnz LDo_64_round_up 351*4882a593Smuzhiyun jmp L_Re_normalise 352*4882a593Smuzhiyun 353*4882a593SmuzhiyunLDown_64: 354*4882a593Smuzhiyun cmpb SIGN_POS,PARAM5 355*4882a593Smuzhiyun je LCheck_truncate_64 /* If positive then down==truncate */ 356*4882a593Smuzhiyun 357*4882a593Smuzhiyun orl %edx,%edx 358*4882a593Smuzhiyun jnz LDo_64_round_up 359*4882a593Smuzhiyun jmp L_Re_normalise 360*4882a593Smuzhiyun 361*4882a593SmuzhiyunLRound_nearest_64: 362*4882a593Smuzhiyun cmpl $0x80000000,%edx 363*4882a593Smuzhiyun jc LCheck_truncate_64 364*4882a593Smuzhiyun 365*4882a593Smuzhiyun jne LDo_64_round_up 366*4882a593Smuzhiyun 367*4882a593Smuzhiyun /* Now test for round-to-even */ 368*4882a593Smuzhiyun testb $1,%bl 369*4882a593Smuzhiyun jz LCheck_truncate_64 370*4882a593Smuzhiyun 371*4882a593SmuzhiyunLDo_64_round_up: 372*4882a593Smuzhiyun movb LOST_UP,FPU_bits_lost 373*4882a593Smuzhiyun addl $1,%ebx 374*4882a593Smuzhiyun adcl $0,%eax 375*4882a593Smuzhiyun 376*4882a593SmuzhiyunLCheck_Round_Overflow: 377*4882a593Smuzhiyun jnc L_Re_normalise 378*4882a593Smuzhiyun 379*4882a593Smuzhiyun /* Overflow, adjust the result (significand to 1.0) */ 380*4882a593Smuzhiyun rcrl $1,%eax 381*4882a593Smuzhiyun rcrl $1,%ebx 382*4882a593Smuzhiyun incw EXP(%edi) 383*4882a593Smuzhiyun jmp L_Re_normalise 384*4882a593Smuzhiyun 385*4882a593SmuzhiyunLCheck_truncate_64: 386*4882a593Smuzhiyun orl %edx,%edx 387*4882a593Smuzhiyun jz L_Re_normalise 388*4882a593Smuzhiyun 389*4882a593SmuzhiyunLTruncate_64: 390*4882a593Smuzhiyun movb LOST_DOWN,FPU_bits_lost 391*4882a593Smuzhiyun 392*4882a593SmuzhiyunL_Re_normalise: 393*4882a593Smuzhiyun testb $0xff,FPU_denormal 394*4882a593Smuzhiyun jnz Normalise_result 395*4882a593Smuzhiyun 396*4882a593SmuzhiyunL_Normalised: 397*4882a593Smuzhiyun movl TAG_Valid,%edx 398*4882a593Smuzhiyun 399*4882a593SmuzhiyunL_deNormalised: 400*4882a593Smuzhiyun cmpb LOST_UP,FPU_bits_lost 401*4882a593Smuzhiyun je L_precision_lost_up 402*4882a593Smuzhiyun 403*4882a593Smuzhiyun cmpb LOST_DOWN,FPU_bits_lost 404*4882a593Smuzhiyun je L_precision_lost_down 405*4882a593Smuzhiyun 406*4882a593SmuzhiyunL_no_precision_loss: 407*4882a593Smuzhiyun /* store the result */ 408*4882a593Smuzhiyun 409*4882a593SmuzhiyunL_Store_significand: 410*4882a593Smuzhiyun movl %eax,SIGH(%edi) 411*4882a593Smuzhiyun movl %ebx,SIGL(%edi) 412*4882a593Smuzhiyun 413*4882a593Smuzhiyun cmpw EXP_OVER,EXP(%edi) 414*4882a593Smuzhiyun jge L_overflow 415*4882a593Smuzhiyun 416*4882a593Smuzhiyun movl %edx,%eax 417*4882a593Smuzhiyun 418*4882a593Smuzhiyun /* Convert the exponent to 80x87 form. */ 419*4882a593Smuzhiyun addw EXTENDED_Ebias,EXP(%edi) 420*4882a593Smuzhiyun andw $0x7fff,EXP(%edi) 421*4882a593Smuzhiyun 422*4882a593Smuzhiyunfpu_reg_round_signed_special_exit: 423*4882a593Smuzhiyun 424*4882a593Smuzhiyun cmpb SIGN_POS,PARAM5 425*4882a593Smuzhiyun je fpu_reg_round_special_exit 426*4882a593Smuzhiyun 427*4882a593Smuzhiyun orw $0x8000,EXP(%edi) /* Negative sign for the result. */ 428*4882a593Smuzhiyun 429*4882a593Smuzhiyunfpu_reg_round_special_exit: 430*4882a593Smuzhiyun 431*4882a593Smuzhiyun#ifndef NON_REENTRANT_FPU 432*4882a593Smuzhiyun popl %ebx /* adjust the stack pointer */ 433*4882a593Smuzhiyun#endif /* NON_REENTRANT_FPU */ 434*4882a593Smuzhiyun 435*4882a593Smuzhiyunfpu_Arith_exit: 436*4882a593Smuzhiyun popl %ebx 437*4882a593Smuzhiyun popl %edi 438*4882a593Smuzhiyun popl %esi 439*4882a593Smuzhiyun leave 440*4882a593Smuzhiyun RET 441*4882a593Smuzhiyun 442*4882a593Smuzhiyun 443*4882a593Smuzhiyun/* 444*4882a593Smuzhiyun * Set the FPU status flags to represent precision loss due to 445*4882a593Smuzhiyun * round-up. 446*4882a593Smuzhiyun */ 447*4882a593SmuzhiyunL_precision_lost_up: 448*4882a593Smuzhiyun push %edx 449*4882a593Smuzhiyun push %eax 450*4882a593Smuzhiyun call set_precision_flag_up 451*4882a593Smuzhiyun popl %eax 452*4882a593Smuzhiyun popl %edx 453*4882a593Smuzhiyun jmp L_no_precision_loss 454*4882a593Smuzhiyun 455*4882a593Smuzhiyun/* 456*4882a593Smuzhiyun * Set the FPU status flags to represent precision loss due to 457*4882a593Smuzhiyun * truncation. 458*4882a593Smuzhiyun */ 459*4882a593SmuzhiyunL_precision_lost_down: 460*4882a593Smuzhiyun push %edx 461*4882a593Smuzhiyun push %eax 462*4882a593Smuzhiyun call set_precision_flag_down 463*4882a593Smuzhiyun popl %eax 464*4882a593Smuzhiyun popl %edx 465*4882a593Smuzhiyun jmp L_no_precision_loss 466*4882a593Smuzhiyun 467*4882a593Smuzhiyun 468*4882a593Smuzhiyun/* 469*4882a593Smuzhiyun * The number is a denormal (which might get rounded up to a normal) 470*4882a593Smuzhiyun * Shift the number right the required number of bits, which will 471*4882a593Smuzhiyun * have to be undone later... 472*4882a593Smuzhiyun */ 473*4882a593SmuzhiyunL_Make_denorm: 474*4882a593Smuzhiyun /* The action to be taken depends upon whether the underflow 475*4882a593Smuzhiyun exception is masked */ 476*4882a593Smuzhiyun testb CW_Underflow,%cl /* Underflow mask. */ 477*4882a593Smuzhiyun jz Unmasked_underflow /* Do not make a denormal. */ 478*4882a593Smuzhiyun 479*4882a593Smuzhiyun movb DENORMAL,FPU_denormal 480*4882a593Smuzhiyun 481*4882a593Smuzhiyun pushl %ecx /* Save */ 482*4882a593Smuzhiyun movw EXP_UNDER+1,%cx 483*4882a593Smuzhiyun subw EXP(%edi),%cx 484*4882a593Smuzhiyun 485*4882a593Smuzhiyun cmpw $64,%cx /* shrd only works for 0..31 bits */ 486*4882a593Smuzhiyun jnc Denorm_shift_more_than_63 487*4882a593Smuzhiyun 488*4882a593Smuzhiyun cmpw $32,%cx /* shrd only works for 0..31 bits */ 489*4882a593Smuzhiyun jnc Denorm_shift_more_than_32 490*4882a593Smuzhiyun 491*4882a593Smuzhiyun/* 492*4882a593Smuzhiyun * We got here without jumps by assuming that the most common requirement 493*4882a593Smuzhiyun * is for a small de-normalising shift. 494*4882a593Smuzhiyun * Shift by [1..31] bits 495*4882a593Smuzhiyun */ 496*4882a593Smuzhiyun addw %cx,EXP(%edi) 497*4882a593Smuzhiyun orl %edx,%edx /* extension */ 498*4882a593Smuzhiyun setne %ch /* Save whether %edx is non-zero */ 499*4882a593Smuzhiyun xorl %edx,%edx 500*4882a593Smuzhiyun shrd %cl,%ebx,%edx 501*4882a593Smuzhiyun shrd %cl,%eax,%ebx 502*4882a593Smuzhiyun shr %cl,%eax 503*4882a593Smuzhiyun orb %ch,%dl 504*4882a593Smuzhiyun popl %ecx 505*4882a593Smuzhiyun jmp Denorm_done 506*4882a593Smuzhiyun 507*4882a593Smuzhiyun/* Shift by [32..63] bits */ 508*4882a593SmuzhiyunDenorm_shift_more_than_32: 509*4882a593Smuzhiyun addw %cx,EXP(%edi) 510*4882a593Smuzhiyun subb $32,%cl 511*4882a593Smuzhiyun orl %edx,%edx 512*4882a593Smuzhiyun setne %ch 513*4882a593Smuzhiyun orb %ch,%bl 514*4882a593Smuzhiyun xorl %edx,%edx 515*4882a593Smuzhiyun shrd %cl,%ebx,%edx 516*4882a593Smuzhiyun shrd %cl,%eax,%ebx 517*4882a593Smuzhiyun shr %cl,%eax 518*4882a593Smuzhiyun orl %edx,%edx /* test these 32 bits */ 519*4882a593Smuzhiyun setne %cl 520*4882a593Smuzhiyun orb %ch,%bl 521*4882a593Smuzhiyun orb %cl,%bl 522*4882a593Smuzhiyun movl %ebx,%edx 523*4882a593Smuzhiyun movl %eax,%ebx 524*4882a593Smuzhiyun xorl %eax,%eax 525*4882a593Smuzhiyun popl %ecx 526*4882a593Smuzhiyun jmp Denorm_done 527*4882a593Smuzhiyun 528*4882a593Smuzhiyun/* Shift by [64..) bits */ 529*4882a593SmuzhiyunDenorm_shift_more_than_63: 530*4882a593Smuzhiyun cmpw $64,%cx 531*4882a593Smuzhiyun jne Denorm_shift_more_than_64 532*4882a593Smuzhiyun 533*4882a593Smuzhiyun/* Exactly 64 bit shift */ 534*4882a593Smuzhiyun addw %cx,EXP(%edi) 535*4882a593Smuzhiyun xorl %ecx,%ecx 536*4882a593Smuzhiyun orl %edx,%edx 537*4882a593Smuzhiyun setne %cl 538*4882a593Smuzhiyun orl %ebx,%ebx 539*4882a593Smuzhiyun setne %ch 540*4882a593Smuzhiyun orb %ch,%cl 541*4882a593Smuzhiyun orb %cl,%al 542*4882a593Smuzhiyun movl %eax,%edx 543*4882a593Smuzhiyun xorl %eax,%eax 544*4882a593Smuzhiyun xorl %ebx,%ebx 545*4882a593Smuzhiyun popl %ecx 546*4882a593Smuzhiyun jmp Denorm_done 547*4882a593Smuzhiyun 548*4882a593SmuzhiyunDenorm_shift_more_than_64: 549*4882a593Smuzhiyun movw EXP_UNDER+1,EXP(%edi) 550*4882a593Smuzhiyun/* This is easy, %eax must be non-zero, so.. */ 551*4882a593Smuzhiyun movl $1,%edx 552*4882a593Smuzhiyun xorl %eax,%eax 553*4882a593Smuzhiyun xorl %ebx,%ebx 554*4882a593Smuzhiyun popl %ecx 555*4882a593Smuzhiyun jmp Denorm_done 556*4882a593Smuzhiyun 557*4882a593Smuzhiyun 558*4882a593SmuzhiyunUnmasked_underflow: 559*4882a593Smuzhiyun movb UNMASKED_UNDERFLOW,FPU_denormal 560*4882a593Smuzhiyun jmp Denorm_done 561*4882a593Smuzhiyun 562*4882a593Smuzhiyun 563*4882a593Smuzhiyun/* Undo the de-normalisation. */ 564*4882a593SmuzhiyunNormalise_result: 565*4882a593Smuzhiyun cmpb UNMASKED_UNDERFLOW,FPU_denormal 566*4882a593Smuzhiyun je Signal_underflow 567*4882a593Smuzhiyun 568*4882a593Smuzhiyun/* The number must be a denormal if we got here. */ 569*4882a593Smuzhiyun#ifdef PARANOID 570*4882a593Smuzhiyun /* But check it... just in case. */ 571*4882a593Smuzhiyun cmpw EXP_UNDER+1,EXP(%edi) 572*4882a593Smuzhiyun jne L_norm_bugged 573*4882a593Smuzhiyun#endif /* PARANOID */ 574*4882a593Smuzhiyun 575*4882a593Smuzhiyun#ifdef PECULIAR_486 576*4882a593Smuzhiyun /* 577*4882a593Smuzhiyun * This implements a special feature of 80486 behaviour. 578*4882a593Smuzhiyun * Underflow will be signalled even if the number is 579*4882a593Smuzhiyun * not a denormal after rounding. 580*4882a593Smuzhiyun * This difference occurs only for masked underflow, and not 581*4882a593Smuzhiyun * in the unmasked case. 582*4882a593Smuzhiyun * Actual 80486 behaviour differs from this in some circumstances. 583*4882a593Smuzhiyun */ 584*4882a593Smuzhiyun orl %eax,%eax /* ms bits */ 585*4882a593Smuzhiyun js LPseudoDenormal /* Will be masked underflow */ 586*4882a593Smuzhiyun#else 587*4882a593Smuzhiyun orl %eax,%eax /* ms bits */ 588*4882a593Smuzhiyun js L_Normalised /* No longer a denormal */ 589*4882a593Smuzhiyun#endif /* PECULIAR_486 */ 590*4882a593Smuzhiyun 591*4882a593Smuzhiyun jnz LDenormal_adj_exponent 592*4882a593Smuzhiyun 593*4882a593Smuzhiyun orl %ebx,%ebx 594*4882a593Smuzhiyun jz L_underflow_to_zero /* The contents are zero */ 595*4882a593Smuzhiyun 596*4882a593SmuzhiyunLDenormal_adj_exponent: 597*4882a593Smuzhiyun decw EXP(%edi) 598*4882a593Smuzhiyun 599*4882a593SmuzhiyunLPseudoDenormal: 600*4882a593Smuzhiyun testb $0xff,FPU_bits_lost /* bits lost == underflow */ 601*4882a593Smuzhiyun movl TAG_Special,%edx 602*4882a593Smuzhiyun jz L_deNormalised 603*4882a593Smuzhiyun 604*4882a593Smuzhiyun /* There must be a masked underflow */ 605*4882a593Smuzhiyun push %eax 606*4882a593Smuzhiyun pushl EX_Underflow 607*4882a593Smuzhiyun call EXCEPTION 608*4882a593Smuzhiyun popl %eax 609*4882a593Smuzhiyun popl %eax 610*4882a593Smuzhiyun movl TAG_Special,%edx 611*4882a593Smuzhiyun jmp L_deNormalised 612*4882a593Smuzhiyun 613*4882a593Smuzhiyun 614*4882a593Smuzhiyun/* 615*4882a593Smuzhiyun * The operations resulted in a number too small to represent. 616*4882a593Smuzhiyun * Masked response. 617*4882a593Smuzhiyun */ 618*4882a593SmuzhiyunL_underflow_to_zero: 619*4882a593Smuzhiyun push %eax 620*4882a593Smuzhiyun call set_precision_flag_down 621*4882a593Smuzhiyun popl %eax 622*4882a593Smuzhiyun 623*4882a593Smuzhiyun push %eax 624*4882a593Smuzhiyun pushl EX_Underflow 625*4882a593Smuzhiyun call EXCEPTION 626*4882a593Smuzhiyun popl %eax 627*4882a593Smuzhiyun popl %eax 628*4882a593Smuzhiyun 629*4882a593Smuzhiyun/* Reduce the exponent to EXP_UNDER */ 630*4882a593Smuzhiyun movw EXP_UNDER,EXP(%edi) 631*4882a593Smuzhiyun movl TAG_Zero,%edx 632*4882a593Smuzhiyun jmp L_Store_significand 633*4882a593Smuzhiyun 634*4882a593Smuzhiyun 635*4882a593Smuzhiyun/* The operations resulted in a number too large to represent. */ 636*4882a593SmuzhiyunL_overflow: 637*4882a593Smuzhiyun addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ 638*4882a593Smuzhiyun push %edi 639*4882a593Smuzhiyun call arith_overflow 640*4882a593Smuzhiyun pop %edi 641*4882a593Smuzhiyun jmp fpu_reg_round_signed_special_exit 642*4882a593Smuzhiyun 643*4882a593Smuzhiyun 644*4882a593SmuzhiyunSignal_underflow: 645*4882a593Smuzhiyun /* The number may have been changed to a non-denormal */ 646*4882a593Smuzhiyun /* by the rounding operations. */ 647*4882a593Smuzhiyun cmpw EXP_UNDER,EXP(%edi) 648*4882a593Smuzhiyun jle Do_unmasked_underflow 649*4882a593Smuzhiyun 650*4882a593Smuzhiyun jmp L_Normalised 651*4882a593Smuzhiyun 652*4882a593SmuzhiyunDo_unmasked_underflow: 653*4882a593Smuzhiyun /* Increase the exponent by the magic number */ 654*4882a593Smuzhiyun addw $(3*(1<<13)),EXP(%edi) 655*4882a593Smuzhiyun push %eax 656*4882a593Smuzhiyun pushl EX_Underflow 657*4882a593Smuzhiyun call EXCEPTION 658*4882a593Smuzhiyun popl %eax 659*4882a593Smuzhiyun popl %eax 660*4882a593Smuzhiyun jmp L_Normalised 661*4882a593Smuzhiyun 662*4882a593Smuzhiyun 663*4882a593Smuzhiyun#ifdef PARANOID 664*4882a593Smuzhiyun#ifdef PECULIAR_486 665*4882a593SmuzhiyunL_bugged_denorm_486: 666*4882a593Smuzhiyun pushl EX_INTERNAL|0x236 667*4882a593Smuzhiyun call EXCEPTION 668*4882a593Smuzhiyun popl %ebx 669*4882a593Smuzhiyun jmp L_exception_exit 670*4882a593Smuzhiyun#else 671*4882a593SmuzhiyunL_bugged_denorm: 672*4882a593Smuzhiyun pushl EX_INTERNAL|0x230 673*4882a593Smuzhiyun call EXCEPTION 674*4882a593Smuzhiyun popl %ebx 675*4882a593Smuzhiyun jmp L_exception_exit 676*4882a593Smuzhiyun#endif /* PECULIAR_486 */ 677*4882a593Smuzhiyun 678*4882a593SmuzhiyunL_bugged_round24: 679*4882a593Smuzhiyun pushl EX_INTERNAL|0x231 680*4882a593Smuzhiyun call EXCEPTION 681*4882a593Smuzhiyun popl %ebx 682*4882a593Smuzhiyun jmp L_exception_exit 683*4882a593Smuzhiyun 684*4882a593SmuzhiyunL_bugged_round53: 685*4882a593Smuzhiyun pushl EX_INTERNAL|0x232 686*4882a593Smuzhiyun call EXCEPTION 687*4882a593Smuzhiyun popl %ebx 688*4882a593Smuzhiyun jmp L_exception_exit 689*4882a593Smuzhiyun 690*4882a593SmuzhiyunL_bugged_round64: 691*4882a593Smuzhiyun pushl EX_INTERNAL|0x233 692*4882a593Smuzhiyun call EXCEPTION 693*4882a593Smuzhiyun popl %ebx 694*4882a593Smuzhiyun jmp L_exception_exit 695*4882a593Smuzhiyun 696*4882a593SmuzhiyunL_norm_bugged: 697*4882a593Smuzhiyun pushl EX_INTERNAL|0x234 698*4882a593Smuzhiyun call EXCEPTION 699*4882a593Smuzhiyun popl %ebx 700*4882a593Smuzhiyun jmp L_exception_exit 701*4882a593Smuzhiyun 702*4882a593SmuzhiyunL_entry_bugged: 703*4882a593Smuzhiyun pushl EX_INTERNAL|0x235 704*4882a593Smuzhiyun call EXCEPTION 705*4882a593Smuzhiyun popl %ebx 706*4882a593SmuzhiyunL_exception_exit: 707*4882a593Smuzhiyun mov $-1,%eax 708*4882a593Smuzhiyun jmp fpu_reg_round_special_exit 709*4882a593Smuzhiyun#endif /* PARANOID */ 710*4882a593Smuzhiyun 711*4882a593SmuzhiyunSYM_FUNC_END(FPU_round) 712