1*4882a593Smuzhiyun| 2*4882a593Smuzhiyun| res_func.sa 3.9 7/29/91 3*4882a593Smuzhiyun| 4*4882a593Smuzhiyun| Normalizes denormalized numbers if necessary and updates the 5*4882a593Smuzhiyun| stack frame. The function is then restored back into the 6*4882a593Smuzhiyun| machine and the 040 completes the operation. This routine 7*4882a593Smuzhiyun| is only used by the unsupported data type/format handler. 8*4882a593Smuzhiyun| (Exception vector 55). 9*4882a593Smuzhiyun| 10*4882a593Smuzhiyun| For packed move out (fmove.p fpm,<ea>) the operation is 11*4882a593Smuzhiyun| completed here; data is packed and moved to user memory. 12*4882a593Smuzhiyun| The stack is restored to the 040 only in the case of a 13*4882a593Smuzhiyun| reportable exception in the conversion. 14*4882a593Smuzhiyun| 15*4882a593Smuzhiyun| 16*4882a593Smuzhiyun| Copyright (C) Motorola, Inc. 1990 17*4882a593Smuzhiyun| All Rights Reserved 18*4882a593Smuzhiyun| 19*4882a593Smuzhiyun| For details on the license for this file, please see the 20*4882a593Smuzhiyun| file, README, in this same directory. 21*4882a593Smuzhiyun 22*4882a593SmuzhiyunRES_FUNC: |idnt 2,1 | Motorola 040 Floating Point Software Package 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun |section 8 25*4882a593Smuzhiyun 26*4882a593Smuzhiyun#include "fpsp.h" 27*4882a593Smuzhiyun 28*4882a593Smuzhiyunsp_bnds: .short 0x3f81,0x407e 29*4882a593Smuzhiyun .short 0x3f6a,0x0000 30*4882a593Smuzhiyundp_bnds: .short 0x3c01,0x43fe 31*4882a593Smuzhiyun .short 0x3bcd,0x0000 32*4882a593Smuzhiyun 33*4882a593Smuzhiyun |xref mem_write 34*4882a593Smuzhiyun |xref bindec 35*4882a593Smuzhiyun |xref get_fline 36*4882a593Smuzhiyun |xref round 37*4882a593Smuzhiyun |xref denorm 38*4882a593Smuzhiyun |xref dest_ext 39*4882a593Smuzhiyun |xref dest_dbl 40*4882a593Smuzhiyun |xref dest_sgl 41*4882a593Smuzhiyun |xref unf_sub 42*4882a593Smuzhiyun |xref nrm_set 43*4882a593Smuzhiyun |xref dnrm_lp 44*4882a593Smuzhiyun |xref ovf_res 45*4882a593Smuzhiyun |xref reg_dest 46*4882a593Smuzhiyun |xref t_ovfl 47*4882a593Smuzhiyun |xref t_unfl 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun .global res_func 50*4882a593Smuzhiyun .global p_move 51*4882a593Smuzhiyun 52*4882a593Smuzhiyunres_func: 53*4882a593Smuzhiyun clrb DNRM_FLG(%a6) 54*4882a593Smuzhiyun clrb RES_FLG(%a6) 55*4882a593Smuzhiyun clrb CU_ONLY(%a6) 56*4882a593Smuzhiyun tstb DY_MO_FLG(%a6) 57*4882a593Smuzhiyun beqs monadic 58*4882a593Smuzhiyundyadic: 59*4882a593Smuzhiyun btstb #7,DTAG(%a6) |if dop = norm=000, zero=001, 60*4882a593Smuzhiyun| ;inf=010 or nan=011 61*4882a593Smuzhiyun beqs monadic |then branch 62*4882a593Smuzhiyun| ;else denorm 63*4882a593Smuzhiyun| HANDLE DESTINATION DENORM HERE 64*4882a593Smuzhiyun| ;set dtag to norm 65*4882a593Smuzhiyun| ;write the tag & fpte15 to the fstack 66*4882a593Smuzhiyun leal FPTEMP(%a6),%a0 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 69*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun bsr nrm_set |normalize number (exp will go negative) 72*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign 73*4882a593Smuzhiyun bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format 74*4882a593Smuzhiyun beqs dpos 75*4882a593Smuzhiyun bsetb #sign_bit,LOCAL_EX(%a0) 76*4882a593Smuzhiyundpos: 77*4882a593Smuzhiyun bfclr DTAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0 78*4882a593Smuzhiyun bsetb #4,DTAG(%a6) |set FPTE15 79*4882a593Smuzhiyun orb #0x0f,DNRM_FLG(%a6) 80*4882a593Smuzhiyunmonadic: 81*4882a593Smuzhiyun leal ETEMP(%a6),%a0 82*4882a593Smuzhiyun btstb #direction_bit,CMDREG1B(%a6) |check direction 83*4882a593Smuzhiyun bne opclass3 |it is a mv out 84*4882a593Smuzhiyun| 85*4882a593Smuzhiyun| At this point, only opclass 0 and 2 possible 86*4882a593Smuzhiyun| 87*4882a593Smuzhiyun btstb #7,STAG(%a6) |if sop = norm=000, zero=001, 88*4882a593Smuzhiyun| ;inf=010 or nan=011 89*4882a593Smuzhiyun bne mon_dnrm |else denorm 90*4882a593Smuzhiyun tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would 91*4882a593Smuzhiyun bne normal |require normalization of denorm 92*4882a593Smuzhiyun 93*4882a593Smuzhiyun| At this point: 94*4882a593Smuzhiyun| monadic instructions: fabs = $18 fneg = $1a ftst = $3a 95*4882a593Smuzhiyun| fmove = $00 fsmove = $40 fdmove = $44 96*4882a593Smuzhiyun| fsqrt = $05* fssqrt = $41 fdsqrt = $45 97*4882a593Smuzhiyun| (*fsqrt reencoded to $05) 98*4882a593Smuzhiyun| 99*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 |get command register 100*4882a593Smuzhiyun andil #0x7f,%d0 |strip to only command word 101*4882a593Smuzhiyun| 102*4882a593Smuzhiyun| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 103*4882a593Smuzhiyun| fdsqrt are possible. 104*4882a593Smuzhiyun| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) 105*4882a593Smuzhiyun| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) 106*4882a593Smuzhiyun| 107*4882a593Smuzhiyun btstl #0,%d0 108*4882a593Smuzhiyun bne normal |weed out fsqrt instructions 109*4882a593Smuzhiyun| 110*4882a593Smuzhiyun| cu_norm handles fmove in instructions with normalized inputs. 111*4882a593Smuzhiyun| The routine round is used to correctly round the input for the 112*4882a593Smuzhiyun| destination precision and mode. 113*4882a593Smuzhiyun| 114*4882a593Smuzhiyuncu_norm: 115*4882a593Smuzhiyun st CU_ONLY(%a6) |set cu-only inst flag 116*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 117*4882a593Smuzhiyun andib #0x3b,%d0 |isolate bits to select inst 118*4882a593Smuzhiyun tstb %d0 119*4882a593Smuzhiyun beql cu_nmove |if zero, it is an fmove 120*4882a593Smuzhiyun cmpib #0x18,%d0 121*4882a593Smuzhiyun beql cu_nabs |if $18, it is fabs 122*4882a593Smuzhiyun cmpib #0x1a,%d0 123*4882a593Smuzhiyun beql cu_nneg |if $1a, it is fneg 124*4882a593Smuzhiyun| 125*4882a593Smuzhiyun| Inst is ftst. Check the source operand and set the cc's accordingly. 126*4882a593Smuzhiyun| No write is done, so simply rts. 127*4882a593Smuzhiyun| 128*4882a593Smuzhiyuncu_ntst: 129*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d0 130*4882a593Smuzhiyun bclrl #15,%d0 131*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 132*4882a593Smuzhiyun beqs cu_ntpo 133*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) |set N 134*4882a593Smuzhiyuncu_ntpo: 135*4882a593Smuzhiyun cmpiw #0x7fff,%d0 |test for inf/nan 136*4882a593Smuzhiyun bnes cu_ntcz 137*4882a593Smuzhiyun tstl LOCAL_HI(%a0) 138*4882a593Smuzhiyun bnes cu_ntn 139*4882a593Smuzhiyun tstl LOCAL_LO(%a0) 140*4882a593Smuzhiyun bnes cu_ntn 141*4882a593Smuzhiyun orl #inf_mask,USER_FPSR(%a6) 142*4882a593Smuzhiyun rts 143*4882a593Smuzhiyuncu_ntn: 144*4882a593Smuzhiyun orl #nan_mask,USER_FPSR(%a6) 145*4882a593Smuzhiyun movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for 146*4882a593Smuzhiyun| ;snan handler 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun rts 149*4882a593Smuzhiyuncu_ntcz: 150*4882a593Smuzhiyun tstl LOCAL_HI(%a0) 151*4882a593Smuzhiyun bnel cu_ntsx 152*4882a593Smuzhiyun tstl LOCAL_LO(%a0) 153*4882a593Smuzhiyun bnel cu_ntsx 154*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) 155*4882a593Smuzhiyuncu_ntsx: 156*4882a593Smuzhiyun rts 157*4882a593Smuzhiyun| 158*4882a593Smuzhiyun| Inst is fabs. Execute the absolute value function on the input. 159*4882a593Smuzhiyun| Branch to the fmove code. If the operand is NaN, do nothing. 160*4882a593Smuzhiyun| 161*4882a593Smuzhiyuncu_nabs: 162*4882a593Smuzhiyun moveb STAG(%a6),%d0 163*4882a593Smuzhiyun btstl #5,%d0 |test for NaN or zero 164*4882a593Smuzhiyun bne wr_etemp |if either, simply write it 165*4882a593Smuzhiyun bclrb #7,LOCAL_EX(%a0) |do abs 166*4882a593Smuzhiyun bras cu_nmove |fmove code will finish 167*4882a593Smuzhiyun| 168*4882a593Smuzhiyun| Inst is fneg. Execute the negate value function on the input. 169*4882a593Smuzhiyun| Fall though to the fmove code. If the operand is NaN, do nothing. 170*4882a593Smuzhiyun| 171*4882a593Smuzhiyuncu_nneg: 172*4882a593Smuzhiyun moveb STAG(%a6),%d0 173*4882a593Smuzhiyun btstl #5,%d0 |test for NaN or zero 174*4882a593Smuzhiyun bne wr_etemp |if either, simply write it 175*4882a593Smuzhiyun bchgb #7,LOCAL_EX(%a0) |do neg 176*4882a593Smuzhiyun| 177*4882a593Smuzhiyun| Inst is fmove. This code also handles all result writes. 178*4882a593Smuzhiyun| If bit 2 is set, round is forced to double. If it is clear, 179*4882a593Smuzhiyun| and bit 6 is set, round is forced to single. If both are clear, 180*4882a593Smuzhiyun| the round precision is found in the fpcr. If the rounding precision 181*4882a593Smuzhiyun| is double or single, round the result before the write. 182*4882a593Smuzhiyun| 183*4882a593Smuzhiyuncu_nmove: 184*4882a593Smuzhiyun moveb STAG(%a6),%d0 185*4882a593Smuzhiyun andib #0xe0,%d0 |isolate stag bits 186*4882a593Smuzhiyun bne wr_etemp |if not norm, simply write it 187*4882a593Smuzhiyun btstb #2,CMDREG1B+1(%a6) |check for rd 188*4882a593Smuzhiyun bne cu_nmrd 189*4882a593Smuzhiyun btstb #6,CMDREG1B+1(%a6) |check for rs 190*4882a593Smuzhiyun bne cu_nmrs 191*4882a593Smuzhiyun| 192*4882a593Smuzhiyun| The move or operation is not with forced precision. Test for 193*4882a593Smuzhiyun| nan or inf as the input; if so, simply write it to FPn. Use the 194*4882a593Smuzhiyun| FPCR_MODE byte to get rounding on norms and zeros. 195*4882a593Smuzhiyun| 196*4882a593Smuzhiyuncu_nmnr: 197*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#0:#2},%d0 198*4882a593Smuzhiyun tstb %d0 |check for extended 199*4882a593Smuzhiyun beq cu_wrexn |if so, just write result 200*4882a593Smuzhiyun cmpib #1,%d0 |check for single 201*4882a593Smuzhiyun beq cu_nmrs |fall through to double 202*4882a593Smuzhiyun| 203*4882a593Smuzhiyun| The move is fdmove or round precision is double. 204*4882a593Smuzhiyun| 205*4882a593Smuzhiyuncu_nmrd: 206*4882a593Smuzhiyun movel #2,%d0 |set up the size for denorm 207*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d1 |compare exponent to double threshold 208*4882a593Smuzhiyun andw #0x7fff,%d1 209*4882a593Smuzhiyun cmpw #0x3c01,%d1 210*4882a593Smuzhiyun bls cu_nunfl 211*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode 212*4882a593Smuzhiyun orl #0x00020000,%d1 |or in rprec (double) 213*4882a593Smuzhiyun clrl %d0 |clear g,r,s for round 214*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) |convert to internal format 215*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 216*4882a593Smuzhiyun bsrl round 217*4882a593Smuzhiyun bfclr LOCAL_SGN(%a0){#0:#8} 218*4882a593Smuzhiyun beqs cu_nmrdc 219*4882a593Smuzhiyun bsetb #sign_bit,LOCAL_EX(%a0) 220*4882a593Smuzhiyuncu_nmrdc: 221*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d1 |check for overflow 222*4882a593Smuzhiyun andw #0x7fff,%d1 223*4882a593Smuzhiyun cmpw #0x43ff,%d1 224*4882a593Smuzhiyun bge cu_novfl |take care of overflow case 225*4882a593Smuzhiyun bra cu_wrexn 226*4882a593Smuzhiyun| 227*4882a593Smuzhiyun| The move is fsmove or round precision is single. 228*4882a593Smuzhiyun| 229*4882a593Smuzhiyuncu_nmrs: 230*4882a593Smuzhiyun movel #1,%d0 231*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d1 232*4882a593Smuzhiyun andw #0x7fff,%d1 233*4882a593Smuzhiyun cmpw #0x3f81,%d1 234*4882a593Smuzhiyun bls cu_nunfl 235*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 236*4882a593Smuzhiyun orl #0x00010000,%d1 237*4882a593Smuzhiyun clrl %d0 238*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 239*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 240*4882a593Smuzhiyun bsrl round 241*4882a593Smuzhiyun bfclr LOCAL_SGN(%a0){#0:#8} 242*4882a593Smuzhiyun beqs cu_nmrsc 243*4882a593Smuzhiyun bsetb #sign_bit,LOCAL_EX(%a0) 244*4882a593Smuzhiyuncu_nmrsc: 245*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d1 246*4882a593Smuzhiyun andw #0x7FFF,%d1 247*4882a593Smuzhiyun cmpw #0x407f,%d1 248*4882a593Smuzhiyun blt cu_wrexn 249*4882a593Smuzhiyun| 250*4882a593Smuzhiyun| The operand is above precision boundaries. Use t_ovfl to 251*4882a593Smuzhiyun| generate the correct value. 252*4882a593Smuzhiyun| 253*4882a593Smuzhiyuncu_novfl: 254*4882a593Smuzhiyun bsr t_ovfl 255*4882a593Smuzhiyun bra cu_wrexn 256*4882a593Smuzhiyun| 257*4882a593Smuzhiyun| The operand is below precision boundaries. Use denorm to 258*4882a593Smuzhiyun| generate the correct value. 259*4882a593Smuzhiyun| 260*4882a593Smuzhiyuncu_nunfl: 261*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 262*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 263*4882a593Smuzhiyun bsr denorm 264*4882a593Smuzhiyun bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format 265*4882a593Smuzhiyun beqs cu_nucont 266*4882a593Smuzhiyun bsetb #sign_bit,LOCAL_EX(%a0) 267*4882a593Smuzhiyuncu_nucont: 268*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 269*4882a593Smuzhiyun btstb #2,CMDREG1B+1(%a6) |check for rd 270*4882a593Smuzhiyun bne inst_d 271*4882a593Smuzhiyun btstb #6,CMDREG1B+1(%a6) |check for rs 272*4882a593Smuzhiyun bne inst_s 273*4882a593Smuzhiyun swap %d1 274*4882a593Smuzhiyun moveb FPCR_MODE(%a6),%d1 275*4882a593Smuzhiyun lsrb #6,%d1 276*4882a593Smuzhiyun swap %d1 277*4882a593Smuzhiyun bra inst_sd 278*4882a593Smuzhiyuninst_d: 279*4882a593Smuzhiyun orl #0x00020000,%d1 280*4882a593Smuzhiyun bra inst_sd 281*4882a593Smuzhiyuninst_s: 282*4882a593Smuzhiyun orl #0x00010000,%d1 283*4882a593Smuzhiyuninst_sd: 284*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 285*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 286*4882a593Smuzhiyun bsrl round 287*4882a593Smuzhiyun bfclr LOCAL_SGN(%a0){#0:#8} 288*4882a593Smuzhiyun beqs cu_nuflp 289*4882a593Smuzhiyun bsetb #sign_bit,LOCAL_EX(%a0) 290*4882a593Smuzhiyuncu_nuflp: 291*4882a593Smuzhiyun btstb #inex2_bit,FPSR_EXCEPT(%a6) 292*4882a593Smuzhiyun beqs cu_nuninx 293*4882a593Smuzhiyun orl #aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL 294*4882a593Smuzhiyuncu_nuninx: 295*4882a593Smuzhiyun tstl LOCAL_HI(%a0) |test for zero 296*4882a593Smuzhiyun bnes cu_nunzro 297*4882a593Smuzhiyun tstl LOCAL_LO(%a0) 298*4882a593Smuzhiyun bnes cu_nunzro 299*4882a593Smuzhiyun| 300*4882a593Smuzhiyun| The mantissa is zero from the denorm loop. Check sign and rmode 301*4882a593Smuzhiyun| to see if rounding should have occurred which would leave the lsb. 302*4882a593Smuzhiyun| 303*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 304*4882a593Smuzhiyun andil #0x30,%d0 |isolate rmode 305*4882a593Smuzhiyun cmpil #0x20,%d0 306*4882a593Smuzhiyun blts cu_nzro 307*4882a593Smuzhiyun bnes cu_nrp 308*4882a593Smuzhiyuncu_nrm: 309*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |if positive, set lsb 310*4882a593Smuzhiyun bges cu_nzro 311*4882a593Smuzhiyun btstb #7,FPCR_MODE(%a6) |check for double 312*4882a593Smuzhiyun beqs cu_nincs 313*4882a593Smuzhiyun bras cu_nincd 314*4882a593Smuzhiyuncu_nrp: 315*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |if positive, set lsb 316*4882a593Smuzhiyun blts cu_nzro 317*4882a593Smuzhiyun btstb #7,FPCR_MODE(%a6) |check for double 318*4882a593Smuzhiyun beqs cu_nincs 319*4882a593Smuzhiyuncu_nincd: 320*4882a593Smuzhiyun orl #0x800,LOCAL_LO(%a0) |inc for double 321*4882a593Smuzhiyun bra cu_nunzro 322*4882a593Smuzhiyuncu_nincs: 323*4882a593Smuzhiyun orl #0x100,LOCAL_HI(%a0) |inc for single 324*4882a593Smuzhiyun bra cu_nunzro 325*4882a593Smuzhiyuncu_nzro: 326*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) 327*4882a593Smuzhiyun moveb STAG(%a6),%d0 328*4882a593Smuzhiyun andib #0xe0,%d0 329*4882a593Smuzhiyun cmpib #0x40,%d0 |check if input was tagged zero 330*4882a593Smuzhiyun beqs cu_numv 331*4882a593Smuzhiyuncu_nunzro: 332*4882a593Smuzhiyun orl #unfl_mask,USER_FPSR(%a6) |set unfl 333*4882a593Smuzhiyuncu_numv: 334*4882a593Smuzhiyun movel (%a0),ETEMP(%a6) 335*4882a593Smuzhiyun movel 4(%a0),ETEMP_HI(%a6) 336*4882a593Smuzhiyun movel 8(%a0),ETEMP_LO(%a6) 337*4882a593Smuzhiyun| 338*4882a593Smuzhiyun| Write the result to memory, setting the fpsr cc bits. NaN and Inf 339*4882a593Smuzhiyun| bypass cu_wrexn. 340*4882a593Smuzhiyun| 341*4882a593Smuzhiyuncu_wrexn: 342*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |test for zero 343*4882a593Smuzhiyun beqs cu_wrzero 344*4882a593Smuzhiyun cmpw #0x8000,LOCAL_EX(%a0) |test for zero 345*4882a593Smuzhiyun bnes cu_wreon 346*4882a593Smuzhiyuncu_wrzero: 347*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) |set Z bit 348*4882a593Smuzhiyuncu_wreon: 349*4882a593Smuzhiyun tstw LOCAL_EX(%a0) 350*4882a593Smuzhiyun bpl wr_etemp 351*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 352*4882a593Smuzhiyun bra wr_etemp 353*4882a593Smuzhiyun 354*4882a593Smuzhiyun| 355*4882a593Smuzhiyun| HANDLE SOURCE DENORM HERE 356*4882a593Smuzhiyun| 357*4882a593Smuzhiyun| ;clear denorm stag to norm 358*4882a593Smuzhiyun| ;write the new tag & ete15 to the fstack 359*4882a593Smuzhiyunmon_dnrm: 360*4882a593Smuzhiyun| 361*4882a593Smuzhiyun| At this point, check for the cases in which normalizing the 362*4882a593Smuzhiyun| denorm produces incorrect results. 363*4882a593Smuzhiyun| 364*4882a593Smuzhiyun tstb DY_MO_FLG(%a6) |all cases of dyadic instructions would 365*4882a593Smuzhiyun bnes nrm_src |require normalization of denorm 366*4882a593Smuzhiyun 367*4882a593Smuzhiyun| At this point: 368*4882a593Smuzhiyun| monadic instructions: fabs = $18 fneg = $1a ftst = $3a 369*4882a593Smuzhiyun| fmove = $00 fsmove = $40 fdmove = $44 370*4882a593Smuzhiyun| fsqrt = $05* fssqrt = $41 fdsqrt = $45 371*4882a593Smuzhiyun| (*fsqrt reencoded to $05) 372*4882a593Smuzhiyun| 373*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 |get command register 374*4882a593Smuzhiyun andil #0x7f,%d0 |strip to only command word 375*4882a593Smuzhiyun| 376*4882a593Smuzhiyun| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 377*4882a593Smuzhiyun| fdsqrt are possible. 378*4882a593Smuzhiyun| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) 379*4882a593Smuzhiyun| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) 380*4882a593Smuzhiyun| 381*4882a593Smuzhiyun btstl #0,%d0 382*4882a593Smuzhiyun bnes nrm_src |weed out fsqrt instructions 383*4882a593Smuzhiyun st CU_ONLY(%a6) |set cu-only inst flag 384*4882a593Smuzhiyun bra cu_dnrm |fmove, fabs, fneg, ftst 385*4882a593Smuzhiyun| ;cases go to cu_dnrm 386*4882a593Smuzhiyunnrm_src: 387*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 388*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 389*4882a593Smuzhiyun bsr nrm_set |normalize number (exponent will go 390*4882a593Smuzhiyun| ; negative) 391*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) |get rid of false sign 392*4882a593Smuzhiyun 393*4882a593Smuzhiyun bfclr LOCAL_SGN(%a0){#0:#8} |change back to IEEE ext format 394*4882a593Smuzhiyun beqs spos 395*4882a593Smuzhiyun bsetb #sign_bit,LOCAL_EX(%a0) 396*4882a593Smuzhiyunspos: 397*4882a593Smuzhiyun bfclr STAG(%a6){#0:#4} |set tag to normalized, FPTE15 = 0 398*4882a593Smuzhiyun bsetb #4,STAG(%a6) |set ETE15 399*4882a593Smuzhiyun orb #0xf0,DNRM_FLG(%a6) 400*4882a593Smuzhiyunnormal: 401*4882a593Smuzhiyun tstb DNRM_FLG(%a6) |check if any of the ops were denorms 402*4882a593Smuzhiyun bne ck_wrap |if so, check if it is a potential 403*4882a593Smuzhiyun| ;wrap-around case 404*4882a593Smuzhiyunfix_stk: 405*4882a593Smuzhiyun moveb #0xfe,CU_SAVEPC(%a6) 406*4882a593Smuzhiyun bclrb #E1,E_BYTE(%a6) 407*4882a593Smuzhiyun 408*4882a593Smuzhiyun clrw NMNEXC(%a6) 409*4882a593Smuzhiyun 410*4882a593Smuzhiyun st RES_FLG(%a6) |indicate that a restore is needed 411*4882a593Smuzhiyun rts 412*4882a593Smuzhiyun 413*4882a593Smuzhiyun| 414*4882a593Smuzhiyun| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and 415*4882a593Smuzhiyun| ftst) completely in software without an frestore to the 040. 416*4882a593Smuzhiyun| 417*4882a593Smuzhiyuncu_dnrm: 418*4882a593Smuzhiyun st CU_ONLY(%a6) 419*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 420*4882a593Smuzhiyun andib #0x3b,%d0 |isolate bits to select inst 421*4882a593Smuzhiyun tstb %d0 422*4882a593Smuzhiyun beql cu_dmove |if zero, it is an fmove 423*4882a593Smuzhiyun cmpib #0x18,%d0 424*4882a593Smuzhiyun beql cu_dabs |if $18, it is fabs 425*4882a593Smuzhiyun cmpib #0x1a,%d0 426*4882a593Smuzhiyun beql cu_dneg |if $1a, it is fneg 427*4882a593Smuzhiyun| 428*4882a593Smuzhiyun| Inst is ftst. Check the source operand and set the cc's accordingly. 429*4882a593Smuzhiyun| No write is done, so simply rts. 430*4882a593Smuzhiyun| 431*4882a593Smuzhiyuncu_dtst: 432*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d0 433*4882a593Smuzhiyun bclrl #15,%d0 434*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 435*4882a593Smuzhiyun beqs cu_dtpo 436*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) |set N 437*4882a593Smuzhiyuncu_dtpo: 438*4882a593Smuzhiyun cmpiw #0x7fff,%d0 |test for inf/nan 439*4882a593Smuzhiyun bnes cu_dtcz 440*4882a593Smuzhiyun tstl LOCAL_HI(%a0) 441*4882a593Smuzhiyun bnes cu_dtn 442*4882a593Smuzhiyun tstl LOCAL_LO(%a0) 443*4882a593Smuzhiyun bnes cu_dtn 444*4882a593Smuzhiyun orl #inf_mask,USER_FPSR(%a6) 445*4882a593Smuzhiyun rts 446*4882a593Smuzhiyuncu_dtn: 447*4882a593Smuzhiyun orl #nan_mask,USER_FPSR(%a6) 448*4882a593Smuzhiyun movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for 449*4882a593Smuzhiyun| ;snan handler 450*4882a593Smuzhiyun rts 451*4882a593Smuzhiyuncu_dtcz: 452*4882a593Smuzhiyun tstl LOCAL_HI(%a0) 453*4882a593Smuzhiyun bnel cu_dtsx 454*4882a593Smuzhiyun tstl LOCAL_LO(%a0) 455*4882a593Smuzhiyun bnel cu_dtsx 456*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) 457*4882a593Smuzhiyuncu_dtsx: 458*4882a593Smuzhiyun rts 459*4882a593Smuzhiyun| 460*4882a593Smuzhiyun| Inst is fabs. Execute the absolute value function on the input. 461*4882a593Smuzhiyun| Branch to the fmove code. 462*4882a593Smuzhiyun| 463*4882a593Smuzhiyuncu_dabs: 464*4882a593Smuzhiyun bclrb #7,LOCAL_EX(%a0) |do abs 465*4882a593Smuzhiyun bras cu_dmove |fmove code will finish 466*4882a593Smuzhiyun| 467*4882a593Smuzhiyun| Inst is fneg. Execute the negate value function on the input. 468*4882a593Smuzhiyun| Fall though to the fmove code. 469*4882a593Smuzhiyun| 470*4882a593Smuzhiyuncu_dneg: 471*4882a593Smuzhiyun bchgb #7,LOCAL_EX(%a0) |do neg 472*4882a593Smuzhiyun| 473*4882a593Smuzhiyun| Inst is fmove. This code also handles all result writes. 474*4882a593Smuzhiyun| If bit 2 is set, round is forced to double. If it is clear, 475*4882a593Smuzhiyun| and bit 6 is set, round is forced to single. If both are clear, 476*4882a593Smuzhiyun| the round precision is found in the fpcr. If the rounding precision 477*4882a593Smuzhiyun| is double or single, the result is zero, and the mode is checked 478*4882a593Smuzhiyun| to determine if the lsb of the result should be set. 479*4882a593Smuzhiyun| 480*4882a593Smuzhiyuncu_dmove: 481*4882a593Smuzhiyun btstb #2,CMDREG1B+1(%a6) |check for rd 482*4882a593Smuzhiyun bne cu_dmrd 483*4882a593Smuzhiyun btstb #6,CMDREG1B+1(%a6) |check for rs 484*4882a593Smuzhiyun bne cu_dmrs 485*4882a593Smuzhiyun| 486*4882a593Smuzhiyun| The move or operation is not with forced precision. Use the 487*4882a593Smuzhiyun| FPCR_MODE byte to get rounding. 488*4882a593Smuzhiyun| 489*4882a593Smuzhiyuncu_dmnr: 490*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#0:#2},%d0 491*4882a593Smuzhiyun tstb %d0 |check for extended 492*4882a593Smuzhiyun beq cu_wrexd |if so, just write result 493*4882a593Smuzhiyun cmpib #1,%d0 |check for single 494*4882a593Smuzhiyun beq cu_dmrs |fall through to double 495*4882a593Smuzhiyun| 496*4882a593Smuzhiyun| The move is fdmove or round precision is double. Result is zero. 497*4882a593Smuzhiyun| Check rmode for rp or rm and set lsb accordingly. 498*4882a593Smuzhiyun| 499*4882a593Smuzhiyuncu_dmrd: 500*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode 501*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |check sign 502*4882a593Smuzhiyun blts cu_dmdn 503*4882a593Smuzhiyun cmpib #3,%d1 |check for rp 504*4882a593Smuzhiyun bne cu_dpd |load double pos zero 505*4882a593Smuzhiyun bra cu_dpdr |load double pos zero w/lsb 506*4882a593Smuzhiyuncu_dmdn: 507*4882a593Smuzhiyun cmpib #2,%d1 |check for rm 508*4882a593Smuzhiyun bne cu_dnd |load double neg zero 509*4882a593Smuzhiyun bra cu_dndr |load double neg zero w/lsb 510*4882a593Smuzhiyun| 511*4882a593Smuzhiyun| The move is fsmove or round precision is single. Result is zero. 512*4882a593Smuzhiyun| Check for rp or rm and set lsb accordingly. 513*4882a593Smuzhiyun| 514*4882a593Smuzhiyuncu_dmrs: 515*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rmode 516*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |check sign 517*4882a593Smuzhiyun blts cu_dmsn 518*4882a593Smuzhiyun cmpib #3,%d1 |check for rp 519*4882a593Smuzhiyun bne cu_spd |load single pos zero 520*4882a593Smuzhiyun bra cu_spdr |load single pos zero w/lsb 521*4882a593Smuzhiyuncu_dmsn: 522*4882a593Smuzhiyun cmpib #2,%d1 |check for rm 523*4882a593Smuzhiyun bne cu_snd |load single neg zero 524*4882a593Smuzhiyun bra cu_sndr |load single neg zero w/lsb 525*4882a593Smuzhiyun| 526*4882a593Smuzhiyun| The precision is extended, so the result in etemp is correct. 527*4882a593Smuzhiyun| Simply set unfl (not inex2 or aunfl) and write the result to 528*4882a593Smuzhiyun| the correct fp register. 529*4882a593Smuzhiyuncu_wrexd: 530*4882a593Smuzhiyun orl #unfl_mask,USER_FPSR(%a6) 531*4882a593Smuzhiyun tstw LOCAL_EX(%a0) 532*4882a593Smuzhiyun beq wr_etemp 533*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 534*4882a593Smuzhiyun bra wr_etemp 535*4882a593Smuzhiyun| 536*4882a593Smuzhiyun| These routines write +/- zero in double format. The routines 537*4882a593Smuzhiyun| cu_dpdr and cu_dndr set the double lsb. 538*4882a593Smuzhiyun| 539*4882a593Smuzhiyuncu_dpd: 540*4882a593Smuzhiyun movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero 541*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 542*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 543*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) 544*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 545*4882a593Smuzhiyun bra wr_etemp 546*4882a593Smuzhiyuncu_dpdr: 547*4882a593Smuzhiyun movel #0x3c010000,LOCAL_EX(%a0) |force pos double zero 548*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 549*4882a593Smuzhiyun movel #0x800,LOCAL_LO(%a0) |with lsb set 550*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 551*4882a593Smuzhiyun bra wr_etemp 552*4882a593Smuzhiyuncu_dnd: 553*4882a593Smuzhiyun movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero 554*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 555*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 556*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) 557*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 558*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 559*4882a593Smuzhiyun bra wr_etemp 560*4882a593Smuzhiyuncu_dndr: 561*4882a593Smuzhiyun movel #0xbc010000,LOCAL_EX(%a0) |force pos double zero 562*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 563*4882a593Smuzhiyun movel #0x800,LOCAL_LO(%a0) |with lsb set 564*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 565*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 566*4882a593Smuzhiyun bra wr_etemp 567*4882a593Smuzhiyun| 568*4882a593Smuzhiyun| These routines write +/- zero in single format. The routines 569*4882a593Smuzhiyun| cu_dpdr and cu_dndr set the single lsb. 570*4882a593Smuzhiyun| 571*4882a593Smuzhiyuncu_spd: 572*4882a593Smuzhiyun movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero 573*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 574*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 575*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) 576*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 577*4882a593Smuzhiyun bra wr_etemp 578*4882a593Smuzhiyuncu_spdr: 579*4882a593Smuzhiyun movel #0x3f810000,LOCAL_EX(%a0) |force pos single zero 580*4882a593Smuzhiyun movel #0x100,LOCAL_HI(%a0) |with lsb set 581*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 582*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 583*4882a593Smuzhiyun bra wr_etemp 584*4882a593Smuzhiyuncu_snd: 585*4882a593Smuzhiyun movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero 586*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 587*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 588*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) 589*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 590*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 591*4882a593Smuzhiyun bra wr_etemp 592*4882a593Smuzhiyuncu_sndr: 593*4882a593Smuzhiyun movel #0xbf810000,LOCAL_EX(%a0) |force pos single zero 594*4882a593Smuzhiyun movel #0x100,LOCAL_HI(%a0) |with lsb set 595*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 596*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 597*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 598*4882a593Smuzhiyun bra wr_etemp 599*4882a593Smuzhiyun 600*4882a593Smuzhiyun| 601*4882a593Smuzhiyun| This code checks for 16-bit overflow conditions on dyadic 602*4882a593Smuzhiyun| operations which are not restorable into the floating-point 603*4882a593Smuzhiyun| unit and must be completed in software. Basically, this 604*4882a593Smuzhiyun| condition exists with a very large norm and a denorm. One 605*4882a593Smuzhiyun| of the operands must be denormalized to enter this code. 606*4882a593Smuzhiyun| 607*4882a593Smuzhiyun| Flags used: 608*4882a593Smuzhiyun| DY_MO_FLG contains 0 for monadic op, $ff for dyadic 609*4882a593Smuzhiyun| DNRM_FLG contains $00 for neither op denormalized 610*4882a593Smuzhiyun| $0f for the destination op denormalized 611*4882a593Smuzhiyun| $f0 for the source op denormalized 612*4882a593Smuzhiyun| $ff for both ops denormalized 613*4882a593Smuzhiyun| 614*4882a593Smuzhiyun| The wrap-around condition occurs for add, sub, div, and cmp 615*4882a593Smuzhiyun| when 616*4882a593Smuzhiyun| 617*4882a593Smuzhiyun| abs(dest_exp - src_exp) >= $8000 618*4882a593Smuzhiyun| 619*4882a593Smuzhiyun| and for mul when 620*4882a593Smuzhiyun| 621*4882a593Smuzhiyun| (dest_exp + src_exp) < $0 622*4882a593Smuzhiyun| 623*4882a593Smuzhiyun| we must process the operation here if this case is true. 624*4882a593Smuzhiyun| 625*4882a593Smuzhiyun| The rts following the frcfpn routine is the exit from res_func 626*4882a593Smuzhiyun| for this condition. The restore flag (RES_FLG) is left clear. 627*4882a593Smuzhiyun| No frestore is done unless an exception is to be reported. 628*4882a593Smuzhiyun| 629*4882a593Smuzhiyun| For fadd: 630*4882a593Smuzhiyun| if(sign_of(dest) != sign_of(src)) 631*4882a593Smuzhiyun| replace exponent of src with $3fff (keep sign) 632*4882a593Smuzhiyun| use fpu to perform dest+new_src (user's rmode and X) 633*4882a593Smuzhiyun| clr sticky 634*4882a593Smuzhiyun| else 635*4882a593Smuzhiyun| set sticky 636*4882a593Smuzhiyun| call round with user's precision and mode 637*4882a593Smuzhiyun| move result to fpn and wbtemp 638*4882a593Smuzhiyun| 639*4882a593Smuzhiyun| For fsub: 640*4882a593Smuzhiyun| if(sign_of(dest) == sign_of(src)) 641*4882a593Smuzhiyun| replace exponent of src with $3fff (keep sign) 642*4882a593Smuzhiyun| use fpu to perform dest+new_src (user's rmode and X) 643*4882a593Smuzhiyun| clr sticky 644*4882a593Smuzhiyun| else 645*4882a593Smuzhiyun| set sticky 646*4882a593Smuzhiyun| call round with user's precision and mode 647*4882a593Smuzhiyun| move result to fpn and wbtemp 648*4882a593Smuzhiyun| 649*4882a593Smuzhiyun| For fdiv/fsgldiv: 650*4882a593Smuzhiyun| if(both operands are denorm) 651*4882a593Smuzhiyun| restore_to_fpu; 652*4882a593Smuzhiyun| if(dest is norm) 653*4882a593Smuzhiyun| force_ovf; 654*4882a593Smuzhiyun| else(dest is denorm) 655*4882a593Smuzhiyun| force_unf: 656*4882a593Smuzhiyun| 657*4882a593Smuzhiyun| For fcmp: 658*4882a593Smuzhiyun| if(dest is norm) 659*4882a593Smuzhiyun| N = sign_of(dest); 660*4882a593Smuzhiyun| else(dest is denorm) 661*4882a593Smuzhiyun| N = sign_of(src); 662*4882a593Smuzhiyun| 663*4882a593Smuzhiyun| For fmul: 664*4882a593Smuzhiyun| if(both operands are denorm) 665*4882a593Smuzhiyun| force_unf; 666*4882a593Smuzhiyun| if((dest_exp + src_exp) < 0) 667*4882a593Smuzhiyun| force_unf: 668*4882a593Smuzhiyun| else 669*4882a593Smuzhiyun| restore_to_fpu; 670*4882a593Smuzhiyun| 671*4882a593Smuzhiyun| local equates: 672*4882a593Smuzhiyun .set addcode,0x22 673*4882a593Smuzhiyun .set subcode,0x28 674*4882a593Smuzhiyun .set mulcode,0x23 675*4882a593Smuzhiyun .set divcode,0x20 676*4882a593Smuzhiyun .set cmpcode,0x38 677*4882a593Smuzhiyunck_wrap: 678*4882a593Smuzhiyun | tstb DY_MO_FLG(%a6) ;check for fsqrt 679*4882a593Smuzhiyun beq fix_stk |if zero, it is fsqrt 680*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 681*4882a593Smuzhiyun andiw #0x3b,%d0 |strip to command bits 682*4882a593Smuzhiyun cmpiw #addcode,%d0 683*4882a593Smuzhiyun beq wrap_add 684*4882a593Smuzhiyun cmpiw #subcode,%d0 685*4882a593Smuzhiyun beq wrap_sub 686*4882a593Smuzhiyun cmpiw #mulcode,%d0 687*4882a593Smuzhiyun beq wrap_mul 688*4882a593Smuzhiyun cmpiw #cmpcode,%d0 689*4882a593Smuzhiyun beq wrap_cmp 690*4882a593Smuzhiyun| 691*4882a593Smuzhiyun| Inst is fdiv. 692*4882a593Smuzhiyun| 693*4882a593Smuzhiyunwrap_div: 694*4882a593Smuzhiyun cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, 695*4882a593Smuzhiyun beq fix_stk |restore to fpu 696*4882a593Smuzhiyun| 697*4882a593Smuzhiyun| One of the ops is denormalized. Test for wrap condition 698*4882a593Smuzhiyun| and force the result. 699*4882a593Smuzhiyun| 700*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm 701*4882a593Smuzhiyun bnes div_srcd 702*4882a593Smuzhiyundiv_destd: 703*4882a593Smuzhiyun bsrl ckinf_ns 704*4882a593Smuzhiyun bne fix_stk 705*4882a593Smuzhiyun bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) 706*4882a593Smuzhiyun bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) 707*4882a593Smuzhiyun subl %d1,%d0 |subtract dest from src 708*4882a593Smuzhiyun cmpl #0x7fff,%d0 709*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 710*4882a593Smuzhiyun clrb WBTEMP_SGN(%a6) 711*4882a593Smuzhiyun movew ETEMP_EX(%a6),%d0 |find the sign of the result 712*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d1 713*4882a593Smuzhiyun eorw %d1,%d0 714*4882a593Smuzhiyun andiw #0x8000,%d0 715*4882a593Smuzhiyun beq force_unf 716*4882a593Smuzhiyun st WBTEMP_SGN(%a6) 717*4882a593Smuzhiyun bra force_unf 718*4882a593Smuzhiyun 719*4882a593Smuzhiyunckinf_ns: 720*4882a593Smuzhiyun moveb STAG(%a6),%d0 |check source tag for inf or nan 721*4882a593Smuzhiyun bra ck_in_com 722*4882a593Smuzhiyunckinf_nd: 723*4882a593Smuzhiyun moveb DTAG(%a6),%d0 |check destination tag for inf or nan 724*4882a593Smuzhiyunck_in_com: 725*4882a593Smuzhiyun andib #0x60,%d0 |isolate tag bits 726*4882a593Smuzhiyun cmpb #0x40,%d0 |is it inf? 727*4882a593Smuzhiyun beq nan_or_inf |not wrap case 728*4882a593Smuzhiyun cmpb #0x60,%d0 |is it nan? 729*4882a593Smuzhiyun beq nan_or_inf |yes, not wrap case? 730*4882a593Smuzhiyun cmpb #0x20,%d0 |is it a zero? 731*4882a593Smuzhiyun beq nan_or_inf |yes 732*4882a593Smuzhiyun clrl %d0 733*4882a593Smuzhiyun rts |then ; it is either a zero of norm, 734*4882a593Smuzhiyun| ;check wrap case 735*4882a593Smuzhiyunnan_or_inf: 736*4882a593Smuzhiyun moveql #-1,%d0 737*4882a593Smuzhiyun rts 738*4882a593Smuzhiyun 739*4882a593Smuzhiyun 740*4882a593Smuzhiyun 741*4882a593Smuzhiyundiv_srcd: 742*4882a593Smuzhiyun bsrl ckinf_nd 743*4882a593Smuzhiyun bne fix_stk 744*4882a593Smuzhiyun bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) 745*4882a593Smuzhiyun bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) 746*4882a593Smuzhiyun subl %d1,%d0 |subtract src from dest 747*4882a593Smuzhiyun cmpl #0x8000,%d0 748*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 749*4882a593Smuzhiyun clrb WBTEMP_SGN(%a6) 750*4882a593Smuzhiyun movew ETEMP_EX(%a6),%d0 |find the sign of the result 751*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d1 752*4882a593Smuzhiyun eorw %d1,%d0 753*4882a593Smuzhiyun andiw #0x8000,%d0 754*4882a593Smuzhiyun beqs force_ovf 755*4882a593Smuzhiyun st WBTEMP_SGN(%a6) 756*4882a593Smuzhiyun| 757*4882a593Smuzhiyun| This code handles the case of the instruction resulting in 758*4882a593Smuzhiyun| an overflow condition. 759*4882a593Smuzhiyun| 760*4882a593Smuzhiyunforce_ovf: 761*4882a593Smuzhiyun bclrb #E1,E_BYTE(%a6) 762*4882a593Smuzhiyun orl #ovfl_inx_mask,USER_FPSR(%a6) 763*4882a593Smuzhiyun clrw NMNEXC(%a6) 764*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 |point a0 to memory location 765*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 766*4882a593Smuzhiyun btstl #6,%d0 |test for forced precision 767*4882a593Smuzhiyun beqs frcovf_fpcr 768*4882a593Smuzhiyun btstl #2,%d0 |check for double 769*4882a593Smuzhiyun bnes frcovf_dbl 770*4882a593Smuzhiyun movel #0x1,%d0 |inst is forced single 771*4882a593Smuzhiyun bras frcovf_rnd 772*4882a593Smuzhiyunfrcovf_dbl: 773*4882a593Smuzhiyun movel #0x2,%d0 |inst is forced double 774*4882a593Smuzhiyun bras frcovf_rnd 775*4882a593Smuzhiyunfrcovf_fpcr: 776*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec 777*4882a593Smuzhiyunfrcovf_rnd: 778*4882a593Smuzhiyun 779*4882a593Smuzhiyun| The 881/882 does not set inex2 for the following case, so the 780*4882a593Smuzhiyun| line is commented out to be compatible with 881/882 781*4882a593Smuzhiyun| tst.b %d0 782*4882a593Smuzhiyun| beq.b frcovf_x 783*4882a593Smuzhiyun| or.l #inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2 784*4882a593Smuzhiyun 785*4882a593Smuzhiyun|frcovf_x: 786*4882a593Smuzhiyun bsrl ovf_res |get correct result based on 787*4882a593Smuzhiyun| ;round precision/mode. This 788*4882a593Smuzhiyun| ;sets FPSR_CC correctly 789*4882a593Smuzhiyun| ;returns in external format 790*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} 791*4882a593Smuzhiyun beq frcfpn 792*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 793*4882a593Smuzhiyun bra frcfpn 794*4882a593Smuzhiyun| 795*4882a593Smuzhiyun| Inst is fadd. 796*4882a593Smuzhiyun| 797*4882a593Smuzhiyunwrap_add: 798*4882a593Smuzhiyun cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, 799*4882a593Smuzhiyun beq fix_stk |restore to fpu 800*4882a593Smuzhiyun| 801*4882a593Smuzhiyun| One of the ops is denormalized. Test for wrap condition 802*4882a593Smuzhiyun| and complete the instruction. 803*4882a593Smuzhiyun| 804*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm 805*4882a593Smuzhiyun bnes add_srcd 806*4882a593Smuzhiyunadd_destd: 807*4882a593Smuzhiyun bsrl ckinf_ns 808*4882a593Smuzhiyun bne fix_stk 809*4882a593Smuzhiyun bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) 810*4882a593Smuzhiyun bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) 811*4882a593Smuzhiyun subl %d1,%d0 |subtract dest from src 812*4882a593Smuzhiyun cmpl #0x8000,%d0 813*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 814*4882a593Smuzhiyun bra add_wrap 815*4882a593Smuzhiyunadd_srcd: 816*4882a593Smuzhiyun bsrl ckinf_nd 817*4882a593Smuzhiyun bne fix_stk 818*4882a593Smuzhiyun bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) 819*4882a593Smuzhiyun bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) 820*4882a593Smuzhiyun subl %d1,%d0 |subtract src from dest 821*4882a593Smuzhiyun cmpl #0x8000,%d0 822*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 823*4882a593Smuzhiyun| 824*4882a593Smuzhiyun| Check the signs of the operands. If they are unlike, the fpu 825*4882a593Smuzhiyun| can be used to add the norm and 1.0 with the sign of the 826*4882a593Smuzhiyun| denorm and it will correctly generate the result in extended 827*4882a593Smuzhiyun| precision. We can then call round with no sticky and the result 828*4882a593Smuzhiyun| will be correct for the user's rounding mode and precision. If 829*4882a593Smuzhiyun| the signs are the same, we call round with the sticky bit set 830*4882a593Smuzhiyun| and the result will be correct for the user's rounding mode and 831*4882a593Smuzhiyun| precision. 832*4882a593Smuzhiyun| 833*4882a593Smuzhiyunadd_wrap: 834*4882a593Smuzhiyun movew ETEMP_EX(%a6),%d0 835*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d1 836*4882a593Smuzhiyun eorw %d1,%d0 837*4882a593Smuzhiyun andiw #0x8000,%d0 838*4882a593Smuzhiyun beq add_same 839*4882a593Smuzhiyun| 840*4882a593Smuzhiyun| The signs are unlike. 841*4882a593Smuzhiyun| 842*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? 843*4882a593Smuzhiyun bnes add_u_srcd 844*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d0 845*4882a593Smuzhiyun andiw #0x8000,%d0 846*4882a593Smuzhiyun orw #0x3fff,%d0 |force the exponent to +/- 1 847*4882a593Smuzhiyun movew %d0,FPTEMP_EX(%a6) |in the denorm 848*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 849*4882a593Smuzhiyun andil #0x30,%d0 850*4882a593Smuzhiyun fmovel %d0,%fpcr |set up users rmode and X 851*4882a593Smuzhiyun fmovex ETEMP(%a6),%fp0 852*4882a593Smuzhiyun faddx FPTEMP(%a6),%fp0 853*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame 854*4882a593Smuzhiyun fmovel %fpsr,%d1 855*4882a593Smuzhiyun orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd 856*4882a593Smuzhiyun fmovex %fp0,WBTEMP(%a6) |write result to memory 857*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 858*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 859*4882a593Smuzhiyun andil #0xc0,%d1 860*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 861*4882a593Smuzhiyun swap %d1 862*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 863*4882a593Smuzhiyun clrl %d0 |force sticky to zero 864*4882a593Smuzhiyun bclrb #sign_bit,WBTEMP_EX(%a6) 865*4882a593Smuzhiyun sne WBTEMP_SGN(%a6) 866*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 867*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 868*4882a593Smuzhiyun beq frcfpnr 869*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 870*4882a593Smuzhiyun bra frcfpnr 871*4882a593Smuzhiyunadd_u_srcd: 872*4882a593Smuzhiyun movew ETEMP_EX(%a6),%d0 873*4882a593Smuzhiyun andiw #0x8000,%d0 874*4882a593Smuzhiyun orw #0x3fff,%d0 |force the exponent to +/- 1 875*4882a593Smuzhiyun movew %d0,ETEMP_EX(%a6) |in the denorm 876*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 877*4882a593Smuzhiyun andil #0x30,%d0 878*4882a593Smuzhiyun fmovel %d0,%fpcr |set up users rmode and X 879*4882a593Smuzhiyun fmovex ETEMP(%a6),%fp0 880*4882a593Smuzhiyun faddx FPTEMP(%a6),%fp0 881*4882a593Smuzhiyun fmovel %fpsr,%d1 882*4882a593Smuzhiyun orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd 883*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame 884*4882a593Smuzhiyun fmovex %fp0,WBTEMP(%a6) |write result to memory 885*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 886*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 887*4882a593Smuzhiyun andil #0xc0,%d1 888*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 889*4882a593Smuzhiyun swap %d1 890*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 891*4882a593Smuzhiyun clrl %d0 |force sticky to zero 892*4882a593Smuzhiyun bclrb #sign_bit,WBTEMP_EX(%a6) 893*4882a593Smuzhiyun sne WBTEMP_SGN(%a6) |use internal format for round 894*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 895*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 896*4882a593Smuzhiyun beq frcfpnr 897*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 898*4882a593Smuzhiyun bra frcfpnr 899*4882a593Smuzhiyun| 900*4882a593Smuzhiyun| Signs are alike: 901*4882a593Smuzhiyun| 902*4882a593Smuzhiyunadd_same: 903*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? 904*4882a593Smuzhiyun bnes add_s_srcd 905*4882a593Smuzhiyunadd_s_destd: 906*4882a593Smuzhiyun leal ETEMP(%a6),%a0 907*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 908*4882a593Smuzhiyun andil #0x30,%d0 909*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 910*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 911*4882a593Smuzhiyun andil #0xc0,%d1 912*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 913*4882a593Smuzhiyun swap %d1 914*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 915*4882a593Smuzhiyun movel #0x20000000,%d0 |set sticky for round 916*4882a593Smuzhiyun bclrb #sign_bit,ETEMP_EX(%a6) 917*4882a593Smuzhiyun sne ETEMP_SGN(%a6) 918*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 919*4882a593Smuzhiyun bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 920*4882a593Smuzhiyun beqs add_s_dclr 921*4882a593Smuzhiyun bsetb #sign_bit,ETEMP_EX(%a6) 922*4882a593Smuzhiyunadd_s_dclr: 923*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 924*4882a593Smuzhiyun movel ETEMP(%a6),(%a0) |write result to wbtemp 925*4882a593Smuzhiyun movel ETEMP_HI(%a6),4(%a0) 926*4882a593Smuzhiyun movel ETEMP_LO(%a6),8(%a0) 927*4882a593Smuzhiyun tstw ETEMP_EX(%a6) 928*4882a593Smuzhiyun bgt add_ckovf 929*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 930*4882a593Smuzhiyun bra add_ckovf 931*4882a593Smuzhiyunadd_s_srcd: 932*4882a593Smuzhiyun leal FPTEMP(%a6),%a0 933*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 934*4882a593Smuzhiyun andil #0x30,%d0 935*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 936*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 937*4882a593Smuzhiyun andil #0xc0,%d1 938*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 939*4882a593Smuzhiyun swap %d1 940*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 941*4882a593Smuzhiyun movel #0x20000000,%d0 |set sticky for round 942*4882a593Smuzhiyun bclrb #sign_bit,FPTEMP_EX(%a6) 943*4882a593Smuzhiyun sne FPTEMP_SGN(%a6) 944*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 945*4882a593Smuzhiyun bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 946*4882a593Smuzhiyun beqs add_s_sclr 947*4882a593Smuzhiyun bsetb #sign_bit,FPTEMP_EX(%a6) 948*4882a593Smuzhiyunadd_s_sclr: 949*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 950*4882a593Smuzhiyun movel FPTEMP(%a6),(%a0) |write result to wbtemp 951*4882a593Smuzhiyun movel FPTEMP_HI(%a6),4(%a0) 952*4882a593Smuzhiyun movel FPTEMP_LO(%a6),8(%a0) 953*4882a593Smuzhiyun tstw FPTEMP_EX(%a6) 954*4882a593Smuzhiyun bgt add_ckovf 955*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 956*4882a593Smuzhiyunadd_ckovf: 957*4882a593Smuzhiyun movew WBTEMP_EX(%a6),%d0 958*4882a593Smuzhiyun andiw #0x7fff,%d0 959*4882a593Smuzhiyun cmpiw #0x7fff,%d0 960*4882a593Smuzhiyun bne frcfpnr 961*4882a593Smuzhiyun| 962*4882a593Smuzhiyun| The result has overflowed to $7fff exponent. Set I, ovfl, 963*4882a593Smuzhiyun| and aovfl, and clr the mantissa (incorrectly set by the 964*4882a593Smuzhiyun| round routine.) 965*4882a593Smuzhiyun| 966*4882a593Smuzhiyun orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6) 967*4882a593Smuzhiyun clrl 4(%a0) 968*4882a593Smuzhiyun bra frcfpnr 969*4882a593Smuzhiyun| 970*4882a593Smuzhiyun| Inst is fsub. 971*4882a593Smuzhiyun| 972*4882a593Smuzhiyunwrap_sub: 973*4882a593Smuzhiyun cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, 974*4882a593Smuzhiyun beq fix_stk |restore to fpu 975*4882a593Smuzhiyun| 976*4882a593Smuzhiyun| One of the ops is denormalized. Test for wrap condition 977*4882a593Smuzhiyun| and complete the instruction. 978*4882a593Smuzhiyun| 979*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm 980*4882a593Smuzhiyun bnes sub_srcd 981*4882a593Smuzhiyunsub_destd: 982*4882a593Smuzhiyun bsrl ckinf_ns 983*4882a593Smuzhiyun bne fix_stk 984*4882a593Smuzhiyun bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) 985*4882a593Smuzhiyun bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) 986*4882a593Smuzhiyun subl %d1,%d0 |subtract src from dest 987*4882a593Smuzhiyun cmpl #0x8000,%d0 988*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 989*4882a593Smuzhiyun bra sub_wrap 990*4882a593Smuzhiyunsub_srcd: 991*4882a593Smuzhiyun bsrl ckinf_nd 992*4882a593Smuzhiyun bne fix_stk 993*4882a593Smuzhiyun bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) 994*4882a593Smuzhiyun bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) 995*4882a593Smuzhiyun subl %d1,%d0 |subtract dest from src 996*4882a593Smuzhiyun cmpl #0x8000,%d0 997*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 998*4882a593Smuzhiyun| 999*4882a593Smuzhiyun| Check the signs of the operands. If they are alike, the fpu 1000*4882a593Smuzhiyun| can be used to subtract from the norm 1.0 with the sign of the 1001*4882a593Smuzhiyun| denorm and it will correctly generate the result in extended 1002*4882a593Smuzhiyun| precision. We can then call round with no sticky and the result 1003*4882a593Smuzhiyun| will be correct for the user's rounding mode and precision. If 1004*4882a593Smuzhiyun| the signs are unlike, we call round with the sticky bit set 1005*4882a593Smuzhiyun| and the result will be correct for the user's rounding mode and 1006*4882a593Smuzhiyun| precision. 1007*4882a593Smuzhiyun| 1008*4882a593Smuzhiyunsub_wrap: 1009*4882a593Smuzhiyun movew ETEMP_EX(%a6),%d0 1010*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d1 1011*4882a593Smuzhiyun eorw %d1,%d0 1012*4882a593Smuzhiyun andiw #0x8000,%d0 1013*4882a593Smuzhiyun bne sub_diff 1014*4882a593Smuzhiyun| 1015*4882a593Smuzhiyun| The signs are alike. 1016*4882a593Smuzhiyun| 1017*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? 1018*4882a593Smuzhiyun bnes sub_u_srcd 1019*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d0 1020*4882a593Smuzhiyun andiw #0x8000,%d0 1021*4882a593Smuzhiyun orw #0x3fff,%d0 |force the exponent to +/- 1 1022*4882a593Smuzhiyun movew %d0,FPTEMP_EX(%a6) |in the denorm 1023*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 1024*4882a593Smuzhiyun andil #0x30,%d0 1025*4882a593Smuzhiyun fmovel %d0,%fpcr |set up users rmode and X 1026*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 1027*4882a593Smuzhiyun fsubx ETEMP(%a6),%fp0 1028*4882a593Smuzhiyun fmovel %fpsr,%d1 1029*4882a593Smuzhiyun orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd 1030*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame 1031*4882a593Smuzhiyun fmovex %fp0,WBTEMP(%a6) |write result to memory 1032*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 1033*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 1034*4882a593Smuzhiyun andil #0xc0,%d1 1035*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 1036*4882a593Smuzhiyun swap %d1 1037*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 1038*4882a593Smuzhiyun clrl %d0 |force sticky to zero 1039*4882a593Smuzhiyun bclrb #sign_bit,WBTEMP_EX(%a6) 1040*4882a593Smuzhiyun sne WBTEMP_SGN(%a6) 1041*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 1042*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 1043*4882a593Smuzhiyun beq frcfpnr 1044*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 1045*4882a593Smuzhiyun bra frcfpnr 1046*4882a593Smuzhiyunsub_u_srcd: 1047*4882a593Smuzhiyun movew ETEMP_EX(%a6),%d0 1048*4882a593Smuzhiyun andiw #0x8000,%d0 1049*4882a593Smuzhiyun orw #0x3fff,%d0 |force the exponent to +/- 1 1050*4882a593Smuzhiyun movew %d0,ETEMP_EX(%a6) |in the denorm 1051*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 1052*4882a593Smuzhiyun andil #0x30,%d0 1053*4882a593Smuzhiyun fmovel %d0,%fpcr |set up users rmode and X 1054*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 1055*4882a593Smuzhiyun fsubx ETEMP(%a6),%fp0 1056*4882a593Smuzhiyun fmovel %fpsr,%d1 1057*4882a593Smuzhiyun orl %d1,USER_FPSR(%a6) |capture cc's and inex from fadd 1058*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 |point a0 to wbtemp in frame 1059*4882a593Smuzhiyun fmovex %fp0,WBTEMP(%a6) |write result to memory 1060*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 1061*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 1062*4882a593Smuzhiyun andil #0xc0,%d1 1063*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 1064*4882a593Smuzhiyun swap %d1 1065*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 1066*4882a593Smuzhiyun clrl %d0 |force sticky to zero 1067*4882a593Smuzhiyun bclrb #sign_bit,WBTEMP_EX(%a6) 1068*4882a593Smuzhiyun sne WBTEMP_SGN(%a6) 1069*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 1070*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 1071*4882a593Smuzhiyun beq frcfpnr 1072*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 1073*4882a593Smuzhiyun bra frcfpnr 1074*4882a593Smuzhiyun| 1075*4882a593Smuzhiyun| Signs are unlike: 1076*4882a593Smuzhiyun| 1077*4882a593Smuzhiyunsub_diff: 1078*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |is dest the denorm? 1079*4882a593Smuzhiyun bnes sub_s_srcd 1080*4882a593Smuzhiyunsub_s_destd: 1081*4882a593Smuzhiyun leal ETEMP(%a6),%a0 1082*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 1083*4882a593Smuzhiyun andil #0x30,%d0 1084*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 1085*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 1086*4882a593Smuzhiyun andil #0xc0,%d1 1087*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 1088*4882a593Smuzhiyun swap %d1 1089*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 1090*4882a593Smuzhiyun movel #0x20000000,%d0 |set sticky for round 1091*4882a593Smuzhiyun| 1092*4882a593Smuzhiyun| Since the dest is the denorm, the sign is the opposite of the 1093*4882a593Smuzhiyun| norm sign. 1094*4882a593Smuzhiyun| 1095*4882a593Smuzhiyun eoriw #0x8000,ETEMP_EX(%a6) |flip sign on result 1096*4882a593Smuzhiyun tstw ETEMP_EX(%a6) 1097*4882a593Smuzhiyun bgts sub_s_dwr 1098*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 1099*4882a593Smuzhiyunsub_s_dwr: 1100*4882a593Smuzhiyun bclrb #sign_bit,ETEMP_EX(%a6) 1101*4882a593Smuzhiyun sne ETEMP_SGN(%a6) 1102*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 1103*4882a593Smuzhiyun bfclr ETEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 1104*4882a593Smuzhiyun beqs sub_s_dclr 1105*4882a593Smuzhiyun bsetb #sign_bit,ETEMP_EX(%a6) 1106*4882a593Smuzhiyunsub_s_dclr: 1107*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 1108*4882a593Smuzhiyun movel ETEMP(%a6),(%a0) |write result to wbtemp 1109*4882a593Smuzhiyun movel ETEMP_HI(%a6),4(%a0) 1110*4882a593Smuzhiyun movel ETEMP_LO(%a6),8(%a0) 1111*4882a593Smuzhiyun bra sub_ckovf 1112*4882a593Smuzhiyunsub_s_srcd: 1113*4882a593Smuzhiyun leal FPTEMP(%a6),%a0 1114*4882a593Smuzhiyun movel USER_FPCR(%a6),%d0 1115*4882a593Smuzhiyun andil #0x30,%d0 1116*4882a593Smuzhiyun lsrl #4,%d0 |put rmode in lower 2 bits 1117*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 1118*4882a593Smuzhiyun andil #0xc0,%d1 1119*4882a593Smuzhiyun lsrl #6,%d1 |put precision in upper word 1120*4882a593Smuzhiyun swap %d1 1121*4882a593Smuzhiyun orl %d0,%d1 |set up for round call 1122*4882a593Smuzhiyun movel #0x20000000,%d0 |set sticky for round 1123*4882a593Smuzhiyun bclrb #sign_bit,FPTEMP_EX(%a6) 1124*4882a593Smuzhiyun sne FPTEMP_SGN(%a6) 1125*4882a593Smuzhiyun bsrl round |round result to users rmode & prec 1126*4882a593Smuzhiyun bfclr FPTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 1127*4882a593Smuzhiyun beqs sub_s_sclr 1128*4882a593Smuzhiyun bsetb #sign_bit,FPTEMP_EX(%a6) 1129*4882a593Smuzhiyunsub_s_sclr: 1130*4882a593Smuzhiyun leal WBTEMP(%a6),%a0 1131*4882a593Smuzhiyun movel FPTEMP(%a6),(%a0) |write result to wbtemp 1132*4882a593Smuzhiyun movel FPTEMP_HI(%a6),4(%a0) 1133*4882a593Smuzhiyun movel FPTEMP_LO(%a6),8(%a0) 1134*4882a593Smuzhiyun tstw FPTEMP_EX(%a6) 1135*4882a593Smuzhiyun bgt sub_ckovf 1136*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 1137*4882a593Smuzhiyunsub_ckovf: 1138*4882a593Smuzhiyun movew WBTEMP_EX(%a6),%d0 1139*4882a593Smuzhiyun andiw #0x7fff,%d0 1140*4882a593Smuzhiyun cmpiw #0x7fff,%d0 1141*4882a593Smuzhiyun bne frcfpnr 1142*4882a593Smuzhiyun| 1143*4882a593Smuzhiyun| The result has overflowed to $7fff exponent. Set I, ovfl, 1144*4882a593Smuzhiyun| and aovfl, and clr the mantissa (incorrectly set by the 1145*4882a593Smuzhiyun| round routine.) 1146*4882a593Smuzhiyun| 1147*4882a593Smuzhiyun orl #inf_mask+ovfl_inx_mask,USER_FPSR(%a6) 1148*4882a593Smuzhiyun clrl 4(%a0) 1149*4882a593Smuzhiyun bra frcfpnr 1150*4882a593Smuzhiyun| 1151*4882a593Smuzhiyun| Inst is fcmp. 1152*4882a593Smuzhiyun| 1153*4882a593Smuzhiyunwrap_cmp: 1154*4882a593Smuzhiyun cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, 1155*4882a593Smuzhiyun beq fix_stk |restore to fpu 1156*4882a593Smuzhiyun| 1157*4882a593Smuzhiyun| One of the ops is denormalized. Test for wrap condition 1158*4882a593Smuzhiyun| and complete the instruction. 1159*4882a593Smuzhiyun| 1160*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm 1161*4882a593Smuzhiyun bnes cmp_srcd 1162*4882a593Smuzhiyuncmp_destd: 1163*4882a593Smuzhiyun bsrl ckinf_ns 1164*4882a593Smuzhiyun bne fix_stk 1165*4882a593Smuzhiyun bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) 1166*4882a593Smuzhiyun bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) 1167*4882a593Smuzhiyun subl %d1,%d0 |subtract dest from src 1168*4882a593Smuzhiyun cmpl #0x8000,%d0 1169*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 1170*4882a593Smuzhiyun tstw ETEMP_EX(%a6) |set N to ~sign_of(src) 1171*4882a593Smuzhiyun bge cmp_setn 1172*4882a593Smuzhiyun rts 1173*4882a593Smuzhiyuncmp_srcd: 1174*4882a593Smuzhiyun bsrl ckinf_nd 1175*4882a593Smuzhiyun bne fix_stk 1176*4882a593Smuzhiyun bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) 1177*4882a593Smuzhiyun bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) 1178*4882a593Smuzhiyun subl %d1,%d0 |subtract src from dest 1179*4882a593Smuzhiyun cmpl #0x8000,%d0 1180*4882a593Smuzhiyun blt fix_stk |if less, not wrap case 1181*4882a593Smuzhiyun tstw FPTEMP_EX(%a6) |set N to sign_of(dest) 1182*4882a593Smuzhiyun blt cmp_setn 1183*4882a593Smuzhiyun rts 1184*4882a593Smuzhiyuncmp_setn: 1185*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 1186*4882a593Smuzhiyun rts 1187*4882a593Smuzhiyun 1188*4882a593Smuzhiyun| 1189*4882a593Smuzhiyun| Inst is fmul. 1190*4882a593Smuzhiyun| 1191*4882a593Smuzhiyunwrap_mul: 1192*4882a593Smuzhiyun cmpb #0xff,DNRM_FLG(%a6) |if both ops denorm, 1193*4882a593Smuzhiyun beq force_unf |force an underflow (really!) 1194*4882a593Smuzhiyun| 1195*4882a593Smuzhiyun| One of the ops is denormalized. Test for wrap condition 1196*4882a593Smuzhiyun| and complete the instruction. 1197*4882a593Smuzhiyun| 1198*4882a593Smuzhiyun cmpb #0x0f,DNRM_FLG(%a6) |check for dest denorm 1199*4882a593Smuzhiyun bnes mul_srcd 1200*4882a593Smuzhiyunmul_destd: 1201*4882a593Smuzhiyun bsrl ckinf_ns 1202*4882a593Smuzhiyun bne fix_stk 1203*4882a593Smuzhiyun bfextu ETEMP_EX(%a6){#1:#15},%d0 |get src exp (always pos) 1204*4882a593Smuzhiyun bfexts FPTEMP_EX(%a6){#1:#15},%d1 |get dest exp (always neg) 1205*4882a593Smuzhiyun addl %d1,%d0 |subtract dest from src 1206*4882a593Smuzhiyun bgt fix_stk 1207*4882a593Smuzhiyun bra force_unf 1208*4882a593Smuzhiyunmul_srcd: 1209*4882a593Smuzhiyun bsrl ckinf_nd 1210*4882a593Smuzhiyun bne fix_stk 1211*4882a593Smuzhiyun bfextu FPTEMP_EX(%a6){#1:#15},%d0 |get dest exp (always pos) 1212*4882a593Smuzhiyun bfexts ETEMP_EX(%a6){#1:#15},%d1 |get src exp (always neg) 1213*4882a593Smuzhiyun addl %d1,%d0 |subtract src from dest 1214*4882a593Smuzhiyun bgt fix_stk 1215*4882a593Smuzhiyun 1216*4882a593Smuzhiyun| 1217*4882a593Smuzhiyun| This code handles the case of the instruction resulting in 1218*4882a593Smuzhiyun| an underflow condition. 1219*4882a593Smuzhiyun| 1220*4882a593Smuzhiyunforce_unf: 1221*4882a593Smuzhiyun bclrb #E1,E_BYTE(%a6) 1222*4882a593Smuzhiyun orl #unfinx_mask,USER_FPSR(%a6) 1223*4882a593Smuzhiyun clrw NMNEXC(%a6) 1224*4882a593Smuzhiyun clrb WBTEMP_SGN(%a6) 1225*4882a593Smuzhiyun movew ETEMP_EX(%a6),%d0 |find the sign of the result 1226*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d1 1227*4882a593Smuzhiyun eorw %d1,%d0 1228*4882a593Smuzhiyun andiw #0x8000,%d0 1229*4882a593Smuzhiyun beqs frcunfcont 1230*4882a593Smuzhiyun st WBTEMP_SGN(%a6) 1231*4882a593Smuzhiyunfrcunfcont: 1232*4882a593Smuzhiyun lea WBTEMP(%a6),%a0 |point a0 to memory location 1233*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 1234*4882a593Smuzhiyun btstl #6,%d0 |test for forced precision 1235*4882a593Smuzhiyun beqs frcunf_fpcr 1236*4882a593Smuzhiyun btstl #2,%d0 |check for double 1237*4882a593Smuzhiyun bnes frcunf_dbl 1238*4882a593Smuzhiyun movel #0x1,%d0 |inst is forced single 1239*4882a593Smuzhiyun bras frcunf_rnd 1240*4882a593Smuzhiyunfrcunf_dbl: 1241*4882a593Smuzhiyun movel #0x2,%d0 |inst is forced double 1242*4882a593Smuzhiyun bras frcunf_rnd 1243*4882a593Smuzhiyunfrcunf_fpcr: 1244*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec 1245*4882a593Smuzhiyunfrcunf_rnd: 1246*4882a593Smuzhiyun bsrl unf_sub |get correct result based on 1247*4882a593Smuzhiyun| ;round precision/mode. This 1248*4882a593Smuzhiyun| ;sets FPSR_CC correctly 1249*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 1250*4882a593Smuzhiyun beqs frcfpn 1251*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 1252*4882a593Smuzhiyun bra frcfpn 1253*4882a593Smuzhiyun 1254*4882a593Smuzhiyun| 1255*4882a593Smuzhiyun| Write the result to the user's fpn. All results must be HUGE to be 1256*4882a593Smuzhiyun| written; otherwise the results would have overflowed or underflowed. 1257*4882a593Smuzhiyun| If the rounding precision is single or double, the ovf_res routine 1258*4882a593Smuzhiyun| is needed to correctly supply the max value. 1259*4882a593Smuzhiyun| 1260*4882a593Smuzhiyunfrcfpnr: 1261*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 1262*4882a593Smuzhiyun btstl #6,%d0 |test for forced precision 1263*4882a593Smuzhiyun beqs frcfpn_fpcr 1264*4882a593Smuzhiyun btstl #2,%d0 |check for double 1265*4882a593Smuzhiyun bnes frcfpn_dbl 1266*4882a593Smuzhiyun movel #0x1,%d0 |inst is forced single 1267*4882a593Smuzhiyun bras frcfpn_rnd 1268*4882a593Smuzhiyunfrcfpn_dbl: 1269*4882a593Smuzhiyun movel #0x2,%d0 |inst is forced double 1270*4882a593Smuzhiyun bras frcfpn_rnd 1271*4882a593Smuzhiyunfrcfpn_fpcr: 1272*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#0:#2},%d0 |inst not forced - use fpcr prec 1273*4882a593Smuzhiyun tstb %d0 1274*4882a593Smuzhiyun beqs frcfpn |if extended, write what you got 1275*4882a593Smuzhiyunfrcfpn_rnd: 1276*4882a593Smuzhiyun bclrb #sign_bit,WBTEMP_EX(%a6) 1277*4882a593Smuzhiyun sne WBTEMP_SGN(%a6) 1278*4882a593Smuzhiyun bsrl ovf_res |get correct result based on 1279*4882a593Smuzhiyun| ;round precision/mode. This 1280*4882a593Smuzhiyun| ;sets FPSR_CC correctly 1281*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} |convert back to IEEE ext format 1282*4882a593Smuzhiyun beqs frcfpn_clr 1283*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 1284*4882a593Smuzhiyunfrcfpn_clr: 1285*4882a593Smuzhiyun orl #ovfinx_mask,USER_FPSR(%a6) 1286*4882a593Smuzhiyun| 1287*4882a593Smuzhiyun| Perform the write. 1288*4882a593Smuzhiyun| 1289*4882a593Smuzhiyunfrcfpn: 1290*4882a593Smuzhiyun bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register 1291*4882a593Smuzhiyun cmpib #3,%d0 1292*4882a593Smuzhiyun bles frc0123 |check if dest is fp0-fp3 1293*4882a593Smuzhiyun movel #7,%d1 1294*4882a593Smuzhiyun subl %d0,%d1 1295*4882a593Smuzhiyun clrl %d0 1296*4882a593Smuzhiyun bsetl %d1,%d0 1297*4882a593Smuzhiyun fmovemx WBTEMP(%a6),%d0 1298*4882a593Smuzhiyun rts 1299*4882a593Smuzhiyunfrc0123: 1300*4882a593Smuzhiyun cmpib #0,%d0 1301*4882a593Smuzhiyun beqs frc0_dst 1302*4882a593Smuzhiyun cmpib #1,%d0 1303*4882a593Smuzhiyun beqs frc1_dst 1304*4882a593Smuzhiyun cmpib #2,%d0 1305*4882a593Smuzhiyun beqs frc2_dst 1306*4882a593Smuzhiyunfrc3_dst: 1307*4882a593Smuzhiyun movel WBTEMP_EX(%a6),USER_FP3(%a6) 1308*4882a593Smuzhiyun movel WBTEMP_HI(%a6),USER_FP3+4(%a6) 1309*4882a593Smuzhiyun movel WBTEMP_LO(%a6),USER_FP3+8(%a6) 1310*4882a593Smuzhiyun rts 1311*4882a593Smuzhiyunfrc2_dst: 1312*4882a593Smuzhiyun movel WBTEMP_EX(%a6),USER_FP2(%a6) 1313*4882a593Smuzhiyun movel WBTEMP_HI(%a6),USER_FP2+4(%a6) 1314*4882a593Smuzhiyun movel WBTEMP_LO(%a6),USER_FP2+8(%a6) 1315*4882a593Smuzhiyun rts 1316*4882a593Smuzhiyunfrc1_dst: 1317*4882a593Smuzhiyun movel WBTEMP_EX(%a6),USER_FP1(%a6) 1318*4882a593Smuzhiyun movel WBTEMP_HI(%a6),USER_FP1+4(%a6) 1319*4882a593Smuzhiyun movel WBTEMP_LO(%a6),USER_FP1+8(%a6) 1320*4882a593Smuzhiyun rts 1321*4882a593Smuzhiyunfrc0_dst: 1322*4882a593Smuzhiyun movel WBTEMP_EX(%a6),USER_FP0(%a6) 1323*4882a593Smuzhiyun movel WBTEMP_HI(%a6),USER_FP0+4(%a6) 1324*4882a593Smuzhiyun movel WBTEMP_LO(%a6),USER_FP0+8(%a6) 1325*4882a593Smuzhiyun rts 1326*4882a593Smuzhiyun 1327*4882a593Smuzhiyun| 1328*4882a593Smuzhiyun| Write etemp to fpn. 1329*4882a593Smuzhiyun| A check is made on enabled and signalled snan exceptions, 1330*4882a593Smuzhiyun| and the destination is not overwritten if this condition exists. 1331*4882a593Smuzhiyun| This code is designed to make fmoveins of unsupported data types 1332*4882a593Smuzhiyun| faster. 1333*4882a593Smuzhiyun| 1334*4882a593Smuzhiyunwr_etemp: 1335*4882a593Smuzhiyun btstb #snan_bit,FPSR_EXCEPT(%a6) |if snan is set, and 1336*4882a593Smuzhiyun beqs fmoveinc |enabled, force restore 1337*4882a593Smuzhiyun btstb #snan_bit,FPCR_ENABLE(%a6) |and don't overwrite 1338*4882a593Smuzhiyun beqs fmoveinc |the dest 1339*4882a593Smuzhiyun movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for 1340*4882a593Smuzhiyun| ;snan handler 1341*4882a593Smuzhiyun tstb ETEMP(%a6) |check for negative 1342*4882a593Smuzhiyun blts snan_neg 1343*4882a593Smuzhiyun rts 1344*4882a593Smuzhiyunsnan_neg: 1345*4882a593Smuzhiyun orl #neg_bit,USER_FPSR(%a6) |snan is negative; set N 1346*4882a593Smuzhiyun rts 1347*4882a593Smuzhiyunfmoveinc: 1348*4882a593Smuzhiyun clrw NMNEXC(%a6) 1349*4882a593Smuzhiyun bclrb #E1,E_BYTE(%a6) 1350*4882a593Smuzhiyun moveb STAG(%a6),%d0 |check if stag is inf 1351*4882a593Smuzhiyun andib #0xe0,%d0 1352*4882a593Smuzhiyun cmpib #0x40,%d0 1353*4882a593Smuzhiyun bnes fminc_cnan 1354*4882a593Smuzhiyun orl #inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I 1355*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |check sign 1356*4882a593Smuzhiyun bges fminc_con 1357*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 1358*4882a593Smuzhiyun bra fminc_con 1359*4882a593Smuzhiyunfminc_cnan: 1360*4882a593Smuzhiyun cmpib #0x60,%d0 |check if stag is NaN 1361*4882a593Smuzhiyun bnes fminc_czero 1362*4882a593Smuzhiyun orl #nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN 1363*4882a593Smuzhiyun movel ETEMP_EX(%a6),FPTEMP_EX(%a6) |set up fptemp sign for 1364*4882a593Smuzhiyun| ;snan handler 1365*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |check sign 1366*4882a593Smuzhiyun bges fminc_con 1367*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 1368*4882a593Smuzhiyun bra fminc_con 1369*4882a593Smuzhiyunfminc_czero: 1370*4882a593Smuzhiyun cmpib #0x20,%d0 |check if zero 1371*4882a593Smuzhiyun bnes fminc_con 1372*4882a593Smuzhiyun orl #z_mask,USER_FPSR(%a6) |if zero, set Z 1373*4882a593Smuzhiyun tstw LOCAL_EX(%a0) |check sign 1374*4882a593Smuzhiyun bges fminc_con 1375*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) 1376*4882a593Smuzhiyunfminc_con: 1377*4882a593Smuzhiyun bfextu CMDREG1B(%a6){#6:#3},%d0 |extract fp destination register 1378*4882a593Smuzhiyun cmpib #3,%d0 1379*4882a593Smuzhiyun bles fp0123 |check if dest is fp0-fp3 1380*4882a593Smuzhiyun movel #7,%d1 1381*4882a593Smuzhiyun subl %d0,%d1 1382*4882a593Smuzhiyun clrl %d0 1383*4882a593Smuzhiyun bsetl %d1,%d0 1384*4882a593Smuzhiyun fmovemx ETEMP(%a6),%d0 1385*4882a593Smuzhiyun rts 1386*4882a593Smuzhiyun 1387*4882a593Smuzhiyunfp0123: 1388*4882a593Smuzhiyun cmpib #0,%d0 1389*4882a593Smuzhiyun beqs fp0_dst 1390*4882a593Smuzhiyun cmpib #1,%d0 1391*4882a593Smuzhiyun beqs fp1_dst 1392*4882a593Smuzhiyun cmpib #2,%d0 1393*4882a593Smuzhiyun beqs fp2_dst 1394*4882a593Smuzhiyunfp3_dst: 1395*4882a593Smuzhiyun movel ETEMP_EX(%a6),USER_FP3(%a6) 1396*4882a593Smuzhiyun movel ETEMP_HI(%a6),USER_FP3+4(%a6) 1397*4882a593Smuzhiyun movel ETEMP_LO(%a6),USER_FP3+8(%a6) 1398*4882a593Smuzhiyun rts 1399*4882a593Smuzhiyunfp2_dst: 1400*4882a593Smuzhiyun movel ETEMP_EX(%a6),USER_FP2(%a6) 1401*4882a593Smuzhiyun movel ETEMP_HI(%a6),USER_FP2+4(%a6) 1402*4882a593Smuzhiyun movel ETEMP_LO(%a6),USER_FP2+8(%a6) 1403*4882a593Smuzhiyun rts 1404*4882a593Smuzhiyunfp1_dst: 1405*4882a593Smuzhiyun movel ETEMP_EX(%a6),USER_FP1(%a6) 1406*4882a593Smuzhiyun movel ETEMP_HI(%a6),USER_FP1+4(%a6) 1407*4882a593Smuzhiyun movel ETEMP_LO(%a6),USER_FP1+8(%a6) 1408*4882a593Smuzhiyun rts 1409*4882a593Smuzhiyunfp0_dst: 1410*4882a593Smuzhiyun movel ETEMP_EX(%a6),USER_FP0(%a6) 1411*4882a593Smuzhiyun movel ETEMP_HI(%a6),USER_FP0+4(%a6) 1412*4882a593Smuzhiyun movel ETEMP_LO(%a6),USER_FP0+8(%a6) 1413*4882a593Smuzhiyun rts 1414*4882a593Smuzhiyun 1415*4882a593Smuzhiyunopclass3: 1416*4882a593Smuzhiyun st CU_ONLY(%a6) 1417*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 |check if packed moveout 1418*4882a593Smuzhiyun andiw #0x0c00,%d0 |isolate last 2 bits of size field 1419*4882a593Smuzhiyun cmpiw #0x0c00,%d0 |if size is 011 or 111, it is packed 1420*4882a593Smuzhiyun beq pack_out |else it is norm or denorm 1421*4882a593Smuzhiyun bra mv_out 1422*4882a593Smuzhiyun 1423*4882a593Smuzhiyun 1424*4882a593Smuzhiyun| 1425*4882a593Smuzhiyun| MOVE OUT 1426*4882a593Smuzhiyun| 1427*4882a593Smuzhiyun 1428*4882a593Smuzhiyunmv_tbl: 1429*4882a593Smuzhiyun .long li 1430*4882a593Smuzhiyun .long sgp 1431*4882a593Smuzhiyun .long xp 1432*4882a593Smuzhiyun .long mvout_end |should never be taken 1433*4882a593Smuzhiyun .long wi 1434*4882a593Smuzhiyun .long dp 1435*4882a593Smuzhiyun .long bi 1436*4882a593Smuzhiyun .long mvout_end |should never be taken 1437*4882a593Smuzhiyunmv_out: 1438*4882a593Smuzhiyun bfextu CMDREG1B(%a6){#3:#3},%d1 |put source specifier in d1 1439*4882a593Smuzhiyun leal mv_tbl,%a0 1440*4882a593Smuzhiyun movel %a0@(%d1:l:4),%a0 1441*4882a593Smuzhiyun jmp (%a0) 1442*4882a593Smuzhiyun 1443*4882a593Smuzhiyun| 1444*4882a593Smuzhiyun| This exit is for move-out to memory. The aunfl bit is 1445*4882a593Smuzhiyun| set if the result is inex and unfl is signalled. 1446*4882a593Smuzhiyun| 1447*4882a593Smuzhiyunmvout_end: 1448*4882a593Smuzhiyun btstb #inex2_bit,FPSR_EXCEPT(%a6) 1449*4882a593Smuzhiyun beqs no_aufl 1450*4882a593Smuzhiyun btstb #unfl_bit,FPSR_EXCEPT(%a6) 1451*4882a593Smuzhiyun beqs no_aufl 1452*4882a593Smuzhiyun bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) 1453*4882a593Smuzhiyunno_aufl: 1454*4882a593Smuzhiyun clrw NMNEXC(%a6) 1455*4882a593Smuzhiyun bclrb #E1,E_BYTE(%a6) 1456*4882a593Smuzhiyun fmovel #0,%FPSR |clear any cc bits from res_func 1457*4882a593Smuzhiyun| 1458*4882a593Smuzhiyun| Return ETEMP to extended format from internal extended format so 1459*4882a593Smuzhiyun| that gen_except will have a correctly signed value for ovfl/unfl 1460*4882a593Smuzhiyun| handlers. 1461*4882a593Smuzhiyun| 1462*4882a593Smuzhiyun bfclr ETEMP_SGN(%a6){#0:#8} 1463*4882a593Smuzhiyun beqs mvout_con 1464*4882a593Smuzhiyun bsetb #sign_bit,ETEMP_EX(%a6) 1465*4882a593Smuzhiyunmvout_con: 1466*4882a593Smuzhiyun rts 1467*4882a593Smuzhiyun| 1468*4882a593Smuzhiyun| This exit is for move-out to int register. The aunfl bit is 1469*4882a593Smuzhiyun| not set in any case for this move. 1470*4882a593Smuzhiyun| 1471*4882a593Smuzhiyunmvouti_end: 1472*4882a593Smuzhiyun clrw NMNEXC(%a6) 1473*4882a593Smuzhiyun bclrb #E1,E_BYTE(%a6) 1474*4882a593Smuzhiyun fmovel #0,%FPSR |clear any cc bits from res_func 1475*4882a593Smuzhiyun| 1476*4882a593Smuzhiyun| Return ETEMP to extended format from internal extended format so 1477*4882a593Smuzhiyun| that gen_except will have a correctly signed value for ovfl/unfl 1478*4882a593Smuzhiyun| handlers. 1479*4882a593Smuzhiyun| 1480*4882a593Smuzhiyun bfclr ETEMP_SGN(%a6){#0:#8} 1481*4882a593Smuzhiyun beqs mvouti_con 1482*4882a593Smuzhiyun bsetb #sign_bit,ETEMP_EX(%a6) 1483*4882a593Smuzhiyunmvouti_con: 1484*4882a593Smuzhiyun rts 1485*4882a593Smuzhiyun| 1486*4882a593Smuzhiyun| li is used to handle a long integer source specifier 1487*4882a593Smuzhiyun| 1488*4882a593Smuzhiyun 1489*4882a593Smuzhiyunli: 1490*4882a593Smuzhiyun moveql #4,%d0 |set byte count 1491*4882a593Smuzhiyun 1492*4882a593Smuzhiyun btstb #7,STAG(%a6) |check for extended denorm 1493*4882a593Smuzhiyun bne int_dnrm |if so, branch 1494*4882a593Smuzhiyun 1495*4882a593Smuzhiyun fmovemx ETEMP(%a6),%fp0-%fp0 1496*4882a593Smuzhiyun fcmpd #0x41dfffffffc00000,%fp0 1497*4882a593Smuzhiyun| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec 1498*4882a593Smuzhiyun fbge lo_plrg 1499*4882a593Smuzhiyun fcmpd #0xc1e0000000000000,%fp0 1500*4882a593Smuzhiyun| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec 1501*4882a593Smuzhiyun fble lo_nlrg 1502*4882a593Smuzhiyun| 1503*4882a593Smuzhiyun| at this point, the answer is between the largest pos and neg values 1504*4882a593Smuzhiyun| 1505*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 |use user's rounding mode 1506*4882a593Smuzhiyun andil #0x30,%d1 1507*4882a593Smuzhiyun fmovel %d1,%fpcr 1508*4882a593Smuzhiyun fmovel %fp0,L_SCR1(%a6) |let the 040 perform conversion 1509*4882a593Smuzhiyun fmovel %fpsr,%d1 1510*4882a593Smuzhiyun orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set 1511*4882a593Smuzhiyun bra int_wrt 1512*4882a593Smuzhiyun 1513*4882a593Smuzhiyun 1514*4882a593Smuzhiyunlo_plrg: 1515*4882a593Smuzhiyun movel #0x7fffffff,L_SCR1(%a6) |answer is largest positive int 1516*4882a593Smuzhiyun fbeq int_wrt |exact answer 1517*4882a593Smuzhiyun fcmpd #0x41dfffffffe00000,%fp0 1518*4882a593Smuzhiyun| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec 1519*4882a593Smuzhiyun fbge int_operr |set operr 1520*4882a593Smuzhiyun bra int_inx |set inexact 1521*4882a593Smuzhiyun 1522*4882a593Smuzhiyunlo_nlrg: 1523*4882a593Smuzhiyun movel #0x80000000,L_SCR1(%a6) 1524*4882a593Smuzhiyun fbeq int_wrt |exact answer 1525*4882a593Smuzhiyun fcmpd #0xc1e0000000100000,%fp0 1526*4882a593Smuzhiyun| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec 1527*4882a593Smuzhiyun fblt int_operr |set operr 1528*4882a593Smuzhiyun bra int_inx |set inexact 1529*4882a593Smuzhiyun 1530*4882a593Smuzhiyun| 1531*4882a593Smuzhiyun| wi is used to handle a word integer source specifier 1532*4882a593Smuzhiyun| 1533*4882a593Smuzhiyun 1534*4882a593Smuzhiyunwi: 1535*4882a593Smuzhiyun moveql #2,%d0 |set byte count 1536*4882a593Smuzhiyun 1537*4882a593Smuzhiyun btstb #7,STAG(%a6) |check for extended denorm 1538*4882a593Smuzhiyun bne int_dnrm |branch if so 1539*4882a593Smuzhiyun 1540*4882a593Smuzhiyun fmovemx ETEMP(%a6),%fp0-%fp0 1541*4882a593Smuzhiyun fcmps #0x46fffe00,%fp0 1542*4882a593Smuzhiyun| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec 1543*4882a593Smuzhiyun fbge wo_plrg 1544*4882a593Smuzhiyun fcmps #0xc7000000,%fp0 1545*4882a593Smuzhiyun| c7000000 in sgl prec = c00e00008000000000000000 in ext prec 1546*4882a593Smuzhiyun fble wo_nlrg 1547*4882a593Smuzhiyun 1548*4882a593Smuzhiyun| 1549*4882a593Smuzhiyun| at this point, the answer is between the largest pos and neg values 1550*4882a593Smuzhiyun| 1551*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 |use user's rounding mode 1552*4882a593Smuzhiyun andil #0x30,%d1 1553*4882a593Smuzhiyun fmovel %d1,%fpcr 1554*4882a593Smuzhiyun fmovew %fp0,L_SCR1(%a6) |let the 040 perform conversion 1555*4882a593Smuzhiyun fmovel %fpsr,%d1 1556*4882a593Smuzhiyun orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set 1557*4882a593Smuzhiyun bra int_wrt 1558*4882a593Smuzhiyun 1559*4882a593Smuzhiyunwo_plrg: 1560*4882a593Smuzhiyun movew #0x7fff,L_SCR1(%a6) |answer is largest positive int 1561*4882a593Smuzhiyun fbeq int_wrt |exact answer 1562*4882a593Smuzhiyun fcmps #0x46ffff00,%fp0 1563*4882a593Smuzhiyun| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec 1564*4882a593Smuzhiyun fbge int_operr |set operr 1565*4882a593Smuzhiyun bra int_inx |set inexact 1566*4882a593Smuzhiyun 1567*4882a593Smuzhiyunwo_nlrg: 1568*4882a593Smuzhiyun movew #0x8000,L_SCR1(%a6) 1569*4882a593Smuzhiyun fbeq int_wrt |exact answer 1570*4882a593Smuzhiyun fcmps #0xc7000080,%fp0 1571*4882a593Smuzhiyun| c7000080 in sgl prec = c00e00008000800000000000 in ext prec 1572*4882a593Smuzhiyun fblt int_operr |set operr 1573*4882a593Smuzhiyun bra int_inx |set inexact 1574*4882a593Smuzhiyun 1575*4882a593Smuzhiyun| 1576*4882a593Smuzhiyun| bi is used to handle a byte integer source specifier 1577*4882a593Smuzhiyun| 1578*4882a593Smuzhiyun 1579*4882a593Smuzhiyunbi: 1580*4882a593Smuzhiyun moveql #1,%d0 |set byte count 1581*4882a593Smuzhiyun 1582*4882a593Smuzhiyun btstb #7,STAG(%a6) |check for extended denorm 1583*4882a593Smuzhiyun bne int_dnrm |branch if so 1584*4882a593Smuzhiyun 1585*4882a593Smuzhiyun fmovemx ETEMP(%a6),%fp0-%fp0 1586*4882a593Smuzhiyun fcmps #0x42fe0000,%fp0 1587*4882a593Smuzhiyun| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec 1588*4882a593Smuzhiyun fbge by_plrg 1589*4882a593Smuzhiyun fcmps #0xc3000000,%fp0 1590*4882a593Smuzhiyun| c3000000 in sgl prec = c00600008000000000000000 in ext prec 1591*4882a593Smuzhiyun fble by_nlrg 1592*4882a593Smuzhiyun 1593*4882a593Smuzhiyun| 1594*4882a593Smuzhiyun| at this point, the answer is between the largest pos and neg values 1595*4882a593Smuzhiyun| 1596*4882a593Smuzhiyun movel USER_FPCR(%a6),%d1 |use user's rounding mode 1597*4882a593Smuzhiyun andil #0x30,%d1 1598*4882a593Smuzhiyun fmovel %d1,%fpcr 1599*4882a593Smuzhiyun fmoveb %fp0,L_SCR1(%a6) |let the 040 perform conversion 1600*4882a593Smuzhiyun fmovel %fpsr,%d1 1601*4882a593Smuzhiyun orl %d1,USER_FPSR(%a6) |capture inex2/ainex if set 1602*4882a593Smuzhiyun bra int_wrt 1603*4882a593Smuzhiyun 1604*4882a593Smuzhiyunby_plrg: 1605*4882a593Smuzhiyun moveb #0x7f,L_SCR1(%a6) |answer is largest positive int 1606*4882a593Smuzhiyun fbeq int_wrt |exact answer 1607*4882a593Smuzhiyun fcmps #0x42ff0000,%fp0 1608*4882a593Smuzhiyun| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec 1609*4882a593Smuzhiyun fbge int_operr |set operr 1610*4882a593Smuzhiyun bra int_inx |set inexact 1611*4882a593Smuzhiyun 1612*4882a593Smuzhiyunby_nlrg: 1613*4882a593Smuzhiyun moveb #0x80,L_SCR1(%a6) 1614*4882a593Smuzhiyun fbeq int_wrt |exact answer 1615*4882a593Smuzhiyun fcmps #0xc3008000,%fp0 1616*4882a593Smuzhiyun| c3008000 in sgl prec = c00600008080000000000000 in ext prec 1617*4882a593Smuzhiyun fblt int_operr |set operr 1618*4882a593Smuzhiyun bra int_inx |set inexact 1619*4882a593Smuzhiyun 1620*4882a593Smuzhiyun| 1621*4882a593Smuzhiyun| Common integer routines 1622*4882a593Smuzhiyun| 1623*4882a593Smuzhiyun| int_drnrm---account for possible nonzero result for round up with positive 1624*4882a593Smuzhiyun| operand and round down for negative answer. In the first case (result = 1) 1625*4882a593Smuzhiyun| byte-width (store in d0) of result must be honored. In the second case, 1626*4882a593Smuzhiyun| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out). 1627*4882a593Smuzhiyun 1628*4882a593Smuzhiyunint_dnrm: 1629*4882a593Smuzhiyun movel #0,L_SCR1(%a6) | initialize result to 0 1630*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 | d1 is the rounding mode 1631*4882a593Smuzhiyun cmpb #2,%d1 1632*4882a593Smuzhiyun bmis int_inx | if RN or RZ, done 1633*4882a593Smuzhiyun bnes int_rp | if RP, continue below 1634*4882a593Smuzhiyun tstw ETEMP(%a6) | RM: store -1 in L_SCR1 if src is negative 1635*4882a593Smuzhiyun bpls int_inx | otherwise result is 0 1636*4882a593Smuzhiyun movel #-1,L_SCR1(%a6) 1637*4882a593Smuzhiyun bras int_inx 1638*4882a593Smuzhiyunint_rp: 1639*4882a593Smuzhiyun tstw ETEMP(%a6) | RP: store +1 of proper width in L_SCR1 if 1640*4882a593Smuzhiyun| ; source is greater than 0 1641*4882a593Smuzhiyun bmis int_inx | otherwise, result is 0 1642*4882a593Smuzhiyun lea L_SCR1(%a6),%a1 | a1 is address of L_SCR1 1643*4882a593Smuzhiyun addal %d0,%a1 | offset by destination width -1 1644*4882a593Smuzhiyun subal #1,%a1 1645*4882a593Smuzhiyun bsetb #0,(%a1) | set low bit at a1 address 1646*4882a593Smuzhiyunint_inx: 1647*4882a593Smuzhiyun oril #inx2a_mask,USER_FPSR(%a6) 1648*4882a593Smuzhiyun bras int_wrt 1649*4882a593Smuzhiyunint_operr: 1650*4882a593Smuzhiyun fmovemx %fp0-%fp0,FPTEMP(%a6) |FPTEMP must contain the extended 1651*4882a593Smuzhiyun| ;precision source that needs to be 1652*4882a593Smuzhiyun| ;converted to integer this is required 1653*4882a593Smuzhiyun| ;if the operr exception is enabled. 1654*4882a593Smuzhiyun| ;set operr/aiop (no inex2 on int ovfl) 1655*4882a593Smuzhiyun 1656*4882a593Smuzhiyun oril #opaop_mask,USER_FPSR(%a6) 1657*4882a593Smuzhiyun| ;fall through to perform int_wrt 1658*4882a593Smuzhiyunint_wrt: 1659*4882a593Smuzhiyun movel EXC_EA(%a6),%a1 |load destination address 1660*4882a593Smuzhiyun tstl %a1 |check to see if it is a dest register 1661*4882a593Smuzhiyun beqs wrt_dn |write data register 1662*4882a593Smuzhiyun lea L_SCR1(%a6),%a0 |point to supervisor source address 1663*4882a593Smuzhiyun bsrl mem_write 1664*4882a593Smuzhiyun bra mvouti_end 1665*4882a593Smuzhiyun 1666*4882a593Smuzhiyunwrt_dn: 1667*4882a593Smuzhiyun movel %d0,-(%sp) |d0 currently contains the size to write 1668*4882a593Smuzhiyun bsrl get_fline |get_fline returns Dn in d0 1669*4882a593Smuzhiyun andiw #0x7,%d0 |isolate register 1670*4882a593Smuzhiyun movel (%sp)+,%d1 |get size 1671*4882a593Smuzhiyun cmpil #4,%d1 |most frequent case 1672*4882a593Smuzhiyun beqs sz_long 1673*4882a593Smuzhiyun cmpil #2,%d1 1674*4882a593Smuzhiyun bnes sz_con 1675*4882a593Smuzhiyun orl #8,%d0 |add 'word' size to register# 1676*4882a593Smuzhiyun bras sz_con 1677*4882a593Smuzhiyunsz_long: 1678*4882a593Smuzhiyun orl #0x10,%d0 |add 'long' size to register# 1679*4882a593Smuzhiyunsz_con: 1680*4882a593Smuzhiyun movel %d0,%d1 |reg_dest expects size:reg in d1 1681*4882a593Smuzhiyun bsrl reg_dest |load proper data register 1682*4882a593Smuzhiyun bra mvouti_end 1683*4882a593Smuzhiyunxp: 1684*4882a593Smuzhiyun lea ETEMP(%a6),%a0 1685*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 1686*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 1687*4882a593Smuzhiyun btstb #7,STAG(%a6) |check for extended denorm 1688*4882a593Smuzhiyun bne xdnrm 1689*4882a593Smuzhiyun clrl %d0 1690*4882a593Smuzhiyun bras do_fp |do normal case 1691*4882a593Smuzhiyunsgp: 1692*4882a593Smuzhiyun lea ETEMP(%a6),%a0 1693*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 1694*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 1695*4882a593Smuzhiyun btstb #7,STAG(%a6) |check for extended denorm 1696*4882a593Smuzhiyun bne sp_catas |branch if so 1697*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d0 1698*4882a593Smuzhiyun lea sp_bnds,%a1 1699*4882a593Smuzhiyun cmpw (%a1),%d0 1700*4882a593Smuzhiyun blt sp_under 1701*4882a593Smuzhiyun cmpw 2(%a1),%d0 1702*4882a593Smuzhiyun bgt sp_over 1703*4882a593Smuzhiyun movel #1,%d0 |set destination format to single 1704*4882a593Smuzhiyun bras do_fp |do normal case 1705*4882a593Smuzhiyundp: 1706*4882a593Smuzhiyun lea ETEMP(%a6),%a0 1707*4882a593Smuzhiyun bclrb #sign_bit,LOCAL_EX(%a0) 1708*4882a593Smuzhiyun sne LOCAL_SGN(%a0) 1709*4882a593Smuzhiyun 1710*4882a593Smuzhiyun btstb #7,STAG(%a6) |check for extended denorm 1711*4882a593Smuzhiyun bne dp_catas |branch if so 1712*4882a593Smuzhiyun 1713*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d0 1714*4882a593Smuzhiyun lea dp_bnds,%a1 1715*4882a593Smuzhiyun 1716*4882a593Smuzhiyun cmpw (%a1),%d0 1717*4882a593Smuzhiyun blt dp_under 1718*4882a593Smuzhiyun cmpw 2(%a1),%d0 1719*4882a593Smuzhiyun bgt dp_over 1720*4882a593Smuzhiyun 1721*4882a593Smuzhiyun movel #2,%d0 |set destination format to double 1722*4882a593Smuzhiyun| ;fall through to do_fp 1723*4882a593Smuzhiyun| 1724*4882a593Smuzhiyundo_fp: 1725*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 |rnd mode in d1 1726*4882a593Smuzhiyun swap %d0 |rnd prec in upper word 1727*4882a593Smuzhiyun addl %d0,%d1 |d1 has PREC/MODE info 1728*4882a593Smuzhiyun 1729*4882a593Smuzhiyun clrl %d0 |clear g,r,s 1730*4882a593Smuzhiyun 1731*4882a593Smuzhiyun bsrl round |round 1732*4882a593Smuzhiyun 1733*4882a593Smuzhiyun movel %a0,%a1 1734*4882a593Smuzhiyun movel EXC_EA(%a6),%a0 1735*4882a593Smuzhiyun 1736*4882a593Smuzhiyun bfextu CMDREG1B(%a6){#3:#3},%d1 |extract destination format 1737*4882a593Smuzhiyun| ;at this point only the dest 1738*4882a593Smuzhiyun| ;formats sgl, dbl, ext are 1739*4882a593Smuzhiyun| ;possible 1740*4882a593Smuzhiyun cmpb #2,%d1 1741*4882a593Smuzhiyun bgts ddbl |double=5, extended=2, single=1 1742*4882a593Smuzhiyun bnes dsgl 1743*4882a593Smuzhiyun| ;fall through to dext 1744*4882a593Smuzhiyundext: 1745*4882a593Smuzhiyun bsrl dest_ext 1746*4882a593Smuzhiyun bra mvout_end 1747*4882a593Smuzhiyundsgl: 1748*4882a593Smuzhiyun bsrl dest_sgl 1749*4882a593Smuzhiyun bra mvout_end 1750*4882a593Smuzhiyunddbl: 1751*4882a593Smuzhiyun bsrl dest_dbl 1752*4882a593Smuzhiyun bra mvout_end 1753*4882a593Smuzhiyun 1754*4882a593Smuzhiyun| 1755*4882a593Smuzhiyun| Handle possible denorm or catastrophic underflow cases here 1756*4882a593Smuzhiyun| 1757*4882a593Smuzhiyunxdnrm: 1758*4882a593Smuzhiyun bsr set_xop |initialize WBTEMP 1759*4882a593Smuzhiyun bsetb #wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15 1760*4882a593Smuzhiyun 1761*4882a593Smuzhiyun movel %a0,%a1 1762*4882a593Smuzhiyun movel EXC_EA(%a6),%a0 |a0 has the destination pointer 1763*4882a593Smuzhiyun bsrl dest_ext |store to memory 1764*4882a593Smuzhiyun bsetb #unfl_bit,FPSR_EXCEPT(%a6) 1765*4882a593Smuzhiyun bra mvout_end 1766*4882a593Smuzhiyun 1767*4882a593Smuzhiyunsp_under: 1768*4882a593Smuzhiyun bsetb #etemp15_bit,STAG(%a6) 1769*4882a593Smuzhiyun 1770*4882a593Smuzhiyun cmpw 4(%a1),%d0 1771*4882a593Smuzhiyun blts sp_catas |catastrophic underflow case 1772*4882a593Smuzhiyun 1773*4882a593Smuzhiyun movel #1,%d0 |load in round precision 1774*4882a593Smuzhiyun movel #sgl_thresh,%d1 |load in single denorm threshold 1775*4882a593Smuzhiyun bsrl dpspdnrm |expects d1 to have the proper 1776*4882a593Smuzhiyun| ;denorm threshold 1777*4882a593Smuzhiyun bsrl dest_sgl |stores value to destination 1778*4882a593Smuzhiyun bsetb #unfl_bit,FPSR_EXCEPT(%a6) 1779*4882a593Smuzhiyun bra mvout_end |exit 1780*4882a593Smuzhiyun 1781*4882a593Smuzhiyundp_under: 1782*4882a593Smuzhiyun bsetb #etemp15_bit,STAG(%a6) 1783*4882a593Smuzhiyun 1784*4882a593Smuzhiyun cmpw 4(%a1),%d0 1785*4882a593Smuzhiyun blts dp_catas |catastrophic underflow case 1786*4882a593Smuzhiyun 1787*4882a593Smuzhiyun movel #dbl_thresh,%d1 |load in double precision threshold 1788*4882a593Smuzhiyun movel #2,%d0 1789*4882a593Smuzhiyun bsrl dpspdnrm |expects d1 to have proper 1790*4882a593Smuzhiyun| ;denorm threshold 1791*4882a593Smuzhiyun| ;expects d0 to have round precision 1792*4882a593Smuzhiyun bsrl dest_dbl |store value to destination 1793*4882a593Smuzhiyun bsetb #unfl_bit,FPSR_EXCEPT(%a6) 1794*4882a593Smuzhiyun bra mvout_end |exit 1795*4882a593Smuzhiyun 1796*4882a593Smuzhiyun| 1797*4882a593Smuzhiyun| Handle catastrophic underflow cases here 1798*4882a593Smuzhiyun| 1799*4882a593Smuzhiyunsp_catas: 1800*4882a593Smuzhiyun| Temp fix for z bit set in unf_sub 1801*4882a593Smuzhiyun movel USER_FPSR(%a6),-(%a7) 1802*4882a593Smuzhiyun 1803*4882a593Smuzhiyun movel #1,%d0 |set round precision to sgl 1804*4882a593Smuzhiyun 1805*4882a593Smuzhiyun bsrl unf_sub |a0 points to result 1806*4882a593Smuzhiyun 1807*4882a593Smuzhiyun movel (%a7)+,USER_FPSR(%a6) 1808*4882a593Smuzhiyun 1809*4882a593Smuzhiyun movel #1,%d0 1810*4882a593Smuzhiyun subw %d0,LOCAL_EX(%a0) |account for difference between 1811*4882a593Smuzhiyun| ;denorm/norm bias 1812*4882a593Smuzhiyun 1813*4882a593Smuzhiyun movel %a0,%a1 |a1 has the operand input 1814*4882a593Smuzhiyun movel EXC_EA(%a6),%a0 |a0 has the destination pointer 1815*4882a593Smuzhiyun 1816*4882a593Smuzhiyun bsrl dest_sgl |store the result 1817*4882a593Smuzhiyun oril #unfinx_mask,USER_FPSR(%a6) 1818*4882a593Smuzhiyun bra mvout_end 1819*4882a593Smuzhiyun 1820*4882a593Smuzhiyundp_catas: 1821*4882a593Smuzhiyun| Temp fix for z bit set in unf_sub 1822*4882a593Smuzhiyun movel USER_FPSR(%a6),-(%a7) 1823*4882a593Smuzhiyun 1824*4882a593Smuzhiyun movel #2,%d0 |set round precision to dbl 1825*4882a593Smuzhiyun bsrl unf_sub |a0 points to result 1826*4882a593Smuzhiyun 1827*4882a593Smuzhiyun movel (%a7)+,USER_FPSR(%a6) 1828*4882a593Smuzhiyun 1829*4882a593Smuzhiyun movel #1,%d0 1830*4882a593Smuzhiyun subw %d0,LOCAL_EX(%a0) |account for difference between 1831*4882a593Smuzhiyun| ;denorm/norm bias 1832*4882a593Smuzhiyun 1833*4882a593Smuzhiyun movel %a0,%a1 |a1 has the operand input 1834*4882a593Smuzhiyun movel EXC_EA(%a6),%a0 |a0 has the destination pointer 1835*4882a593Smuzhiyun 1836*4882a593Smuzhiyun bsrl dest_dbl |store the result 1837*4882a593Smuzhiyun oril #unfinx_mask,USER_FPSR(%a6) 1838*4882a593Smuzhiyun bra mvout_end 1839*4882a593Smuzhiyun 1840*4882a593Smuzhiyun| 1841*4882a593Smuzhiyun| Handle catastrophic overflow cases here 1842*4882a593Smuzhiyun| 1843*4882a593Smuzhiyunsp_over: 1844*4882a593Smuzhiyun| Temp fix for z bit set in unf_sub 1845*4882a593Smuzhiyun movel USER_FPSR(%a6),-(%a7) 1846*4882a593Smuzhiyun 1847*4882a593Smuzhiyun movel #1,%d0 1848*4882a593Smuzhiyun leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result 1849*4882a593Smuzhiyun movel ETEMP_EX(%a6),(%a0) 1850*4882a593Smuzhiyun movel ETEMP_HI(%a6),4(%a0) 1851*4882a593Smuzhiyun movel ETEMP_LO(%a6),8(%a0) 1852*4882a593Smuzhiyun bsrl ovf_res 1853*4882a593Smuzhiyun 1854*4882a593Smuzhiyun movel (%a7)+,USER_FPSR(%a6) 1855*4882a593Smuzhiyun 1856*4882a593Smuzhiyun movel %a0,%a1 1857*4882a593Smuzhiyun movel EXC_EA(%a6),%a0 1858*4882a593Smuzhiyun bsrl dest_sgl 1859*4882a593Smuzhiyun orl #ovfinx_mask,USER_FPSR(%a6) 1860*4882a593Smuzhiyun bra mvout_end 1861*4882a593Smuzhiyun 1862*4882a593Smuzhiyundp_over: 1863*4882a593Smuzhiyun| Temp fix for z bit set in ovf_res 1864*4882a593Smuzhiyun movel USER_FPSR(%a6),-(%a7) 1865*4882a593Smuzhiyun 1866*4882a593Smuzhiyun movel #2,%d0 1867*4882a593Smuzhiyun leal FP_SCR1(%a6),%a0 |use FP_SCR1 for creating result 1868*4882a593Smuzhiyun movel ETEMP_EX(%a6),(%a0) 1869*4882a593Smuzhiyun movel ETEMP_HI(%a6),4(%a0) 1870*4882a593Smuzhiyun movel ETEMP_LO(%a6),8(%a0) 1871*4882a593Smuzhiyun bsrl ovf_res 1872*4882a593Smuzhiyun 1873*4882a593Smuzhiyun movel (%a7)+,USER_FPSR(%a6) 1874*4882a593Smuzhiyun 1875*4882a593Smuzhiyun movel %a0,%a1 1876*4882a593Smuzhiyun movel EXC_EA(%a6),%a0 1877*4882a593Smuzhiyun bsrl dest_dbl 1878*4882a593Smuzhiyun orl #ovfinx_mask,USER_FPSR(%a6) 1879*4882a593Smuzhiyun bra mvout_end 1880*4882a593Smuzhiyun 1881*4882a593Smuzhiyun| 1882*4882a593Smuzhiyun| DPSPDNRM 1883*4882a593Smuzhiyun| 1884*4882a593Smuzhiyun| This subroutine takes an extended normalized number and denormalizes 1885*4882a593Smuzhiyun| it to the given round precision. This subroutine also decrements 1886*4882a593Smuzhiyun| the input operand's exponent by 1 to account for the fact that 1887*4882a593Smuzhiyun| dest_sgl or dest_dbl expects a normalized number's bias. 1888*4882a593Smuzhiyun| 1889*4882a593Smuzhiyun| Input: a0 points to a normalized number in internal extended format 1890*4882a593Smuzhiyun| d0 is the round precision (=1 for sgl; =2 for dbl) 1891*4882a593Smuzhiyun| d1 is the single precision or double precision 1892*4882a593Smuzhiyun| denorm threshold 1893*4882a593Smuzhiyun| 1894*4882a593Smuzhiyun| Output: (In the format for dest_sgl or dest_dbl) 1895*4882a593Smuzhiyun| a0 points to the destination 1896*4882a593Smuzhiyun| a1 points to the operand 1897*4882a593Smuzhiyun| 1898*4882a593Smuzhiyun| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits 1899*4882a593Smuzhiyun| 1900*4882a593Smuzhiyundpspdnrm: 1901*4882a593Smuzhiyun movel %d0,-(%a7) |save round precision 1902*4882a593Smuzhiyun clrl %d0 |clear initial g,r,s 1903*4882a593Smuzhiyun bsrl dnrm_lp |careful with d0, it's needed by round 1904*4882a593Smuzhiyun 1905*4882a593Smuzhiyun bfextu FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode 1906*4882a593Smuzhiyun swap %d1 1907*4882a593Smuzhiyun movew 2(%a7),%d1 |set rounding precision 1908*4882a593Smuzhiyun swap %d1 |at this point d1 has PREC/MODE info 1909*4882a593Smuzhiyun bsrl round |round result, sets the inex bit in 1910*4882a593Smuzhiyun| ;USER_FPSR if needed 1911*4882a593Smuzhiyun 1912*4882a593Smuzhiyun movew #1,%d0 1913*4882a593Smuzhiyun subw %d0,LOCAL_EX(%a0) |account for difference in denorm 1914*4882a593Smuzhiyun| ;vs norm bias 1915*4882a593Smuzhiyun 1916*4882a593Smuzhiyun movel %a0,%a1 |a1 has the operand input 1917*4882a593Smuzhiyun movel EXC_EA(%a6),%a0 |a0 has the destination pointer 1918*4882a593Smuzhiyun addw #4,%a7 |pop stack 1919*4882a593Smuzhiyun rts 1920*4882a593Smuzhiyun| 1921*4882a593Smuzhiyun| SET_XOP initialized WBTEMP with the value pointed to by a0 1922*4882a593Smuzhiyun| input: a0 points to input operand in the internal extended format 1923*4882a593Smuzhiyun| 1924*4882a593Smuzhiyunset_xop: 1925*4882a593Smuzhiyun movel LOCAL_EX(%a0),WBTEMP_EX(%a6) 1926*4882a593Smuzhiyun movel LOCAL_HI(%a0),WBTEMP_HI(%a6) 1927*4882a593Smuzhiyun movel LOCAL_LO(%a0),WBTEMP_LO(%a6) 1928*4882a593Smuzhiyun bfclr WBTEMP_SGN(%a6){#0:#8} 1929*4882a593Smuzhiyun beqs sxop 1930*4882a593Smuzhiyun bsetb #sign_bit,WBTEMP_EX(%a6) 1931*4882a593Smuzhiyunsxop: 1932*4882a593Smuzhiyun bfclr STAG(%a6){#5:#4} |clear wbtm66,wbtm1,wbtm0,sbit 1933*4882a593Smuzhiyun rts 1934*4882a593Smuzhiyun| 1935*4882a593Smuzhiyun| P_MOVE 1936*4882a593Smuzhiyun| 1937*4882a593Smuzhiyunp_movet: 1938*4882a593Smuzhiyun .long p_move 1939*4882a593Smuzhiyun .long p_movez 1940*4882a593Smuzhiyun .long p_movei 1941*4882a593Smuzhiyun .long p_moven 1942*4882a593Smuzhiyun .long p_move 1943*4882a593Smuzhiyunp_regd: 1944*4882a593Smuzhiyun .long p_dyd0 1945*4882a593Smuzhiyun .long p_dyd1 1946*4882a593Smuzhiyun .long p_dyd2 1947*4882a593Smuzhiyun .long p_dyd3 1948*4882a593Smuzhiyun .long p_dyd4 1949*4882a593Smuzhiyun .long p_dyd5 1950*4882a593Smuzhiyun .long p_dyd6 1951*4882a593Smuzhiyun .long p_dyd7 1952*4882a593Smuzhiyun 1953*4882a593Smuzhiyunpack_out: 1954*4882a593Smuzhiyun leal p_movet,%a0 |load jmp table address 1955*4882a593Smuzhiyun movew STAG(%a6),%d0 |get source tag 1956*4882a593Smuzhiyun bfextu %d0{#16:#3},%d0 |isolate source bits 1957*4882a593Smuzhiyun movel (%a0,%d0.w*4),%a0 |load a0 with routine label for tag 1958*4882a593Smuzhiyun jmp (%a0) |go to the routine 1959*4882a593Smuzhiyun 1960*4882a593Smuzhiyunp_write: 1961*4882a593Smuzhiyun movel #0x0c,%d0 |get byte count 1962*4882a593Smuzhiyun movel EXC_EA(%a6),%a1 |get the destination address 1963*4882a593Smuzhiyun bsr mem_write |write the user's destination 1964*4882a593Smuzhiyun moveb #0,CU_SAVEPC(%a6) |set the cu save pc to all 0's 1965*4882a593Smuzhiyun 1966*4882a593Smuzhiyun| 1967*4882a593Smuzhiyun| Also note that the dtag must be set to norm here - this is because 1968*4882a593Smuzhiyun| the 040 uses the dtag to execute the correct microcode. 1969*4882a593Smuzhiyun| 1970*4882a593Smuzhiyun bfclr DTAG(%a6){#0:#3} |set dtag to norm 1971*4882a593Smuzhiyun 1972*4882a593Smuzhiyun rts 1973*4882a593Smuzhiyun 1974*4882a593Smuzhiyun| Notes on handling of special case (zero, inf, and nan) inputs: 1975*4882a593Smuzhiyun| 1. Operr is not signalled if the k-factor is greater than 18. 1976*4882a593Smuzhiyun| 2. Per the manual, status bits are not set. 1977*4882a593Smuzhiyun| 1978*4882a593Smuzhiyun 1979*4882a593Smuzhiyunp_move: 1980*4882a593Smuzhiyun movew CMDREG1B(%a6),%d0 1981*4882a593Smuzhiyun btstl #kfact_bit,%d0 |test for dynamic k-factor 1982*4882a593Smuzhiyun beqs statick |if clear, k-factor is static 1983*4882a593Smuzhiyundynamick: 1984*4882a593Smuzhiyun bfextu %d0{#25:#3},%d0 |isolate register for dynamic k-factor 1985*4882a593Smuzhiyun lea p_regd,%a0 1986*4882a593Smuzhiyun movel %a0@(%d0:l:4),%a0 1987*4882a593Smuzhiyun jmp (%a0) 1988*4882a593Smuzhiyunstatick: 1989*4882a593Smuzhiyun andiw #0x007f,%d0 |get k-factor 1990*4882a593Smuzhiyun bfexts %d0{#25:#7},%d0 |sign extend d0 for bindec 1991*4882a593Smuzhiyun leal ETEMP(%a6),%a0 |a0 will point to the packed decimal 1992*4882a593Smuzhiyun bsrl bindec |perform the convert; data at a6 1993*4882a593Smuzhiyun leal FP_SCR1(%a6),%a0 |load a0 with result address 1994*4882a593Smuzhiyun bral p_write 1995*4882a593Smuzhiyunp_movez: 1996*4882a593Smuzhiyun leal ETEMP(%a6),%a0 |a0 will point to the packed decimal 1997*4882a593Smuzhiyun clrw 2(%a0) |clear lower word of exp 1998*4882a593Smuzhiyun clrl 4(%a0) |load second lword of ZERO 1999*4882a593Smuzhiyun clrl 8(%a0) |load third lword of ZERO 2000*4882a593Smuzhiyun bra p_write |go write results 2001*4882a593Smuzhiyunp_movei: 2002*4882a593Smuzhiyun fmovel #0,%FPSR |clear aiop 2003*4882a593Smuzhiyun leal ETEMP(%a6),%a0 |a0 will point to the packed decimal 2004*4882a593Smuzhiyun clrw 2(%a0) |clear lower word of exp 2005*4882a593Smuzhiyun bra p_write |go write the result 2006*4882a593Smuzhiyunp_moven: 2007*4882a593Smuzhiyun leal ETEMP(%a6),%a0 |a0 will point to the packed decimal 2008*4882a593Smuzhiyun clrw 2(%a0) |clear lower word of exp 2009*4882a593Smuzhiyun bra p_write |go write the result 2010*4882a593Smuzhiyun 2011*4882a593Smuzhiyun| 2012*4882a593Smuzhiyun| Routines to read the dynamic k-factor from Dn. 2013*4882a593Smuzhiyun| 2014*4882a593Smuzhiyunp_dyd0: 2015*4882a593Smuzhiyun movel USER_D0(%a6),%d0 2016*4882a593Smuzhiyun bras statick 2017*4882a593Smuzhiyunp_dyd1: 2018*4882a593Smuzhiyun movel USER_D1(%a6),%d0 2019*4882a593Smuzhiyun bras statick 2020*4882a593Smuzhiyunp_dyd2: 2021*4882a593Smuzhiyun movel %d2,%d0 2022*4882a593Smuzhiyun bras statick 2023*4882a593Smuzhiyunp_dyd3: 2024*4882a593Smuzhiyun movel %d3,%d0 2025*4882a593Smuzhiyun bras statick 2026*4882a593Smuzhiyunp_dyd4: 2027*4882a593Smuzhiyun movel %d4,%d0 2028*4882a593Smuzhiyun bras statick 2029*4882a593Smuzhiyunp_dyd5: 2030*4882a593Smuzhiyun movel %d5,%d0 2031*4882a593Smuzhiyun bras statick 2032*4882a593Smuzhiyunp_dyd6: 2033*4882a593Smuzhiyun movel %d6,%d0 2034*4882a593Smuzhiyun bra statick 2035*4882a593Smuzhiyunp_dyd7: 2036*4882a593Smuzhiyun movel %d7,%d0 2037*4882a593Smuzhiyun bra statick 2038*4882a593Smuzhiyun 2039*4882a593Smuzhiyun |end 2040