1*4882a593Smuzhiyun~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2*4882a593SmuzhiyunMOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 3*4882a593SmuzhiyunM68000 Hi-Performance Microprocessor Division 4*4882a593SmuzhiyunM68060 Software Package 5*4882a593SmuzhiyunProduction Release P1.00 -- October 10, 1994 6*4882a593Smuzhiyun 7*4882a593SmuzhiyunM68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. 8*4882a593Smuzhiyun 9*4882a593SmuzhiyunTHE SOFTWARE is provided on an "AS IS" basis and without warranty. 10*4882a593SmuzhiyunTo the maximum extent permitted by applicable law, 11*4882a593SmuzhiyunMOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 12*4882a593SmuzhiyunINCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE 13*4882a593Smuzhiyunand any warranty against infringement with regard to the SOFTWARE 14*4882a593Smuzhiyun(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. 15*4882a593Smuzhiyun 16*4882a593SmuzhiyunTo the maximum extent permitted by applicable law, 17*4882a593SmuzhiyunIN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 18*4882a593Smuzhiyun(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 19*4882a593SmuzhiyunBUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 20*4882a593SmuzhiyunARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 21*4882a593SmuzhiyunMotorola assumes no responsibility for the maintenance and support of the SOFTWARE. 22*4882a593Smuzhiyun 23*4882a593SmuzhiyunYou are hereby granted a copyright license to use, modify, and distribute the SOFTWARE 24*4882a593Smuzhiyunso long as this entire notice is retained without alteration in any modified and/or 25*4882a593Smuzhiyunredistributed versions, and that such modified versions are clearly identified as such. 26*4882a593SmuzhiyunNo licenses are granted by implication, estoppel or otherwise under any patents 27*4882a593Smuzhiyunor trademarks of Motorola, Inc. 28*4882a593Smuzhiyun~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29*4882a593Smuzhiyun# freal.s: 30*4882a593Smuzhiyun# This file is appended to the top of the 060FPSP package 31*4882a593Smuzhiyun# and contains the entry points into the package. The user, in 32*4882a593Smuzhiyun# effect, branches to one of the branch table entries located 33*4882a593Smuzhiyun# after _060FPSP_TABLE. 34*4882a593Smuzhiyun# Also, subroutine stubs exist in this file (_fpsp_done for 35*4882a593Smuzhiyun# example) that are referenced by the FPSP package itself in order 36*4882a593Smuzhiyun# to call a given routine. The stub routine actually performs the 37*4882a593Smuzhiyun# callout. The FPSP code does a "bsr" to the stub routine. This 38*4882a593Smuzhiyun# extra layer of hierarchy adds a slight performance penalty but 39*4882a593Smuzhiyun# it makes the FPSP code easier to read and more mainatinable. 40*4882a593Smuzhiyun# 41*4882a593Smuzhiyun 42*4882a593Smuzhiyunset _off_bsun, 0x00 43*4882a593Smuzhiyunset _off_snan, 0x04 44*4882a593Smuzhiyunset _off_operr, 0x08 45*4882a593Smuzhiyunset _off_ovfl, 0x0c 46*4882a593Smuzhiyunset _off_unfl, 0x10 47*4882a593Smuzhiyunset _off_dz, 0x14 48*4882a593Smuzhiyunset _off_inex, 0x18 49*4882a593Smuzhiyunset _off_fline, 0x1c 50*4882a593Smuzhiyunset _off_fpu_dis, 0x20 51*4882a593Smuzhiyunset _off_trap, 0x24 52*4882a593Smuzhiyunset _off_trace, 0x28 53*4882a593Smuzhiyunset _off_access, 0x2c 54*4882a593Smuzhiyunset _off_done, 0x30 55*4882a593Smuzhiyun 56*4882a593Smuzhiyunset _off_imr, 0x40 57*4882a593Smuzhiyunset _off_dmr, 0x44 58*4882a593Smuzhiyunset _off_dmw, 0x48 59*4882a593Smuzhiyunset _off_irw, 0x4c 60*4882a593Smuzhiyunset _off_irl, 0x50 61*4882a593Smuzhiyunset _off_drb, 0x54 62*4882a593Smuzhiyunset _off_drw, 0x58 63*4882a593Smuzhiyunset _off_drl, 0x5c 64*4882a593Smuzhiyunset _off_dwb, 0x60 65*4882a593Smuzhiyunset _off_dww, 0x64 66*4882a593Smuzhiyunset _off_dwl, 0x68 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun_060FPSP_TABLE: 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun############################################################### 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun# Here's the table of ENTRY POINTS for those linking the package. 73*4882a593Smuzhiyun bra.l _fpsp_snan 74*4882a593Smuzhiyun short 0x0000 75*4882a593Smuzhiyun bra.l _fpsp_operr 76*4882a593Smuzhiyun short 0x0000 77*4882a593Smuzhiyun bra.l _fpsp_ovfl 78*4882a593Smuzhiyun short 0x0000 79*4882a593Smuzhiyun bra.l _fpsp_unfl 80*4882a593Smuzhiyun short 0x0000 81*4882a593Smuzhiyun bra.l _fpsp_dz 82*4882a593Smuzhiyun short 0x0000 83*4882a593Smuzhiyun bra.l _fpsp_inex 84*4882a593Smuzhiyun short 0x0000 85*4882a593Smuzhiyun bra.l _fpsp_fline 86*4882a593Smuzhiyun short 0x0000 87*4882a593Smuzhiyun bra.l _fpsp_unsupp 88*4882a593Smuzhiyun short 0x0000 89*4882a593Smuzhiyun bra.l _fpsp_effadd 90*4882a593Smuzhiyun short 0x0000 91*4882a593Smuzhiyun 92*4882a593Smuzhiyun space 56 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun############################################################### 95*4882a593Smuzhiyun global _fpsp_done 96*4882a593Smuzhiyun_fpsp_done: 97*4882a593Smuzhiyun mov.l %d0,-(%sp) 98*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0 99*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 100*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 101*4882a593Smuzhiyun rtd &0x4 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun global _real_ovfl 104*4882a593Smuzhiyun_real_ovfl: 105*4882a593Smuzhiyun mov.l %d0,-(%sp) 106*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0 107*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 108*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 109*4882a593Smuzhiyun rtd &0x4 110*4882a593Smuzhiyun 111*4882a593Smuzhiyun global _real_unfl 112*4882a593Smuzhiyun_real_unfl: 113*4882a593Smuzhiyun mov.l %d0,-(%sp) 114*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0 115*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 116*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 117*4882a593Smuzhiyun rtd &0x4 118*4882a593Smuzhiyun 119*4882a593Smuzhiyun global _real_inex 120*4882a593Smuzhiyun_real_inex: 121*4882a593Smuzhiyun mov.l %d0,-(%sp) 122*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0 123*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 124*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 125*4882a593Smuzhiyun rtd &0x4 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun global _real_bsun 128*4882a593Smuzhiyun_real_bsun: 129*4882a593Smuzhiyun mov.l %d0,-(%sp) 130*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0 131*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 132*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 133*4882a593Smuzhiyun rtd &0x4 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun global _real_operr 136*4882a593Smuzhiyun_real_operr: 137*4882a593Smuzhiyun mov.l %d0,-(%sp) 138*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0 139*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 140*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 141*4882a593Smuzhiyun rtd &0x4 142*4882a593Smuzhiyun 143*4882a593Smuzhiyun global _real_snan 144*4882a593Smuzhiyun_real_snan: 145*4882a593Smuzhiyun mov.l %d0,-(%sp) 146*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0 147*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 148*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 149*4882a593Smuzhiyun rtd &0x4 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun global _real_dz 152*4882a593Smuzhiyun_real_dz: 153*4882a593Smuzhiyun mov.l %d0,-(%sp) 154*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0 155*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 156*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 157*4882a593Smuzhiyun rtd &0x4 158*4882a593Smuzhiyun 159*4882a593Smuzhiyun global _real_fline 160*4882a593Smuzhiyun_real_fline: 161*4882a593Smuzhiyun mov.l %d0,-(%sp) 162*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0 163*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 164*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 165*4882a593Smuzhiyun rtd &0x4 166*4882a593Smuzhiyun 167*4882a593Smuzhiyun global _real_fpu_disabled 168*4882a593Smuzhiyun_real_fpu_disabled: 169*4882a593Smuzhiyun mov.l %d0,-(%sp) 170*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0 171*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 172*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 173*4882a593Smuzhiyun rtd &0x4 174*4882a593Smuzhiyun 175*4882a593Smuzhiyun global _real_trap 176*4882a593Smuzhiyun_real_trap: 177*4882a593Smuzhiyun mov.l %d0,-(%sp) 178*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0 179*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 180*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 181*4882a593Smuzhiyun rtd &0x4 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun global _real_trace 184*4882a593Smuzhiyun_real_trace: 185*4882a593Smuzhiyun mov.l %d0,-(%sp) 186*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0 187*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 188*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 189*4882a593Smuzhiyun rtd &0x4 190*4882a593Smuzhiyun 191*4882a593Smuzhiyun global _real_access 192*4882a593Smuzhiyun_real_access: 193*4882a593Smuzhiyun mov.l %d0,-(%sp) 194*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0 195*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 196*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 197*4882a593Smuzhiyun rtd &0x4 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun####################################### 200*4882a593Smuzhiyun 201*4882a593Smuzhiyun global _imem_read 202*4882a593Smuzhiyun_imem_read: 203*4882a593Smuzhiyun mov.l %d0,-(%sp) 204*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0 205*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 206*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 207*4882a593Smuzhiyun rtd &0x4 208*4882a593Smuzhiyun 209*4882a593Smuzhiyun global _dmem_read 210*4882a593Smuzhiyun_dmem_read: 211*4882a593Smuzhiyun mov.l %d0,-(%sp) 212*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0 213*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 214*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 215*4882a593Smuzhiyun rtd &0x4 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun global _dmem_write 218*4882a593Smuzhiyun_dmem_write: 219*4882a593Smuzhiyun mov.l %d0,-(%sp) 220*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0 221*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 222*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 223*4882a593Smuzhiyun rtd &0x4 224*4882a593Smuzhiyun 225*4882a593Smuzhiyun global _imem_read_word 226*4882a593Smuzhiyun_imem_read_word: 227*4882a593Smuzhiyun mov.l %d0,-(%sp) 228*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0 229*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 230*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 231*4882a593Smuzhiyun rtd &0x4 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun global _imem_read_long 234*4882a593Smuzhiyun_imem_read_long: 235*4882a593Smuzhiyun mov.l %d0,-(%sp) 236*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0 237*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 238*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 239*4882a593Smuzhiyun rtd &0x4 240*4882a593Smuzhiyun 241*4882a593Smuzhiyun global _dmem_read_byte 242*4882a593Smuzhiyun_dmem_read_byte: 243*4882a593Smuzhiyun mov.l %d0,-(%sp) 244*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0 245*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 246*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 247*4882a593Smuzhiyun rtd &0x4 248*4882a593Smuzhiyun 249*4882a593Smuzhiyun global _dmem_read_word 250*4882a593Smuzhiyun_dmem_read_word: 251*4882a593Smuzhiyun mov.l %d0,-(%sp) 252*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0 253*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 254*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 255*4882a593Smuzhiyun rtd &0x4 256*4882a593Smuzhiyun 257*4882a593Smuzhiyun global _dmem_read_long 258*4882a593Smuzhiyun_dmem_read_long: 259*4882a593Smuzhiyun mov.l %d0,-(%sp) 260*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0 261*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 262*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 263*4882a593Smuzhiyun rtd &0x4 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun global _dmem_write_byte 266*4882a593Smuzhiyun_dmem_write_byte: 267*4882a593Smuzhiyun mov.l %d0,-(%sp) 268*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0 269*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 270*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 271*4882a593Smuzhiyun rtd &0x4 272*4882a593Smuzhiyun 273*4882a593Smuzhiyun global _dmem_write_word 274*4882a593Smuzhiyun_dmem_write_word: 275*4882a593Smuzhiyun mov.l %d0,-(%sp) 276*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0 277*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 278*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 279*4882a593Smuzhiyun rtd &0x4 280*4882a593Smuzhiyun 281*4882a593Smuzhiyun global _dmem_write_long 282*4882a593Smuzhiyun_dmem_write_long: 283*4882a593Smuzhiyun mov.l %d0,-(%sp) 284*4882a593Smuzhiyun mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0 285*4882a593Smuzhiyun pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 286*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 287*4882a593Smuzhiyun rtd &0x4 288*4882a593Smuzhiyun 289*4882a593Smuzhiyun# 290*4882a593Smuzhiyun# This file contains a set of define statements for constants 291*4882a593Smuzhiyun# in order to promote readability within the corecode itself. 292*4882a593Smuzhiyun# 293*4882a593Smuzhiyun 294*4882a593Smuzhiyunset LOCAL_SIZE, 192 # stack frame size(bytes) 295*4882a593Smuzhiyunset LV, -LOCAL_SIZE # stack offset 296*4882a593Smuzhiyun 297*4882a593Smuzhiyunset EXC_SR, 0x4 # stack status register 298*4882a593Smuzhiyunset EXC_PC, 0x6 # stack pc 299*4882a593Smuzhiyunset EXC_VOFF, 0xa # stacked vector offset 300*4882a593Smuzhiyunset EXC_EA, 0xc # stacked <ea> 301*4882a593Smuzhiyun 302*4882a593Smuzhiyunset EXC_FP, 0x0 # frame pointer 303*4882a593Smuzhiyun 304*4882a593Smuzhiyunset EXC_AREGS, -68 # offset of all address regs 305*4882a593Smuzhiyunset EXC_DREGS, -100 # offset of all data regs 306*4882a593Smuzhiyunset EXC_FPREGS, -36 # offset of all fp regs 307*4882a593Smuzhiyun 308*4882a593Smuzhiyunset EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 309*4882a593Smuzhiyunset OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 310*4882a593Smuzhiyunset EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 311*4882a593Smuzhiyunset EXC_A5, EXC_AREGS+(5*4) 312*4882a593Smuzhiyunset EXC_A4, EXC_AREGS+(4*4) 313*4882a593Smuzhiyunset EXC_A3, EXC_AREGS+(3*4) 314*4882a593Smuzhiyunset EXC_A2, EXC_AREGS+(2*4) 315*4882a593Smuzhiyunset EXC_A1, EXC_AREGS+(1*4) 316*4882a593Smuzhiyunset EXC_A0, EXC_AREGS+(0*4) 317*4882a593Smuzhiyunset EXC_D7, EXC_DREGS+(7*4) 318*4882a593Smuzhiyunset EXC_D6, EXC_DREGS+(6*4) 319*4882a593Smuzhiyunset EXC_D5, EXC_DREGS+(5*4) 320*4882a593Smuzhiyunset EXC_D4, EXC_DREGS+(4*4) 321*4882a593Smuzhiyunset EXC_D3, EXC_DREGS+(3*4) 322*4882a593Smuzhiyunset EXC_D2, EXC_DREGS+(2*4) 323*4882a593Smuzhiyunset EXC_D1, EXC_DREGS+(1*4) 324*4882a593Smuzhiyunset EXC_D0, EXC_DREGS+(0*4) 325*4882a593Smuzhiyun 326*4882a593Smuzhiyunset EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 327*4882a593Smuzhiyunset EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 328*4882a593Smuzhiyunset EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) 329*4882a593Smuzhiyun 330*4882a593Smuzhiyunset FP_SCR1, LV+80 # fp scratch 1 331*4882a593Smuzhiyunset FP_SCR1_EX, FP_SCR1+0 332*4882a593Smuzhiyunset FP_SCR1_SGN, FP_SCR1+2 333*4882a593Smuzhiyunset FP_SCR1_HI, FP_SCR1+4 334*4882a593Smuzhiyunset FP_SCR1_LO, FP_SCR1+8 335*4882a593Smuzhiyun 336*4882a593Smuzhiyunset FP_SCR0, LV+68 # fp scratch 0 337*4882a593Smuzhiyunset FP_SCR0_EX, FP_SCR0+0 338*4882a593Smuzhiyunset FP_SCR0_SGN, FP_SCR0+2 339*4882a593Smuzhiyunset FP_SCR0_HI, FP_SCR0+4 340*4882a593Smuzhiyunset FP_SCR0_LO, FP_SCR0+8 341*4882a593Smuzhiyun 342*4882a593Smuzhiyunset FP_DST, LV+56 # fp destination operand 343*4882a593Smuzhiyunset FP_DST_EX, FP_DST+0 344*4882a593Smuzhiyunset FP_DST_SGN, FP_DST+2 345*4882a593Smuzhiyunset FP_DST_HI, FP_DST+4 346*4882a593Smuzhiyunset FP_DST_LO, FP_DST+8 347*4882a593Smuzhiyun 348*4882a593Smuzhiyunset FP_SRC, LV+44 # fp source operand 349*4882a593Smuzhiyunset FP_SRC_EX, FP_SRC+0 350*4882a593Smuzhiyunset FP_SRC_SGN, FP_SRC+2 351*4882a593Smuzhiyunset FP_SRC_HI, FP_SRC+4 352*4882a593Smuzhiyunset FP_SRC_LO, FP_SRC+8 353*4882a593Smuzhiyun 354*4882a593Smuzhiyunset USER_FPIAR, LV+40 # FP instr address register 355*4882a593Smuzhiyun 356*4882a593Smuzhiyunset USER_FPSR, LV+36 # FP status register 357*4882a593Smuzhiyunset FPSR_CC, USER_FPSR+0 # FPSR condition codes 358*4882a593Smuzhiyunset FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte 359*4882a593Smuzhiyunset FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte 360*4882a593Smuzhiyunset FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte 361*4882a593Smuzhiyun 362*4882a593Smuzhiyunset USER_FPCR, LV+32 # FP control register 363*4882a593Smuzhiyunset FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable 364*4882a593Smuzhiyunset FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control 365*4882a593Smuzhiyun 366*4882a593Smuzhiyunset L_SCR3, LV+28 # integer scratch 3 367*4882a593Smuzhiyunset L_SCR2, LV+24 # integer scratch 2 368*4882a593Smuzhiyunset L_SCR1, LV+20 # integer scratch 1 369*4882a593Smuzhiyun 370*4882a593Smuzhiyunset STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) 371*4882a593Smuzhiyun 372*4882a593Smuzhiyunset EXC_TEMP2, LV+24 # temporary space 373*4882a593Smuzhiyunset EXC_TEMP, LV+16 # temporary space 374*4882a593Smuzhiyun 375*4882a593Smuzhiyunset DTAG, LV+15 # destination operand type 376*4882a593Smuzhiyunset STAG, LV+14 # source operand type 377*4882a593Smuzhiyun 378*4882a593Smuzhiyunset SPCOND_FLG, LV+10 # flag: special case (see below) 379*4882a593Smuzhiyun 380*4882a593Smuzhiyunset EXC_CC, LV+8 # saved condition codes 381*4882a593Smuzhiyunset EXC_EXTWPTR, LV+4 # saved current PC (active) 382*4882a593Smuzhiyunset EXC_EXTWORD, LV+2 # saved extension word 383*4882a593Smuzhiyunset EXC_CMDREG, LV+2 # saved extension word 384*4882a593Smuzhiyunset EXC_OPWORD, LV+0 # saved operation word 385*4882a593Smuzhiyun 386*4882a593Smuzhiyun################################ 387*4882a593Smuzhiyun 388*4882a593Smuzhiyun# Helpful macros 389*4882a593Smuzhiyun 390*4882a593Smuzhiyunset FTEMP, 0 # offsets within an 391*4882a593Smuzhiyunset FTEMP_EX, 0 # extended precision 392*4882a593Smuzhiyunset FTEMP_SGN, 2 # value saved in memory. 393*4882a593Smuzhiyunset FTEMP_HI, 4 394*4882a593Smuzhiyunset FTEMP_LO, 8 395*4882a593Smuzhiyunset FTEMP_GRS, 12 396*4882a593Smuzhiyun 397*4882a593Smuzhiyunset LOCAL, 0 # offsets within an 398*4882a593Smuzhiyunset LOCAL_EX, 0 # extended precision 399*4882a593Smuzhiyunset LOCAL_SGN, 2 # value saved in memory. 400*4882a593Smuzhiyunset LOCAL_HI, 4 401*4882a593Smuzhiyunset LOCAL_LO, 8 402*4882a593Smuzhiyunset LOCAL_GRS, 12 403*4882a593Smuzhiyun 404*4882a593Smuzhiyunset DST, 0 # offsets within an 405*4882a593Smuzhiyunset DST_EX, 0 # extended precision 406*4882a593Smuzhiyunset DST_HI, 4 # value saved in memory. 407*4882a593Smuzhiyunset DST_LO, 8 408*4882a593Smuzhiyun 409*4882a593Smuzhiyunset SRC, 0 # offsets within an 410*4882a593Smuzhiyunset SRC_EX, 0 # extended precision 411*4882a593Smuzhiyunset SRC_HI, 4 # value saved in memory. 412*4882a593Smuzhiyunset SRC_LO, 8 413*4882a593Smuzhiyun 414*4882a593Smuzhiyunset SGL_LO, 0x3f81 # min sgl prec exponent 415*4882a593Smuzhiyunset SGL_HI, 0x407e # max sgl prec exponent 416*4882a593Smuzhiyunset DBL_LO, 0x3c01 # min dbl prec exponent 417*4882a593Smuzhiyunset DBL_HI, 0x43fe # max dbl prec exponent 418*4882a593Smuzhiyunset EXT_LO, 0x0 # min ext prec exponent 419*4882a593Smuzhiyunset EXT_HI, 0x7ffe # max ext prec exponent 420*4882a593Smuzhiyun 421*4882a593Smuzhiyunset EXT_BIAS, 0x3fff # extended precision bias 422*4882a593Smuzhiyunset SGL_BIAS, 0x007f # single precision bias 423*4882a593Smuzhiyunset DBL_BIAS, 0x03ff # double precision bias 424*4882a593Smuzhiyun 425*4882a593Smuzhiyunset NORM, 0x00 # operand type for STAG/DTAG 426*4882a593Smuzhiyunset ZERO, 0x01 # operand type for STAG/DTAG 427*4882a593Smuzhiyunset INF, 0x02 # operand type for STAG/DTAG 428*4882a593Smuzhiyunset QNAN, 0x03 # operand type for STAG/DTAG 429*4882a593Smuzhiyunset DENORM, 0x04 # operand type for STAG/DTAG 430*4882a593Smuzhiyunset SNAN, 0x05 # operand type for STAG/DTAG 431*4882a593Smuzhiyunset UNNORM, 0x06 # operand type for STAG/DTAG 432*4882a593Smuzhiyun 433*4882a593Smuzhiyun################## 434*4882a593Smuzhiyun# FPSR/FPCR bits # 435*4882a593Smuzhiyun################## 436*4882a593Smuzhiyunset neg_bit, 0x3 # negative result 437*4882a593Smuzhiyunset z_bit, 0x2 # zero result 438*4882a593Smuzhiyunset inf_bit, 0x1 # infinite result 439*4882a593Smuzhiyunset nan_bit, 0x0 # NAN result 440*4882a593Smuzhiyun 441*4882a593Smuzhiyunset q_sn_bit, 0x7 # sign bit of quotient byte 442*4882a593Smuzhiyun 443*4882a593Smuzhiyunset bsun_bit, 7 # branch on unordered 444*4882a593Smuzhiyunset snan_bit, 6 # signalling NAN 445*4882a593Smuzhiyunset operr_bit, 5 # operand error 446*4882a593Smuzhiyunset ovfl_bit, 4 # overflow 447*4882a593Smuzhiyunset unfl_bit, 3 # underflow 448*4882a593Smuzhiyunset dz_bit, 2 # divide by zero 449*4882a593Smuzhiyunset inex2_bit, 1 # inexact result 2 450*4882a593Smuzhiyunset inex1_bit, 0 # inexact result 1 451*4882a593Smuzhiyun 452*4882a593Smuzhiyunset aiop_bit, 7 # accrued inexact operation bit 453*4882a593Smuzhiyunset aovfl_bit, 6 # accrued overflow bit 454*4882a593Smuzhiyunset aunfl_bit, 5 # accrued underflow bit 455*4882a593Smuzhiyunset adz_bit, 4 # accrued dz bit 456*4882a593Smuzhiyunset ainex_bit, 3 # accrued inexact bit 457*4882a593Smuzhiyun 458*4882a593Smuzhiyun############################# 459*4882a593Smuzhiyun# FPSR individual bit masks # 460*4882a593Smuzhiyun############################# 461*4882a593Smuzhiyunset neg_mask, 0x08000000 # negative bit mask (lw) 462*4882a593Smuzhiyunset inf_mask, 0x02000000 # infinity bit mask (lw) 463*4882a593Smuzhiyunset z_mask, 0x04000000 # zero bit mask (lw) 464*4882a593Smuzhiyunset nan_mask, 0x01000000 # nan bit mask (lw) 465*4882a593Smuzhiyun 466*4882a593Smuzhiyunset neg_bmask, 0x08 # negative bit mask (byte) 467*4882a593Smuzhiyunset inf_bmask, 0x02 # infinity bit mask (byte) 468*4882a593Smuzhiyunset z_bmask, 0x04 # zero bit mask (byte) 469*4882a593Smuzhiyunset nan_bmask, 0x01 # nan bit mask (byte) 470*4882a593Smuzhiyun 471*4882a593Smuzhiyunset bsun_mask, 0x00008000 # bsun exception mask 472*4882a593Smuzhiyunset snan_mask, 0x00004000 # snan exception mask 473*4882a593Smuzhiyunset operr_mask, 0x00002000 # operr exception mask 474*4882a593Smuzhiyunset ovfl_mask, 0x00001000 # overflow exception mask 475*4882a593Smuzhiyunset unfl_mask, 0x00000800 # underflow exception mask 476*4882a593Smuzhiyunset dz_mask, 0x00000400 # dz exception mask 477*4882a593Smuzhiyunset inex2_mask, 0x00000200 # inex2 exception mask 478*4882a593Smuzhiyunset inex1_mask, 0x00000100 # inex1 exception mask 479*4882a593Smuzhiyun 480*4882a593Smuzhiyunset aiop_mask, 0x00000080 # accrued illegal operation 481*4882a593Smuzhiyunset aovfl_mask, 0x00000040 # accrued overflow 482*4882a593Smuzhiyunset aunfl_mask, 0x00000020 # accrued underflow 483*4882a593Smuzhiyunset adz_mask, 0x00000010 # accrued divide by zero 484*4882a593Smuzhiyunset ainex_mask, 0x00000008 # accrued inexact 485*4882a593Smuzhiyun 486*4882a593Smuzhiyun###################################### 487*4882a593Smuzhiyun# FPSR combinations used in the FPSP # 488*4882a593Smuzhiyun###################################### 489*4882a593Smuzhiyunset dzinf_mask, inf_mask+dz_mask+adz_mask 490*4882a593Smuzhiyunset opnan_mask, nan_mask+operr_mask+aiop_mask 491*4882a593Smuzhiyunset nzi_mask, 0x01ffffff #clears N, Z, and I 492*4882a593Smuzhiyunset unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask 493*4882a593Smuzhiyunset unf2inx_mask, unfl_mask+inex2_mask+ainex_mask 494*4882a593Smuzhiyunset ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask 495*4882a593Smuzhiyunset inx1a_mask, inex1_mask+ainex_mask 496*4882a593Smuzhiyunset inx2a_mask, inex2_mask+ainex_mask 497*4882a593Smuzhiyunset snaniop_mask, nan_mask+snan_mask+aiop_mask 498*4882a593Smuzhiyunset snaniop2_mask, snan_mask+aiop_mask 499*4882a593Smuzhiyunset naniop_mask, nan_mask+aiop_mask 500*4882a593Smuzhiyunset neginf_mask, neg_mask+inf_mask 501*4882a593Smuzhiyunset infaiop_mask, inf_mask+aiop_mask 502*4882a593Smuzhiyunset negz_mask, neg_mask+z_mask 503*4882a593Smuzhiyunset opaop_mask, operr_mask+aiop_mask 504*4882a593Smuzhiyunset unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask 505*4882a593Smuzhiyunset ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask 506*4882a593Smuzhiyun 507*4882a593Smuzhiyun######### 508*4882a593Smuzhiyun# misc. # 509*4882a593Smuzhiyun######### 510*4882a593Smuzhiyunset rnd_stky_bit, 29 # stky bit pos in longword 511*4882a593Smuzhiyun 512*4882a593Smuzhiyunset sign_bit, 0x7 # sign bit 513*4882a593Smuzhiyunset signan_bit, 0x6 # signalling nan bit 514*4882a593Smuzhiyun 515*4882a593Smuzhiyunset sgl_thresh, 0x3f81 # minimum sgl exponent 516*4882a593Smuzhiyunset dbl_thresh, 0x3c01 # minimum dbl exponent 517*4882a593Smuzhiyun 518*4882a593Smuzhiyunset x_mode, 0x0 # extended precision 519*4882a593Smuzhiyunset s_mode, 0x4 # single precision 520*4882a593Smuzhiyunset d_mode, 0x8 # double precision 521*4882a593Smuzhiyun 522*4882a593Smuzhiyunset rn_mode, 0x0 # round-to-nearest 523*4882a593Smuzhiyunset rz_mode, 0x1 # round-to-zero 524*4882a593Smuzhiyunset rm_mode, 0x2 # round-tp-minus-infinity 525*4882a593Smuzhiyunset rp_mode, 0x3 # round-to-plus-infinity 526*4882a593Smuzhiyun 527*4882a593Smuzhiyunset mantissalen, 64 # length of mantissa in bits 528*4882a593Smuzhiyun 529*4882a593Smuzhiyunset BYTE, 1 # len(byte) == 1 byte 530*4882a593Smuzhiyunset WORD, 2 # len(word) == 2 bytes 531*4882a593Smuzhiyunset LONG, 4 # len(longword) == 2 bytes 532*4882a593Smuzhiyun 533*4882a593Smuzhiyunset BSUN_VEC, 0xc0 # bsun vector offset 534*4882a593Smuzhiyunset INEX_VEC, 0xc4 # inexact vector offset 535*4882a593Smuzhiyunset DZ_VEC, 0xc8 # dz vector offset 536*4882a593Smuzhiyunset UNFL_VEC, 0xcc # unfl vector offset 537*4882a593Smuzhiyunset OPERR_VEC, 0xd0 # operr vector offset 538*4882a593Smuzhiyunset OVFL_VEC, 0xd4 # ovfl vector offset 539*4882a593Smuzhiyunset SNAN_VEC, 0xd8 # snan vector offset 540*4882a593Smuzhiyun 541*4882a593Smuzhiyun########################### 542*4882a593Smuzhiyun# SPecial CONDition FLaGs # 543*4882a593Smuzhiyun########################### 544*4882a593Smuzhiyunset ftrapcc_flg, 0x01 # flag bit: ftrapcc exception 545*4882a593Smuzhiyunset fbsun_flg, 0x02 # flag bit: bsun exception 546*4882a593Smuzhiyunset mia7_flg, 0x04 # flag bit: (a7)+ <ea> 547*4882a593Smuzhiyunset mda7_flg, 0x08 # flag bit: -(a7) <ea> 548*4882a593Smuzhiyunset fmovm_flg, 0x40 # flag bit: fmovm instruction 549*4882a593Smuzhiyunset immed_flg, 0x80 # flag bit: &<data> <ea> 550*4882a593Smuzhiyun 551*4882a593Smuzhiyunset ftrapcc_bit, 0x0 552*4882a593Smuzhiyunset fbsun_bit, 0x1 553*4882a593Smuzhiyunset mia7_bit, 0x2 554*4882a593Smuzhiyunset mda7_bit, 0x3 555*4882a593Smuzhiyunset immed_bit, 0x7 556*4882a593Smuzhiyun 557*4882a593Smuzhiyun################################## 558*4882a593Smuzhiyun# TRANSCENDENTAL "LAST-OP" FLAGS # 559*4882a593Smuzhiyun################################## 560*4882a593Smuzhiyunset FMUL_OP, 0x0 # fmul instr performed last 561*4882a593Smuzhiyunset FDIV_OP, 0x1 # fdiv performed last 562*4882a593Smuzhiyunset FADD_OP, 0x2 # fadd performed last 563*4882a593Smuzhiyunset FMOV_OP, 0x3 # fmov performed last 564*4882a593Smuzhiyun 565*4882a593Smuzhiyun############# 566*4882a593Smuzhiyun# CONSTANTS # 567*4882a593Smuzhiyun############# 568*4882a593SmuzhiyunT1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD 569*4882a593SmuzhiyunT2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL 570*4882a593Smuzhiyun 571*4882a593SmuzhiyunPI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 572*4882a593SmuzhiyunPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 573*4882a593Smuzhiyun 574*4882a593SmuzhiyunTWOBYPI: 575*4882a593Smuzhiyun long 0x3FE45F30,0x6DC9C883 576*4882a593Smuzhiyun 577*4882a593Smuzhiyun######################################################################### 578*4882a593Smuzhiyun# XDEF **************************************************************** # 579*4882a593Smuzhiyun# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. # 580*4882a593Smuzhiyun# # 581*4882a593Smuzhiyun# This handler should be the first code executed upon taking the # 582*4882a593Smuzhiyun# FP Overflow exception in an operating system. # 583*4882a593Smuzhiyun# # 584*4882a593Smuzhiyun# XREF **************************************************************** # 585*4882a593Smuzhiyun# _imem_read_long() - read instruction longword # 586*4882a593Smuzhiyun# fix_skewed_ops() - adjust src operand in fsave frame # 587*4882a593Smuzhiyun# set_tag_x() - determine optype of src/dst operands # 588*4882a593Smuzhiyun# store_fpreg() - store opclass 0 or 2 result to FP regfile # 589*4882a593Smuzhiyun# unnorm_fix() - change UNNORM operands to NORM or ZERO # 590*4882a593Smuzhiyun# load_fpn2() - load dst operand from FP regfile # 591*4882a593Smuzhiyun# fout() - emulate an opclass 3 instruction # 592*4882a593Smuzhiyun# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 593*4882a593Smuzhiyun# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 594*4882a593Smuzhiyun# _real_ovfl() - "callout" for Overflow exception enabled code # 595*4882a593Smuzhiyun# _real_inex() - "callout" for Inexact exception enabled code # 596*4882a593Smuzhiyun# _real_trace() - "callout" for Trace exception code # 597*4882a593Smuzhiyun# # 598*4882a593Smuzhiyun# INPUT *************************************************************** # 599*4882a593Smuzhiyun# - The system stack contains the FP Ovfl exception stack frame # 600*4882a593Smuzhiyun# - The fsave frame contains the source operand # 601*4882a593Smuzhiyun# # 602*4882a593Smuzhiyun# OUTPUT ************************************************************** # 603*4882a593Smuzhiyun# Overflow Exception enabled: # 604*4882a593Smuzhiyun# - The system stack is unchanged # 605*4882a593Smuzhiyun# - The fsave frame contains the adjusted src op for opclass 0,2 # 606*4882a593Smuzhiyun# Overflow Exception disabled: # 607*4882a593Smuzhiyun# - The system stack is unchanged # 608*4882a593Smuzhiyun# - The "exception present" flag in the fsave frame is cleared # 609*4882a593Smuzhiyun# # 610*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 611*4882a593Smuzhiyun# On the 060, if an FP overflow is present as the result of any # 612*4882a593Smuzhiyun# instruction, the 060 will take an overflow exception whether the # 613*4882a593Smuzhiyun# exception is enabled or disabled in the FPCR. For the disabled case, # 614*4882a593Smuzhiyun# This handler emulates the instruction to determine what the correct # 615*4882a593Smuzhiyun# default result should be for the operation. This default result is # 616*4882a593Smuzhiyun# then stored in either the FP regfile, data regfile, or memory. # 617*4882a593Smuzhiyun# Finally, the handler exits through the "callout" _fpsp_done() # 618*4882a593Smuzhiyun# denoting that no exceptional conditions exist within the machine. # 619*4882a593Smuzhiyun# If the exception is enabled, then this handler must create the # 620*4882a593Smuzhiyun# exceptional operand and plave it in the fsave state frame, and store # 621*4882a593Smuzhiyun# the default result (only if the instruction is opclass 3). For # 622*4882a593Smuzhiyun# exceptions enabled, this handler must exit through the "callout" # 623*4882a593Smuzhiyun# _real_ovfl() so that the operating system enabled overflow handler # 624*4882a593Smuzhiyun# can handle this case. # 625*4882a593Smuzhiyun# Two other conditions exist. First, if overflow was disabled # 626*4882a593Smuzhiyun# but the inexact exception was enabled, this handler must exit # 627*4882a593Smuzhiyun# through the "callout" _real_inex() regardless of whether the result # 628*4882a593Smuzhiyun# was inexact. # 629*4882a593Smuzhiyun# Also, in the case of an opclass three instruction where # 630*4882a593Smuzhiyun# overflow was disabled and the trace exception was enabled, this # 631*4882a593Smuzhiyun# handler must exit through the "callout" _real_trace(). # 632*4882a593Smuzhiyun# # 633*4882a593Smuzhiyun######################################################################### 634*4882a593Smuzhiyun 635*4882a593Smuzhiyun global _fpsp_ovfl 636*4882a593Smuzhiyun_fpsp_ovfl: 637*4882a593Smuzhiyun 638*4882a593Smuzhiyun#$# sub.l &24,%sp # make room for src/dst 639*4882a593Smuzhiyun 640*4882a593Smuzhiyun link.w %a6,&-LOCAL_SIZE # init stack frame 641*4882a593Smuzhiyun 642*4882a593Smuzhiyun fsave FP_SRC(%a6) # grab the "busy" frame 643*4882a593Smuzhiyun 644*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 645*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 646*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 647*4882a593Smuzhiyun 648*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction 649*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 650*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 651*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 652*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 653*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) 654*4882a593Smuzhiyun 655*4882a593Smuzhiyun############################################################################## 656*4882a593Smuzhiyun 657*4882a593Smuzhiyun btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 658*4882a593Smuzhiyun bne.w fovfl_out 659*4882a593Smuzhiyun 660*4882a593Smuzhiyun 661*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 662*4882a593Smuzhiyun bsr.l fix_skewed_ops # fix src op 663*4882a593Smuzhiyun 664*4882a593Smuzhiyun# since, I believe, only NORMs and DENORMs can come through here, 665*4882a593Smuzhiyun# maybe we can avoid the subroutine call. 666*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 667*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 668*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # maybe NORM,DENORM 669*4882a593Smuzhiyun 670*4882a593Smuzhiyun# bit five of the fp extension word separates the monadic and dyadic operations 671*4882a593Smuzhiyun# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos 672*4882a593Smuzhiyun# will never take this exception. 673*4882a593Smuzhiyun btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 674*4882a593Smuzhiyun beq.b fovfl_extract # monadic 675*4882a593Smuzhiyun 676*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 677*4882a593Smuzhiyun bsr.l load_fpn2 # load dst into FP_DST 678*4882a593Smuzhiyun 679*4882a593Smuzhiyun lea FP_DST(%a6),%a0 # pass: ptr to dst op 680*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 681*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 682*4882a593Smuzhiyun bne.b fovfl_op2_done # no 683*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 684*4882a593Smuzhiyunfovfl_op2_done: 685*4882a593Smuzhiyun mov.b %d0,DTAG(%a6) # save dst optype tag 686*4882a593Smuzhiyun 687*4882a593Smuzhiyunfovfl_extract: 688*4882a593Smuzhiyun 689*4882a593Smuzhiyun#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 690*4882a593Smuzhiyun#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 691*4882a593Smuzhiyun#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 692*4882a593Smuzhiyun#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 693*4882a593Smuzhiyun#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 694*4882a593Smuzhiyun#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 695*4882a593Smuzhiyun 696*4882a593Smuzhiyun clr.l %d0 697*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 698*4882a593Smuzhiyun 699*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d1 700*4882a593Smuzhiyun andi.w &0x007f,%d1 # extract extension 701*4882a593Smuzhiyun 702*4882a593Smuzhiyun andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 703*4882a593Smuzhiyun 704*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 705*4882a593Smuzhiyun fmov.l &0x0,%fpsr 706*4882a593Smuzhiyun 707*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 708*4882a593Smuzhiyun lea FP_DST(%a6),%a1 709*4882a593Smuzhiyun 710*4882a593Smuzhiyun# maybe we can make these entry points ONLY the OVFL entry points of each routine. 711*4882a593Smuzhiyun mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 712*4882a593Smuzhiyun jsr (tbl_unsupp.l,%pc,%d1.l*1) 713*4882a593Smuzhiyun 714*4882a593Smuzhiyun# the operation has been emulated. the result is in fp0. 715*4882a593Smuzhiyun# the EXOP, if an exception occurred, is in fp1. 716*4882a593Smuzhiyun# we must save the default result regardless of whether 717*4882a593Smuzhiyun# traps are enabled or disabled. 718*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 719*4882a593Smuzhiyun bsr.l store_fpreg 720*4882a593Smuzhiyun 721*4882a593Smuzhiyun# the exceptional possibilities we have left ourselves with are ONLY overflow 722*4882a593Smuzhiyun# and inexact. and, the inexact is such that overflow occurred and was disabled 723*4882a593Smuzhiyun# but inexact was enabled. 724*4882a593Smuzhiyun btst &ovfl_bit,FPCR_ENABLE(%a6) 725*4882a593Smuzhiyun bne.b fovfl_ovfl_on 726*4882a593Smuzhiyun 727*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) 728*4882a593Smuzhiyun bne.b fovfl_inex_on 729*4882a593Smuzhiyun 730*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 731*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 732*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 733*4882a593Smuzhiyun 734*4882a593Smuzhiyun unlk %a6 735*4882a593Smuzhiyun#$# add.l &24,%sp 736*4882a593Smuzhiyun bra.l _fpsp_done 737*4882a593Smuzhiyun 738*4882a593Smuzhiyun# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 739*4882a593Smuzhiyun# in fp1. now, simply jump to _real_ovfl()! 740*4882a593Smuzhiyunfovfl_ovfl_on: 741*4882a593Smuzhiyun fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 742*4882a593Smuzhiyun 743*4882a593Smuzhiyun mov.w &0xe005,2+FP_SRC(%a6) # save exc status 744*4882a593Smuzhiyun 745*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 746*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 747*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 748*4882a593Smuzhiyun 749*4882a593Smuzhiyun frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 750*4882a593Smuzhiyun 751*4882a593Smuzhiyun unlk %a6 752*4882a593Smuzhiyun 753*4882a593Smuzhiyun bra.l _real_ovfl 754*4882a593Smuzhiyun 755*4882a593Smuzhiyun# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, 756*4882a593Smuzhiyun# we must jump to real_inex(). 757*4882a593Smuzhiyunfovfl_inex_on: 758*4882a593Smuzhiyun 759*4882a593Smuzhiyun fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 760*4882a593Smuzhiyun 761*4882a593Smuzhiyun mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 762*4882a593Smuzhiyun mov.w &0xe001,2+FP_SRC(%a6) # save exc status 763*4882a593Smuzhiyun 764*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 765*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 766*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 767*4882a593Smuzhiyun 768*4882a593Smuzhiyun frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 769*4882a593Smuzhiyun 770*4882a593Smuzhiyun unlk %a6 771*4882a593Smuzhiyun 772*4882a593Smuzhiyun bra.l _real_inex 773*4882a593Smuzhiyun 774*4882a593Smuzhiyun######################################################################## 775*4882a593Smuzhiyunfovfl_out: 776*4882a593Smuzhiyun 777*4882a593Smuzhiyun 778*4882a593Smuzhiyun#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 779*4882a593Smuzhiyun#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 780*4882a593Smuzhiyun#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 781*4882a593Smuzhiyun 782*4882a593Smuzhiyun# the src operand is definitely a NORM(!), so tag it as such 783*4882a593Smuzhiyun mov.b &NORM,STAG(%a6) # set src optype tag 784*4882a593Smuzhiyun 785*4882a593Smuzhiyun clr.l %d0 786*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 787*4882a593Smuzhiyun 788*4882a593Smuzhiyun and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 789*4882a593Smuzhiyun 790*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 791*4882a593Smuzhiyun fmov.l &0x0,%fpsr 792*4882a593Smuzhiyun 793*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to src operand 794*4882a593Smuzhiyun 795*4882a593Smuzhiyun bsr.l fout 796*4882a593Smuzhiyun 797*4882a593Smuzhiyun btst &ovfl_bit,FPCR_ENABLE(%a6) 798*4882a593Smuzhiyun bne.w fovfl_ovfl_on 799*4882a593Smuzhiyun 800*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) 801*4882a593Smuzhiyun bne.w fovfl_inex_on 802*4882a593Smuzhiyun 803*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 804*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 805*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 806*4882a593Smuzhiyun 807*4882a593Smuzhiyun unlk %a6 808*4882a593Smuzhiyun#$# add.l &24,%sp 809*4882a593Smuzhiyun 810*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 811*4882a593Smuzhiyun beq.l _fpsp_done # no 812*4882a593Smuzhiyun 813*4882a593Smuzhiyun fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 814*4882a593Smuzhiyun mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 815*4882a593Smuzhiyun bra.l _real_trace 816*4882a593Smuzhiyun 817*4882a593Smuzhiyun######################################################################### 818*4882a593Smuzhiyun# XDEF **************************************************************** # 819*4882a593Smuzhiyun# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. # 820*4882a593Smuzhiyun# # 821*4882a593Smuzhiyun# This handler should be the first code executed upon taking the # 822*4882a593Smuzhiyun# FP Underflow exception in an operating system. # 823*4882a593Smuzhiyun# # 824*4882a593Smuzhiyun# XREF **************************************************************** # 825*4882a593Smuzhiyun# _imem_read_long() - read instruction longword # 826*4882a593Smuzhiyun# fix_skewed_ops() - adjust src operand in fsave frame # 827*4882a593Smuzhiyun# set_tag_x() - determine optype of src/dst operands # 828*4882a593Smuzhiyun# store_fpreg() - store opclass 0 or 2 result to FP regfile # 829*4882a593Smuzhiyun# unnorm_fix() - change UNNORM operands to NORM or ZERO # 830*4882a593Smuzhiyun# load_fpn2() - load dst operand from FP regfile # 831*4882a593Smuzhiyun# fout() - emulate an opclass 3 instruction # 832*4882a593Smuzhiyun# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 833*4882a593Smuzhiyun# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 834*4882a593Smuzhiyun# _real_ovfl() - "callout" for Overflow exception enabled code # 835*4882a593Smuzhiyun# _real_inex() - "callout" for Inexact exception enabled code # 836*4882a593Smuzhiyun# _real_trace() - "callout" for Trace exception code # 837*4882a593Smuzhiyun# # 838*4882a593Smuzhiyun# INPUT *************************************************************** # 839*4882a593Smuzhiyun# - The system stack contains the FP Unfl exception stack frame # 840*4882a593Smuzhiyun# - The fsave frame contains the source operand # 841*4882a593Smuzhiyun# # 842*4882a593Smuzhiyun# OUTPUT ************************************************************** # 843*4882a593Smuzhiyun# Underflow Exception enabled: # 844*4882a593Smuzhiyun# - The system stack is unchanged # 845*4882a593Smuzhiyun# - The fsave frame contains the adjusted src op for opclass 0,2 # 846*4882a593Smuzhiyun# Underflow Exception disabled: # 847*4882a593Smuzhiyun# - The system stack is unchanged # 848*4882a593Smuzhiyun# - The "exception present" flag in the fsave frame is cleared # 849*4882a593Smuzhiyun# # 850*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 851*4882a593Smuzhiyun# On the 060, if an FP underflow is present as the result of any # 852*4882a593Smuzhiyun# instruction, the 060 will take an underflow exception whether the # 853*4882a593Smuzhiyun# exception is enabled or disabled in the FPCR. For the disabled case, # 854*4882a593Smuzhiyun# This handler emulates the instruction to determine what the correct # 855*4882a593Smuzhiyun# default result should be for the operation. This default result is # 856*4882a593Smuzhiyun# then stored in either the FP regfile, data regfile, or memory. # 857*4882a593Smuzhiyun# Finally, the handler exits through the "callout" _fpsp_done() # 858*4882a593Smuzhiyun# denoting that no exceptional conditions exist within the machine. # 859*4882a593Smuzhiyun# If the exception is enabled, then this handler must create the # 860*4882a593Smuzhiyun# exceptional operand and plave it in the fsave state frame, and store # 861*4882a593Smuzhiyun# the default result (only if the instruction is opclass 3). For # 862*4882a593Smuzhiyun# exceptions enabled, this handler must exit through the "callout" # 863*4882a593Smuzhiyun# _real_unfl() so that the operating system enabled overflow handler # 864*4882a593Smuzhiyun# can handle this case. # 865*4882a593Smuzhiyun# Two other conditions exist. First, if underflow was disabled # 866*4882a593Smuzhiyun# but the inexact exception was enabled and the result was inexact, # 867*4882a593Smuzhiyun# this handler must exit through the "callout" _real_inex(). # 868*4882a593Smuzhiyun# was inexact. # 869*4882a593Smuzhiyun# Also, in the case of an opclass three instruction where # 870*4882a593Smuzhiyun# underflow was disabled and the trace exception was enabled, this # 871*4882a593Smuzhiyun# handler must exit through the "callout" _real_trace(). # 872*4882a593Smuzhiyun# # 873*4882a593Smuzhiyun######################################################################### 874*4882a593Smuzhiyun 875*4882a593Smuzhiyun global _fpsp_unfl 876*4882a593Smuzhiyun_fpsp_unfl: 877*4882a593Smuzhiyun 878*4882a593Smuzhiyun#$# sub.l &24,%sp # make room for src/dst 879*4882a593Smuzhiyun 880*4882a593Smuzhiyun link.w %a6,&-LOCAL_SIZE # init stack frame 881*4882a593Smuzhiyun 882*4882a593Smuzhiyun fsave FP_SRC(%a6) # grab the "busy" frame 883*4882a593Smuzhiyun 884*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 885*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 886*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 887*4882a593Smuzhiyun 888*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction 889*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 890*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 891*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 892*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 893*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) 894*4882a593Smuzhiyun 895*4882a593Smuzhiyun############################################################################## 896*4882a593Smuzhiyun 897*4882a593Smuzhiyun btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 898*4882a593Smuzhiyun bne.w funfl_out 899*4882a593Smuzhiyun 900*4882a593Smuzhiyun 901*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 902*4882a593Smuzhiyun bsr.l fix_skewed_ops # fix src op 903*4882a593Smuzhiyun 904*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 905*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 906*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # maybe NORM,DENORM 907*4882a593Smuzhiyun 908*4882a593Smuzhiyun# bit five of the fp ext word separates the monadic and dyadic operations 909*4882a593Smuzhiyun# that can pass through fpsp_unfl(). remember that fcmp, and ftst 910*4882a593Smuzhiyun# will never take this exception. 911*4882a593Smuzhiyun btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic? 912*4882a593Smuzhiyun beq.b funfl_extract # monadic 913*4882a593Smuzhiyun 914*4882a593Smuzhiyun# now, what's left that's not dyadic is fsincos. we can distinguish it 915*4882a593Smuzhiyun# from all dyadics by the '0110xxx pattern 916*4882a593Smuzhiyun btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos? 917*4882a593Smuzhiyun bne.b funfl_extract # yes 918*4882a593Smuzhiyun 919*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 920*4882a593Smuzhiyun bsr.l load_fpn2 # load dst into FP_DST 921*4882a593Smuzhiyun 922*4882a593Smuzhiyun lea FP_DST(%a6),%a0 # pass: ptr to dst op 923*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 924*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 925*4882a593Smuzhiyun bne.b funfl_op2_done # no 926*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 927*4882a593Smuzhiyunfunfl_op2_done: 928*4882a593Smuzhiyun mov.b %d0,DTAG(%a6) # save dst optype tag 929*4882a593Smuzhiyun 930*4882a593Smuzhiyunfunfl_extract: 931*4882a593Smuzhiyun 932*4882a593Smuzhiyun#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 933*4882a593Smuzhiyun#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 934*4882a593Smuzhiyun#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 935*4882a593Smuzhiyun#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 936*4882a593Smuzhiyun#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 937*4882a593Smuzhiyun#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 938*4882a593Smuzhiyun 939*4882a593Smuzhiyun clr.l %d0 940*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 941*4882a593Smuzhiyun 942*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d1 943*4882a593Smuzhiyun andi.w &0x007f,%d1 # extract extension 944*4882a593Smuzhiyun 945*4882a593Smuzhiyun andi.l &0x00ff01ff,USER_FPSR(%a6) 946*4882a593Smuzhiyun 947*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 948*4882a593Smuzhiyun fmov.l &0x0,%fpsr 949*4882a593Smuzhiyun 950*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 951*4882a593Smuzhiyun lea FP_DST(%a6),%a1 952*4882a593Smuzhiyun 953*4882a593Smuzhiyun# maybe we can make these entry points ONLY the OVFL entry points of each routine. 954*4882a593Smuzhiyun mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 955*4882a593Smuzhiyun jsr (tbl_unsupp.l,%pc,%d1.l*1) 956*4882a593Smuzhiyun 957*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 958*4882a593Smuzhiyun bsr.l store_fpreg 959*4882a593Smuzhiyun 960*4882a593Smuzhiyun# The `060 FPU multiplier hardware is such that if the result of a 961*4882a593Smuzhiyun# multiply operation is the smallest possible normalized number 962*4882a593Smuzhiyun# (0x00000000_80000000_00000000), then the machine will take an 963*4882a593Smuzhiyun# underflow exception. Since this is incorrect, we need to check 964*4882a593Smuzhiyun# if our emulation, after re-doing the operation, decided that 965*4882a593Smuzhiyun# no underflow was called for. We do these checks only in 966*4882a593Smuzhiyun# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this 967*4882a593Smuzhiyun# special case will simply exit gracefully with the correct result. 968*4882a593Smuzhiyun 969*4882a593Smuzhiyun# the exceptional possibilities we have left ourselves with are ONLY overflow 970*4882a593Smuzhiyun# and inexact. and, the inexact is such that overflow occurred and was disabled 971*4882a593Smuzhiyun# but inexact was enabled. 972*4882a593Smuzhiyun btst &unfl_bit,FPCR_ENABLE(%a6) 973*4882a593Smuzhiyun bne.b funfl_unfl_on 974*4882a593Smuzhiyun 975*4882a593Smuzhiyunfunfl_chkinex: 976*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) 977*4882a593Smuzhiyun bne.b funfl_inex_on 978*4882a593Smuzhiyun 979*4882a593Smuzhiyunfunfl_exit: 980*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 981*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 982*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 983*4882a593Smuzhiyun 984*4882a593Smuzhiyun unlk %a6 985*4882a593Smuzhiyun#$# add.l &24,%sp 986*4882a593Smuzhiyun bra.l _fpsp_done 987*4882a593Smuzhiyun 988*4882a593Smuzhiyun# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 989*4882a593Smuzhiyun# in fp1 (don't forget to save fp0). what to do now? 990*4882a593Smuzhiyun# well, we simply have to get to go to _real_unfl()! 991*4882a593Smuzhiyunfunfl_unfl_on: 992*4882a593Smuzhiyun 993*4882a593Smuzhiyun# The `060 FPU multiplier hardware is such that if the result of a 994*4882a593Smuzhiyun# multiply operation is the smallest possible normalized number 995*4882a593Smuzhiyun# (0x00000000_80000000_00000000), then the machine will take an 996*4882a593Smuzhiyun# underflow exception. Since this is incorrect, we check here to see 997*4882a593Smuzhiyun# if our emulation, after re-doing the operation, decided that 998*4882a593Smuzhiyun# no underflow was called for. 999*4882a593Smuzhiyun btst &unfl_bit,FPSR_EXCEPT(%a6) 1000*4882a593Smuzhiyun beq.w funfl_chkinex 1001*4882a593Smuzhiyun 1002*4882a593Smuzhiyunfunfl_unfl_on2: 1003*4882a593Smuzhiyun fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 1004*4882a593Smuzhiyun 1005*4882a593Smuzhiyun mov.w &0xe003,2+FP_SRC(%a6) # save exc status 1006*4882a593Smuzhiyun 1007*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1008*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1009*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1010*4882a593Smuzhiyun 1011*4882a593Smuzhiyun frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1012*4882a593Smuzhiyun 1013*4882a593Smuzhiyun unlk %a6 1014*4882a593Smuzhiyun 1015*4882a593Smuzhiyun bra.l _real_unfl 1016*4882a593Smuzhiyun 1017*4882a593Smuzhiyun# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, 1018*4882a593Smuzhiyun# we must jump to real_inex(). 1019*4882a593Smuzhiyunfunfl_inex_on: 1020*4882a593Smuzhiyun 1021*4882a593Smuzhiyun# The `060 FPU multiplier hardware is such that if the result of a 1022*4882a593Smuzhiyun# multiply operation is the smallest possible normalized number 1023*4882a593Smuzhiyun# (0x00000000_80000000_00000000), then the machine will take an 1024*4882a593Smuzhiyun# underflow exception. 1025*4882a593Smuzhiyun# But, whether bogus or not, if inexact is enabled AND it occurred, 1026*4882a593Smuzhiyun# then we have to branch to real_inex. 1027*4882a593Smuzhiyun 1028*4882a593Smuzhiyun btst &inex2_bit,FPSR_EXCEPT(%a6) 1029*4882a593Smuzhiyun beq.w funfl_exit 1030*4882a593Smuzhiyun 1031*4882a593Smuzhiyunfunfl_inex_on2: 1032*4882a593Smuzhiyun 1033*4882a593Smuzhiyun fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack 1034*4882a593Smuzhiyun 1035*4882a593Smuzhiyun mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 1036*4882a593Smuzhiyun mov.w &0xe001,2+FP_SRC(%a6) # save exc status 1037*4882a593Smuzhiyun 1038*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1039*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1040*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1041*4882a593Smuzhiyun 1042*4882a593Smuzhiyun frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1043*4882a593Smuzhiyun 1044*4882a593Smuzhiyun unlk %a6 1045*4882a593Smuzhiyun 1046*4882a593Smuzhiyun bra.l _real_inex 1047*4882a593Smuzhiyun 1048*4882a593Smuzhiyun####################################################################### 1049*4882a593Smuzhiyunfunfl_out: 1050*4882a593Smuzhiyun 1051*4882a593Smuzhiyun 1052*4882a593Smuzhiyun#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 1053*4882a593Smuzhiyun#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 1054*4882a593Smuzhiyun#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 1055*4882a593Smuzhiyun 1056*4882a593Smuzhiyun# the src operand is definitely a NORM(!), so tag it as such 1057*4882a593Smuzhiyun mov.b &NORM,STAG(%a6) # set src optype tag 1058*4882a593Smuzhiyun 1059*4882a593Smuzhiyun clr.l %d0 1060*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 1061*4882a593Smuzhiyun 1062*4882a593Smuzhiyun and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 1063*4882a593Smuzhiyun 1064*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 1065*4882a593Smuzhiyun fmov.l &0x0,%fpsr 1066*4882a593Smuzhiyun 1067*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to src operand 1068*4882a593Smuzhiyun 1069*4882a593Smuzhiyun bsr.l fout 1070*4882a593Smuzhiyun 1071*4882a593Smuzhiyun btst &unfl_bit,FPCR_ENABLE(%a6) 1072*4882a593Smuzhiyun bne.w funfl_unfl_on2 1073*4882a593Smuzhiyun 1074*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) 1075*4882a593Smuzhiyun bne.w funfl_inex_on2 1076*4882a593Smuzhiyun 1077*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1078*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1079*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1080*4882a593Smuzhiyun 1081*4882a593Smuzhiyun unlk %a6 1082*4882a593Smuzhiyun#$# add.l &24,%sp 1083*4882a593Smuzhiyun 1084*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 1085*4882a593Smuzhiyun beq.l _fpsp_done # no 1086*4882a593Smuzhiyun 1087*4882a593Smuzhiyun fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 1088*4882a593Smuzhiyun mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 1089*4882a593Smuzhiyun bra.l _real_trace 1090*4882a593Smuzhiyun 1091*4882a593Smuzhiyun######################################################################### 1092*4882a593Smuzhiyun# XDEF **************************************************************** # 1093*4882a593Smuzhiyun# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented # 1094*4882a593Smuzhiyun# Data Type" exception. # 1095*4882a593Smuzhiyun# # 1096*4882a593Smuzhiyun# This handler should be the first code executed upon taking the # 1097*4882a593Smuzhiyun# FP Unimplemented Data Type exception in an operating system. # 1098*4882a593Smuzhiyun# # 1099*4882a593Smuzhiyun# XREF **************************************************************** # 1100*4882a593Smuzhiyun# _imem_read_{word,long}() - read instruction word/longword # 1101*4882a593Smuzhiyun# fix_skewed_ops() - adjust src operand in fsave frame # 1102*4882a593Smuzhiyun# set_tag_x() - determine optype of src/dst operands # 1103*4882a593Smuzhiyun# store_fpreg() - store opclass 0 or 2 result to FP regfile # 1104*4882a593Smuzhiyun# unnorm_fix() - change UNNORM operands to NORM or ZERO # 1105*4882a593Smuzhiyun# load_fpn2() - load dst operand from FP regfile # 1106*4882a593Smuzhiyun# load_fpn1() - load src operand from FP regfile # 1107*4882a593Smuzhiyun# fout() - emulate an opclass 3 instruction # 1108*4882a593Smuzhiyun# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 1109*4882a593Smuzhiyun# _real_inex() - "callout" to operating system inexact handler # 1110*4882a593Smuzhiyun# _fpsp_done() - "callout" for exit; work all done # 1111*4882a593Smuzhiyun# _real_trace() - "callout" for Trace enabled exception # 1112*4882a593Smuzhiyun# funimp_skew() - adjust fsave src ops to "incorrect" value # 1113*4882a593Smuzhiyun# _real_snan() - "callout" for SNAN exception # 1114*4882a593Smuzhiyun# _real_operr() - "callout" for OPERR exception # 1115*4882a593Smuzhiyun# _real_ovfl() - "callout" for OVFL exception # 1116*4882a593Smuzhiyun# _real_unfl() - "callout" for UNFL exception # 1117*4882a593Smuzhiyun# get_packed() - fetch packed operand from memory # 1118*4882a593Smuzhiyun# # 1119*4882a593Smuzhiyun# INPUT *************************************************************** # 1120*4882a593Smuzhiyun# - The system stack contains the "Unimp Data Type" stk frame # 1121*4882a593Smuzhiyun# - The fsave frame contains the ssrc op (for UNNORM/DENORM) # 1122*4882a593Smuzhiyun# # 1123*4882a593Smuzhiyun# OUTPUT ************************************************************** # 1124*4882a593Smuzhiyun# If Inexact exception (opclass 3): # 1125*4882a593Smuzhiyun# - The system stack is changed to an Inexact exception stk frame # 1126*4882a593Smuzhiyun# If SNAN exception (opclass 3): # 1127*4882a593Smuzhiyun# - The system stack is changed to an SNAN exception stk frame # 1128*4882a593Smuzhiyun# If OPERR exception (opclass 3): # 1129*4882a593Smuzhiyun# - The system stack is changed to an OPERR exception stk frame # 1130*4882a593Smuzhiyun# If OVFL exception (opclass 3): # 1131*4882a593Smuzhiyun# - The system stack is changed to an OVFL exception stk frame # 1132*4882a593Smuzhiyun# If UNFL exception (opclass 3): # 1133*4882a593Smuzhiyun# - The system stack is changed to an UNFL exception stack frame # 1134*4882a593Smuzhiyun# If Trace exception enabled: # 1135*4882a593Smuzhiyun# - The system stack is changed to a Trace exception stack frame # 1136*4882a593Smuzhiyun# Else: (normal case) # 1137*4882a593Smuzhiyun# - Correct result has been stored as appropriate # 1138*4882a593Smuzhiyun# # 1139*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 1140*4882a593Smuzhiyun# Two main instruction types can enter here: (1) DENORM or UNNORM # 1141*4882a593Smuzhiyun# unimplemented data types. These can be either opclass 0,2 or 3 # 1142*4882a593Smuzhiyun# instructions, and (2) PACKED unimplemented data format instructions # 1143*4882a593Smuzhiyun# also of opclasses 0,2, or 3. # 1144*4882a593Smuzhiyun# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src # 1145*4882a593Smuzhiyun# operand from the fsave state frame and the dst operand (if dyadic) # 1146*4882a593Smuzhiyun# from the FP register file. The instruction is then emulated by # 1147*4882a593Smuzhiyun# choosing an emulation routine from a table of routines indexed by # 1148*4882a593Smuzhiyun# instruction type. Once the instruction has been emulated and result # 1149*4882a593Smuzhiyun# saved, then we check to see if any enabled exceptions resulted from # 1150*4882a593Smuzhiyun# instruction emulation. If none, then we exit through the "callout" # 1151*4882a593Smuzhiyun# _fpsp_done(). If there is an enabled FP exception, then we insert # 1152*4882a593Smuzhiyun# this exception into the FPU in the fsave state frame and then exit # 1153*4882a593Smuzhiyun# through _fpsp_done(). # 1154*4882a593Smuzhiyun# PACKED opclass 0 and 2 is similar in how the instruction is # 1155*4882a593Smuzhiyun# emulated and exceptions handled. The differences occur in how the # 1156*4882a593Smuzhiyun# handler loads the packed op (by calling get_packed() routine) and # 1157*4882a593Smuzhiyun# by the fact that a Trace exception could be pending for PACKED ops. # 1158*4882a593Smuzhiyun# If a Trace exception is pending, then the current exception stack # 1159*4882a593Smuzhiyun# frame is changed to a Trace exception stack frame and an exit is # 1160*4882a593Smuzhiyun# made through _real_trace(). # 1161*4882a593Smuzhiyun# For UNNORM/DENORM opclass 3, the actual move out to memory is # 1162*4882a593Smuzhiyun# performed by calling the routine fout(). If no exception should occur # 1163*4882a593Smuzhiyun# as the result of emulation, then an exit either occurs through # 1164*4882a593Smuzhiyun# _fpsp_done() or through _real_trace() if a Trace exception is pending # 1165*4882a593Smuzhiyun# (a Trace stack frame must be created here, too). If an FP exception # 1166*4882a593Smuzhiyun# should occur, then we must create an exception stack frame of that # 1167*4882a593Smuzhiyun# type and jump to either _real_snan(), _real_operr(), _real_inex(), # 1168*4882a593Smuzhiyun# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 # 1169*4882a593Smuzhiyun# emulation is performed in a similar manner. # 1170*4882a593Smuzhiyun# # 1171*4882a593Smuzhiyun######################################################################### 1172*4882a593Smuzhiyun 1173*4882a593Smuzhiyun# 1174*4882a593Smuzhiyun# (1) DENORM and UNNORM (unimplemented) data types: 1175*4882a593Smuzhiyun# 1176*4882a593Smuzhiyun# post-instruction 1177*4882a593Smuzhiyun# ***************** 1178*4882a593Smuzhiyun# * EA * 1179*4882a593Smuzhiyun# pre-instruction * * 1180*4882a593Smuzhiyun# ***************** ***************** 1181*4882a593Smuzhiyun# * 0x0 * 0x0dc * * 0x3 * 0x0dc * 1182*4882a593Smuzhiyun# ***************** ***************** 1183*4882a593Smuzhiyun# * Next * * Next * 1184*4882a593Smuzhiyun# * PC * * PC * 1185*4882a593Smuzhiyun# ***************** ***************** 1186*4882a593Smuzhiyun# * SR * * SR * 1187*4882a593Smuzhiyun# ***************** ***************** 1188*4882a593Smuzhiyun# 1189*4882a593Smuzhiyun# (2) PACKED format (unsupported) opclasses two and three: 1190*4882a593Smuzhiyun# ***************** 1191*4882a593Smuzhiyun# * EA * 1192*4882a593Smuzhiyun# * * 1193*4882a593Smuzhiyun# ***************** 1194*4882a593Smuzhiyun# * 0x2 * 0x0dc * 1195*4882a593Smuzhiyun# ***************** 1196*4882a593Smuzhiyun# * Next * 1197*4882a593Smuzhiyun# * PC * 1198*4882a593Smuzhiyun# ***************** 1199*4882a593Smuzhiyun# * SR * 1200*4882a593Smuzhiyun# ***************** 1201*4882a593Smuzhiyun# 1202*4882a593Smuzhiyun global _fpsp_unsupp 1203*4882a593Smuzhiyun_fpsp_unsupp: 1204*4882a593Smuzhiyun 1205*4882a593Smuzhiyun link.w %a6,&-LOCAL_SIZE # init stack frame 1206*4882a593Smuzhiyun 1207*4882a593Smuzhiyun fsave FP_SRC(%a6) # save fp state 1208*4882a593Smuzhiyun 1209*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1210*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 1211*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 1212*4882a593Smuzhiyun 1213*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # user or supervisor mode? 1214*4882a593Smuzhiyun bne.b fu_s 1215*4882a593Smuzhiyunfu_u: 1216*4882a593Smuzhiyun mov.l %usp,%a0 # fetch user stack pointer 1217*4882a593Smuzhiyun mov.l %a0,EXC_A7(%a6) # save on stack 1218*4882a593Smuzhiyun bra.b fu_cont 1219*4882a593Smuzhiyun# if the exception is an opclass zero or two unimplemented data type 1220*4882a593Smuzhiyun# exception, then the a7' calculated here is wrong since it doesn't 1221*4882a593Smuzhiyun# stack an ea. however, we don't need an a7' for this case anyways. 1222*4882a593Smuzhiyunfu_s: 1223*4882a593Smuzhiyun lea 0x4+EXC_EA(%a6),%a0 # load old a7' 1224*4882a593Smuzhiyun mov.l %a0,EXC_A7(%a6) # save on stack 1225*4882a593Smuzhiyun 1226*4882a593Smuzhiyunfu_cont: 1227*4882a593Smuzhiyun 1228*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction 1229*4882a593Smuzhiyun# the FPIAR should be set correctly for ALL exceptions passing through 1230*4882a593Smuzhiyun# this point. 1231*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 1232*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 1233*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 1234*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 1235*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 1236*4882a593Smuzhiyun 1237*4882a593Smuzhiyun############################ 1238*4882a593Smuzhiyun 1239*4882a593Smuzhiyun clr.b SPCOND_FLG(%a6) # clear special condition flag 1240*4882a593Smuzhiyun 1241*4882a593Smuzhiyun# Separate opclass three (fpn-to-mem) ops since they have a different 1242*4882a593Smuzhiyun# stack frame and protocol. 1243*4882a593Smuzhiyun btst &0x5,EXC_CMDREG(%a6) # is it an fmove out? 1244*4882a593Smuzhiyun bne.w fu_out # yes 1245*4882a593Smuzhiyun 1246*4882a593Smuzhiyun# Separate packed opclass two instructions. 1247*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&0:&6},%d0 1248*4882a593Smuzhiyun cmpi.b %d0,&0x13 1249*4882a593Smuzhiyun beq.w fu_in_pack 1250*4882a593Smuzhiyun 1251*4882a593Smuzhiyun 1252*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction. 1253*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field 1254*4882a593Smuzhiyun andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field 1255*4882a593Smuzhiyun 1256*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 1257*4882a593Smuzhiyun fmov.l &0x0,%fpsr 1258*4882a593Smuzhiyun 1259*4882a593Smuzhiyun# Opclass two w/ memory-to-fpn operation will have an incorrect extended 1260*4882a593Smuzhiyun# precision format if the src format was single or double and the 1261*4882a593Smuzhiyun# source data type was an INF, NAN, DENORM, or UNNORM 1262*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to input 1263*4882a593Smuzhiyun bsr.l fix_skewed_ops 1264*4882a593Smuzhiyun 1265*4882a593Smuzhiyun# we don't know whether the src operand or the dst operand (or both) is the 1266*4882a593Smuzhiyun# UNNORM or DENORM. call the function that tags the operand type. if the 1267*4882a593Smuzhiyun# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO. 1268*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 1269*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 1270*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 1271*4882a593Smuzhiyun bne.b fu_op2 # no 1272*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1273*4882a593Smuzhiyun 1274*4882a593Smuzhiyunfu_op2: 1275*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # save src optype tag 1276*4882a593Smuzhiyun 1277*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1278*4882a593Smuzhiyun 1279*4882a593Smuzhiyun# bit five of the fp extension word separates the monadic and dyadic operations 1280*4882a593Smuzhiyun# at this point 1281*4882a593Smuzhiyun btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1282*4882a593Smuzhiyun beq.b fu_extract # monadic 1283*4882a593Smuzhiyun cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1284*4882a593Smuzhiyun beq.b fu_extract # yes, so it's monadic, too 1285*4882a593Smuzhiyun 1286*4882a593Smuzhiyun bsr.l load_fpn2 # load dst into FP_DST 1287*4882a593Smuzhiyun 1288*4882a593Smuzhiyun lea FP_DST(%a6),%a0 # pass: ptr to dst op 1289*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 1290*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 1291*4882a593Smuzhiyun bne.b fu_op2_done # no 1292*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1293*4882a593Smuzhiyunfu_op2_done: 1294*4882a593Smuzhiyun mov.b %d0,DTAG(%a6) # save dst optype tag 1295*4882a593Smuzhiyun 1296*4882a593Smuzhiyunfu_extract: 1297*4882a593Smuzhiyun clr.l %d0 1298*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1299*4882a593Smuzhiyun 1300*4882a593Smuzhiyun bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1301*4882a593Smuzhiyun 1302*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 1303*4882a593Smuzhiyun lea FP_DST(%a6),%a1 1304*4882a593Smuzhiyun 1305*4882a593Smuzhiyun mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1306*4882a593Smuzhiyun jsr (tbl_unsupp.l,%pc,%d1.l*1) 1307*4882a593Smuzhiyun 1308*4882a593Smuzhiyun# 1309*4882a593Smuzhiyun# Exceptions in order of precedence: 1310*4882a593Smuzhiyun# BSUN : none 1311*4882a593Smuzhiyun# SNAN : all dyadic ops 1312*4882a593Smuzhiyun# OPERR : fsqrt(-NORM) 1313*4882a593Smuzhiyun# OVFL : all except ftst,fcmp 1314*4882a593Smuzhiyun# UNFL : all except ftst,fcmp 1315*4882a593Smuzhiyun# DZ : fdiv 1316*4882a593Smuzhiyun# INEX2 : all except ftst,fcmp 1317*4882a593Smuzhiyun# INEX1 : none (packed doesn't go through here) 1318*4882a593Smuzhiyun# 1319*4882a593Smuzhiyun 1320*4882a593Smuzhiyun# we determine the highest priority exception(if any) set by the 1321*4882a593Smuzhiyun# emulation routine that has also been enabled by the user. 1322*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set 1323*4882a593Smuzhiyun bne.b fu_in_ena # some are enabled 1324*4882a593Smuzhiyun 1325*4882a593Smuzhiyunfu_in_cont: 1326*4882a593Smuzhiyun# fcmp and ftst do not store any result. 1327*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1328*4882a593Smuzhiyun andi.b &0x38,%d0 # extract bits 3-5 1329*4882a593Smuzhiyun cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1330*4882a593Smuzhiyun beq.b fu_in_exit # yes 1331*4882a593Smuzhiyun 1332*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1333*4882a593Smuzhiyun bsr.l store_fpreg # store the result 1334*4882a593Smuzhiyun 1335*4882a593Smuzhiyunfu_in_exit: 1336*4882a593Smuzhiyun 1337*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1338*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1339*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1340*4882a593Smuzhiyun 1341*4882a593Smuzhiyun unlk %a6 1342*4882a593Smuzhiyun 1343*4882a593Smuzhiyun bra.l _fpsp_done 1344*4882a593Smuzhiyun 1345*4882a593Smuzhiyunfu_in_ena: 1346*4882a593Smuzhiyun and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1347*4882a593Smuzhiyun bfffo %d0{&24:&8},%d0 # find highest priority exception 1348*4882a593Smuzhiyun bne.b fu_in_exc # there is at least one set 1349*4882a593Smuzhiyun 1350*4882a593Smuzhiyun# 1351*4882a593Smuzhiyun# No exceptions occurred that were also enabled. Now: 1352*4882a593Smuzhiyun# 1353*4882a593Smuzhiyun# if (OVFL && ovfl_disabled && inexact_enabled) { 1354*4882a593Smuzhiyun# branch to _real_inex() (even if the result was exact!); 1355*4882a593Smuzhiyun# } else { 1356*4882a593Smuzhiyun# save the result in the proper fp reg (unless the op is fcmp or ftst); 1357*4882a593Smuzhiyun# return; 1358*4882a593Smuzhiyun# } 1359*4882a593Smuzhiyun# 1360*4882a593Smuzhiyun btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1361*4882a593Smuzhiyun beq.b fu_in_cont # no 1362*4882a593Smuzhiyun 1363*4882a593Smuzhiyunfu_in_ovflchk: 1364*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1365*4882a593Smuzhiyun beq.b fu_in_cont # no 1366*4882a593Smuzhiyun bra.w fu_in_exc_ovfl # go insert overflow frame 1367*4882a593Smuzhiyun 1368*4882a593Smuzhiyun# 1369*4882a593Smuzhiyun# An exception occurred and that exception was enabled: 1370*4882a593Smuzhiyun# 1371*4882a593Smuzhiyun# shift enabled exception field into lo byte of d0; 1372*4882a593Smuzhiyun# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1373*4882a593Smuzhiyun# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1374*4882a593Smuzhiyun# /* 1375*4882a593Smuzhiyun# * this is the case where we must call _real_inex() now or else 1376*4882a593Smuzhiyun# * there will be no other way to pass it the exceptional operand 1377*4882a593Smuzhiyun# */ 1378*4882a593Smuzhiyun# call _real_inex(); 1379*4882a593Smuzhiyun# } else { 1380*4882a593Smuzhiyun# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1381*4882a593Smuzhiyun# } 1382*4882a593Smuzhiyun# 1383*4882a593Smuzhiyunfu_in_exc: 1384*4882a593Smuzhiyun subi.l &24,%d0 # fix offset to be 0-8 1385*4882a593Smuzhiyun cmpi.b %d0,&0x6 # is exception INEX? (6) 1386*4882a593Smuzhiyun bne.b fu_in_exc_exit # no 1387*4882a593Smuzhiyun 1388*4882a593Smuzhiyun# the enabled exception was inexact 1389*4882a593Smuzhiyun btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1390*4882a593Smuzhiyun bne.w fu_in_exc_unfl # yes 1391*4882a593Smuzhiyun btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1392*4882a593Smuzhiyun bne.w fu_in_exc_ovfl # yes 1393*4882a593Smuzhiyun 1394*4882a593Smuzhiyun# here, we insert the correct fsave status value into the fsave frame for the 1395*4882a593Smuzhiyun# corresponding exception. the operand in the fsave frame should be the original 1396*4882a593Smuzhiyun# src operand. 1397*4882a593Smuzhiyunfu_in_exc_exit: 1398*4882a593Smuzhiyun mov.l %d0,-(%sp) # save d0 1399*4882a593Smuzhiyun bsr.l funimp_skew # skew sgl or dbl inputs 1400*4882a593Smuzhiyun mov.l (%sp)+,%d0 # restore d0 1401*4882a593Smuzhiyun 1402*4882a593Smuzhiyun mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status 1403*4882a593Smuzhiyun 1404*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1405*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1406*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1407*4882a593Smuzhiyun 1408*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore src op 1409*4882a593Smuzhiyun 1410*4882a593Smuzhiyun unlk %a6 1411*4882a593Smuzhiyun 1412*4882a593Smuzhiyun bra.l _fpsp_done 1413*4882a593Smuzhiyun 1414*4882a593Smuzhiyuntbl_except: 1415*4882a593Smuzhiyun short 0xe000,0xe006,0xe004,0xe005 1416*4882a593Smuzhiyun short 0xe003,0xe002,0xe001,0xe001 1417*4882a593Smuzhiyun 1418*4882a593Smuzhiyunfu_in_exc_unfl: 1419*4882a593Smuzhiyun mov.w &0x4,%d0 1420*4882a593Smuzhiyun bra.b fu_in_exc_exit 1421*4882a593Smuzhiyunfu_in_exc_ovfl: 1422*4882a593Smuzhiyun mov.w &0x03,%d0 1423*4882a593Smuzhiyun bra.b fu_in_exc_exit 1424*4882a593Smuzhiyun 1425*4882a593Smuzhiyun# If the input operand to this operation was opclass two and a single 1426*4882a593Smuzhiyun# or double precision denorm, inf, or nan, the operand needs to be 1427*4882a593Smuzhiyun# "corrected" in order to have the proper equivalent extended precision 1428*4882a593Smuzhiyun# number. 1429*4882a593Smuzhiyun global fix_skewed_ops 1430*4882a593Smuzhiyunfix_skewed_ops: 1431*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt 1432*4882a593Smuzhiyun cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl? 1433*4882a593Smuzhiyun beq.b fso_sgl # yes 1434*4882a593Smuzhiyun cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl? 1435*4882a593Smuzhiyun beq.b fso_dbl # yes 1436*4882a593Smuzhiyun rts # no 1437*4882a593Smuzhiyun 1438*4882a593Smuzhiyunfso_sgl: 1439*4882a593Smuzhiyun mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1440*4882a593Smuzhiyun andi.w &0x7fff,%d0 # strip sign 1441*4882a593Smuzhiyun cmpi.w %d0,&0x3f80 # is |exp| == $3f80? 1442*4882a593Smuzhiyun beq.b fso_sgl_dnrm_zero # yes 1443*4882a593Smuzhiyun cmpi.w %d0,&0x407f # no; is |exp| == $407f? 1444*4882a593Smuzhiyun beq.b fso_infnan # yes 1445*4882a593Smuzhiyun rts # no 1446*4882a593Smuzhiyun 1447*4882a593Smuzhiyunfso_sgl_dnrm_zero: 1448*4882a593Smuzhiyun andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1449*4882a593Smuzhiyun beq.b fso_zero # it's a skewed zero 1450*4882a593Smuzhiyunfso_sgl_dnrm: 1451*4882a593Smuzhiyun# here, we count on norm not to alter a0... 1452*4882a593Smuzhiyun bsr.l norm # normalize mantissa 1453*4882a593Smuzhiyun neg.w %d0 # -shft amt 1454*4882a593Smuzhiyun addi.w &0x3f81,%d0 # adjust new exponent 1455*4882a593Smuzhiyun andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1456*4882a593Smuzhiyun or.w %d0,LOCAL_EX(%a0) # insert new exponent 1457*4882a593Smuzhiyun rts 1458*4882a593Smuzhiyun 1459*4882a593Smuzhiyunfso_zero: 1460*4882a593Smuzhiyun andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent 1461*4882a593Smuzhiyun rts 1462*4882a593Smuzhiyun 1463*4882a593Smuzhiyunfso_infnan: 1464*4882a593Smuzhiyun andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit 1465*4882a593Smuzhiyun ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff 1466*4882a593Smuzhiyun rts 1467*4882a593Smuzhiyun 1468*4882a593Smuzhiyunfso_dbl: 1469*4882a593Smuzhiyun mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1470*4882a593Smuzhiyun andi.w &0x7fff,%d0 # strip sign 1471*4882a593Smuzhiyun cmpi.w %d0,&0x3c00 # is |exp| == $3c00? 1472*4882a593Smuzhiyun beq.b fso_dbl_dnrm_zero # yes 1473*4882a593Smuzhiyun cmpi.w %d0,&0x43ff # no; is |exp| == $43ff? 1474*4882a593Smuzhiyun beq.b fso_infnan # yes 1475*4882a593Smuzhiyun rts # no 1476*4882a593Smuzhiyun 1477*4882a593Smuzhiyunfso_dbl_dnrm_zero: 1478*4882a593Smuzhiyun andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1479*4882a593Smuzhiyun bne.b fso_dbl_dnrm # it's a skewed denorm 1480*4882a593Smuzhiyun tst.l LOCAL_LO(%a0) # is it a zero? 1481*4882a593Smuzhiyun beq.b fso_zero # yes 1482*4882a593Smuzhiyunfso_dbl_dnrm: 1483*4882a593Smuzhiyun# here, we count on norm not to alter a0... 1484*4882a593Smuzhiyun bsr.l norm # normalize mantissa 1485*4882a593Smuzhiyun neg.w %d0 # -shft amt 1486*4882a593Smuzhiyun addi.w &0x3c01,%d0 # adjust new exponent 1487*4882a593Smuzhiyun andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1488*4882a593Smuzhiyun or.w %d0,LOCAL_EX(%a0) # insert new exponent 1489*4882a593Smuzhiyun rts 1490*4882a593Smuzhiyun 1491*4882a593Smuzhiyun################################################################# 1492*4882a593Smuzhiyun 1493*4882a593Smuzhiyun# fmove out took an unimplemented data type exception. 1494*4882a593Smuzhiyun# the src operand is in FP_SRC. Call _fout() to write out the result and 1495*4882a593Smuzhiyun# to determine which exceptions, if any, to take. 1496*4882a593Smuzhiyunfu_out: 1497*4882a593Smuzhiyun 1498*4882a593Smuzhiyun# Separate packed move outs from the UNNORM and DENORM move outs. 1499*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&3:&3},%d0 1500*4882a593Smuzhiyun cmpi.b %d0,&0x3 1501*4882a593Smuzhiyun beq.w fu_out_pack 1502*4882a593Smuzhiyun cmpi.b %d0,&0x7 1503*4882a593Smuzhiyun beq.w fu_out_pack 1504*4882a593Smuzhiyun 1505*4882a593Smuzhiyun 1506*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction. 1507*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field. 1508*4882a593Smuzhiyun# fmove out doesn't affect ccodes. 1509*4882a593Smuzhiyun and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 1510*4882a593Smuzhiyun 1511*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 1512*4882a593Smuzhiyun fmov.l &0x0,%fpsr 1513*4882a593Smuzhiyun 1514*4882a593Smuzhiyun# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine 1515*4882a593Smuzhiyun# call here. just figure out what it is... 1516*4882a593Smuzhiyun mov.w FP_SRC_EX(%a6),%d0 # get exponent 1517*4882a593Smuzhiyun andi.w &0x7fff,%d0 # strip sign 1518*4882a593Smuzhiyun beq.b fu_out_denorm # it's a DENORM 1519*4882a593Smuzhiyun 1520*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 1521*4882a593Smuzhiyun bsr.l unnorm_fix # yes; fix it 1522*4882a593Smuzhiyun 1523*4882a593Smuzhiyun mov.b %d0,STAG(%a6) 1524*4882a593Smuzhiyun 1525*4882a593Smuzhiyun bra.b fu_out_cont 1526*4882a593Smuzhiyunfu_out_denorm: 1527*4882a593Smuzhiyun mov.b &DENORM,STAG(%a6) 1528*4882a593Smuzhiyunfu_out_cont: 1529*4882a593Smuzhiyun 1530*4882a593Smuzhiyun clr.l %d0 1531*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1532*4882a593Smuzhiyun 1533*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to src operand 1534*4882a593Smuzhiyun 1535*4882a593Smuzhiyun mov.l (%a6),EXC_A6(%a6) # in case a6 changes 1536*4882a593Smuzhiyun bsr.l fout # call fmove out routine 1537*4882a593Smuzhiyun 1538*4882a593Smuzhiyun# Exceptions in order of precedence: 1539*4882a593Smuzhiyun# BSUN : none 1540*4882a593Smuzhiyun# SNAN : none 1541*4882a593Smuzhiyun# OPERR : fmove.{b,w,l} out of large UNNORM 1542*4882a593Smuzhiyun# OVFL : fmove.{s,d} 1543*4882a593Smuzhiyun# UNFL : fmove.{s,d,x} 1544*4882a593Smuzhiyun# DZ : none 1545*4882a593Smuzhiyun# INEX2 : all 1546*4882a593Smuzhiyun# INEX1 : none (packed doesn't travel through here) 1547*4882a593Smuzhiyun 1548*4882a593Smuzhiyun# determine the highest priority exception(if any) set by the 1549*4882a593Smuzhiyun# emulation routine that has also been enabled by the user. 1550*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1551*4882a593Smuzhiyun bne.w fu_out_ena # some are enabled 1552*4882a593Smuzhiyun 1553*4882a593Smuzhiyunfu_out_done: 1554*4882a593Smuzhiyun 1555*4882a593Smuzhiyun mov.l EXC_A6(%a6),(%a6) # in case a6 changed 1556*4882a593Smuzhiyun 1557*4882a593Smuzhiyun# on extended precision opclass three instructions using pre-decrement or 1558*4882a593Smuzhiyun# post-increment addressing mode, the address register is not updated. is the 1559*4882a593Smuzhiyun# address register was the stack pointer used from user mode, then let's update 1560*4882a593Smuzhiyun# it here. if it was used from supervisor mode, then we have to handle this 1561*4882a593Smuzhiyun# as a special case. 1562*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) 1563*4882a593Smuzhiyun bne.b fu_out_done_s 1564*4882a593Smuzhiyun 1565*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 # restore a7 1566*4882a593Smuzhiyun mov.l %a0,%usp 1567*4882a593Smuzhiyun 1568*4882a593Smuzhiyunfu_out_done_cont: 1569*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1570*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1571*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1572*4882a593Smuzhiyun 1573*4882a593Smuzhiyun unlk %a6 1574*4882a593Smuzhiyun 1575*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 1576*4882a593Smuzhiyun bne.b fu_out_trace # yes 1577*4882a593Smuzhiyun 1578*4882a593Smuzhiyun bra.l _fpsp_done 1579*4882a593Smuzhiyun 1580*4882a593Smuzhiyun# is the ea mode pre-decrement of the stack pointer from supervisor mode? 1581*4882a593Smuzhiyun# ("fmov.x fpm,-(a7)") if so, 1582*4882a593Smuzhiyunfu_out_done_s: 1583*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg 1584*4882a593Smuzhiyun bne.b fu_out_done_cont 1585*4882a593Smuzhiyun 1586*4882a593Smuzhiyun# the extended precision result is still in fp0. but, we need to save it 1587*4882a593Smuzhiyun# somewhere on the stack until we can copy it to its final resting place. 1588*4882a593Smuzhiyun# here, we're counting on the top of the stack to be the old place-holders 1589*4882a593Smuzhiyun# for fp0/fp1 which have already been restored. that way, we can write 1590*4882a593Smuzhiyun# over those destinations with the shifted stack frame. 1591*4882a593Smuzhiyun fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1592*4882a593Smuzhiyun 1593*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1594*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1595*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1596*4882a593Smuzhiyun 1597*4882a593Smuzhiyun mov.l (%a6),%a6 # restore frame pointer 1598*4882a593Smuzhiyun 1599*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1600*4882a593Smuzhiyun mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1601*4882a593Smuzhiyun 1602*4882a593Smuzhiyun# now, copy the result to the proper place on the stack 1603*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1604*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1605*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1606*4882a593Smuzhiyun 1607*4882a593Smuzhiyun add.l &LOCAL_SIZE-0x8,%sp 1608*4882a593Smuzhiyun 1609*4882a593Smuzhiyun btst &0x7,(%sp) 1610*4882a593Smuzhiyun bne.b fu_out_trace 1611*4882a593Smuzhiyun 1612*4882a593Smuzhiyun bra.l _fpsp_done 1613*4882a593Smuzhiyun 1614*4882a593Smuzhiyunfu_out_ena: 1615*4882a593Smuzhiyun and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1616*4882a593Smuzhiyun bfffo %d0{&24:&8},%d0 # find highest priority exception 1617*4882a593Smuzhiyun bne.b fu_out_exc # there is at least one set 1618*4882a593Smuzhiyun 1619*4882a593Smuzhiyun# no exceptions were set. 1620*4882a593Smuzhiyun# if a disabled overflow occurred and inexact was enabled but the result 1621*4882a593Smuzhiyun# was exact, then a branch to _real_inex() is made. 1622*4882a593Smuzhiyun btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1623*4882a593Smuzhiyun beq.w fu_out_done # no 1624*4882a593Smuzhiyun 1625*4882a593Smuzhiyunfu_out_ovflchk: 1626*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1627*4882a593Smuzhiyun beq.w fu_out_done # no 1628*4882a593Smuzhiyun bra.w fu_inex # yes 1629*4882a593Smuzhiyun 1630*4882a593Smuzhiyun# 1631*4882a593Smuzhiyun# The fp move out that took the "Unimplemented Data Type" exception was 1632*4882a593Smuzhiyun# being traced. Since the stack frames are similar, get the "current" PC 1633*4882a593Smuzhiyun# from FPIAR and put it in the trace stack frame then jump to _real_trace(). 1634*4882a593Smuzhiyun# 1635*4882a593Smuzhiyun# UNSUPP FRAME TRACE FRAME 1636*4882a593Smuzhiyun# ***************** ***************** 1637*4882a593Smuzhiyun# * EA * * Current * 1638*4882a593Smuzhiyun# * * * PC * 1639*4882a593Smuzhiyun# ***************** ***************** 1640*4882a593Smuzhiyun# * 0x3 * 0x0dc * * 0x2 * 0x024 * 1641*4882a593Smuzhiyun# ***************** ***************** 1642*4882a593Smuzhiyun# * Next * * Next * 1643*4882a593Smuzhiyun# * PC * * PC * 1644*4882a593Smuzhiyun# ***************** ***************** 1645*4882a593Smuzhiyun# * SR * * SR * 1646*4882a593Smuzhiyun# ***************** ***************** 1647*4882a593Smuzhiyun# 1648*4882a593Smuzhiyunfu_out_trace: 1649*4882a593Smuzhiyun mov.w &0x2024,0x6(%sp) 1650*4882a593Smuzhiyun fmov.l %fpiar,0x8(%sp) 1651*4882a593Smuzhiyun bra.l _real_trace 1652*4882a593Smuzhiyun 1653*4882a593Smuzhiyun# an exception occurred and that exception was enabled. 1654*4882a593Smuzhiyunfu_out_exc: 1655*4882a593Smuzhiyun subi.l &24,%d0 # fix offset to be 0-8 1656*4882a593Smuzhiyun 1657*4882a593Smuzhiyun# we don't mess with the existing fsave frame. just re-insert it and 1658*4882a593Smuzhiyun# jump to the "_real_{}()" handler... 1659*4882a593Smuzhiyun mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0 1660*4882a593Smuzhiyun jmp (tbl_fu_out.b,%pc,%d0.w*1) 1661*4882a593Smuzhiyun 1662*4882a593Smuzhiyun swbeg &0x8 1663*4882a593Smuzhiyuntbl_fu_out: 1664*4882a593Smuzhiyun short tbl_fu_out - tbl_fu_out # BSUN can't happen 1665*4882a593Smuzhiyun short tbl_fu_out - tbl_fu_out # SNAN can't happen 1666*4882a593Smuzhiyun short fu_operr - tbl_fu_out # OPERR 1667*4882a593Smuzhiyun short fu_ovfl - tbl_fu_out # OVFL 1668*4882a593Smuzhiyun short fu_unfl - tbl_fu_out # UNFL 1669*4882a593Smuzhiyun short tbl_fu_out - tbl_fu_out # DZ can't happen 1670*4882a593Smuzhiyun short fu_inex - tbl_fu_out # INEX2 1671*4882a593Smuzhiyun short tbl_fu_out - tbl_fu_out # INEX1 won't make it here 1672*4882a593Smuzhiyun 1673*4882a593Smuzhiyun# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just 1674*4882a593Smuzhiyun# frestore it. 1675*4882a593Smuzhiyunfu_snan: 1676*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1677*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1678*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1679*4882a593Smuzhiyun 1680*4882a593Smuzhiyun mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8 1681*4882a593Smuzhiyun mov.w &0xe006,2+FP_SRC(%a6) 1682*4882a593Smuzhiyun 1683*4882a593Smuzhiyun frestore FP_SRC(%a6) 1684*4882a593Smuzhiyun 1685*4882a593Smuzhiyun unlk %a6 1686*4882a593Smuzhiyun 1687*4882a593Smuzhiyun 1688*4882a593Smuzhiyun bra.l _real_snan 1689*4882a593Smuzhiyun 1690*4882a593Smuzhiyunfu_operr: 1691*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1692*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1693*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1694*4882a593Smuzhiyun 1695*4882a593Smuzhiyun mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 1696*4882a593Smuzhiyun mov.w &0xe004,2+FP_SRC(%a6) 1697*4882a593Smuzhiyun 1698*4882a593Smuzhiyun frestore FP_SRC(%a6) 1699*4882a593Smuzhiyun 1700*4882a593Smuzhiyun unlk %a6 1701*4882a593Smuzhiyun 1702*4882a593Smuzhiyun 1703*4882a593Smuzhiyun bra.l _real_operr 1704*4882a593Smuzhiyun 1705*4882a593Smuzhiyunfu_ovfl: 1706*4882a593Smuzhiyun fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1707*4882a593Smuzhiyun 1708*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1709*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1710*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1711*4882a593Smuzhiyun 1712*4882a593Smuzhiyun mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4 1713*4882a593Smuzhiyun mov.w &0xe005,2+FP_SRC(%a6) 1714*4882a593Smuzhiyun 1715*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore EXOP 1716*4882a593Smuzhiyun 1717*4882a593Smuzhiyun unlk %a6 1718*4882a593Smuzhiyun 1719*4882a593Smuzhiyun bra.l _real_ovfl 1720*4882a593Smuzhiyun 1721*4882a593Smuzhiyun# underflow can happen for extended precision. extended precision opclass 1722*4882a593Smuzhiyun# three instruction exceptions don't update the stack pointer. so, if the 1723*4882a593Smuzhiyun# exception occurred from user mode, then simply update a7 and exit normally. 1724*4882a593Smuzhiyun# if the exception occurred from supervisor mode, check if 1725*4882a593Smuzhiyunfu_unfl: 1726*4882a593Smuzhiyun mov.l EXC_A6(%a6),(%a6) # restore a6 1727*4882a593Smuzhiyun 1728*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) 1729*4882a593Smuzhiyun bne.w fu_unfl_s 1730*4882a593Smuzhiyun 1731*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 # restore a7 whether we need 1732*4882a593Smuzhiyun mov.l %a0,%usp # to or not... 1733*4882a593Smuzhiyun 1734*4882a593Smuzhiyunfu_unfl_cont: 1735*4882a593Smuzhiyun fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1736*4882a593Smuzhiyun 1737*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1738*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1739*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1740*4882a593Smuzhiyun 1741*4882a593Smuzhiyun mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1742*4882a593Smuzhiyun mov.w &0xe003,2+FP_SRC(%a6) 1743*4882a593Smuzhiyun 1744*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore EXOP 1745*4882a593Smuzhiyun 1746*4882a593Smuzhiyun unlk %a6 1747*4882a593Smuzhiyun 1748*4882a593Smuzhiyun bra.l _real_unfl 1749*4882a593Smuzhiyun 1750*4882a593Smuzhiyunfu_unfl_s: 1751*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)? 1752*4882a593Smuzhiyun bne.b fu_unfl_cont 1753*4882a593Smuzhiyun 1754*4882a593Smuzhiyun# the extended precision result is still in fp0. but, we need to save it 1755*4882a593Smuzhiyun# somewhere on the stack until we can copy it to its final resting place 1756*4882a593Smuzhiyun# (where the exc frame is currently). make sure it's not at the top of the 1757*4882a593Smuzhiyun# frame or it will get overwritten when the exc stack frame is shifted "down". 1758*4882a593Smuzhiyun fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1759*4882a593Smuzhiyun fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack 1760*4882a593Smuzhiyun 1761*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1762*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1763*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1764*4882a593Smuzhiyun 1765*4882a593Smuzhiyun mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1766*4882a593Smuzhiyun mov.w &0xe003,2+FP_DST(%a6) 1767*4882a593Smuzhiyun 1768*4882a593Smuzhiyun frestore FP_DST(%a6) # restore EXOP 1769*4882a593Smuzhiyun 1770*4882a593Smuzhiyun mov.l (%a6),%a6 # restore frame pointer 1771*4882a593Smuzhiyun 1772*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1773*4882a593Smuzhiyun mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1774*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 1775*4882a593Smuzhiyun 1776*4882a593Smuzhiyun# now, copy the result to the proper place on the stack 1777*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1778*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1779*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1780*4882a593Smuzhiyun 1781*4882a593Smuzhiyun add.l &LOCAL_SIZE-0x8,%sp 1782*4882a593Smuzhiyun 1783*4882a593Smuzhiyun bra.l _real_unfl 1784*4882a593Smuzhiyun 1785*4882a593Smuzhiyun# fmove in and out enter here. 1786*4882a593Smuzhiyunfu_inex: 1787*4882a593Smuzhiyun fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1788*4882a593Smuzhiyun 1789*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1790*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1791*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1792*4882a593Smuzhiyun 1793*4882a593Smuzhiyun mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 1794*4882a593Smuzhiyun mov.w &0xe001,2+FP_SRC(%a6) 1795*4882a593Smuzhiyun 1796*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore EXOP 1797*4882a593Smuzhiyun 1798*4882a593Smuzhiyun unlk %a6 1799*4882a593Smuzhiyun 1800*4882a593Smuzhiyun 1801*4882a593Smuzhiyun bra.l _real_inex 1802*4882a593Smuzhiyun 1803*4882a593Smuzhiyun######################################################################### 1804*4882a593Smuzhiyun######################################################################### 1805*4882a593Smuzhiyunfu_in_pack: 1806*4882a593Smuzhiyun 1807*4882a593Smuzhiyun 1808*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction. 1809*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field 1810*4882a593Smuzhiyun andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field 1811*4882a593Smuzhiyun 1812*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 1813*4882a593Smuzhiyun fmov.l &0x0,%fpsr 1814*4882a593Smuzhiyun 1815*4882a593Smuzhiyun bsr.l get_packed # fetch packed src operand 1816*4882a593Smuzhiyun 1817*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to src 1818*4882a593Smuzhiyun bsr.l set_tag_x # set src optype tag 1819*4882a593Smuzhiyun 1820*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # save src optype tag 1821*4882a593Smuzhiyun 1822*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1823*4882a593Smuzhiyun 1824*4882a593Smuzhiyun# bit five of the fp extension word separates the monadic and dyadic operations 1825*4882a593Smuzhiyun# at this point 1826*4882a593Smuzhiyun btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1827*4882a593Smuzhiyun beq.b fu_extract_p # monadic 1828*4882a593Smuzhiyun cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1829*4882a593Smuzhiyun beq.b fu_extract_p # yes, so it's monadic, too 1830*4882a593Smuzhiyun 1831*4882a593Smuzhiyun bsr.l load_fpn2 # load dst into FP_DST 1832*4882a593Smuzhiyun 1833*4882a593Smuzhiyun lea FP_DST(%a6),%a0 # pass: ptr to dst op 1834*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 1835*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 1836*4882a593Smuzhiyun bne.b fu_op2_done_p # no 1837*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1838*4882a593Smuzhiyunfu_op2_done_p: 1839*4882a593Smuzhiyun mov.b %d0,DTAG(%a6) # save dst optype tag 1840*4882a593Smuzhiyun 1841*4882a593Smuzhiyunfu_extract_p: 1842*4882a593Smuzhiyun clr.l %d0 1843*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1844*4882a593Smuzhiyun 1845*4882a593Smuzhiyun bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1846*4882a593Smuzhiyun 1847*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 1848*4882a593Smuzhiyun lea FP_DST(%a6),%a1 1849*4882a593Smuzhiyun 1850*4882a593Smuzhiyun mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1851*4882a593Smuzhiyun jsr (tbl_unsupp.l,%pc,%d1.l*1) 1852*4882a593Smuzhiyun 1853*4882a593Smuzhiyun# 1854*4882a593Smuzhiyun# Exceptions in order of precedence: 1855*4882a593Smuzhiyun# BSUN : none 1856*4882a593Smuzhiyun# SNAN : all dyadic ops 1857*4882a593Smuzhiyun# OPERR : fsqrt(-NORM) 1858*4882a593Smuzhiyun# OVFL : all except ftst,fcmp 1859*4882a593Smuzhiyun# UNFL : all except ftst,fcmp 1860*4882a593Smuzhiyun# DZ : fdiv 1861*4882a593Smuzhiyun# INEX2 : all except ftst,fcmp 1862*4882a593Smuzhiyun# INEX1 : all 1863*4882a593Smuzhiyun# 1864*4882a593Smuzhiyun 1865*4882a593Smuzhiyun# we determine the highest priority exception(if any) set by the 1866*4882a593Smuzhiyun# emulation routine that has also been enabled by the user. 1867*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1868*4882a593Smuzhiyun bne.w fu_in_ena_p # some are enabled 1869*4882a593Smuzhiyun 1870*4882a593Smuzhiyunfu_in_cont_p: 1871*4882a593Smuzhiyun# fcmp and ftst do not store any result. 1872*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1873*4882a593Smuzhiyun andi.b &0x38,%d0 # extract bits 3-5 1874*4882a593Smuzhiyun cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1875*4882a593Smuzhiyun beq.b fu_in_exit_p # yes 1876*4882a593Smuzhiyun 1877*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1878*4882a593Smuzhiyun bsr.l store_fpreg # store the result 1879*4882a593Smuzhiyun 1880*4882a593Smuzhiyunfu_in_exit_p: 1881*4882a593Smuzhiyun 1882*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # user or supervisor? 1883*4882a593Smuzhiyun bne.w fu_in_exit_s_p # supervisor 1884*4882a593Smuzhiyun 1885*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 # update user a7 1886*4882a593Smuzhiyun mov.l %a0,%usp 1887*4882a593Smuzhiyun 1888*4882a593Smuzhiyunfu_in_exit_cont_p: 1889*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1890*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1891*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1892*4882a593Smuzhiyun 1893*4882a593Smuzhiyun unlk %a6 # unravel stack frame 1894*4882a593Smuzhiyun 1895*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 1896*4882a593Smuzhiyun bne.w fu_trace_p # yes 1897*4882a593Smuzhiyun 1898*4882a593Smuzhiyun bra.l _fpsp_done # exit to os 1899*4882a593Smuzhiyun 1900*4882a593Smuzhiyun# the exception occurred in supervisor mode. check to see if the 1901*4882a593Smuzhiyun# addressing mode was (a7)+. if so, we'll need to shift the 1902*4882a593Smuzhiyun# stack frame "up". 1903*4882a593Smuzhiyunfu_in_exit_s_p: 1904*4882a593Smuzhiyun btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+ 1905*4882a593Smuzhiyun beq.b fu_in_exit_cont_p # no 1906*4882a593Smuzhiyun 1907*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1908*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1909*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1910*4882a593Smuzhiyun 1911*4882a593Smuzhiyun unlk %a6 # unravel stack frame 1912*4882a593Smuzhiyun 1913*4882a593Smuzhiyun# shift the stack frame "up". we don't really care about the <ea> field. 1914*4882a593Smuzhiyun mov.l 0x4(%sp),0x10(%sp) 1915*4882a593Smuzhiyun mov.l 0x0(%sp),0xc(%sp) 1916*4882a593Smuzhiyun add.l &0xc,%sp 1917*4882a593Smuzhiyun 1918*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 1919*4882a593Smuzhiyun bne.w fu_trace_p # yes 1920*4882a593Smuzhiyun 1921*4882a593Smuzhiyun bra.l _fpsp_done # exit to os 1922*4882a593Smuzhiyun 1923*4882a593Smuzhiyunfu_in_ena_p: 1924*4882a593Smuzhiyun and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set 1925*4882a593Smuzhiyun bfffo %d0{&24:&8},%d0 # find highest priority exception 1926*4882a593Smuzhiyun bne.b fu_in_exc_p # at least one was set 1927*4882a593Smuzhiyun 1928*4882a593Smuzhiyun# 1929*4882a593Smuzhiyun# No exceptions occurred that were also enabled. Now: 1930*4882a593Smuzhiyun# 1931*4882a593Smuzhiyun# if (OVFL && ovfl_disabled && inexact_enabled) { 1932*4882a593Smuzhiyun# branch to _real_inex() (even if the result was exact!); 1933*4882a593Smuzhiyun# } else { 1934*4882a593Smuzhiyun# save the result in the proper fp reg (unless the op is fcmp or ftst); 1935*4882a593Smuzhiyun# return; 1936*4882a593Smuzhiyun# } 1937*4882a593Smuzhiyun# 1938*4882a593Smuzhiyun btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1939*4882a593Smuzhiyun beq.w fu_in_cont_p # no 1940*4882a593Smuzhiyun 1941*4882a593Smuzhiyunfu_in_ovflchk_p: 1942*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1943*4882a593Smuzhiyun beq.w fu_in_cont_p # no 1944*4882a593Smuzhiyun bra.w fu_in_exc_ovfl_p # do _real_inex() now 1945*4882a593Smuzhiyun 1946*4882a593Smuzhiyun# 1947*4882a593Smuzhiyun# An exception occurred and that exception was enabled: 1948*4882a593Smuzhiyun# 1949*4882a593Smuzhiyun# shift enabled exception field into lo byte of d0; 1950*4882a593Smuzhiyun# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1951*4882a593Smuzhiyun# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1952*4882a593Smuzhiyun# /* 1953*4882a593Smuzhiyun# * this is the case where we must call _real_inex() now or else 1954*4882a593Smuzhiyun# * there will be no other way to pass it the exceptional operand 1955*4882a593Smuzhiyun# */ 1956*4882a593Smuzhiyun# call _real_inex(); 1957*4882a593Smuzhiyun# } else { 1958*4882a593Smuzhiyun# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1959*4882a593Smuzhiyun# } 1960*4882a593Smuzhiyun# 1961*4882a593Smuzhiyunfu_in_exc_p: 1962*4882a593Smuzhiyun subi.l &24,%d0 # fix offset to be 0-8 1963*4882a593Smuzhiyun cmpi.b %d0,&0x6 # is exception INEX? (6 or 7) 1964*4882a593Smuzhiyun blt.b fu_in_exc_exit_p # no 1965*4882a593Smuzhiyun 1966*4882a593Smuzhiyun# the enabled exception was inexact 1967*4882a593Smuzhiyun btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1968*4882a593Smuzhiyun bne.w fu_in_exc_unfl_p # yes 1969*4882a593Smuzhiyun btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1970*4882a593Smuzhiyun bne.w fu_in_exc_ovfl_p # yes 1971*4882a593Smuzhiyun 1972*4882a593Smuzhiyun# here, we insert the correct fsave status value into the fsave frame for the 1973*4882a593Smuzhiyun# corresponding exception. the operand in the fsave frame should be the original 1974*4882a593Smuzhiyun# src operand. 1975*4882a593Smuzhiyun# as a reminder for future predicted pain and agony, we are passing in fsave the 1976*4882a593Smuzhiyun# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs. 1977*4882a593Smuzhiyun# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!! 1978*4882a593Smuzhiyunfu_in_exc_exit_p: 1979*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # user or supervisor? 1980*4882a593Smuzhiyun bne.w fu_in_exc_exit_s_p # supervisor 1981*4882a593Smuzhiyun 1982*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 # update user a7 1983*4882a593Smuzhiyun mov.l %a0,%usp 1984*4882a593Smuzhiyun 1985*4882a593Smuzhiyunfu_in_exc_exit_cont_p: 1986*4882a593Smuzhiyun mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 1987*4882a593Smuzhiyun 1988*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1989*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1990*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1991*4882a593Smuzhiyun 1992*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore src op 1993*4882a593Smuzhiyun 1994*4882a593Smuzhiyun unlk %a6 1995*4882a593Smuzhiyun 1996*4882a593Smuzhiyun btst &0x7,(%sp) # is trace enabled? 1997*4882a593Smuzhiyun bne.w fu_trace_p # yes 1998*4882a593Smuzhiyun 1999*4882a593Smuzhiyun bra.l _fpsp_done 2000*4882a593Smuzhiyun 2001*4882a593Smuzhiyuntbl_except_p: 2002*4882a593Smuzhiyun short 0xe000,0xe006,0xe004,0xe005 2003*4882a593Smuzhiyun short 0xe003,0xe002,0xe001,0xe001 2004*4882a593Smuzhiyun 2005*4882a593Smuzhiyunfu_in_exc_ovfl_p: 2006*4882a593Smuzhiyun mov.w &0x3,%d0 2007*4882a593Smuzhiyun bra.w fu_in_exc_exit_p 2008*4882a593Smuzhiyun 2009*4882a593Smuzhiyunfu_in_exc_unfl_p: 2010*4882a593Smuzhiyun mov.w &0x4,%d0 2011*4882a593Smuzhiyun bra.w fu_in_exc_exit_p 2012*4882a593Smuzhiyun 2013*4882a593Smuzhiyunfu_in_exc_exit_s_p: 2014*4882a593Smuzhiyun btst &mia7_bit,SPCOND_FLG(%a6) 2015*4882a593Smuzhiyun beq.b fu_in_exc_exit_cont_p 2016*4882a593Smuzhiyun 2017*4882a593Smuzhiyun mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2018*4882a593Smuzhiyun 2019*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2020*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2021*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2022*4882a593Smuzhiyun 2023*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore src op 2024*4882a593Smuzhiyun 2025*4882a593Smuzhiyun unlk %a6 # unravel stack frame 2026*4882a593Smuzhiyun 2027*4882a593Smuzhiyun# shift stack frame "up". who cares about <ea> field. 2028*4882a593Smuzhiyun mov.l 0x4(%sp),0x10(%sp) 2029*4882a593Smuzhiyun mov.l 0x0(%sp),0xc(%sp) 2030*4882a593Smuzhiyun add.l &0xc,%sp 2031*4882a593Smuzhiyun 2032*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 2033*4882a593Smuzhiyun bne.b fu_trace_p # yes 2034*4882a593Smuzhiyun 2035*4882a593Smuzhiyun bra.l _fpsp_done # exit to os 2036*4882a593Smuzhiyun 2037*4882a593Smuzhiyun# 2038*4882a593Smuzhiyun# The opclass two PACKED instruction that took an "Unimplemented Data Type" 2039*4882a593Smuzhiyun# exception was being traced. Make the "current" PC the FPIAR and put it in the 2040*4882a593Smuzhiyun# trace stack frame then jump to _real_trace(). 2041*4882a593Smuzhiyun# 2042*4882a593Smuzhiyun# UNSUPP FRAME TRACE FRAME 2043*4882a593Smuzhiyun# ***************** ***************** 2044*4882a593Smuzhiyun# * EA * * Current * 2045*4882a593Smuzhiyun# * * * PC * 2046*4882a593Smuzhiyun# ***************** ***************** 2047*4882a593Smuzhiyun# * 0x2 * 0x0dc * * 0x2 * 0x024 * 2048*4882a593Smuzhiyun# ***************** ***************** 2049*4882a593Smuzhiyun# * Next * * Next * 2050*4882a593Smuzhiyun# * PC * * PC * 2051*4882a593Smuzhiyun# ***************** ***************** 2052*4882a593Smuzhiyun# * SR * * SR * 2053*4882a593Smuzhiyun# ***************** ***************** 2054*4882a593Smuzhiyunfu_trace_p: 2055*4882a593Smuzhiyun mov.w &0x2024,0x6(%sp) 2056*4882a593Smuzhiyun fmov.l %fpiar,0x8(%sp) 2057*4882a593Smuzhiyun 2058*4882a593Smuzhiyun bra.l _real_trace 2059*4882a593Smuzhiyun 2060*4882a593Smuzhiyun######################################################### 2061*4882a593Smuzhiyun######################################################### 2062*4882a593Smuzhiyunfu_out_pack: 2063*4882a593Smuzhiyun 2064*4882a593Smuzhiyun 2065*4882a593Smuzhiyun# I'm not sure at this point what FPSR bits are valid for this instruction. 2066*4882a593Smuzhiyun# so, since the emulation routines re-create them anyways, zero exception field. 2067*4882a593Smuzhiyun# fmove out doesn't affect ccodes. 2068*4882a593Smuzhiyun and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 2069*4882a593Smuzhiyun 2070*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 2071*4882a593Smuzhiyun fmov.l &0x0,%fpsr 2072*4882a593Smuzhiyun 2073*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 2074*4882a593Smuzhiyun bsr.l load_fpn1 2075*4882a593Smuzhiyun 2076*4882a593Smuzhiyun# unlike other opclass 3, unimplemented data type exceptions, packed must be 2077*4882a593Smuzhiyun# able to detect all operand types. 2078*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 2079*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 2080*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 2081*4882a593Smuzhiyun bne.b fu_op2_p # no 2082*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 2083*4882a593Smuzhiyun 2084*4882a593Smuzhiyunfu_op2_p: 2085*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # save src optype tag 2086*4882a593Smuzhiyun 2087*4882a593Smuzhiyun clr.l %d0 2088*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 2089*4882a593Smuzhiyun 2090*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to src operand 2091*4882a593Smuzhiyun 2092*4882a593Smuzhiyun mov.l (%a6),EXC_A6(%a6) # in case a6 changes 2093*4882a593Smuzhiyun bsr.l fout # call fmove out routine 2094*4882a593Smuzhiyun 2095*4882a593Smuzhiyun# Exceptions in order of precedence: 2096*4882a593Smuzhiyun# BSUN : no 2097*4882a593Smuzhiyun# SNAN : yes 2098*4882a593Smuzhiyun# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits)) 2099*4882a593Smuzhiyun# OVFL : no 2100*4882a593Smuzhiyun# UNFL : no 2101*4882a593Smuzhiyun# DZ : no 2102*4882a593Smuzhiyun# INEX2 : yes 2103*4882a593Smuzhiyun# INEX1 : no 2104*4882a593Smuzhiyun 2105*4882a593Smuzhiyun# determine the highest priority exception(if any) set by the 2106*4882a593Smuzhiyun# emulation routine that has also been enabled by the user. 2107*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2108*4882a593Smuzhiyun bne.w fu_out_ena_p # some are enabled 2109*4882a593Smuzhiyun 2110*4882a593Smuzhiyunfu_out_exit_p: 2111*4882a593Smuzhiyun mov.l EXC_A6(%a6),(%a6) # restore a6 2112*4882a593Smuzhiyun 2113*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # user or supervisor? 2114*4882a593Smuzhiyun bne.b fu_out_exit_s_p # supervisor 2115*4882a593Smuzhiyun 2116*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 # update user a7 2117*4882a593Smuzhiyun mov.l %a0,%usp 2118*4882a593Smuzhiyun 2119*4882a593Smuzhiyunfu_out_exit_cont_p: 2120*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2121*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2122*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2123*4882a593Smuzhiyun 2124*4882a593Smuzhiyun unlk %a6 # unravel stack frame 2125*4882a593Smuzhiyun 2126*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 2127*4882a593Smuzhiyun bne.w fu_trace_p # yes 2128*4882a593Smuzhiyun 2129*4882a593Smuzhiyun bra.l _fpsp_done # exit to os 2130*4882a593Smuzhiyun 2131*4882a593Smuzhiyun# the exception occurred in supervisor mode. check to see if the 2132*4882a593Smuzhiyun# addressing mode was -(a7). if so, we'll need to shift the 2133*4882a593Smuzhiyun# stack frame "down". 2134*4882a593Smuzhiyunfu_out_exit_s_p: 2135*4882a593Smuzhiyun btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7) 2136*4882a593Smuzhiyun beq.b fu_out_exit_cont_p # no 2137*4882a593Smuzhiyun 2138*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2139*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2140*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2141*4882a593Smuzhiyun 2142*4882a593Smuzhiyun mov.l (%a6),%a6 # restore frame pointer 2143*4882a593Smuzhiyun 2144*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2145*4882a593Smuzhiyun mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2146*4882a593Smuzhiyun 2147*4882a593Smuzhiyun# now, copy the result to the proper place on the stack 2148*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 2149*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 2150*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 2151*4882a593Smuzhiyun 2152*4882a593Smuzhiyun add.l &LOCAL_SIZE-0x8,%sp 2153*4882a593Smuzhiyun 2154*4882a593Smuzhiyun btst &0x7,(%sp) 2155*4882a593Smuzhiyun bne.w fu_trace_p 2156*4882a593Smuzhiyun 2157*4882a593Smuzhiyun bra.l _fpsp_done 2158*4882a593Smuzhiyun 2159*4882a593Smuzhiyunfu_out_ena_p: 2160*4882a593Smuzhiyun and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 2161*4882a593Smuzhiyun bfffo %d0{&24:&8},%d0 # find highest priority exception 2162*4882a593Smuzhiyun beq.w fu_out_exit_p 2163*4882a593Smuzhiyun 2164*4882a593Smuzhiyun mov.l EXC_A6(%a6),(%a6) # restore a6 2165*4882a593Smuzhiyun 2166*4882a593Smuzhiyun# an exception occurred and that exception was enabled. 2167*4882a593Smuzhiyun# the only exception possible on packed move out are INEX, OPERR, and SNAN. 2168*4882a593Smuzhiyunfu_out_exc_p: 2169*4882a593Smuzhiyun cmpi.b %d0,&0x1a 2170*4882a593Smuzhiyun bgt.w fu_inex_p2 2171*4882a593Smuzhiyun beq.w fu_operr_p 2172*4882a593Smuzhiyun 2173*4882a593Smuzhiyunfu_snan_p: 2174*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) 2175*4882a593Smuzhiyun bne.b fu_snan_s_p 2176*4882a593Smuzhiyun 2177*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 2178*4882a593Smuzhiyun mov.l %a0,%usp 2179*4882a593Smuzhiyun bra.w fu_snan 2180*4882a593Smuzhiyun 2181*4882a593Smuzhiyunfu_snan_s_p: 2182*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg 2183*4882a593Smuzhiyun bne.w fu_snan 2184*4882a593Smuzhiyun 2185*4882a593Smuzhiyun# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2186*4882a593Smuzhiyun# the strategy is to move the exception frame "down" 12 bytes. then, we 2187*4882a593Smuzhiyun# can store the default result where the exception frame was. 2188*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2189*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2190*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2191*4882a593Smuzhiyun 2192*4882a593Smuzhiyun mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0 2193*4882a593Smuzhiyun mov.w &0xe006,2+FP_SRC(%a6) # set fsave status 2194*4882a593Smuzhiyun 2195*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore src operand 2196*4882a593Smuzhiyun 2197*4882a593Smuzhiyun mov.l (%a6),%a6 # restore frame pointer 2198*4882a593Smuzhiyun 2199*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2200*4882a593Smuzhiyun mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2201*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2202*4882a593Smuzhiyun 2203*4882a593Smuzhiyun# now, we copy the default result to its proper location 2204*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2205*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2206*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2207*4882a593Smuzhiyun 2208*4882a593Smuzhiyun add.l &LOCAL_SIZE-0x8,%sp 2209*4882a593Smuzhiyun 2210*4882a593Smuzhiyun 2211*4882a593Smuzhiyun bra.l _real_snan 2212*4882a593Smuzhiyun 2213*4882a593Smuzhiyunfu_operr_p: 2214*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) 2215*4882a593Smuzhiyun bne.w fu_operr_p_s 2216*4882a593Smuzhiyun 2217*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 2218*4882a593Smuzhiyun mov.l %a0,%usp 2219*4882a593Smuzhiyun bra.w fu_operr 2220*4882a593Smuzhiyun 2221*4882a593Smuzhiyunfu_operr_p_s: 2222*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg 2223*4882a593Smuzhiyun bne.w fu_operr 2224*4882a593Smuzhiyun 2225*4882a593Smuzhiyun# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2226*4882a593Smuzhiyun# the strategy is to move the exception frame "down" 12 bytes. then, we 2227*4882a593Smuzhiyun# can store the default result where the exception frame was. 2228*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2229*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2230*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2231*4882a593Smuzhiyun 2232*4882a593Smuzhiyun mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 2233*4882a593Smuzhiyun mov.w &0xe004,2+FP_SRC(%a6) # set fsave status 2234*4882a593Smuzhiyun 2235*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore src operand 2236*4882a593Smuzhiyun 2237*4882a593Smuzhiyun mov.l (%a6),%a6 # restore frame pointer 2238*4882a593Smuzhiyun 2239*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2240*4882a593Smuzhiyun mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2241*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2242*4882a593Smuzhiyun 2243*4882a593Smuzhiyun# now, we copy the default result to its proper location 2244*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2245*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2246*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2247*4882a593Smuzhiyun 2248*4882a593Smuzhiyun add.l &LOCAL_SIZE-0x8,%sp 2249*4882a593Smuzhiyun 2250*4882a593Smuzhiyun 2251*4882a593Smuzhiyun bra.l _real_operr 2252*4882a593Smuzhiyun 2253*4882a593Smuzhiyunfu_inex_p2: 2254*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) 2255*4882a593Smuzhiyun bne.w fu_inex_s_p2 2256*4882a593Smuzhiyun 2257*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 2258*4882a593Smuzhiyun mov.l %a0,%usp 2259*4882a593Smuzhiyun bra.w fu_inex 2260*4882a593Smuzhiyun 2261*4882a593Smuzhiyunfu_inex_s_p2: 2262*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg 2263*4882a593Smuzhiyun bne.w fu_inex 2264*4882a593Smuzhiyun 2265*4882a593Smuzhiyun# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2266*4882a593Smuzhiyun# the strategy is to move the exception frame "down" 12 bytes. then, we 2267*4882a593Smuzhiyun# can store the default result where the exception frame was. 2268*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2269*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2270*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2271*4882a593Smuzhiyun 2272*4882a593Smuzhiyun mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 2273*4882a593Smuzhiyun mov.w &0xe001,2+FP_SRC(%a6) # set fsave status 2274*4882a593Smuzhiyun 2275*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore src operand 2276*4882a593Smuzhiyun 2277*4882a593Smuzhiyun mov.l (%a6),%a6 # restore frame pointer 2278*4882a593Smuzhiyun 2279*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2280*4882a593Smuzhiyun mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2281*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2282*4882a593Smuzhiyun 2283*4882a593Smuzhiyun# now, we copy the default result to its proper location 2284*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2285*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2286*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2287*4882a593Smuzhiyun 2288*4882a593Smuzhiyun add.l &LOCAL_SIZE-0x8,%sp 2289*4882a593Smuzhiyun 2290*4882a593Smuzhiyun 2291*4882a593Smuzhiyun bra.l _real_inex 2292*4882a593Smuzhiyun 2293*4882a593Smuzhiyun######################################################################### 2294*4882a593Smuzhiyun 2295*4882a593Smuzhiyun# 2296*4882a593Smuzhiyun# if we're stuffing a source operand back into an fsave frame then we 2297*4882a593Smuzhiyun# have to make sure that for single or double source operands that the 2298*4882a593Smuzhiyun# format stuffed is as weird as the hardware usually makes it. 2299*4882a593Smuzhiyun# 2300*4882a593Smuzhiyun global funimp_skew 2301*4882a593Smuzhiyunfunimp_skew: 2302*4882a593Smuzhiyun bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier 2303*4882a593Smuzhiyun cmpi.b %d0,&0x1 # was src sgl? 2304*4882a593Smuzhiyun beq.b funimp_skew_sgl # yes 2305*4882a593Smuzhiyun cmpi.b %d0,&0x5 # was src dbl? 2306*4882a593Smuzhiyun beq.b funimp_skew_dbl # yes 2307*4882a593Smuzhiyun rts 2308*4882a593Smuzhiyun 2309*4882a593Smuzhiyunfunimp_skew_sgl: 2310*4882a593Smuzhiyun mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2311*4882a593Smuzhiyun andi.w &0x7fff,%d0 # strip sign 2312*4882a593Smuzhiyun beq.b funimp_skew_sgl_not 2313*4882a593Smuzhiyun cmpi.w %d0,&0x3f80 2314*4882a593Smuzhiyun bgt.b funimp_skew_sgl_not 2315*4882a593Smuzhiyun neg.w %d0 # make exponent negative 2316*4882a593Smuzhiyun addi.w &0x3f81,%d0 # find amt to shift 2317*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man) 2318*4882a593Smuzhiyun lsr.l %d0,%d1 # shift it 2319*4882a593Smuzhiyun bset &31,%d1 # set j-bit 2320*4882a593Smuzhiyun mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man) 2321*4882a593Smuzhiyun andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent 2322*4882a593Smuzhiyun ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent 2323*4882a593Smuzhiyunfunimp_skew_sgl_not: 2324*4882a593Smuzhiyun rts 2325*4882a593Smuzhiyun 2326*4882a593Smuzhiyunfunimp_skew_dbl: 2327*4882a593Smuzhiyun mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2328*4882a593Smuzhiyun andi.w &0x7fff,%d0 # strip sign 2329*4882a593Smuzhiyun beq.b funimp_skew_dbl_not 2330*4882a593Smuzhiyun cmpi.w %d0,&0x3c00 2331*4882a593Smuzhiyun bgt.b funimp_skew_dbl_not 2332*4882a593Smuzhiyun 2333*4882a593Smuzhiyun tst.b FP_SRC_EX(%a6) # make "internal format" 2334*4882a593Smuzhiyun smi.b 0x2+FP_SRC(%a6) 2335*4882a593Smuzhiyun mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign 2336*4882a593Smuzhiyun clr.l %d0 # clear g,r,s 2337*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to src op 2338*4882a593Smuzhiyun mov.w &0x3c01,%d1 # pass denorm threshold 2339*4882a593Smuzhiyun bsr.l dnrm_lp # denorm it 2340*4882a593Smuzhiyun mov.w &0x3c00,%d0 # new exponent 2341*4882a593Smuzhiyun tst.b 0x2+FP_SRC(%a6) # is sign set? 2342*4882a593Smuzhiyun beq.b fss_dbl_denorm_done # no 2343*4882a593Smuzhiyun bset &15,%d0 # set sign 2344*4882a593Smuzhiyunfss_dbl_denorm_done: 2345*4882a593Smuzhiyun bset &0x7,FP_SRC_HI(%a6) # set j-bit 2346*4882a593Smuzhiyun mov.w %d0,FP_SRC_EX(%a6) # insert new exponent 2347*4882a593Smuzhiyunfunimp_skew_dbl_not: 2348*4882a593Smuzhiyun rts 2349*4882a593Smuzhiyun 2350*4882a593Smuzhiyun######################################################################### 2351*4882a593Smuzhiyun global _mem_write2 2352*4882a593Smuzhiyun_mem_write2: 2353*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) 2354*4882a593Smuzhiyun beq.l _dmem_write 2355*4882a593Smuzhiyun mov.l 0x0(%a0),FP_DST_EX(%a6) 2356*4882a593Smuzhiyun mov.l 0x4(%a0),FP_DST_HI(%a6) 2357*4882a593Smuzhiyun mov.l 0x8(%a0),FP_DST_LO(%a6) 2358*4882a593Smuzhiyun clr.l %d1 2359*4882a593Smuzhiyun rts 2360*4882a593Smuzhiyun 2361*4882a593Smuzhiyun######################################################################### 2362*4882a593Smuzhiyun# XDEF **************************************************************** # 2363*4882a593Smuzhiyun# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented # 2364*4882a593Smuzhiyun# effective address" exception. # 2365*4882a593Smuzhiyun# # 2366*4882a593Smuzhiyun# This handler should be the first code executed upon taking the # 2367*4882a593Smuzhiyun# FP Unimplemented Effective Address exception in an operating # 2368*4882a593Smuzhiyun# system. # 2369*4882a593Smuzhiyun# # 2370*4882a593Smuzhiyun# XREF **************************************************************** # 2371*4882a593Smuzhiyun# _imem_read_long() - read instruction longword # 2372*4882a593Smuzhiyun# fix_skewed_ops() - adjust src operand in fsave frame # 2373*4882a593Smuzhiyun# set_tag_x() - determine optype of src/dst operands # 2374*4882a593Smuzhiyun# store_fpreg() - store opclass 0 or 2 result to FP regfile # 2375*4882a593Smuzhiyun# unnorm_fix() - change UNNORM operands to NORM or ZERO # 2376*4882a593Smuzhiyun# load_fpn2() - load dst operand from FP regfile # 2377*4882a593Smuzhiyun# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 2378*4882a593Smuzhiyun# decbin() - convert packed data to FP binary data # 2379*4882a593Smuzhiyun# _real_fpu_disabled() - "callout" for "FPU disabled" exception # 2380*4882a593Smuzhiyun# _real_access() - "callout" for access error exception # 2381*4882a593Smuzhiyun# _mem_read() - read extended immediate operand from memory # 2382*4882a593Smuzhiyun# _fpsp_done() - "callout" for exit; work all done # 2383*4882a593Smuzhiyun# _real_trace() - "callout" for Trace enabled exception # 2384*4882a593Smuzhiyun# fmovm_dynamic() - emulate dynamic fmovm instruction # 2385*4882a593Smuzhiyun# fmovm_ctrl() - emulate fmovm control instruction # 2386*4882a593Smuzhiyun# # 2387*4882a593Smuzhiyun# INPUT *************************************************************** # 2388*4882a593Smuzhiyun# - The system stack contains the "Unimplemented <ea>" stk frame # 2389*4882a593Smuzhiyun# # 2390*4882a593Smuzhiyun# OUTPUT ************************************************************** # 2391*4882a593Smuzhiyun# If access error: # 2392*4882a593Smuzhiyun# - The system stack is changed to an access error stack frame # 2393*4882a593Smuzhiyun# If FPU disabled: # 2394*4882a593Smuzhiyun# - The system stack is changed to an FPU disabled stack frame # 2395*4882a593Smuzhiyun# If Trace exception enabled: # 2396*4882a593Smuzhiyun# - The system stack is changed to a Trace exception stack frame # 2397*4882a593Smuzhiyun# Else: (normal case) # 2398*4882a593Smuzhiyun# - None (correct result has been stored as appropriate) # 2399*4882a593Smuzhiyun# # 2400*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 2401*4882a593Smuzhiyun# This exception handles 3 types of operations: # 2402*4882a593Smuzhiyun# (1) FP Instructions using extended precision or packed immediate # 2403*4882a593Smuzhiyun# addressing mode. # 2404*4882a593Smuzhiyun# (2) The "fmovm.x" instruction w/ dynamic register specification. # 2405*4882a593Smuzhiyun# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. # 2406*4882a593Smuzhiyun# # 2407*4882a593Smuzhiyun# For immediate data operations, the data is read in w/ a # 2408*4882a593Smuzhiyun# _mem_read() "callout", converted to FP binary (if packed), and used # 2409*4882a593Smuzhiyun# as the source operand to the instruction specified by the instruction # 2410*4882a593Smuzhiyun# word. If no FP exception should be reported ads a result of the # 2411*4882a593Smuzhiyun# emulation, then the result is stored to the destination register and # 2412*4882a593Smuzhiyun# the handler exits through _fpsp_done(). If an enabled exc has been # 2413*4882a593Smuzhiyun# signalled as a result of emulation, then an fsave state frame # 2414*4882a593Smuzhiyun# corresponding to the FP exception type must be entered into the 060 # 2415*4882a593Smuzhiyun# FPU before exiting. In either the enabled or disabled cases, we # 2416*4882a593Smuzhiyun# must also check if a Trace exception is pending, in which case, we # 2417*4882a593Smuzhiyun# must create a Trace exception stack frame from the current exception # 2418*4882a593Smuzhiyun# stack frame. If no Trace is pending, we simply exit through # 2419*4882a593Smuzhiyun# _fpsp_done(). # 2420*4882a593Smuzhiyun# For "fmovm.x", call the routine fmovm_dynamic() which will # 2421*4882a593Smuzhiyun# decode and emulate the instruction. No FP exceptions can be pending # 2422*4882a593Smuzhiyun# as a result of this operation emulation. A Trace exception can be # 2423*4882a593Smuzhiyun# pending, though, which means the current stack frame must be changed # 2424*4882a593Smuzhiyun# to a Trace stack frame and an exit made through _real_trace(). # 2425*4882a593Smuzhiyun# For the case of "fmovm.x Dn,-(a7)", where the offending instruction # 2426*4882a593Smuzhiyun# was executed from supervisor mode, this handler must store the FP # 2427*4882a593Smuzhiyun# register file values to the system stack by itself since # 2428*4882a593Smuzhiyun# fmovm_dynamic() can't handle this. A normal exit is made through # 2429*4882a593Smuzhiyun# fpsp_done(). # 2430*4882a593Smuzhiyun# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. # 2431*4882a593Smuzhiyun# Again, a Trace exception may be pending and an exit made through # 2432*4882a593Smuzhiyun# _real_trace(). Else, a normal exit is made through _fpsp_done(). # 2433*4882a593Smuzhiyun# # 2434*4882a593Smuzhiyun# Before any of the above is attempted, it must be checked to # 2435*4882a593Smuzhiyun# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken # 2436*4882a593Smuzhiyun# before the "FPU disabled" exception, but the "FPU disabled" exception # 2437*4882a593Smuzhiyun# has higher priority, we check the disabled bit in the PCR. If set, # 2438*4882a593Smuzhiyun# then we must create an 8 word "FPU disabled" exception stack frame # 2439*4882a593Smuzhiyun# from the current 4 word exception stack frame. This includes # 2440*4882a593Smuzhiyun# reproducing the effective address of the instruction to put on the # 2441*4882a593Smuzhiyun# new stack frame. # 2442*4882a593Smuzhiyun# # 2443*4882a593Smuzhiyun# In the process of all emulation work, if a _mem_read() # 2444*4882a593Smuzhiyun# "callout" returns a failing result indicating an access error, then # 2445*4882a593Smuzhiyun# we must create an access error stack frame from the current stack # 2446*4882a593Smuzhiyun# frame. This information includes a faulting address and a fault- # 2447*4882a593Smuzhiyun# status-longword. These are created within this handler. # 2448*4882a593Smuzhiyun# # 2449*4882a593Smuzhiyun######################################################################### 2450*4882a593Smuzhiyun 2451*4882a593Smuzhiyun global _fpsp_effadd 2452*4882a593Smuzhiyun_fpsp_effadd: 2453*4882a593Smuzhiyun 2454*4882a593Smuzhiyun# This exception type takes priority over the "Line F Emulator" 2455*4882a593Smuzhiyun# exception. Therefore, the FPU could be disabled when entering here. 2456*4882a593Smuzhiyun# So, we must check to see if it's disabled and handle that case separately. 2457*4882a593Smuzhiyun mov.l %d0,-(%sp) # save d0 2458*4882a593Smuzhiyun movc %pcr,%d0 # load proc cr 2459*4882a593Smuzhiyun btst &0x1,%d0 # is FPU disabled? 2460*4882a593Smuzhiyun bne.w iea_disabled # yes 2461*4882a593Smuzhiyun mov.l (%sp)+,%d0 # restore d0 2462*4882a593Smuzhiyun 2463*4882a593Smuzhiyun link %a6,&-LOCAL_SIZE # init stack frame 2464*4882a593Smuzhiyun 2465*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2466*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 2467*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 2468*4882a593Smuzhiyun 2469*4882a593Smuzhiyun# PC of instruction that took the exception is the PC in the frame 2470*4882a593Smuzhiyun mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2471*4882a593Smuzhiyun 2472*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2473*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2474*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 2475*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2476*4882a593Smuzhiyun 2477*4882a593Smuzhiyun######################################################################### 2478*4882a593Smuzhiyun 2479*4882a593Smuzhiyun tst.w %d0 # is operation fmovem? 2480*4882a593Smuzhiyun bmi.w iea_fmovm # yes 2481*4882a593Smuzhiyun 2482*4882a593Smuzhiyun# 2483*4882a593Smuzhiyun# here, we will have: 2484*4882a593Smuzhiyun# fabs fdabs fsabs facos fmod 2485*4882a593Smuzhiyun# fadd fdadd fsadd fasin frem 2486*4882a593Smuzhiyun# fcmp fatan fscale 2487*4882a593Smuzhiyun# fdiv fddiv fsdiv fatanh fsin 2488*4882a593Smuzhiyun# fint fcos fsincos 2489*4882a593Smuzhiyun# fintrz fcosh fsinh 2490*4882a593Smuzhiyun# fmove fdmove fsmove fetox ftan 2491*4882a593Smuzhiyun# fmul fdmul fsmul fetoxm1 ftanh 2492*4882a593Smuzhiyun# fneg fdneg fsneg fgetexp ftentox 2493*4882a593Smuzhiyun# fsgldiv fgetman ftwotox 2494*4882a593Smuzhiyun# fsglmul flog10 2495*4882a593Smuzhiyun# fsqrt flog2 2496*4882a593Smuzhiyun# fsub fdsub fssub flogn 2497*4882a593Smuzhiyun# ftst flognp1 2498*4882a593Smuzhiyun# which can all use f<op>.{x,p} 2499*4882a593Smuzhiyun# so, now it's immediate data extended precision AND PACKED FORMAT! 2500*4882a593Smuzhiyun# 2501*4882a593Smuzhiyuniea_op: 2502*4882a593Smuzhiyun andi.l &0x00ff00ff,USER_FPSR(%a6) 2503*4882a593Smuzhiyun 2504*4882a593Smuzhiyun btst &0xa,%d0 # is src fmt x or p? 2505*4882a593Smuzhiyun bne.b iea_op_pack # packed 2506*4882a593Smuzhiyun 2507*4882a593Smuzhiyun 2508*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2509*4882a593Smuzhiyun lea FP_SRC(%a6),%a1 # pass: ptr to super addr 2510*4882a593Smuzhiyun mov.l &0xc,%d0 # pass: 12 bytes 2511*4882a593Smuzhiyun bsr.l _imem_read # read extended immediate 2512*4882a593Smuzhiyun 2513*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 2514*4882a593Smuzhiyun bne.w iea_iacc # yes 2515*4882a593Smuzhiyun 2516*4882a593Smuzhiyun bra.b iea_op_setsrc 2517*4882a593Smuzhiyun 2518*4882a593Smuzhiyuniea_op_pack: 2519*4882a593Smuzhiyun 2520*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2521*4882a593Smuzhiyun lea FP_SRC(%a6),%a1 # pass: ptr to super dst 2522*4882a593Smuzhiyun mov.l &0xc,%d0 # pass: 12 bytes 2523*4882a593Smuzhiyun bsr.l _imem_read # read packed operand 2524*4882a593Smuzhiyun 2525*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 2526*4882a593Smuzhiyun bne.w iea_iacc # yes 2527*4882a593Smuzhiyun 2528*4882a593Smuzhiyun# The packed operand is an INF or a NAN if the exponent field is all ones. 2529*4882a593Smuzhiyun bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 2530*4882a593Smuzhiyun cmpi.w %d0,&0x7fff # INF or NAN? 2531*4882a593Smuzhiyun beq.b iea_op_setsrc # operand is an INF or NAN 2532*4882a593Smuzhiyun 2533*4882a593Smuzhiyun# The packed operand is a zero if the mantissa is all zero, else it's 2534*4882a593Smuzhiyun# a normal packed op. 2535*4882a593Smuzhiyun mov.b 3+FP_SRC(%a6),%d0 # get byte 4 2536*4882a593Smuzhiyun andi.b &0x0f,%d0 # clear all but last nybble 2537*4882a593Smuzhiyun bne.b iea_op_gp_not_spec # not a zero 2538*4882a593Smuzhiyun tst.l FP_SRC_HI(%a6) # is lw 2 zero? 2539*4882a593Smuzhiyun bne.b iea_op_gp_not_spec # not a zero 2540*4882a593Smuzhiyun tst.l FP_SRC_LO(%a6) # is lw 3 zero? 2541*4882a593Smuzhiyun beq.b iea_op_setsrc # operand is a ZERO 2542*4882a593Smuzhiyuniea_op_gp_not_spec: 2543*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to packed op 2544*4882a593Smuzhiyun bsr.l decbin # convert to extended 2545*4882a593Smuzhiyun fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 2546*4882a593Smuzhiyun 2547*4882a593Smuzhiyuniea_op_setsrc: 2548*4882a593Smuzhiyun addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer 2549*4882a593Smuzhiyun 2550*4882a593Smuzhiyun# FP_SRC now holds the src operand. 2551*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 2552*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 2553*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # could be ANYTHING!!! 2554*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 2555*4882a593Smuzhiyun bne.b iea_op_getdst # no 2556*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2557*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # set new optype tag 2558*4882a593Smuzhiyuniea_op_getdst: 2559*4882a593Smuzhiyun clr.b STORE_FLG(%a6) # clear "store result" boolean 2560*4882a593Smuzhiyun 2561*4882a593Smuzhiyun btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 2562*4882a593Smuzhiyun beq.b iea_op_extract # monadic 2563*4882a593Smuzhiyun btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp? 2564*4882a593Smuzhiyun bne.b iea_op_spec # yes 2565*4882a593Smuzhiyun 2566*4882a593Smuzhiyuniea_op_loaddst: 2567*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2568*4882a593Smuzhiyun bsr.l load_fpn2 # load dst operand 2569*4882a593Smuzhiyun 2570*4882a593Smuzhiyun lea FP_DST(%a6),%a0 # pass: ptr to dst op 2571*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 2572*4882a593Smuzhiyun mov.b %d0,DTAG(%a6) # could be ANYTHING!!! 2573*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 2574*4882a593Smuzhiyun bne.b iea_op_extract # no 2575*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2576*4882a593Smuzhiyun mov.b %d0,DTAG(%a6) # set new optype tag 2577*4882a593Smuzhiyun bra.b iea_op_extract 2578*4882a593Smuzhiyun 2579*4882a593Smuzhiyun# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic 2580*4882a593Smuzhiyuniea_op_spec: 2581*4882a593Smuzhiyun btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos? 2582*4882a593Smuzhiyun beq.b iea_op_extract # yes 2583*4882a593Smuzhiyun# now, we're left with ftst and fcmp. so, first let's tag them so that they don't 2584*4882a593Smuzhiyun# store a result. then, only fcmp will branch back and pick up a dst operand. 2585*4882a593Smuzhiyun st STORE_FLG(%a6) # don't store a final result 2586*4882a593Smuzhiyun btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp? 2587*4882a593Smuzhiyun beq.b iea_op_loaddst # yes 2588*4882a593Smuzhiyun 2589*4882a593Smuzhiyuniea_op_extract: 2590*4882a593Smuzhiyun clr.l %d0 2591*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec 2592*4882a593Smuzhiyun 2593*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d1 2594*4882a593Smuzhiyun andi.w &0x007f,%d1 # extract extension 2595*4882a593Smuzhiyun 2596*4882a593Smuzhiyun fmov.l &0x0,%fpcr 2597*4882a593Smuzhiyun fmov.l &0x0,%fpsr 2598*4882a593Smuzhiyun 2599*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 2600*4882a593Smuzhiyun lea FP_DST(%a6),%a1 2601*4882a593Smuzhiyun 2602*4882a593Smuzhiyun mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 2603*4882a593Smuzhiyun jsr (tbl_unsupp.l,%pc,%d1.l*1) 2604*4882a593Smuzhiyun 2605*4882a593Smuzhiyun# 2606*4882a593Smuzhiyun# Exceptions in order of precedence: 2607*4882a593Smuzhiyun# BSUN : none 2608*4882a593Smuzhiyun# SNAN : all operations 2609*4882a593Smuzhiyun# OPERR : all reg-reg or mem-reg operations that can normally operr 2610*4882a593Smuzhiyun# OVFL : same as OPERR 2611*4882a593Smuzhiyun# UNFL : same as OPERR 2612*4882a593Smuzhiyun# DZ : same as OPERR 2613*4882a593Smuzhiyun# INEX2 : same as OPERR 2614*4882a593Smuzhiyun# INEX1 : all packed immediate operations 2615*4882a593Smuzhiyun# 2616*4882a593Smuzhiyun 2617*4882a593Smuzhiyun# we determine the highest priority exception(if any) set by the 2618*4882a593Smuzhiyun# emulation routine that has also been enabled by the user. 2619*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2620*4882a593Smuzhiyun bne.b iea_op_ena # some are enabled 2621*4882a593Smuzhiyun 2622*4882a593Smuzhiyun# now, we save the result, unless, of course, the operation was ftst or fcmp. 2623*4882a593Smuzhiyun# these don't save results. 2624*4882a593Smuzhiyuniea_op_save: 2625*4882a593Smuzhiyun tst.b STORE_FLG(%a6) # does this op store a result? 2626*4882a593Smuzhiyun bne.b iea_op_exit1 # exit with no frestore 2627*4882a593Smuzhiyun 2628*4882a593Smuzhiyuniea_op_store: 2629*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2630*4882a593Smuzhiyun bsr.l store_fpreg # store the result 2631*4882a593Smuzhiyun 2632*4882a593Smuzhiyuniea_op_exit1: 2633*4882a593Smuzhiyun mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2634*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2635*4882a593Smuzhiyun 2636*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2637*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2638*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2639*4882a593Smuzhiyun 2640*4882a593Smuzhiyun unlk %a6 # unravel the frame 2641*4882a593Smuzhiyun 2642*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 2643*4882a593Smuzhiyun bne.w iea_op_trace # yes 2644*4882a593Smuzhiyun 2645*4882a593Smuzhiyun bra.l _fpsp_done # exit to os 2646*4882a593Smuzhiyun 2647*4882a593Smuzhiyuniea_op_ena: 2648*4882a593Smuzhiyun and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set 2649*4882a593Smuzhiyun bfffo %d0{&24:&8},%d0 # find highest priority exception 2650*4882a593Smuzhiyun bne.b iea_op_exc # at least one was set 2651*4882a593Smuzhiyun 2652*4882a593Smuzhiyun# no exception occurred. now, did a disabled, exact overflow occur with inexact 2653*4882a593Smuzhiyun# enabled? if so, then we have to stuff an overflow frame into the FPU. 2654*4882a593Smuzhiyun btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2655*4882a593Smuzhiyun beq.b iea_op_save 2656*4882a593Smuzhiyun 2657*4882a593Smuzhiyuniea_op_ovfl: 2658*4882a593Smuzhiyun btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 2659*4882a593Smuzhiyun beq.b iea_op_store # no 2660*4882a593Smuzhiyun bra.b iea_op_exc_ovfl # yes 2661*4882a593Smuzhiyun 2662*4882a593Smuzhiyun# an enabled exception occurred. we have to insert the exception type back into 2663*4882a593Smuzhiyun# the machine. 2664*4882a593Smuzhiyuniea_op_exc: 2665*4882a593Smuzhiyun subi.l &24,%d0 # fix offset to be 0-8 2666*4882a593Smuzhiyun cmpi.b %d0,&0x6 # is exception INEX? 2667*4882a593Smuzhiyun bne.b iea_op_exc_force # no 2668*4882a593Smuzhiyun 2669*4882a593Smuzhiyun# the enabled exception was inexact. so, if it occurs with an overflow 2670*4882a593Smuzhiyun# or underflow that was disabled, then we have to force an overflow or 2671*4882a593Smuzhiyun# underflow frame. 2672*4882a593Smuzhiyun btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2673*4882a593Smuzhiyun bne.b iea_op_exc_ovfl # yes 2674*4882a593Smuzhiyun btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? 2675*4882a593Smuzhiyun bne.b iea_op_exc_unfl # yes 2676*4882a593Smuzhiyun 2677*4882a593Smuzhiyuniea_op_exc_force: 2678*4882a593Smuzhiyun mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2679*4882a593Smuzhiyun bra.b iea_op_exit2 # exit with frestore 2680*4882a593Smuzhiyun 2681*4882a593Smuzhiyuntbl_iea_except: 2682*4882a593Smuzhiyun short 0xe002, 0xe006, 0xe004, 0xe005 2683*4882a593Smuzhiyun short 0xe003, 0xe002, 0xe001, 0xe001 2684*4882a593Smuzhiyun 2685*4882a593Smuzhiyuniea_op_exc_ovfl: 2686*4882a593Smuzhiyun mov.w &0xe005,2+FP_SRC(%a6) 2687*4882a593Smuzhiyun bra.b iea_op_exit2 2688*4882a593Smuzhiyun 2689*4882a593Smuzhiyuniea_op_exc_unfl: 2690*4882a593Smuzhiyun mov.w &0xe003,2+FP_SRC(%a6) 2691*4882a593Smuzhiyun 2692*4882a593Smuzhiyuniea_op_exit2: 2693*4882a593Smuzhiyun mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2694*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2695*4882a593Smuzhiyun 2696*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2697*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2698*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2699*4882a593Smuzhiyun 2700*4882a593Smuzhiyun frestore FP_SRC(%a6) # restore exceptional state 2701*4882a593Smuzhiyun 2702*4882a593Smuzhiyun unlk %a6 # unravel the frame 2703*4882a593Smuzhiyun 2704*4882a593Smuzhiyun btst &0x7,(%sp) # is trace on? 2705*4882a593Smuzhiyun bne.b iea_op_trace # yes 2706*4882a593Smuzhiyun 2707*4882a593Smuzhiyun bra.l _fpsp_done # exit to os 2708*4882a593Smuzhiyun 2709*4882a593Smuzhiyun# 2710*4882a593Smuzhiyun# The opclass two instruction that took an "Unimplemented Effective Address" 2711*4882a593Smuzhiyun# exception was being traced. Make the "current" PC the FPIAR and put it in 2712*4882a593Smuzhiyun# the trace stack frame then jump to _real_trace(). 2713*4882a593Smuzhiyun# 2714*4882a593Smuzhiyun# UNIMP EA FRAME TRACE FRAME 2715*4882a593Smuzhiyun# ***************** ***************** 2716*4882a593Smuzhiyun# * 0x0 * 0x0f0 * * Current * 2717*4882a593Smuzhiyun# ***************** * PC * 2718*4882a593Smuzhiyun# * Current * ***************** 2719*4882a593Smuzhiyun# * PC * * 0x2 * 0x024 * 2720*4882a593Smuzhiyun# ***************** ***************** 2721*4882a593Smuzhiyun# * SR * * Next * 2722*4882a593Smuzhiyun# ***************** * PC * 2723*4882a593Smuzhiyun# ***************** 2724*4882a593Smuzhiyun# * SR * 2725*4882a593Smuzhiyun# ***************** 2726*4882a593Smuzhiyuniea_op_trace: 2727*4882a593Smuzhiyun mov.l (%sp),-(%sp) # shift stack frame "down" 2728*4882a593Smuzhiyun mov.w 0x8(%sp),0x4(%sp) 2729*4882a593Smuzhiyun mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 2730*4882a593Smuzhiyun fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 2731*4882a593Smuzhiyun 2732*4882a593Smuzhiyun bra.l _real_trace 2733*4882a593Smuzhiyun 2734*4882a593Smuzhiyun######################################################################### 2735*4882a593Smuzhiyuniea_fmovm: 2736*4882a593Smuzhiyun btst &14,%d0 # ctrl or data reg 2737*4882a593Smuzhiyun beq.w iea_fmovm_ctrl 2738*4882a593Smuzhiyun 2739*4882a593Smuzhiyuniea_fmovm_data: 2740*4882a593Smuzhiyun 2741*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # user or supervisor mode 2742*4882a593Smuzhiyun bne.b iea_fmovm_data_s 2743*4882a593Smuzhiyun 2744*4882a593Smuzhiyuniea_fmovm_data_u: 2745*4882a593Smuzhiyun mov.l %usp,%a0 2746*4882a593Smuzhiyun mov.l %a0,EXC_A7(%a6) # store current a7 2747*4882a593Smuzhiyun bsr.l fmovm_dynamic # do dynamic fmovm 2748*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 # load possibly new a7 2749*4882a593Smuzhiyun mov.l %a0,%usp # update usp 2750*4882a593Smuzhiyun bra.w iea_fmovm_exit 2751*4882a593Smuzhiyun 2752*4882a593Smuzhiyuniea_fmovm_data_s: 2753*4882a593Smuzhiyun clr.b SPCOND_FLG(%a6) 2754*4882a593Smuzhiyun lea 0x2+EXC_VOFF(%a6),%a0 2755*4882a593Smuzhiyun mov.l %a0,EXC_A7(%a6) 2756*4882a593Smuzhiyun bsr.l fmovm_dynamic # do dynamic fmovm 2757*4882a593Smuzhiyun 2758*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg 2759*4882a593Smuzhiyun beq.w iea_fmovm_data_predec 2760*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mia7_flg 2761*4882a593Smuzhiyun bne.w iea_fmovm_exit 2762*4882a593Smuzhiyun 2763*4882a593Smuzhiyun# right now, d0 = the size. 2764*4882a593Smuzhiyun# the data has been fetched from the supervisor stack, but we have not 2765*4882a593Smuzhiyun# incremented the stack pointer by the appropriate number of bytes. 2766*4882a593Smuzhiyun# do it here. 2767*4882a593Smuzhiyuniea_fmovm_data_postinc: 2768*4882a593Smuzhiyun btst &0x7,EXC_SR(%a6) 2769*4882a593Smuzhiyun bne.b iea_fmovm_data_pi_trace 2770*4882a593Smuzhiyun 2771*4882a593Smuzhiyun mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2772*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0) 2773*4882a593Smuzhiyun mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2774*4882a593Smuzhiyun 2775*4882a593Smuzhiyun lea (EXC_SR,%a6,%d0),%a0 2776*4882a593Smuzhiyun mov.l %a0,EXC_SR(%a6) 2777*4882a593Smuzhiyun 2778*4882a593Smuzhiyun fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2779*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2780*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2781*4882a593Smuzhiyun 2782*4882a593Smuzhiyun unlk %a6 2783*4882a593Smuzhiyun mov.l (%sp)+,%sp 2784*4882a593Smuzhiyun bra.l _fpsp_done 2785*4882a593Smuzhiyun 2786*4882a593Smuzhiyuniea_fmovm_data_pi_trace: 2787*4882a593Smuzhiyun mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2788*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0) 2789*4882a593Smuzhiyun mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2790*4882a593Smuzhiyun mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0) 2791*4882a593Smuzhiyun 2792*4882a593Smuzhiyun lea (EXC_SR-0x4,%a6,%d0),%a0 2793*4882a593Smuzhiyun mov.l %a0,EXC_SR(%a6) 2794*4882a593Smuzhiyun 2795*4882a593Smuzhiyun fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2796*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2797*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2798*4882a593Smuzhiyun 2799*4882a593Smuzhiyun unlk %a6 2800*4882a593Smuzhiyun mov.l (%sp)+,%sp 2801*4882a593Smuzhiyun bra.l _real_trace 2802*4882a593Smuzhiyun 2803*4882a593Smuzhiyun# right now, d1 = size and d0 = the strg. 2804*4882a593Smuzhiyuniea_fmovm_data_predec: 2805*4882a593Smuzhiyun mov.b %d1,EXC_VOFF(%a6) # store strg 2806*4882a593Smuzhiyun mov.b %d0,0x1+EXC_VOFF(%a6) # store size 2807*4882a593Smuzhiyun 2808*4882a593Smuzhiyun fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2809*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2810*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2811*4882a593Smuzhiyun 2812*4882a593Smuzhiyun mov.l (%a6),-(%sp) # make a copy of a6 2813*4882a593Smuzhiyun mov.l %d0,-(%sp) # save d0 2814*4882a593Smuzhiyun mov.l %d1,-(%sp) # save d1 2815*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC 2816*4882a593Smuzhiyun 2817*4882a593Smuzhiyun clr.l %d0 2818*4882a593Smuzhiyun mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size 2819*4882a593Smuzhiyun neg.l %d0 # get negative of size 2820*4882a593Smuzhiyun 2821*4882a593Smuzhiyun btst &0x7,EXC_SR(%a6) # is trace enabled? 2822*4882a593Smuzhiyun beq.b iea_fmovm_data_p2 2823*4882a593Smuzhiyun 2824*4882a593Smuzhiyun mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2825*4882a593Smuzhiyun mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0) 2826*4882a593Smuzhiyun mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0) 2827*4882a593Smuzhiyun mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2828*4882a593Smuzhiyun 2829*4882a593Smuzhiyun pea (%a6,%d0) # create final sp 2830*4882a593Smuzhiyun bra.b iea_fmovm_data_p3 2831*4882a593Smuzhiyun 2832*4882a593Smuzhiyuniea_fmovm_data_p2: 2833*4882a593Smuzhiyun mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2834*4882a593Smuzhiyun mov.l (%sp)+,(EXC_PC,%a6,%d0) 2835*4882a593Smuzhiyun mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2836*4882a593Smuzhiyun 2837*4882a593Smuzhiyun pea (0x4,%a6,%d0) # create final sp 2838*4882a593Smuzhiyun 2839*4882a593Smuzhiyuniea_fmovm_data_p3: 2840*4882a593Smuzhiyun clr.l %d1 2841*4882a593Smuzhiyun mov.b EXC_VOFF(%a6),%d1 # fetch strg 2842*4882a593Smuzhiyun 2843*4882a593Smuzhiyun tst.b %d1 2844*4882a593Smuzhiyun bpl.b fm_1 2845*4882a593Smuzhiyun fmovm.x &0x80,(0x4+0x8,%a6,%d0) 2846*4882a593Smuzhiyun addi.l &0xc,%d0 2847*4882a593Smuzhiyunfm_1: 2848*4882a593Smuzhiyun lsl.b &0x1,%d1 2849*4882a593Smuzhiyun bpl.b fm_2 2850*4882a593Smuzhiyun fmovm.x &0x40,(0x4+0x8,%a6,%d0) 2851*4882a593Smuzhiyun addi.l &0xc,%d0 2852*4882a593Smuzhiyunfm_2: 2853*4882a593Smuzhiyun lsl.b &0x1,%d1 2854*4882a593Smuzhiyun bpl.b fm_3 2855*4882a593Smuzhiyun fmovm.x &0x20,(0x4+0x8,%a6,%d0) 2856*4882a593Smuzhiyun addi.l &0xc,%d0 2857*4882a593Smuzhiyunfm_3: 2858*4882a593Smuzhiyun lsl.b &0x1,%d1 2859*4882a593Smuzhiyun bpl.b fm_4 2860*4882a593Smuzhiyun fmovm.x &0x10,(0x4+0x8,%a6,%d0) 2861*4882a593Smuzhiyun addi.l &0xc,%d0 2862*4882a593Smuzhiyunfm_4: 2863*4882a593Smuzhiyun lsl.b &0x1,%d1 2864*4882a593Smuzhiyun bpl.b fm_5 2865*4882a593Smuzhiyun fmovm.x &0x08,(0x4+0x8,%a6,%d0) 2866*4882a593Smuzhiyun addi.l &0xc,%d0 2867*4882a593Smuzhiyunfm_5: 2868*4882a593Smuzhiyun lsl.b &0x1,%d1 2869*4882a593Smuzhiyun bpl.b fm_6 2870*4882a593Smuzhiyun fmovm.x &0x04,(0x4+0x8,%a6,%d0) 2871*4882a593Smuzhiyun addi.l &0xc,%d0 2872*4882a593Smuzhiyunfm_6: 2873*4882a593Smuzhiyun lsl.b &0x1,%d1 2874*4882a593Smuzhiyun bpl.b fm_7 2875*4882a593Smuzhiyun fmovm.x &0x02,(0x4+0x8,%a6,%d0) 2876*4882a593Smuzhiyun addi.l &0xc,%d0 2877*4882a593Smuzhiyunfm_7: 2878*4882a593Smuzhiyun lsl.b &0x1,%d1 2879*4882a593Smuzhiyun bpl.b fm_end 2880*4882a593Smuzhiyun fmovm.x &0x01,(0x4+0x8,%a6,%d0) 2881*4882a593Smuzhiyunfm_end: 2882*4882a593Smuzhiyun mov.l 0x4(%sp),%d1 2883*4882a593Smuzhiyun mov.l 0x8(%sp),%d0 2884*4882a593Smuzhiyun mov.l 0xc(%sp),%a6 2885*4882a593Smuzhiyun mov.l (%sp)+,%sp 2886*4882a593Smuzhiyun 2887*4882a593Smuzhiyun btst &0x7,(%sp) # is trace enabled? 2888*4882a593Smuzhiyun beq.l _fpsp_done 2889*4882a593Smuzhiyun bra.l _real_trace 2890*4882a593Smuzhiyun 2891*4882a593Smuzhiyun######################################################################### 2892*4882a593Smuzhiyuniea_fmovm_ctrl: 2893*4882a593Smuzhiyun 2894*4882a593Smuzhiyun bsr.l fmovm_ctrl # load ctrl regs 2895*4882a593Smuzhiyun 2896*4882a593Smuzhiyuniea_fmovm_exit: 2897*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2898*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2899*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2900*4882a593Smuzhiyun 2901*4882a593Smuzhiyun btst &0x7,EXC_SR(%a6) # is trace on? 2902*4882a593Smuzhiyun bne.b iea_fmovm_trace # yes 2903*4882a593Smuzhiyun 2904*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC 2905*4882a593Smuzhiyun 2906*4882a593Smuzhiyun unlk %a6 # unravel the frame 2907*4882a593Smuzhiyun 2908*4882a593Smuzhiyun bra.l _fpsp_done # exit to os 2909*4882a593Smuzhiyun 2910*4882a593Smuzhiyun# 2911*4882a593Smuzhiyun# The control reg instruction that took an "Unimplemented Effective Address" 2912*4882a593Smuzhiyun# exception was being traced. The "Current PC" for the trace frame is the 2913*4882a593Smuzhiyun# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR. 2914*4882a593Smuzhiyun# After fixing the stack frame, jump to _real_trace(). 2915*4882a593Smuzhiyun# 2916*4882a593Smuzhiyun# UNIMP EA FRAME TRACE FRAME 2917*4882a593Smuzhiyun# ***************** ***************** 2918*4882a593Smuzhiyun# * 0x0 * 0x0f0 * * Current * 2919*4882a593Smuzhiyun# ***************** * PC * 2920*4882a593Smuzhiyun# * Current * ***************** 2921*4882a593Smuzhiyun# * PC * * 0x2 * 0x024 * 2922*4882a593Smuzhiyun# ***************** ***************** 2923*4882a593Smuzhiyun# * SR * * Next * 2924*4882a593Smuzhiyun# ***************** * PC * 2925*4882a593Smuzhiyun# ***************** 2926*4882a593Smuzhiyun# * SR * 2927*4882a593Smuzhiyun# ***************** 2928*4882a593Smuzhiyun# this ain't a pretty solution, but it works: 2929*4882a593Smuzhiyun# -restore a6 (not with unlk) 2930*4882a593Smuzhiyun# -shift stack frame down over where old a6 used to be 2931*4882a593Smuzhiyun# -add LOCAL_SIZE to stack pointer 2932*4882a593Smuzhiyuniea_fmovm_trace: 2933*4882a593Smuzhiyun mov.l (%a6),%a6 # restore frame pointer 2934*4882a593Smuzhiyun mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp) 2935*4882a593Smuzhiyun mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp) 2936*4882a593Smuzhiyun mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp) 2937*4882a593Smuzhiyun mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024 2938*4882a593Smuzhiyun add.l &LOCAL_SIZE,%sp # clear stack frame 2939*4882a593Smuzhiyun 2940*4882a593Smuzhiyun bra.l _real_trace 2941*4882a593Smuzhiyun 2942*4882a593Smuzhiyun######################################################################### 2943*4882a593Smuzhiyun# The FPU is disabled and so we should really have taken the "Line 2944*4882a593Smuzhiyun# F Emulator" exception. So, here we create an 8-word stack frame 2945*4882a593Smuzhiyun# from our 4-word stack frame. This means we must calculate the length 2946*4882a593Smuzhiyun# the faulting instruction to get the "next PC". This is trivial for 2947*4882a593Smuzhiyun# immediate operands but requires some extra work for fmovm dynamic 2948*4882a593Smuzhiyun# which can use most addressing modes. 2949*4882a593Smuzhiyuniea_disabled: 2950*4882a593Smuzhiyun mov.l (%sp)+,%d0 # restore d0 2951*4882a593Smuzhiyun 2952*4882a593Smuzhiyun link %a6,&-LOCAL_SIZE # init stack frame 2953*4882a593Smuzhiyun 2954*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2955*4882a593Smuzhiyun 2956*4882a593Smuzhiyun# PC of instruction that took the exception is the PC in the frame 2957*4882a593Smuzhiyun mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2958*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2959*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2960*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 2961*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2962*4882a593Smuzhiyun 2963*4882a593Smuzhiyun tst.w %d0 # is instr fmovm? 2964*4882a593Smuzhiyun bmi.b iea_dis_fmovm # yes 2965*4882a593Smuzhiyun# instruction is using an extended precision immediate operand. Therefore, 2966*4882a593Smuzhiyun# the total instruction length is 16 bytes. 2967*4882a593Smuzhiyuniea_dis_immed: 2968*4882a593Smuzhiyun mov.l &0x10,%d0 # 16 bytes of instruction 2969*4882a593Smuzhiyun bra.b iea_dis_cont 2970*4882a593Smuzhiyuniea_dis_fmovm: 2971*4882a593Smuzhiyun btst &0xe,%d0 # is instr fmovm ctrl 2972*4882a593Smuzhiyun bne.b iea_dis_fmovm_data # no 2973*4882a593Smuzhiyun# the instruction is a fmovm.l with 2 or 3 registers. 2974*4882a593Smuzhiyun bfextu %d0{&19:&3},%d1 2975*4882a593Smuzhiyun mov.l &0xc,%d0 2976*4882a593Smuzhiyun cmpi.b %d1,&0x7 # move all regs? 2977*4882a593Smuzhiyun bne.b iea_dis_cont 2978*4882a593Smuzhiyun addq.l &0x4,%d0 2979*4882a593Smuzhiyun bra.b iea_dis_cont 2980*4882a593Smuzhiyun# the instruction is an fmovm.x dynamic which can use many addressing 2981*4882a593Smuzhiyun# modes and thus can have several different total instruction lengths. 2982*4882a593Smuzhiyun# call fmovm_calc_ea which will go through the ea calc process and, 2983*4882a593Smuzhiyun# as a by-product, will tell us how long the instruction is. 2984*4882a593Smuzhiyuniea_dis_fmovm_data: 2985*4882a593Smuzhiyun clr.l %d0 2986*4882a593Smuzhiyun bsr.l fmovm_calc_ea 2987*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%d0 2988*4882a593Smuzhiyun sub.l EXC_PC(%a6),%d0 2989*4882a593Smuzhiyuniea_dis_cont: 2990*4882a593Smuzhiyun mov.w %d0,EXC_VOFF(%a6) # store stack shift value 2991*4882a593Smuzhiyun 2992*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2993*4882a593Smuzhiyun 2994*4882a593Smuzhiyun unlk %a6 2995*4882a593Smuzhiyun 2996*4882a593Smuzhiyun# here, we actually create the 8-word frame from the 4-word frame, 2997*4882a593Smuzhiyun# with the "next PC" as additional info. 2998*4882a593Smuzhiyun# the <ea> field is let as undefined. 2999*4882a593Smuzhiyun subq.l &0x8,%sp # make room for new stack 3000*4882a593Smuzhiyun mov.l %d0,-(%sp) # save d0 3001*4882a593Smuzhiyun mov.w 0xc(%sp),0x4(%sp) # move SR 3002*4882a593Smuzhiyun mov.l 0xe(%sp),0x6(%sp) # move Current PC 3003*4882a593Smuzhiyun clr.l %d0 3004*4882a593Smuzhiyun mov.w 0x12(%sp),%d0 3005*4882a593Smuzhiyun mov.l 0x6(%sp),0x10(%sp) # move Current PC 3006*4882a593Smuzhiyun add.l %d0,0x6(%sp) # make Next PC 3007*4882a593Smuzhiyun mov.w &0x402c,0xa(%sp) # insert offset,frame format 3008*4882a593Smuzhiyun mov.l (%sp)+,%d0 # restore d0 3009*4882a593Smuzhiyun 3010*4882a593Smuzhiyun bra.l _real_fpu_disabled 3011*4882a593Smuzhiyun 3012*4882a593Smuzhiyun########## 3013*4882a593Smuzhiyun 3014*4882a593Smuzhiyuniea_iacc: 3015*4882a593Smuzhiyun movc %pcr,%d0 3016*4882a593Smuzhiyun btst &0x1,%d0 3017*4882a593Smuzhiyun bne.b iea_iacc_cont 3018*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3019*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3020*4882a593Smuzhiyuniea_iacc_cont: 3021*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3022*4882a593Smuzhiyun 3023*4882a593Smuzhiyun unlk %a6 3024*4882a593Smuzhiyun 3025*4882a593Smuzhiyun subq.w &0x8,%sp # make stack frame bigger 3026*4882a593Smuzhiyun mov.l 0x8(%sp),(%sp) # store SR,hi(PC) 3027*4882a593Smuzhiyun mov.w 0xc(%sp),0x4(%sp) # store lo(PC) 3028*4882a593Smuzhiyun mov.w &0x4008,0x6(%sp) # store voff 3029*4882a593Smuzhiyun mov.l 0x2(%sp),0x8(%sp) # store ea 3030*4882a593Smuzhiyun mov.l &0x09428001,0xc(%sp) # store fslw 3031*4882a593Smuzhiyun 3032*4882a593Smuzhiyuniea_acc_done: 3033*4882a593Smuzhiyun btst &0x5,(%sp) # user or supervisor mode? 3034*4882a593Smuzhiyun beq.b iea_acc_done2 # user 3035*4882a593Smuzhiyun bset &0x2,0xd(%sp) # set supervisor TM bit 3036*4882a593Smuzhiyun 3037*4882a593Smuzhiyuniea_acc_done2: 3038*4882a593Smuzhiyun bra.l _real_access 3039*4882a593Smuzhiyun 3040*4882a593Smuzhiyuniea_dacc: 3041*4882a593Smuzhiyun lea -LOCAL_SIZE(%a6),%sp 3042*4882a593Smuzhiyun 3043*4882a593Smuzhiyun movc %pcr,%d1 3044*4882a593Smuzhiyun btst &0x1,%d1 3045*4882a593Smuzhiyun bne.b iea_dacc_cont 3046*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3047*4882a593Smuzhiyun fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs 3048*4882a593Smuzhiyuniea_dacc_cont: 3049*4882a593Smuzhiyun mov.l (%a6),%a6 3050*4882a593Smuzhiyun 3051*4882a593Smuzhiyun mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp) 3052*4882a593Smuzhiyun mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp) 3053*4882a593Smuzhiyun mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp) 3054*4882a593Smuzhiyun mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp) 3055*4882a593Smuzhiyun mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp) 3056*4882a593Smuzhiyun mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp) 3057*4882a593Smuzhiyun 3058*4882a593Smuzhiyun movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1 3059*4882a593Smuzhiyun add.w &LOCAL_SIZE-0x4,%sp 3060*4882a593Smuzhiyun 3061*4882a593Smuzhiyun bra.b iea_acc_done 3062*4882a593Smuzhiyun 3063*4882a593Smuzhiyun######################################################################### 3064*4882a593Smuzhiyun# XDEF **************************************************************** # 3065*4882a593Smuzhiyun# _fpsp_operr(): 060FPSP entry point for FP Operr exception. # 3066*4882a593Smuzhiyun# # 3067*4882a593Smuzhiyun# This handler should be the first code executed upon taking the # 3068*4882a593Smuzhiyun# FP Operand Error exception in an operating system. # 3069*4882a593Smuzhiyun# # 3070*4882a593Smuzhiyun# XREF **************************************************************** # 3071*4882a593Smuzhiyun# _imem_read_long() - read instruction longword # 3072*4882a593Smuzhiyun# fix_skewed_ops() - adjust src operand in fsave frame # 3073*4882a593Smuzhiyun# _real_operr() - "callout" to operating system operr handler # 3074*4882a593Smuzhiyun# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3075*4882a593Smuzhiyun# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3076*4882a593Smuzhiyun# facc_out_{b,w,l}() - store to memory took access error (opcl 3) # 3077*4882a593Smuzhiyun# # 3078*4882a593Smuzhiyun# INPUT *************************************************************** # 3079*4882a593Smuzhiyun# - The system stack contains the FP Operr exception frame # 3080*4882a593Smuzhiyun# - The fsave frame contains the source operand # 3081*4882a593Smuzhiyun# # 3082*4882a593Smuzhiyun# OUTPUT ************************************************************** # 3083*4882a593Smuzhiyun# No access error: # 3084*4882a593Smuzhiyun# - The system stack is unchanged # 3085*4882a593Smuzhiyun# - The fsave frame contains the adjusted src op for opclass 0,2 # 3086*4882a593Smuzhiyun# # 3087*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 3088*4882a593Smuzhiyun# In a system where the FP Operr exception is enabled, the goal # 3089*4882a593Smuzhiyun# is to get to the handler specified at _real_operr(). But, on the 060, # 3090*4882a593Smuzhiyun# for opclass zero and two instruction taking this exception, the # 3091*4882a593Smuzhiyun# input operand in the fsave frame may be incorrect for some cases # 3092*4882a593Smuzhiyun# and needs to be corrected. This handler calls fix_skewed_ops() to # 3093*4882a593Smuzhiyun# do just this and then exits through _real_operr(). # 3094*4882a593Smuzhiyun# For opclass 3 instructions, the 060 doesn't store the default # 3095*4882a593Smuzhiyun# operr result out to memory or data register file as it should. # 3096*4882a593Smuzhiyun# This code must emulate the move out before finally exiting through # 3097*4882a593Smuzhiyun# _real_inex(). The move out, if to memory, is performed using # 3098*4882a593Smuzhiyun# _mem_write() "callout" routines that may return a failing result. # 3099*4882a593Smuzhiyun# In this special case, the handler must exit through facc_out() # 3100*4882a593Smuzhiyun# which creates an access error stack frame from the current operr # 3101*4882a593Smuzhiyun# stack frame. # 3102*4882a593Smuzhiyun# # 3103*4882a593Smuzhiyun######################################################################### 3104*4882a593Smuzhiyun 3105*4882a593Smuzhiyun global _fpsp_operr 3106*4882a593Smuzhiyun_fpsp_operr: 3107*4882a593Smuzhiyun 3108*4882a593Smuzhiyun link.w %a6,&-LOCAL_SIZE # init stack frame 3109*4882a593Smuzhiyun 3110*4882a593Smuzhiyun fsave FP_SRC(%a6) # grab the "busy" frame 3111*4882a593Smuzhiyun 3112*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3113*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3114*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3115*4882a593Smuzhiyun 3116*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction 3117*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3118*4882a593Smuzhiyun 3119*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3120*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3121*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 3122*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) 3123*4882a593Smuzhiyun 3124*4882a593Smuzhiyun############################################################################## 3125*4882a593Smuzhiyun 3126*4882a593Smuzhiyun btst &13,%d0 # is instr an fmove out? 3127*4882a593Smuzhiyun bne.b foperr_out # fmove out 3128*4882a593Smuzhiyun 3129*4882a593Smuzhiyun 3130*4882a593Smuzhiyun# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3131*4882a593Smuzhiyun# this would be the case for opclass two operations with a source infinity or 3132*4882a593Smuzhiyun# denorm operand in the sgl or dbl format. NANs also become skewed, but can't 3133*4882a593Smuzhiyun# cause an operr so we don't need to check for them here. 3134*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 3135*4882a593Smuzhiyun bsr.l fix_skewed_ops # fix src op 3136*4882a593Smuzhiyun 3137*4882a593Smuzhiyunfoperr_exit: 3138*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3139*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3140*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3141*4882a593Smuzhiyun 3142*4882a593Smuzhiyun frestore FP_SRC(%a6) 3143*4882a593Smuzhiyun 3144*4882a593Smuzhiyun unlk %a6 3145*4882a593Smuzhiyun bra.l _real_operr 3146*4882a593Smuzhiyun 3147*4882a593Smuzhiyun######################################################################## 3148*4882a593Smuzhiyun 3149*4882a593Smuzhiyun# 3150*4882a593Smuzhiyun# the hardware does not save the default result to memory on enabled 3151*4882a593Smuzhiyun# operand error exceptions. we do this here before passing control to 3152*4882a593Smuzhiyun# the user operand error handler. 3153*4882a593Smuzhiyun# 3154*4882a593Smuzhiyun# byte, word, and long destination format operations can pass 3155*4882a593Smuzhiyun# through here. we simply need to test the sign of the src 3156*4882a593Smuzhiyun# operand and save the appropriate minimum or maximum integer value 3157*4882a593Smuzhiyun# to the effective address as pointed to by the stacked effective address. 3158*4882a593Smuzhiyun# 3159*4882a593Smuzhiyun# although packed opclass three operations can take operand error 3160*4882a593Smuzhiyun# exceptions, they won't pass through here since they are caught 3161*4882a593Smuzhiyun# first by the unsupported data format exception handler. that handler 3162*4882a593Smuzhiyun# sends them directly to _real_operr() if necessary. 3163*4882a593Smuzhiyun# 3164*4882a593Smuzhiyunfoperr_out: 3165*4882a593Smuzhiyun 3166*4882a593Smuzhiyun mov.w FP_SRC_EX(%a6),%d1 # fetch exponent 3167*4882a593Smuzhiyun andi.w &0x7fff,%d1 3168*4882a593Smuzhiyun cmpi.w %d1,&0x7fff 3169*4882a593Smuzhiyun bne.b foperr_out_not_qnan 3170*4882a593Smuzhiyun# the operand is either an infinity or a QNAN. 3171*4882a593Smuzhiyun tst.l FP_SRC_LO(%a6) 3172*4882a593Smuzhiyun bne.b foperr_out_qnan 3173*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d1 3174*4882a593Smuzhiyun andi.l &0x7fffffff,%d1 3175*4882a593Smuzhiyun beq.b foperr_out_not_qnan 3176*4882a593Smuzhiyunfoperr_out_qnan: 3177*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),L_SCR1(%a6) 3178*4882a593Smuzhiyun bra.b foperr_out_jmp 3179*4882a593Smuzhiyun 3180*4882a593Smuzhiyunfoperr_out_not_qnan: 3181*4882a593Smuzhiyun mov.l &0x7fffffff,%d1 3182*4882a593Smuzhiyun tst.b FP_SRC_EX(%a6) 3183*4882a593Smuzhiyun bpl.b foperr_out_not_qnan2 3184*4882a593Smuzhiyun addq.l &0x1,%d1 3185*4882a593Smuzhiyunfoperr_out_not_qnan2: 3186*4882a593Smuzhiyun mov.l %d1,L_SCR1(%a6) 3187*4882a593Smuzhiyun 3188*4882a593Smuzhiyunfoperr_out_jmp: 3189*4882a593Smuzhiyun bfextu %d0{&19:&3},%d0 # extract dst format field 3190*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3191*4882a593Smuzhiyun mov.w (tbl_operr.b,%pc,%d0.w*2),%a0 3192*4882a593Smuzhiyun jmp (tbl_operr.b,%pc,%a0) 3193*4882a593Smuzhiyun 3194*4882a593Smuzhiyuntbl_operr: 3195*4882a593Smuzhiyun short foperr_out_l - tbl_operr # long word integer 3196*4882a593Smuzhiyun short tbl_operr - tbl_operr # sgl prec shouldn't happen 3197*4882a593Smuzhiyun short tbl_operr - tbl_operr # ext prec shouldn't happen 3198*4882a593Smuzhiyun short foperr_exit - tbl_operr # packed won't enter here 3199*4882a593Smuzhiyun short foperr_out_w - tbl_operr # word integer 3200*4882a593Smuzhiyun short tbl_operr - tbl_operr # dbl prec shouldn't happen 3201*4882a593Smuzhiyun short foperr_out_b - tbl_operr # byte integer 3202*4882a593Smuzhiyun short tbl_operr - tbl_operr # packed won't enter here 3203*4882a593Smuzhiyun 3204*4882a593Smuzhiyunfoperr_out_b: 3205*4882a593Smuzhiyun mov.b L_SCR1(%a6),%d0 # load positive default result 3206*4882a593Smuzhiyun cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3207*4882a593Smuzhiyun ble.b foperr_out_b_save_dn # yes 3208*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3209*4882a593Smuzhiyun bsr.l _dmem_write_byte # write the default result 3210*4882a593Smuzhiyun 3211*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3212*4882a593Smuzhiyun bne.l facc_out_b # yes 3213*4882a593Smuzhiyun 3214*4882a593Smuzhiyun bra.w foperr_exit 3215*4882a593Smuzhiyunfoperr_out_b_save_dn: 3216*4882a593Smuzhiyun andi.w &0x0007,%d1 3217*4882a593Smuzhiyun bsr.l store_dreg_b # store result to regfile 3218*4882a593Smuzhiyun bra.w foperr_exit 3219*4882a593Smuzhiyun 3220*4882a593Smuzhiyunfoperr_out_w: 3221*4882a593Smuzhiyun mov.w L_SCR1(%a6),%d0 # load positive default result 3222*4882a593Smuzhiyun cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3223*4882a593Smuzhiyun ble.b foperr_out_w_save_dn # yes 3224*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3225*4882a593Smuzhiyun bsr.l _dmem_write_word # write the default result 3226*4882a593Smuzhiyun 3227*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3228*4882a593Smuzhiyun bne.l facc_out_w # yes 3229*4882a593Smuzhiyun 3230*4882a593Smuzhiyun bra.w foperr_exit 3231*4882a593Smuzhiyunfoperr_out_w_save_dn: 3232*4882a593Smuzhiyun andi.w &0x0007,%d1 3233*4882a593Smuzhiyun bsr.l store_dreg_w # store result to regfile 3234*4882a593Smuzhiyun bra.w foperr_exit 3235*4882a593Smuzhiyun 3236*4882a593Smuzhiyunfoperr_out_l: 3237*4882a593Smuzhiyun mov.l L_SCR1(%a6),%d0 # load positive default result 3238*4882a593Smuzhiyun cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3239*4882a593Smuzhiyun ble.b foperr_out_l_save_dn # yes 3240*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3241*4882a593Smuzhiyun bsr.l _dmem_write_long # write the default result 3242*4882a593Smuzhiyun 3243*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3244*4882a593Smuzhiyun bne.l facc_out_l # yes 3245*4882a593Smuzhiyun 3246*4882a593Smuzhiyun bra.w foperr_exit 3247*4882a593Smuzhiyunfoperr_out_l_save_dn: 3248*4882a593Smuzhiyun andi.w &0x0007,%d1 3249*4882a593Smuzhiyun bsr.l store_dreg_l # store result to regfile 3250*4882a593Smuzhiyun bra.w foperr_exit 3251*4882a593Smuzhiyun 3252*4882a593Smuzhiyun######################################################################### 3253*4882a593Smuzhiyun# XDEF **************************************************************** # 3254*4882a593Smuzhiyun# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. # 3255*4882a593Smuzhiyun# # 3256*4882a593Smuzhiyun# This handler should be the first code executed upon taking the # 3257*4882a593Smuzhiyun# FP Signalling NAN exception in an operating system. # 3258*4882a593Smuzhiyun# # 3259*4882a593Smuzhiyun# XREF **************************************************************** # 3260*4882a593Smuzhiyun# _imem_read_long() - read instruction longword # 3261*4882a593Smuzhiyun# fix_skewed_ops() - adjust src operand in fsave frame # 3262*4882a593Smuzhiyun# _real_snan() - "callout" to operating system SNAN handler # 3263*4882a593Smuzhiyun# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3264*4882a593Smuzhiyun# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3265*4882a593Smuzhiyun# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) # 3266*4882a593Smuzhiyun# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> # 3267*4882a593Smuzhiyun# # 3268*4882a593Smuzhiyun# INPUT *************************************************************** # 3269*4882a593Smuzhiyun# - The system stack contains the FP SNAN exception frame # 3270*4882a593Smuzhiyun# - The fsave frame contains the source operand # 3271*4882a593Smuzhiyun# # 3272*4882a593Smuzhiyun# OUTPUT ************************************************************** # 3273*4882a593Smuzhiyun# No access error: # 3274*4882a593Smuzhiyun# - The system stack is unchanged # 3275*4882a593Smuzhiyun# - The fsave frame contains the adjusted src op for opclass 0,2 # 3276*4882a593Smuzhiyun# # 3277*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 3278*4882a593Smuzhiyun# In a system where the FP SNAN exception is enabled, the goal # 3279*4882a593Smuzhiyun# is to get to the handler specified at _real_snan(). But, on the 060, # 3280*4882a593Smuzhiyun# for opclass zero and two instructions taking this exception, the # 3281*4882a593Smuzhiyun# input operand in the fsave frame may be incorrect for some cases # 3282*4882a593Smuzhiyun# and needs to be corrected. This handler calls fix_skewed_ops() to # 3283*4882a593Smuzhiyun# do just this and then exits through _real_snan(). # 3284*4882a593Smuzhiyun# For opclass 3 instructions, the 060 doesn't store the default # 3285*4882a593Smuzhiyun# SNAN result out to memory or data register file as it should. # 3286*4882a593Smuzhiyun# This code must emulate the move out before finally exiting through # 3287*4882a593Smuzhiyun# _real_snan(). The move out, if to memory, is performed using # 3288*4882a593Smuzhiyun# _mem_write() "callout" routines that may return a failing result. # 3289*4882a593Smuzhiyun# In this special case, the handler must exit through facc_out() # 3290*4882a593Smuzhiyun# which creates an access error stack frame from the current SNAN # 3291*4882a593Smuzhiyun# stack frame. # 3292*4882a593Smuzhiyun# For the case of an extended precision opclass 3 instruction, # 3293*4882a593Smuzhiyun# if the effective addressing mode was -() or ()+, then the address # 3294*4882a593Smuzhiyun# register must get updated by calling _calc_ea_fout(). If the <ea> # 3295*4882a593Smuzhiyun# was -(a7) from supervisor mode, then the exception frame currently # 3296*4882a593Smuzhiyun# on the system stack must be carefully moved "down" to make room # 3297*4882a593Smuzhiyun# for the operand being moved. # 3298*4882a593Smuzhiyun# # 3299*4882a593Smuzhiyun######################################################################### 3300*4882a593Smuzhiyun 3301*4882a593Smuzhiyun global _fpsp_snan 3302*4882a593Smuzhiyun_fpsp_snan: 3303*4882a593Smuzhiyun 3304*4882a593Smuzhiyun link.w %a6,&-LOCAL_SIZE # init stack frame 3305*4882a593Smuzhiyun 3306*4882a593Smuzhiyun fsave FP_SRC(%a6) # grab the "busy" frame 3307*4882a593Smuzhiyun 3308*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3309*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3310*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3311*4882a593Smuzhiyun 3312*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction 3313*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3314*4882a593Smuzhiyun 3315*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3316*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3317*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 3318*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) 3319*4882a593Smuzhiyun 3320*4882a593Smuzhiyun############################################################################## 3321*4882a593Smuzhiyun 3322*4882a593Smuzhiyun btst &13,%d0 # is instr an fmove out? 3323*4882a593Smuzhiyun bne.w fsnan_out # fmove out 3324*4882a593Smuzhiyun 3325*4882a593Smuzhiyun 3326*4882a593Smuzhiyun# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3327*4882a593Smuzhiyun# this would be the case for opclass two operations with a source infinity or 3328*4882a593Smuzhiyun# denorm operand in the sgl or dbl format. NANs also become skewed and must be 3329*4882a593Smuzhiyun# fixed here. 3330*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 3331*4882a593Smuzhiyun bsr.l fix_skewed_ops # fix src op 3332*4882a593Smuzhiyun 3333*4882a593Smuzhiyunfsnan_exit: 3334*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3335*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3336*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3337*4882a593Smuzhiyun 3338*4882a593Smuzhiyun frestore FP_SRC(%a6) 3339*4882a593Smuzhiyun 3340*4882a593Smuzhiyun unlk %a6 3341*4882a593Smuzhiyun bra.l _real_snan 3342*4882a593Smuzhiyun 3343*4882a593Smuzhiyun######################################################################## 3344*4882a593Smuzhiyun 3345*4882a593Smuzhiyun# 3346*4882a593Smuzhiyun# the hardware does not save the default result to memory on enabled 3347*4882a593Smuzhiyun# snan exceptions. we do this here before passing control to 3348*4882a593Smuzhiyun# the user snan handler. 3349*4882a593Smuzhiyun# 3350*4882a593Smuzhiyun# byte, word, long, and packed destination format operations can pass 3351*4882a593Smuzhiyun# through here. since packed format operations already were handled by 3352*4882a593Smuzhiyun# fpsp_unsupp(), then we need to do nothing else for them here. 3353*4882a593Smuzhiyun# for byte, word, and long, we simply need to test the sign of the src 3354*4882a593Smuzhiyun# operand and save the appropriate minimum or maximum integer value 3355*4882a593Smuzhiyun# to the effective address as pointed to by the stacked effective address. 3356*4882a593Smuzhiyun# 3357*4882a593Smuzhiyunfsnan_out: 3358*4882a593Smuzhiyun 3359*4882a593Smuzhiyun bfextu %d0{&19:&3},%d0 # extract dst format field 3360*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3361*4882a593Smuzhiyun mov.w (tbl_snan.b,%pc,%d0.w*2),%a0 3362*4882a593Smuzhiyun jmp (tbl_snan.b,%pc,%a0) 3363*4882a593Smuzhiyun 3364*4882a593Smuzhiyuntbl_snan: 3365*4882a593Smuzhiyun short fsnan_out_l - tbl_snan # long word integer 3366*4882a593Smuzhiyun short fsnan_out_s - tbl_snan # sgl prec shouldn't happen 3367*4882a593Smuzhiyun short fsnan_out_x - tbl_snan # ext prec shouldn't happen 3368*4882a593Smuzhiyun short tbl_snan - tbl_snan # packed needs no help 3369*4882a593Smuzhiyun short fsnan_out_w - tbl_snan # word integer 3370*4882a593Smuzhiyun short fsnan_out_d - tbl_snan # dbl prec shouldn't happen 3371*4882a593Smuzhiyun short fsnan_out_b - tbl_snan # byte integer 3372*4882a593Smuzhiyun short tbl_snan - tbl_snan # packed needs no help 3373*4882a593Smuzhiyun 3374*4882a593Smuzhiyunfsnan_out_b: 3375*4882a593Smuzhiyun mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN 3376*4882a593Smuzhiyun bset &6,%d0 # set SNAN bit 3377*4882a593Smuzhiyun cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3378*4882a593Smuzhiyun ble.b fsnan_out_b_dn # yes 3379*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3380*4882a593Smuzhiyun bsr.l _dmem_write_byte # write the default result 3381*4882a593Smuzhiyun 3382*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3383*4882a593Smuzhiyun bne.l facc_out_b # yes 3384*4882a593Smuzhiyun 3385*4882a593Smuzhiyun bra.w fsnan_exit 3386*4882a593Smuzhiyunfsnan_out_b_dn: 3387*4882a593Smuzhiyun andi.w &0x0007,%d1 3388*4882a593Smuzhiyun bsr.l store_dreg_b # store result to regfile 3389*4882a593Smuzhiyun bra.w fsnan_exit 3390*4882a593Smuzhiyun 3391*4882a593Smuzhiyunfsnan_out_w: 3392*4882a593Smuzhiyun mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN 3393*4882a593Smuzhiyun bset &14,%d0 # set SNAN bit 3394*4882a593Smuzhiyun cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3395*4882a593Smuzhiyun ble.b fsnan_out_w_dn # yes 3396*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3397*4882a593Smuzhiyun bsr.l _dmem_write_word # write the default result 3398*4882a593Smuzhiyun 3399*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3400*4882a593Smuzhiyun bne.l facc_out_w # yes 3401*4882a593Smuzhiyun 3402*4882a593Smuzhiyun bra.w fsnan_exit 3403*4882a593Smuzhiyunfsnan_out_w_dn: 3404*4882a593Smuzhiyun andi.w &0x0007,%d1 3405*4882a593Smuzhiyun bsr.l store_dreg_w # store result to regfile 3406*4882a593Smuzhiyun bra.w fsnan_exit 3407*4882a593Smuzhiyun 3408*4882a593Smuzhiyunfsnan_out_l: 3409*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN 3410*4882a593Smuzhiyun bset &30,%d0 # set SNAN bit 3411*4882a593Smuzhiyun cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3412*4882a593Smuzhiyun ble.b fsnan_out_l_dn # yes 3413*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3414*4882a593Smuzhiyun bsr.l _dmem_write_long # write the default result 3415*4882a593Smuzhiyun 3416*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3417*4882a593Smuzhiyun bne.l facc_out_l # yes 3418*4882a593Smuzhiyun 3419*4882a593Smuzhiyun bra.w fsnan_exit 3420*4882a593Smuzhiyunfsnan_out_l_dn: 3421*4882a593Smuzhiyun andi.w &0x0007,%d1 3422*4882a593Smuzhiyun bsr.l store_dreg_l # store result to regfile 3423*4882a593Smuzhiyun bra.w fsnan_exit 3424*4882a593Smuzhiyun 3425*4882a593Smuzhiyunfsnan_out_s: 3426*4882a593Smuzhiyun cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3427*4882a593Smuzhiyun ble.b fsnan_out_d_dn # yes 3428*4882a593Smuzhiyun mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3429*4882a593Smuzhiyun andi.l &0x80000000,%d0 # keep sign 3430*4882a593Smuzhiyun ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3431*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3432*4882a593Smuzhiyun lsr.l &0x8,%d1 # shift mantissa for sgl 3433*4882a593Smuzhiyun or.l %d1,%d0 # create sgl SNAN 3434*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3435*4882a593Smuzhiyun bsr.l _dmem_write_long # write the default result 3436*4882a593Smuzhiyun 3437*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3438*4882a593Smuzhiyun bne.l facc_out_l # yes 3439*4882a593Smuzhiyun 3440*4882a593Smuzhiyun bra.w fsnan_exit 3441*4882a593Smuzhiyunfsnan_out_d_dn: 3442*4882a593Smuzhiyun mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3443*4882a593Smuzhiyun andi.l &0x80000000,%d0 # keep sign 3444*4882a593Smuzhiyun ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3445*4882a593Smuzhiyun mov.l %d1,-(%sp) 3446*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3447*4882a593Smuzhiyun lsr.l &0x8,%d1 # shift mantissa for sgl 3448*4882a593Smuzhiyun or.l %d1,%d0 # create sgl SNAN 3449*4882a593Smuzhiyun mov.l (%sp)+,%d1 3450*4882a593Smuzhiyun andi.w &0x0007,%d1 3451*4882a593Smuzhiyun bsr.l store_dreg_l # store result to regfile 3452*4882a593Smuzhiyun bra.w fsnan_exit 3453*4882a593Smuzhiyun 3454*4882a593Smuzhiyunfsnan_out_d: 3455*4882a593Smuzhiyun mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3456*4882a593Smuzhiyun andi.l &0x80000000,%d0 # keep sign 3457*4882a593Smuzhiyun ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit 3458*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3459*4882a593Smuzhiyun mov.l %d0,FP_SCR0_EX(%a6) # store to temp space 3460*4882a593Smuzhiyun mov.l &11,%d0 # load shift amt 3461*4882a593Smuzhiyun lsr.l %d0,%d1 3462*4882a593Smuzhiyun or.l %d1,FP_SCR0_EX(%a6) # create dbl hi 3463*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3464*4882a593Smuzhiyun andi.l &0x000007ff,%d1 3465*4882a593Smuzhiyun ror.l %d0,%d1 3466*4882a593Smuzhiyun mov.l %d1,FP_SCR0_HI(%a6) # store to temp space 3467*4882a593Smuzhiyun mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa 3468*4882a593Smuzhiyun lsr.l %d0,%d1 3469*4882a593Smuzhiyun or.l %d1,FP_SCR0_HI(%a6) # create dbl lo 3470*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3471*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a1 # pass: dst addr 3472*4882a593Smuzhiyun movq.l &0x8,%d0 # pass: size of 8 bytes 3473*4882a593Smuzhiyun bsr.l _dmem_write # write the default result 3474*4882a593Smuzhiyun 3475*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3476*4882a593Smuzhiyun bne.l facc_out_d # yes 3477*4882a593Smuzhiyun 3478*4882a593Smuzhiyun bra.w fsnan_exit 3479*4882a593Smuzhiyun 3480*4882a593Smuzhiyun# for extended precision, if the addressing mode is pre-decrement or 3481*4882a593Smuzhiyun# post-increment, then the address register did not get updated. 3482*4882a593Smuzhiyun# in addition, for pre-decrement, the stacked <ea> is incorrect. 3483*4882a593Smuzhiyunfsnan_out_x: 3484*4882a593Smuzhiyun clr.b SPCOND_FLG(%a6) # clear special case flag 3485*4882a593Smuzhiyun 3486*4882a593Smuzhiyun mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6) 3487*4882a593Smuzhiyun clr.w 2+FP_SCR0(%a6) 3488*4882a593Smuzhiyun mov.l FP_SRC_HI(%a6),%d0 3489*4882a593Smuzhiyun bset &30,%d0 3490*4882a593Smuzhiyun mov.l %d0,FP_SCR0_HI(%a6) 3491*4882a593Smuzhiyun mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6) 3492*4882a593Smuzhiyun 3493*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # supervisor mode exception? 3494*4882a593Smuzhiyun bne.b fsnan_out_x_s # yes 3495*4882a593Smuzhiyun 3496*4882a593Smuzhiyun mov.l %usp,%a0 # fetch user stack pointer 3497*4882a593Smuzhiyun mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea() 3498*4882a593Smuzhiyun mov.l (%a6),EXC_A6(%a6) 3499*4882a593Smuzhiyun 3500*4882a593Smuzhiyun bsr.l _calc_ea_fout # find the correct ea,update An 3501*4882a593Smuzhiyun mov.l %a0,%a1 3502*4882a593Smuzhiyun mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3503*4882a593Smuzhiyun 3504*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 3505*4882a593Smuzhiyun mov.l %a0,%usp # restore user stack pointer 3506*4882a593Smuzhiyun mov.l EXC_A6(%a6),(%a6) 3507*4882a593Smuzhiyun 3508*4882a593Smuzhiyunfsnan_out_x_save: 3509*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3510*4882a593Smuzhiyun movq.l &0xc,%d0 # pass: size of extended 3511*4882a593Smuzhiyun bsr.l _dmem_write # write the default result 3512*4882a593Smuzhiyun 3513*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 3514*4882a593Smuzhiyun bne.l facc_out_x # yes 3515*4882a593Smuzhiyun 3516*4882a593Smuzhiyun bra.w fsnan_exit 3517*4882a593Smuzhiyun 3518*4882a593Smuzhiyunfsnan_out_x_s: 3519*4882a593Smuzhiyun mov.l (%a6),EXC_A6(%a6) 3520*4882a593Smuzhiyun 3521*4882a593Smuzhiyun bsr.l _calc_ea_fout # find the correct ea,update An 3522*4882a593Smuzhiyun mov.l %a0,%a1 3523*4882a593Smuzhiyun mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3524*4882a593Smuzhiyun 3525*4882a593Smuzhiyun mov.l EXC_A6(%a6),(%a6) 3526*4882a593Smuzhiyun 3527*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 3528*4882a593Smuzhiyun bne.b fsnan_out_x_save # no 3529*4882a593Smuzhiyun 3530*4882a593Smuzhiyun# the operation was "fmove.x SNAN,-(a7)" from supervisor mode. 3531*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3532*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3533*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3534*4882a593Smuzhiyun 3535*4882a593Smuzhiyun frestore FP_SRC(%a6) 3536*4882a593Smuzhiyun 3537*4882a593Smuzhiyun mov.l EXC_A6(%a6),%a6 # restore frame pointer 3538*4882a593Smuzhiyun 3539*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 3540*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp) 3541*4882a593Smuzhiyun mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 3542*4882a593Smuzhiyun 3543*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp) 3544*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp) 3545*4882a593Smuzhiyun mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp) 3546*4882a593Smuzhiyun 3547*4882a593Smuzhiyun add.l &LOCAL_SIZE-0x8,%sp 3548*4882a593Smuzhiyun 3549*4882a593Smuzhiyun bra.l _real_snan 3550*4882a593Smuzhiyun 3551*4882a593Smuzhiyun######################################################################### 3552*4882a593Smuzhiyun# XDEF **************************************************************** # 3553*4882a593Smuzhiyun# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. # 3554*4882a593Smuzhiyun# # 3555*4882a593Smuzhiyun# This handler should be the first code executed upon taking the # 3556*4882a593Smuzhiyun# FP Inexact exception in an operating system. # 3557*4882a593Smuzhiyun# # 3558*4882a593Smuzhiyun# XREF **************************************************************** # 3559*4882a593Smuzhiyun# _imem_read_long() - read instruction longword # 3560*4882a593Smuzhiyun# fix_skewed_ops() - adjust src operand in fsave frame # 3561*4882a593Smuzhiyun# set_tag_x() - determine optype of src/dst operands # 3562*4882a593Smuzhiyun# store_fpreg() - store opclass 0 or 2 result to FP regfile # 3563*4882a593Smuzhiyun# unnorm_fix() - change UNNORM operands to NORM or ZERO # 3564*4882a593Smuzhiyun# load_fpn2() - load dst operand from FP regfile # 3565*4882a593Smuzhiyun# smovcr() - emulate an "fmovcr" instruction # 3566*4882a593Smuzhiyun# fout() - emulate an opclass 3 instruction # 3567*4882a593Smuzhiyun# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 3568*4882a593Smuzhiyun# _real_inex() - "callout" to operating system inexact handler # 3569*4882a593Smuzhiyun# # 3570*4882a593Smuzhiyun# INPUT *************************************************************** # 3571*4882a593Smuzhiyun# - The system stack contains the FP Inexact exception frame # 3572*4882a593Smuzhiyun# - The fsave frame contains the source operand # 3573*4882a593Smuzhiyun# # 3574*4882a593Smuzhiyun# OUTPUT ************************************************************** # 3575*4882a593Smuzhiyun# - The system stack is unchanged # 3576*4882a593Smuzhiyun# - The fsave frame contains the adjusted src op for opclass 0,2 # 3577*4882a593Smuzhiyun# # 3578*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 3579*4882a593Smuzhiyun# In a system where the FP Inexact exception is enabled, the goal # 3580*4882a593Smuzhiyun# is to get to the handler specified at _real_inex(). But, on the 060, # 3581*4882a593Smuzhiyun# for opclass zero and two instruction taking this exception, the # 3582*4882a593Smuzhiyun# hardware doesn't store the correct result to the destination FP # 3583*4882a593Smuzhiyun# register as did the '040 and '881/2. This handler must emulate the # 3584*4882a593Smuzhiyun# instruction in order to get this value and then store it to the # 3585*4882a593Smuzhiyun# correct register before calling _real_inex(). # 3586*4882a593Smuzhiyun# For opclass 3 instructions, the 060 doesn't store the default # 3587*4882a593Smuzhiyun# inexact result out to memory or data register file as it should. # 3588*4882a593Smuzhiyun# This code must emulate the move out by calling fout() before finally # 3589*4882a593Smuzhiyun# exiting through _real_inex(). # 3590*4882a593Smuzhiyun# # 3591*4882a593Smuzhiyun######################################################################### 3592*4882a593Smuzhiyun 3593*4882a593Smuzhiyun global _fpsp_inex 3594*4882a593Smuzhiyun_fpsp_inex: 3595*4882a593Smuzhiyun 3596*4882a593Smuzhiyun link.w %a6,&-LOCAL_SIZE # init stack frame 3597*4882a593Smuzhiyun 3598*4882a593Smuzhiyun fsave FP_SRC(%a6) # grab the "busy" frame 3599*4882a593Smuzhiyun 3600*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3601*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3602*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3603*4882a593Smuzhiyun 3604*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction 3605*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3606*4882a593Smuzhiyun 3607*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3608*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3609*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 3610*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) 3611*4882a593Smuzhiyun 3612*4882a593Smuzhiyun############################################################################## 3613*4882a593Smuzhiyun 3614*4882a593Smuzhiyun btst &13,%d0 # is instr an fmove out? 3615*4882a593Smuzhiyun bne.w finex_out # fmove out 3616*4882a593Smuzhiyun 3617*4882a593Smuzhiyun 3618*4882a593Smuzhiyun# the hardware, for "fabs" and "fneg" w/ a long source format, puts the 3619*4882a593Smuzhiyun# longword integer directly into the upper longword of the mantissa along 3620*4882a593Smuzhiyun# w/ an exponent value of 0x401e. we convert this to extended precision here. 3621*4882a593Smuzhiyun bfextu %d0{&19:&3},%d0 # fetch instr size 3622*4882a593Smuzhiyun bne.b finex_cont # instr size is not long 3623*4882a593Smuzhiyun cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e? 3624*4882a593Smuzhiyun bne.b finex_cont # no 3625*4882a593Smuzhiyun fmov.l &0x0,%fpcr 3626*4882a593Smuzhiyun fmov.l FP_SRC_HI(%a6),%fp0 # load integer src 3627*4882a593Smuzhiyun fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision 3628*4882a593Smuzhiyun mov.w &0xe001,0x2+FP_SRC(%a6) 3629*4882a593Smuzhiyun 3630*4882a593Smuzhiyunfinex_cont: 3631*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 3632*4882a593Smuzhiyun bsr.l fix_skewed_ops # fix src op 3633*4882a593Smuzhiyun 3634*4882a593Smuzhiyun# Here, we zero the ccode and exception byte field since we're going to 3635*4882a593Smuzhiyun# emulate the whole instruction. Notice, though, that we don't kill the 3636*4882a593Smuzhiyun# INEX1 bit. This is because a packed op has long since been converted 3637*4882a593Smuzhiyun# to extended before arriving here. Therefore, we need to retain the 3638*4882a593Smuzhiyun# INEX1 bit from when the operand was first converted. 3639*4882a593Smuzhiyun andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 3640*4882a593Smuzhiyun 3641*4882a593Smuzhiyun fmov.l &0x0,%fpcr # zero current control regs 3642*4882a593Smuzhiyun fmov.l &0x0,%fpsr 3643*4882a593Smuzhiyun 3644*4882a593Smuzhiyun bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg 3645*4882a593Smuzhiyun cmpi.b %d1,&0x17 # is op an fmovecr? 3646*4882a593Smuzhiyun beq.w finex_fmovcr # yes 3647*4882a593Smuzhiyun 3648*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 3649*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 3650*4882a593Smuzhiyun mov.b %d0,STAG(%a6) # maybe NORM,DENORM 3651*4882a593Smuzhiyun 3652*4882a593Smuzhiyun# bits four and five of the fp extension word separate the monadic and dyadic 3653*4882a593Smuzhiyun# operations that can pass through fpsp_inex(). remember that fcmp and ftst 3654*4882a593Smuzhiyun# will never take this exception, but fsincos will. 3655*4882a593Smuzhiyun btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 3656*4882a593Smuzhiyun beq.b finex_extract # monadic 3657*4882a593Smuzhiyun 3658*4882a593Smuzhiyun btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos? 3659*4882a593Smuzhiyun bne.b finex_extract # yes 3660*4882a593Smuzhiyun 3661*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 3662*4882a593Smuzhiyun bsr.l load_fpn2 # load dst into FP_DST 3663*4882a593Smuzhiyun 3664*4882a593Smuzhiyun lea FP_DST(%a6),%a0 # pass: ptr to dst op 3665*4882a593Smuzhiyun bsr.l set_tag_x # tag the operand type 3666*4882a593Smuzhiyun cmpi.b %d0,&UNNORM # is operand an UNNORM? 3667*4882a593Smuzhiyun bne.b finex_op2_done # no 3668*4882a593Smuzhiyun bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 3669*4882a593Smuzhiyunfinex_op2_done: 3670*4882a593Smuzhiyun mov.b %d0,DTAG(%a6) # save dst optype tag 3671*4882a593Smuzhiyun 3672*4882a593Smuzhiyunfinex_extract: 3673*4882a593Smuzhiyun clr.l %d0 3674*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 3675*4882a593Smuzhiyun 3676*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d1 3677*4882a593Smuzhiyun andi.w &0x007f,%d1 # extract extension 3678*4882a593Smuzhiyun 3679*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 3680*4882a593Smuzhiyun lea FP_DST(%a6),%a1 3681*4882a593Smuzhiyun 3682*4882a593Smuzhiyun mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 3683*4882a593Smuzhiyun jsr (tbl_unsupp.l,%pc,%d1.l*1) 3684*4882a593Smuzhiyun 3685*4882a593Smuzhiyun# the operation has been emulated. the result is in fp0. 3686*4882a593Smuzhiyunfinex_save: 3687*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&6:&3},%d0 3688*4882a593Smuzhiyun bsr.l store_fpreg 3689*4882a593Smuzhiyun 3690*4882a593Smuzhiyunfinex_exit: 3691*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3692*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3693*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3694*4882a593Smuzhiyun 3695*4882a593Smuzhiyun frestore FP_SRC(%a6) 3696*4882a593Smuzhiyun 3697*4882a593Smuzhiyun unlk %a6 3698*4882a593Smuzhiyun bra.l _real_inex 3699*4882a593Smuzhiyun 3700*4882a593Smuzhiyunfinex_fmovcr: 3701*4882a593Smuzhiyun clr.l %d0 3702*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3703*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d1 3704*4882a593Smuzhiyun andi.l &0x0000007f,%d1 # pass rom offset 3705*4882a593Smuzhiyun bsr.l smovcr 3706*4882a593Smuzhiyun bra.b finex_save 3707*4882a593Smuzhiyun 3708*4882a593Smuzhiyun######################################################################## 3709*4882a593Smuzhiyun 3710*4882a593Smuzhiyun# 3711*4882a593Smuzhiyun# the hardware does not save the default result to memory on enabled 3712*4882a593Smuzhiyun# inexact exceptions. we do this here before passing control to 3713*4882a593Smuzhiyun# the user inexact handler. 3714*4882a593Smuzhiyun# 3715*4882a593Smuzhiyun# byte, word, and long destination format operations can pass 3716*4882a593Smuzhiyun# through here. so can double and single precision. 3717*4882a593Smuzhiyun# although packed opclass three operations can take inexact 3718*4882a593Smuzhiyun# exceptions, they won't pass through here since they are caught 3719*4882a593Smuzhiyun# first by the unsupported data format exception handler. that handler 3720*4882a593Smuzhiyun# sends them directly to _real_inex() if necessary. 3721*4882a593Smuzhiyun# 3722*4882a593Smuzhiyunfinex_out: 3723*4882a593Smuzhiyun 3724*4882a593Smuzhiyun mov.b &NORM,STAG(%a6) # src is a NORM 3725*4882a593Smuzhiyun 3726*4882a593Smuzhiyun clr.l %d0 3727*4882a593Smuzhiyun mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3728*4882a593Smuzhiyun 3729*4882a593Smuzhiyun andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 3730*4882a593Smuzhiyun 3731*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass ptr to src operand 3732*4882a593Smuzhiyun 3733*4882a593Smuzhiyun bsr.l fout # store the default result 3734*4882a593Smuzhiyun 3735*4882a593Smuzhiyun bra.b finex_exit 3736*4882a593Smuzhiyun 3737*4882a593Smuzhiyun######################################################################### 3738*4882a593Smuzhiyun# XDEF **************************************************************** # 3739*4882a593Smuzhiyun# _fpsp_dz(): 060FPSP entry point for FP DZ exception. # 3740*4882a593Smuzhiyun# # 3741*4882a593Smuzhiyun# This handler should be the first code executed upon taking # 3742*4882a593Smuzhiyun# the FP DZ exception in an operating system. # 3743*4882a593Smuzhiyun# # 3744*4882a593Smuzhiyun# XREF **************************************************************** # 3745*4882a593Smuzhiyun# _imem_read_long() - read instruction longword from memory # 3746*4882a593Smuzhiyun# fix_skewed_ops() - adjust fsave operand # 3747*4882a593Smuzhiyun# _real_dz() - "callout" exit point from FP DZ handler # 3748*4882a593Smuzhiyun# # 3749*4882a593Smuzhiyun# INPUT *************************************************************** # 3750*4882a593Smuzhiyun# - The system stack contains the FP DZ exception stack. # 3751*4882a593Smuzhiyun# - The fsave frame contains the source operand. # 3752*4882a593Smuzhiyun# # 3753*4882a593Smuzhiyun# OUTPUT ************************************************************** # 3754*4882a593Smuzhiyun# - The system stack contains the FP DZ exception stack. # 3755*4882a593Smuzhiyun# - The fsave frame contains the adjusted source operand. # 3756*4882a593Smuzhiyun# # 3757*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 3758*4882a593Smuzhiyun# In a system where the DZ exception is enabled, the goal is to # 3759*4882a593Smuzhiyun# get to the handler specified at _real_dz(). But, on the 060, when the # 3760*4882a593Smuzhiyun# exception is taken, the input operand in the fsave state frame may # 3761*4882a593Smuzhiyun# be incorrect for some cases and need to be adjusted. So, this package # 3762*4882a593Smuzhiyun# adjusts the operand using fix_skewed_ops() and then branches to # 3763*4882a593Smuzhiyun# _real_dz(). # 3764*4882a593Smuzhiyun# # 3765*4882a593Smuzhiyun######################################################################### 3766*4882a593Smuzhiyun 3767*4882a593Smuzhiyun global _fpsp_dz 3768*4882a593Smuzhiyun_fpsp_dz: 3769*4882a593Smuzhiyun 3770*4882a593Smuzhiyun link.w %a6,&-LOCAL_SIZE # init stack frame 3771*4882a593Smuzhiyun 3772*4882a593Smuzhiyun fsave FP_SRC(%a6) # grab the "busy" frame 3773*4882a593Smuzhiyun 3774*4882a593Smuzhiyun movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3775*4882a593Smuzhiyun fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3776*4882a593Smuzhiyun fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3777*4882a593Smuzhiyun 3778*4882a593Smuzhiyun# the FPIAR holds the "current PC" of the faulting instruction 3779*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3780*4882a593Smuzhiyun 3781*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3782*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3783*4882a593Smuzhiyun bsr.l _imem_read_long # fetch the instruction words 3784*4882a593Smuzhiyun mov.l %d0,EXC_OPWORD(%a6) 3785*4882a593Smuzhiyun 3786*4882a593Smuzhiyun############################################################################## 3787*4882a593Smuzhiyun 3788*4882a593Smuzhiyun 3789*4882a593Smuzhiyun# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3790*4882a593Smuzhiyun# this would be the case for opclass two operations with a source zero 3791*4882a593Smuzhiyun# in the sgl or dbl format. 3792*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to src op 3793*4882a593Smuzhiyun bsr.l fix_skewed_ops # fix src op 3794*4882a593Smuzhiyun 3795*4882a593Smuzhiyunfdz_exit: 3796*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3797*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3798*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3799*4882a593Smuzhiyun 3800*4882a593Smuzhiyun frestore FP_SRC(%a6) 3801*4882a593Smuzhiyun 3802*4882a593Smuzhiyun unlk %a6 3803*4882a593Smuzhiyun bra.l _real_dz 3804*4882a593Smuzhiyun 3805*4882a593Smuzhiyun######################################################################### 3806*4882a593Smuzhiyun# XDEF **************************************************************** # 3807*4882a593Smuzhiyun# _fpsp_fline(): 060FPSP entry point for "Line F emulator" # 3808*4882a593Smuzhiyun# exception when the "reduced" version of the # 3809*4882a593Smuzhiyun# FPSP is implemented that does not emulate # 3810*4882a593Smuzhiyun# FP unimplemented instructions. # 3811*4882a593Smuzhiyun# # 3812*4882a593Smuzhiyun# This handler should be the first code executed upon taking a # 3813*4882a593Smuzhiyun# "Line F Emulator" exception in an operating system integrating # 3814*4882a593Smuzhiyun# the reduced version of 060FPSP. # 3815*4882a593Smuzhiyun# # 3816*4882a593Smuzhiyun# XREF **************************************************************** # 3817*4882a593Smuzhiyun# _real_fpu_disabled() - Handle "FPU disabled" exceptions # 3818*4882a593Smuzhiyun# _real_fline() - Handle all other cases (treated equally) # 3819*4882a593Smuzhiyun# # 3820*4882a593Smuzhiyun# INPUT *************************************************************** # 3821*4882a593Smuzhiyun# - The system stack contains a "Line F Emulator" exception # 3822*4882a593Smuzhiyun# stack frame. # 3823*4882a593Smuzhiyun# # 3824*4882a593Smuzhiyun# OUTPUT ************************************************************** # 3825*4882a593Smuzhiyun# - The system stack is unchanged. # 3826*4882a593Smuzhiyun# # 3827*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 3828*4882a593Smuzhiyun# When a "Line F Emulator" exception occurs in a system where # 3829*4882a593Smuzhiyun# "FPU Unimplemented" instructions will not be emulated, the exception # 3830*4882a593Smuzhiyun# can occur because then FPU is disabled or the instruction is to be # 3831*4882a593Smuzhiyun# classifed as "Line F". This module determines which case exists and # 3832*4882a593Smuzhiyun# calls the appropriate "callout". # 3833*4882a593Smuzhiyun# # 3834*4882a593Smuzhiyun######################################################################### 3835*4882a593Smuzhiyun 3836*4882a593Smuzhiyun global _fpsp_fline 3837*4882a593Smuzhiyun_fpsp_fline: 3838*4882a593Smuzhiyun 3839*4882a593Smuzhiyun# check to see if the FPU is disabled. if so, jump to the OS entry 3840*4882a593Smuzhiyun# point for that condition. 3841*4882a593Smuzhiyun cmpi.w 0x6(%sp),&0x402c 3842*4882a593Smuzhiyun beq.l _real_fpu_disabled 3843*4882a593Smuzhiyun 3844*4882a593Smuzhiyun bra.l _real_fline 3845*4882a593Smuzhiyun 3846*4882a593Smuzhiyun######################################################################### 3847*4882a593Smuzhiyun# XDEF **************************************************************** # 3848*4882a593Smuzhiyun# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception # 3849*4882a593Smuzhiyun# # 3850*4882a593Smuzhiyun# XREF **************************************************************** # 3851*4882a593Smuzhiyun# inc_areg() - increment an address register # 3852*4882a593Smuzhiyun# dec_areg() - decrement an address register # 3853*4882a593Smuzhiyun# # 3854*4882a593Smuzhiyun# INPUT *************************************************************** # 3855*4882a593Smuzhiyun# d0 = number of bytes to adjust <ea> by # 3856*4882a593Smuzhiyun# # 3857*4882a593Smuzhiyun# OUTPUT ************************************************************** # 3858*4882a593Smuzhiyun# None # 3859*4882a593Smuzhiyun# # 3860*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 3861*4882a593Smuzhiyun# "Dummy" CALCulate Effective Address: # 3862*4882a593Smuzhiyun# The stacked <ea> for FP unimplemented instructions and opclass # 3863*4882a593Smuzhiyun# two packed instructions is correct with the exception of... # 3864*4882a593Smuzhiyun# # 3865*4882a593Smuzhiyun# 1) -(An) : The register is not updated regardless of size. # 3866*4882a593Smuzhiyun# Also, for extended precision and packed, the # 3867*4882a593Smuzhiyun# stacked <ea> value is 8 bytes too big # 3868*4882a593Smuzhiyun# 2) (An)+ : The register is not updated. # 3869*4882a593Smuzhiyun# 3) #<data> : The upper longword of the immediate operand is # 3870*4882a593Smuzhiyun# stacked b,w,l and s sizes are completely stacked. # 3871*4882a593Smuzhiyun# d,x, and p are not. # 3872*4882a593Smuzhiyun# # 3873*4882a593Smuzhiyun######################################################################### 3874*4882a593Smuzhiyun 3875*4882a593Smuzhiyun global _dcalc_ea 3876*4882a593Smuzhiyun_dcalc_ea: 3877*4882a593Smuzhiyun mov.l %d0, %a0 # move # bytes to %a0 3878*4882a593Smuzhiyun 3879*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word 3880*4882a593Smuzhiyun mov.l %d0, %d1 # make a copy 3881*4882a593Smuzhiyun 3882*4882a593Smuzhiyun andi.w &0x38, %d0 # extract mode field 3883*4882a593Smuzhiyun andi.l &0x7, %d1 # extract reg field 3884*4882a593Smuzhiyun 3885*4882a593Smuzhiyun cmpi.b %d0,&0x18 # is mode (An)+ ? 3886*4882a593Smuzhiyun beq.b dcea_pi # yes 3887*4882a593Smuzhiyun 3888*4882a593Smuzhiyun cmpi.b %d0,&0x20 # is mode -(An) ? 3889*4882a593Smuzhiyun beq.b dcea_pd # yes 3890*4882a593Smuzhiyun 3891*4882a593Smuzhiyun or.w %d1,%d0 # concat mode,reg 3892*4882a593Smuzhiyun cmpi.b %d0,&0x3c # is mode #<data>? 3893*4882a593Smuzhiyun 3894*4882a593Smuzhiyun beq.b dcea_imm # yes 3895*4882a593Smuzhiyun 3896*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # return <ea> 3897*4882a593Smuzhiyun rts 3898*4882a593Smuzhiyun 3899*4882a593Smuzhiyun# need to set immediate data flag here since we'll need to do 3900*4882a593Smuzhiyun# an imem_read to fetch this later. 3901*4882a593Smuzhiyundcea_imm: 3902*4882a593Smuzhiyun mov.b &immed_flg,SPCOND_FLG(%a6) 3903*4882a593Smuzhiyun lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea> 3904*4882a593Smuzhiyun rts 3905*4882a593Smuzhiyun 3906*4882a593Smuzhiyun# here, the <ea> is stacked correctly. however, we must update the 3907*4882a593Smuzhiyun# address register... 3908*4882a593Smuzhiyundcea_pi: 3909*4882a593Smuzhiyun mov.l %a0,%d0 # pass amt to inc by 3910*4882a593Smuzhiyun bsr.l inc_areg # inc addr register 3911*4882a593Smuzhiyun 3912*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3913*4882a593Smuzhiyun rts 3914*4882a593Smuzhiyun 3915*4882a593Smuzhiyun# the <ea> is stacked correctly for all but extended and packed which 3916*4882a593Smuzhiyun# the <ea>s are 8 bytes too large. 3917*4882a593Smuzhiyun# it would make no sense to have a pre-decrement to a7 in supervisor 3918*4882a593Smuzhiyun# mode so we don't even worry about this tricky case here : ) 3919*4882a593Smuzhiyundcea_pd: 3920*4882a593Smuzhiyun mov.l %a0,%d0 # pass amt to dec by 3921*4882a593Smuzhiyun bsr.l dec_areg # dec addr register 3922*4882a593Smuzhiyun 3923*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3924*4882a593Smuzhiyun 3925*4882a593Smuzhiyun cmpi.b %d0,&0xc # is opsize ext or packed? 3926*4882a593Smuzhiyun beq.b dcea_pd2 # yes 3927*4882a593Smuzhiyun rts 3928*4882a593Smuzhiyundcea_pd2: 3929*4882a593Smuzhiyun sub.l &0x8,%a0 # correct <ea> 3930*4882a593Smuzhiyun mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack 3931*4882a593Smuzhiyun rts 3932*4882a593Smuzhiyun 3933*4882a593Smuzhiyun######################################################################### 3934*4882a593Smuzhiyun# XDEF **************************************************************** # 3935*4882a593Smuzhiyun# _calc_ea_fout(): calculate correct stacked <ea> for extended # 3936*4882a593Smuzhiyun# and packed data opclass 3 operations. # 3937*4882a593Smuzhiyun# # 3938*4882a593Smuzhiyun# XREF **************************************************************** # 3939*4882a593Smuzhiyun# None # 3940*4882a593Smuzhiyun# # 3941*4882a593Smuzhiyun# INPUT *************************************************************** # 3942*4882a593Smuzhiyun# None # 3943*4882a593Smuzhiyun# # 3944*4882a593Smuzhiyun# OUTPUT ************************************************************** # 3945*4882a593Smuzhiyun# a0 = return correct effective address # 3946*4882a593Smuzhiyun# # 3947*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 3948*4882a593Smuzhiyun# For opclass 3 extended and packed data operations, the <ea> # 3949*4882a593Smuzhiyun# stacked for the exception is incorrect for -(an) and (an)+ addressing # 3950*4882a593Smuzhiyun# modes. Also, while we're at it, the index register itself must get # 3951*4882a593Smuzhiyun# updated. # 3952*4882a593Smuzhiyun# So, for -(an), we must subtract 8 off of the stacked <ea> value # 3953*4882a593Smuzhiyun# and return that value as the correct <ea> and store that value in An. # 3954*4882a593Smuzhiyun# For (an)+, the stacked <ea> is correct but we must adjust An by +12. # 3955*4882a593Smuzhiyun# # 3956*4882a593Smuzhiyun######################################################################### 3957*4882a593Smuzhiyun 3958*4882a593Smuzhiyun# This calc_ea is currently used to retrieve the correct <ea> 3959*4882a593Smuzhiyun# for fmove outs of type extended and packed. 3960*4882a593Smuzhiyun global _calc_ea_fout 3961*4882a593Smuzhiyun_calc_ea_fout: 3962*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word 3963*4882a593Smuzhiyun mov.l %d0,%d1 # make a copy 3964*4882a593Smuzhiyun 3965*4882a593Smuzhiyun andi.w &0x38,%d0 # extract mode field 3966*4882a593Smuzhiyun andi.l &0x7,%d1 # extract reg field 3967*4882a593Smuzhiyun 3968*4882a593Smuzhiyun cmpi.b %d0,&0x18 # is mode (An)+ ? 3969*4882a593Smuzhiyun beq.b ceaf_pi # yes 3970*4882a593Smuzhiyun 3971*4882a593Smuzhiyun cmpi.b %d0,&0x20 # is mode -(An) ? 3972*4882a593Smuzhiyun beq.w ceaf_pd # yes 3973*4882a593Smuzhiyun 3974*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3975*4882a593Smuzhiyun rts 3976*4882a593Smuzhiyun 3977*4882a593Smuzhiyun# (An)+ : extended and packed fmove out 3978*4882a593Smuzhiyun# : stacked <ea> is correct 3979*4882a593Smuzhiyun# : "An" not updated 3980*4882a593Smuzhiyunceaf_pi: 3981*4882a593Smuzhiyun mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1 3982*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 3983*4882a593Smuzhiyun jmp (tbl_ceaf_pi.b,%pc,%d1.w*1) 3984*4882a593Smuzhiyun 3985*4882a593Smuzhiyun swbeg &0x8 3986*4882a593Smuzhiyuntbl_ceaf_pi: 3987*4882a593Smuzhiyun short ceaf_pi0 - tbl_ceaf_pi 3988*4882a593Smuzhiyun short ceaf_pi1 - tbl_ceaf_pi 3989*4882a593Smuzhiyun short ceaf_pi2 - tbl_ceaf_pi 3990*4882a593Smuzhiyun short ceaf_pi3 - tbl_ceaf_pi 3991*4882a593Smuzhiyun short ceaf_pi4 - tbl_ceaf_pi 3992*4882a593Smuzhiyun short ceaf_pi5 - tbl_ceaf_pi 3993*4882a593Smuzhiyun short ceaf_pi6 - tbl_ceaf_pi 3994*4882a593Smuzhiyun short ceaf_pi7 - tbl_ceaf_pi 3995*4882a593Smuzhiyun 3996*4882a593Smuzhiyunceaf_pi0: 3997*4882a593Smuzhiyun addi.l &0xc,EXC_DREGS+0x8(%a6) 3998*4882a593Smuzhiyun rts 3999*4882a593Smuzhiyunceaf_pi1: 4000*4882a593Smuzhiyun addi.l &0xc,EXC_DREGS+0xc(%a6) 4001*4882a593Smuzhiyun rts 4002*4882a593Smuzhiyunceaf_pi2: 4003*4882a593Smuzhiyun add.l &0xc,%a2 4004*4882a593Smuzhiyun rts 4005*4882a593Smuzhiyunceaf_pi3: 4006*4882a593Smuzhiyun add.l &0xc,%a3 4007*4882a593Smuzhiyun rts 4008*4882a593Smuzhiyunceaf_pi4: 4009*4882a593Smuzhiyun add.l &0xc,%a4 4010*4882a593Smuzhiyun rts 4011*4882a593Smuzhiyunceaf_pi5: 4012*4882a593Smuzhiyun add.l &0xc,%a5 4013*4882a593Smuzhiyun rts 4014*4882a593Smuzhiyunceaf_pi6: 4015*4882a593Smuzhiyun addi.l &0xc,EXC_A6(%a6) 4016*4882a593Smuzhiyun rts 4017*4882a593Smuzhiyunceaf_pi7: 4018*4882a593Smuzhiyun mov.b &mia7_flg,SPCOND_FLG(%a6) 4019*4882a593Smuzhiyun addi.l &0xc,EXC_A7(%a6) 4020*4882a593Smuzhiyun rts 4021*4882a593Smuzhiyun 4022*4882a593Smuzhiyun# -(An) : extended and packed fmove out 4023*4882a593Smuzhiyun# : stacked <ea> = actual <ea> + 8 4024*4882a593Smuzhiyun# : "An" not updated 4025*4882a593Smuzhiyunceaf_pd: 4026*4882a593Smuzhiyun mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1 4027*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 4028*4882a593Smuzhiyun sub.l &0x8,%a0 4029*4882a593Smuzhiyun sub.l &0x8,EXC_EA(%a6) 4030*4882a593Smuzhiyun jmp (tbl_ceaf_pd.b,%pc,%d1.w*1) 4031*4882a593Smuzhiyun 4032*4882a593Smuzhiyun swbeg &0x8 4033*4882a593Smuzhiyuntbl_ceaf_pd: 4034*4882a593Smuzhiyun short ceaf_pd0 - tbl_ceaf_pd 4035*4882a593Smuzhiyun short ceaf_pd1 - tbl_ceaf_pd 4036*4882a593Smuzhiyun short ceaf_pd2 - tbl_ceaf_pd 4037*4882a593Smuzhiyun short ceaf_pd3 - tbl_ceaf_pd 4038*4882a593Smuzhiyun short ceaf_pd4 - tbl_ceaf_pd 4039*4882a593Smuzhiyun short ceaf_pd5 - tbl_ceaf_pd 4040*4882a593Smuzhiyun short ceaf_pd6 - tbl_ceaf_pd 4041*4882a593Smuzhiyun short ceaf_pd7 - tbl_ceaf_pd 4042*4882a593Smuzhiyun 4043*4882a593Smuzhiyunceaf_pd0: 4044*4882a593Smuzhiyun mov.l %a0,EXC_DREGS+0x8(%a6) 4045*4882a593Smuzhiyun rts 4046*4882a593Smuzhiyunceaf_pd1: 4047*4882a593Smuzhiyun mov.l %a0,EXC_DREGS+0xc(%a6) 4048*4882a593Smuzhiyun rts 4049*4882a593Smuzhiyunceaf_pd2: 4050*4882a593Smuzhiyun mov.l %a0,%a2 4051*4882a593Smuzhiyun rts 4052*4882a593Smuzhiyunceaf_pd3: 4053*4882a593Smuzhiyun mov.l %a0,%a3 4054*4882a593Smuzhiyun rts 4055*4882a593Smuzhiyunceaf_pd4: 4056*4882a593Smuzhiyun mov.l %a0,%a4 4057*4882a593Smuzhiyun rts 4058*4882a593Smuzhiyunceaf_pd5: 4059*4882a593Smuzhiyun mov.l %a0,%a5 4060*4882a593Smuzhiyun rts 4061*4882a593Smuzhiyunceaf_pd6: 4062*4882a593Smuzhiyun mov.l %a0,EXC_A6(%a6) 4063*4882a593Smuzhiyun rts 4064*4882a593Smuzhiyunceaf_pd7: 4065*4882a593Smuzhiyun mov.l %a0,EXC_A7(%a6) 4066*4882a593Smuzhiyun mov.b &mda7_flg,SPCOND_FLG(%a6) 4067*4882a593Smuzhiyun rts 4068*4882a593Smuzhiyun 4069*4882a593Smuzhiyun# 4070*4882a593Smuzhiyun# This table holds the offsets of the emulation routines for each individual 4071*4882a593Smuzhiyun# math operation relative to the address of this table. Included are 4072*4882a593Smuzhiyun# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because 4073*4882a593Smuzhiyun# this table is for the version if the 060FPSP without transcendentals. 4074*4882a593Smuzhiyun# The location within the table is determined by the extension bits of the 4075*4882a593Smuzhiyun# operation longword. 4076*4882a593Smuzhiyun# 4077*4882a593Smuzhiyun 4078*4882a593Smuzhiyun swbeg &109 4079*4882a593Smuzhiyuntbl_unsupp: 4080*4882a593Smuzhiyun long fin - tbl_unsupp # 00: fmove 4081*4882a593Smuzhiyun long fint - tbl_unsupp # 01: fint 4082*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 02: fsinh 4083*4882a593Smuzhiyun long fintrz - tbl_unsupp # 03: fintrz 4084*4882a593Smuzhiyun long fsqrt - tbl_unsupp # 04: fsqrt 4085*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4086*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 06: flognp1 4087*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4088*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 08: fetoxm1 4089*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 09: ftanh 4090*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 0a: fatan 4091*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4092*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 0c: fasin 4093*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 0d: fatanh 4094*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 0e: fsin 4095*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 0f: ftan 4096*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 10: fetox 4097*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 11: ftwotox 4098*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 12: ftentox 4099*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4100*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 14: flogn 4101*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 15: flog10 4102*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 16: flog2 4103*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4104*4882a593Smuzhiyun long fabs - tbl_unsupp # 18: fabs 4105*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 19: fcosh 4106*4882a593Smuzhiyun long fneg - tbl_unsupp # 1a: fneg 4107*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4108*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 1c: facos 4109*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 1d: fcos 4110*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 1e: fgetexp 4111*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 1f: fgetman 4112*4882a593Smuzhiyun long fdiv - tbl_unsupp # 20: fdiv 4113*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 21: fmod 4114*4882a593Smuzhiyun long fadd - tbl_unsupp # 22: fadd 4115*4882a593Smuzhiyun long fmul - tbl_unsupp # 23: fmul 4116*4882a593Smuzhiyun long fsgldiv - tbl_unsupp # 24: fsgldiv 4117*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 25: frem 4118*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 26: fscale 4119*4882a593Smuzhiyun long fsglmul - tbl_unsupp # 27: fsglmul 4120*4882a593Smuzhiyun long fsub - tbl_unsupp # 28: fsub 4121*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4122*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4123*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4124*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4125*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4126*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4127*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4128*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 30: fsincos 4129*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 31: fsincos 4130*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 32: fsincos 4131*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 33: fsincos 4132*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 34: fsincos 4133*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 35: fsincos 4134*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 36: fsincos 4135*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp # 37: fsincos 4136*4882a593Smuzhiyun long fcmp - tbl_unsupp # 38: fcmp 4137*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4138*4882a593Smuzhiyun long ftst - tbl_unsupp # 3a: ftst 4139*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4140*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4141*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4142*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4143*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4144*4882a593Smuzhiyun long fsin - tbl_unsupp # 40: fsmove 4145*4882a593Smuzhiyun long fssqrt - tbl_unsupp # 41: fssqrt 4146*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4147*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4148*4882a593Smuzhiyun long fdin - tbl_unsupp # 44: fdmove 4149*4882a593Smuzhiyun long fdsqrt - tbl_unsupp # 45: fdsqrt 4150*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4151*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4152*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4153*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4154*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4155*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4156*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4157*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4158*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4159*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4160*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4161*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4162*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4163*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4164*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4165*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4166*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4167*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4168*4882a593Smuzhiyun long fsabs - tbl_unsupp # 58: fsabs 4169*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4170*4882a593Smuzhiyun long fsneg - tbl_unsupp # 5a: fsneg 4171*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4172*4882a593Smuzhiyun long fdabs - tbl_unsupp # 5c: fdabs 4173*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4174*4882a593Smuzhiyun long fdneg - tbl_unsupp # 5e: fdneg 4175*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4176*4882a593Smuzhiyun long fsdiv - tbl_unsupp # 60: fsdiv 4177*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4178*4882a593Smuzhiyun long fsadd - tbl_unsupp # 62: fsadd 4179*4882a593Smuzhiyun long fsmul - tbl_unsupp # 63: fsmul 4180*4882a593Smuzhiyun long fddiv - tbl_unsupp # 64: fddiv 4181*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4182*4882a593Smuzhiyun long fdadd - tbl_unsupp # 66: fdadd 4183*4882a593Smuzhiyun long fdmul - tbl_unsupp # 67: fdmul 4184*4882a593Smuzhiyun long fssub - tbl_unsupp # 68: fssub 4185*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4186*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4187*4882a593Smuzhiyun long tbl_unsupp - tbl_unsupp 4188*4882a593Smuzhiyun long fdsub - tbl_unsupp # 6c: fdsub 4189*4882a593Smuzhiyun 4190*4882a593Smuzhiyun################################################# 4191*4882a593Smuzhiyun# Add this here so non-fp modules can compile. 4192*4882a593Smuzhiyun# (smovcr is called from fpsp_inex.) 4193*4882a593Smuzhiyun global smovcr 4194*4882a593Smuzhiyunsmovcr: 4195*4882a593Smuzhiyun bra.b smovcr 4196*4882a593Smuzhiyun 4197*4882a593Smuzhiyun######################################################################### 4198*4882a593Smuzhiyun# XDEF **************************************************************** # 4199*4882a593Smuzhiyun# fmovm_dynamic(): emulate "fmovm" dynamic instruction # 4200*4882a593Smuzhiyun# # 4201*4882a593Smuzhiyun# XREF **************************************************************** # 4202*4882a593Smuzhiyun# fetch_dreg() - fetch data register # 4203*4882a593Smuzhiyun# {i,d,}mem_read() - fetch data from memory # 4204*4882a593Smuzhiyun# _mem_write() - write data to memory # 4205*4882a593Smuzhiyun# iea_iacc() - instruction memory access error occurred # 4206*4882a593Smuzhiyun# iea_dacc() - data memory access error occurred # 4207*4882a593Smuzhiyun# restore() - restore An index regs if access error occurred # 4208*4882a593Smuzhiyun# # 4209*4882a593Smuzhiyun# INPUT *************************************************************** # 4210*4882a593Smuzhiyun# None # 4211*4882a593Smuzhiyun# # 4212*4882a593Smuzhiyun# OUTPUT ************************************************************** # 4213*4882a593Smuzhiyun# If instr is "fmovm Dn,-(A7)" from supervisor mode, # 4214*4882a593Smuzhiyun# d0 = size of dump # 4215*4882a593Smuzhiyun# d1 = Dn # 4216*4882a593Smuzhiyun# Else if instruction access error, # 4217*4882a593Smuzhiyun# d0 = FSLW # 4218*4882a593Smuzhiyun# Else if data access error, # 4219*4882a593Smuzhiyun# d0 = FSLW # 4220*4882a593Smuzhiyun# a0 = address of fault # 4221*4882a593Smuzhiyun# Else # 4222*4882a593Smuzhiyun# none. # 4223*4882a593Smuzhiyun# # 4224*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 4225*4882a593Smuzhiyun# The effective address must be calculated since this is entered # 4226*4882a593Smuzhiyun# from an "Unimplemented Effective Address" exception handler. So, we # 4227*4882a593Smuzhiyun# have our own fcalc_ea() routine here. If an access error is flagged # 4228*4882a593Smuzhiyun# by a _{i,d,}mem_read() call, we must exit through the special # 4229*4882a593Smuzhiyun# handler. # 4230*4882a593Smuzhiyun# The data register is determined and its value loaded to get the # 4231*4882a593Smuzhiyun# string of FP registers affected. This value is used as an index into # 4232*4882a593Smuzhiyun# a lookup table such that we can determine the number of bytes # 4233*4882a593Smuzhiyun# involved. # 4234*4882a593Smuzhiyun# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used # 4235*4882a593Smuzhiyun# to read in all FP values. Again, _mem_read() may fail and require a # 4236*4882a593Smuzhiyun# special exit. # 4237*4882a593Smuzhiyun# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used # 4238*4882a593Smuzhiyun# to write all FP values. _mem_write() may also fail. # 4239*4882a593Smuzhiyun# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, # 4240*4882a593Smuzhiyun# then we return the size of the dump and the string to the caller # 4241*4882a593Smuzhiyun# so that the move can occur outside of this routine. This special # 4242*4882a593Smuzhiyun# case is required so that moves to the system stack are handled # 4243*4882a593Smuzhiyun# correctly. # 4244*4882a593Smuzhiyun# # 4245*4882a593Smuzhiyun# DYNAMIC: # 4246*4882a593Smuzhiyun# fmovm.x dn, <ea> # 4247*4882a593Smuzhiyun# fmovm.x <ea>, dn # 4248*4882a593Smuzhiyun# # 4249*4882a593Smuzhiyun# <WORD 1> <WORD2> # 4250*4882a593Smuzhiyun# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 # 4251*4882a593Smuzhiyun# # 4252*4882a593Smuzhiyun# & = (0): predecrement addressing mode # 4253*4882a593Smuzhiyun# (1): postincrement or control addressing mode # 4254*4882a593Smuzhiyun# @ = (0): move listed regs from memory to the FPU # 4255*4882a593Smuzhiyun# (1): move listed regs from the FPU to memory # 4256*4882a593Smuzhiyun# $$$ : index of data register holding reg select mask # 4257*4882a593Smuzhiyun# # 4258*4882a593Smuzhiyun# NOTES: # 4259*4882a593Smuzhiyun# If the data register holds a zero, then the # 4260*4882a593Smuzhiyun# instruction is a nop. # 4261*4882a593Smuzhiyun# # 4262*4882a593Smuzhiyun######################################################################### 4263*4882a593Smuzhiyun 4264*4882a593Smuzhiyun global fmovm_dynamic 4265*4882a593Smuzhiyunfmovm_dynamic: 4266*4882a593Smuzhiyun 4267*4882a593Smuzhiyun# extract the data register in which the bit string resides... 4268*4882a593Smuzhiyun mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword 4269*4882a593Smuzhiyun andi.w &0x70,%d1 # extract reg bits 4270*4882a593Smuzhiyun lsr.b &0x4,%d1 # shift into lo bits 4271*4882a593Smuzhiyun 4272*4882a593Smuzhiyun# fetch the bit string into d0... 4273*4882a593Smuzhiyun bsr.l fetch_dreg # fetch reg string 4274*4882a593Smuzhiyun 4275*4882a593Smuzhiyun andi.l &0x000000ff,%d0 # keep only lo byte 4276*4882a593Smuzhiyun 4277*4882a593Smuzhiyun mov.l %d0,-(%sp) # save strg 4278*4882a593Smuzhiyun mov.b (tbl_fmovm_size.w,%pc,%d0),%d0 4279*4882a593Smuzhiyun mov.l %d0,-(%sp) # save size 4280*4882a593Smuzhiyun bsr.l fmovm_calc_ea # calculate <ea> 4281*4882a593Smuzhiyun mov.l (%sp)+,%d0 # restore size 4282*4882a593Smuzhiyun mov.l (%sp)+,%d1 # restore strg 4283*4882a593Smuzhiyun 4284*4882a593Smuzhiyun# if the bit string is a zero, then the operation is a no-op 4285*4882a593Smuzhiyun# but, make sure that we've calculated ea and advanced the opword pointer 4286*4882a593Smuzhiyun beq.w fmovm_data_done 4287*4882a593Smuzhiyun 4288*4882a593Smuzhiyun# separate move ins from move outs... 4289*4882a593Smuzhiyun btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out? 4290*4882a593Smuzhiyun beq.w fmovm_data_in # it's a move out 4291*4882a593Smuzhiyun 4292*4882a593Smuzhiyun############# 4293*4882a593Smuzhiyun# MOVE OUT: # 4294*4882a593Smuzhiyun############# 4295*4882a593Smuzhiyunfmovm_data_out: 4296*4882a593Smuzhiyun btst &0x4,EXC_EXTWORD(%a6) # control or predecrement? 4297*4882a593Smuzhiyun bne.w fmovm_out_ctrl # control 4298*4882a593Smuzhiyun 4299*4882a593Smuzhiyun############################ 4300*4882a593Smuzhiyunfmovm_out_predec: 4301*4882a593Smuzhiyun# for predecrement mode, the bit string is the opposite of both control 4302*4882a593Smuzhiyun# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0) 4303*4882a593Smuzhiyun# here, we convert it to be just like the others... 4304*4882a593Smuzhiyun mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1 4305*4882a593Smuzhiyun 4306*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # user or supervisor mode? 4307*4882a593Smuzhiyun beq.b fmovm_out_ctrl # user 4308*4882a593Smuzhiyun 4309*4882a593Smuzhiyunfmovm_out_predec_s: 4310*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 4311*4882a593Smuzhiyun bne.b fmovm_out_ctrl 4312*4882a593Smuzhiyun 4313*4882a593Smuzhiyun# the operation was unfortunately an: fmovm.x dn,-(sp) 4314*4882a593Smuzhiyun# called from supervisor mode. 4315*4882a593Smuzhiyun# we're also passing "size" and "strg" back to the calling routine 4316*4882a593Smuzhiyun rts 4317*4882a593Smuzhiyun 4318*4882a593Smuzhiyun############################ 4319*4882a593Smuzhiyunfmovm_out_ctrl: 4320*4882a593Smuzhiyun mov.l %a0,%a1 # move <ea> to a1 4321*4882a593Smuzhiyun 4322*4882a593Smuzhiyun sub.l %d0,%sp # subtract size of dump 4323*4882a593Smuzhiyun lea (%sp),%a0 4324*4882a593Smuzhiyun 4325*4882a593Smuzhiyun tst.b %d1 # should FP0 be moved? 4326*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_fp1 # no 4327*4882a593Smuzhiyun 4328*4882a593Smuzhiyun mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes 4329*4882a593Smuzhiyun mov.l 0x4+EXC_FP0(%a6),(%a0)+ 4330*4882a593Smuzhiyun mov.l 0x8+EXC_FP0(%a6),(%a0)+ 4331*4882a593Smuzhiyun 4332*4882a593Smuzhiyunfmovm_out_ctrl_fp1: 4333*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP1 be moved? 4334*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_fp2 # no 4335*4882a593Smuzhiyun 4336*4882a593Smuzhiyun mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes 4337*4882a593Smuzhiyun mov.l 0x4+EXC_FP1(%a6),(%a0)+ 4338*4882a593Smuzhiyun mov.l 0x8+EXC_FP1(%a6),(%a0)+ 4339*4882a593Smuzhiyun 4340*4882a593Smuzhiyunfmovm_out_ctrl_fp2: 4341*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP2 be moved? 4342*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_fp3 # no 4343*4882a593Smuzhiyun 4344*4882a593Smuzhiyun fmovm.x &0x20,(%a0) # yes 4345*4882a593Smuzhiyun add.l &0xc,%a0 4346*4882a593Smuzhiyun 4347*4882a593Smuzhiyunfmovm_out_ctrl_fp3: 4348*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP3 be moved? 4349*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_fp4 # no 4350*4882a593Smuzhiyun 4351*4882a593Smuzhiyun fmovm.x &0x10,(%a0) # yes 4352*4882a593Smuzhiyun add.l &0xc,%a0 4353*4882a593Smuzhiyun 4354*4882a593Smuzhiyunfmovm_out_ctrl_fp4: 4355*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP4 be moved? 4356*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_fp5 # no 4357*4882a593Smuzhiyun 4358*4882a593Smuzhiyun fmovm.x &0x08,(%a0) # yes 4359*4882a593Smuzhiyun add.l &0xc,%a0 4360*4882a593Smuzhiyun 4361*4882a593Smuzhiyunfmovm_out_ctrl_fp5: 4362*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP5 be moved? 4363*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_fp6 # no 4364*4882a593Smuzhiyun 4365*4882a593Smuzhiyun fmovm.x &0x04,(%a0) # yes 4366*4882a593Smuzhiyun add.l &0xc,%a0 4367*4882a593Smuzhiyun 4368*4882a593Smuzhiyunfmovm_out_ctrl_fp6: 4369*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP6 be moved? 4370*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_fp7 # no 4371*4882a593Smuzhiyun 4372*4882a593Smuzhiyun fmovm.x &0x02,(%a0) # yes 4373*4882a593Smuzhiyun add.l &0xc,%a0 4374*4882a593Smuzhiyun 4375*4882a593Smuzhiyunfmovm_out_ctrl_fp7: 4376*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP7 be moved? 4377*4882a593Smuzhiyun bpl.b fmovm_out_ctrl_done # no 4378*4882a593Smuzhiyun 4379*4882a593Smuzhiyun fmovm.x &0x01,(%a0) # yes 4380*4882a593Smuzhiyun add.l &0xc,%a0 4381*4882a593Smuzhiyun 4382*4882a593Smuzhiyunfmovm_out_ctrl_done: 4383*4882a593Smuzhiyun mov.l %a1,L_SCR1(%a6) 4384*4882a593Smuzhiyun 4385*4882a593Smuzhiyun lea (%sp),%a0 # pass: supervisor src 4386*4882a593Smuzhiyun mov.l %d0,-(%sp) # save size 4387*4882a593Smuzhiyun bsr.l _dmem_write # copy data to user mem 4388*4882a593Smuzhiyun 4389*4882a593Smuzhiyun mov.l (%sp)+,%d0 4390*4882a593Smuzhiyun add.l %d0,%sp # clear fpreg data from stack 4391*4882a593Smuzhiyun 4392*4882a593Smuzhiyun tst.l %d1 # did dstore err? 4393*4882a593Smuzhiyun bne.w fmovm_out_err # yes 4394*4882a593Smuzhiyun 4395*4882a593Smuzhiyun rts 4396*4882a593Smuzhiyun 4397*4882a593Smuzhiyun############ 4398*4882a593Smuzhiyun# MOVE IN: # 4399*4882a593Smuzhiyun############ 4400*4882a593Smuzhiyunfmovm_data_in: 4401*4882a593Smuzhiyun mov.l %a0,L_SCR1(%a6) 4402*4882a593Smuzhiyun 4403*4882a593Smuzhiyun sub.l %d0,%sp # make room for fpregs 4404*4882a593Smuzhiyun lea (%sp),%a1 4405*4882a593Smuzhiyun 4406*4882a593Smuzhiyun mov.l %d1,-(%sp) # save bit string for later 4407*4882a593Smuzhiyun mov.l %d0,-(%sp) # save # of bytes 4408*4882a593Smuzhiyun 4409*4882a593Smuzhiyun bsr.l _dmem_read # copy data from user mem 4410*4882a593Smuzhiyun 4411*4882a593Smuzhiyun mov.l (%sp)+,%d0 # retrieve # of bytes 4412*4882a593Smuzhiyun 4413*4882a593Smuzhiyun tst.l %d1 # did dfetch fail? 4414*4882a593Smuzhiyun bne.w fmovm_in_err # yes 4415*4882a593Smuzhiyun 4416*4882a593Smuzhiyun mov.l (%sp)+,%d1 # load bit string 4417*4882a593Smuzhiyun 4418*4882a593Smuzhiyun lea (%sp),%a0 # addr of stack 4419*4882a593Smuzhiyun 4420*4882a593Smuzhiyun tst.b %d1 # should FP0 be moved? 4421*4882a593Smuzhiyun bpl.b fmovm_data_in_fp1 # no 4422*4882a593Smuzhiyun 4423*4882a593Smuzhiyun mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes 4424*4882a593Smuzhiyun mov.l (%a0)+,0x4+EXC_FP0(%a6) 4425*4882a593Smuzhiyun mov.l (%a0)+,0x8+EXC_FP0(%a6) 4426*4882a593Smuzhiyun 4427*4882a593Smuzhiyunfmovm_data_in_fp1: 4428*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP1 be moved? 4429*4882a593Smuzhiyun bpl.b fmovm_data_in_fp2 # no 4430*4882a593Smuzhiyun 4431*4882a593Smuzhiyun mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes 4432*4882a593Smuzhiyun mov.l (%a0)+,0x4+EXC_FP1(%a6) 4433*4882a593Smuzhiyun mov.l (%a0)+,0x8+EXC_FP1(%a6) 4434*4882a593Smuzhiyun 4435*4882a593Smuzhiyunfmovm_data_in_fp2: 4436*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP2 be moved? 4437*4882a593Smuzhiyun bpl.b fmovm_data_in_fp3 # no 4438*4882a593Smuzhiyun 4439*4882a593Smuzhiyun fmovm.x (%a0)+,&0x20 # yes 4440*4882a593Smuzhiyun 4441*4882a593Smuzhiyunfmovm_data_in_fp3: 4442*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP3 be moved? 4443*4882a593Smuzhiyun bpl.b fmovm_data_in_fp4 # no 4444*4882a593Smuzhiyun 4445*4882a593Smuzhiyun fmovm.x (%a0)+,&0x10 # yes 4446*4882a593Smuzhiyun 4447*4882a593Smuzhiyunfmovm_data_in_fp4: 4448*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP4 be moved? 4449*4882a593Smuzhiyun bpl.b fmovm_data_in_fp5 # no 4450*4882a593Smuzhiyun 4451*4882a593Smuzhiyun fmovm.x (%a0)+,&0x08 # yes 4452*4882a593Smuzhiyun 4453*4882a593Smuzhiyunfmovm_data_in_fp5: 4454*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP5 be moved? 4455*4882a593Smuzhiyun bpl.b fmovm_data_in_fp6 # no 4456*4882a593Smuzhiyun 4457*4882a593Smuzhiyun fmovm.x (%a0)+,&0x04 # yes 4458*4882a593Smuzhiyun 4459*4882a593Smuzhiyunfmovm_data_in_fp6: 4460*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP6 be moved? 4461*4882a593Smuzhiyun bpl.b fmovm_data_in_fp7 # no 4462*4882a593Smuzhiyun 4463*4882a593Smuzhiyun fmovm.x (%a0)+,&0x02 # yes 4464*4882a593Smuzhiyun 4465*4882a593Smuzhiyunfmovm_data_in_fp7: 4466*4882a593Smuzhiyun lsl.b &0x1,%d1 # should FP7 be moved? 4467*4882a593Smuzhiyun bpl.b fmovm_data_in_done # no 4468*4882a593Smuzhiyun 4469*4882a593Smuzhiyun fmovm.x (%a0)+,&0x01 # yes 4470*4882a593Smuzhiyun 4471*4882a593Smuzhiyunfmovm_data_in_done: 4472*4882a593Smuzhiyun add.l %d0,%sp # remove fpregs from stack 4473*4882a593Smuzhiyun rts 4474*4882a593Smuzhiyun 4475*4882a593Smuzhiyun##################################### 4476*4882a593Smuzhiyun 4477*4882a593Smuzhiyunfmovm_data_done: 4478*4882a593Smuzhiyun rts 4479*4882a593Smuzhiyun 4480*4882a593Smuzhiyun############################################################################## 4481*4882a593Smuzhiyun 4482*4882a593Smuzhiyun# 4483*4882a593Smuzhiyun# table indexed by the operation's bit string that gives the number 4484*4882a593Smuzhiyun# of bytes that will be moved. 4485*4882a593Smuzhiyun# 4486*4882a593Smuzhiyun# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg) 4487*4882a593Smuzhiyun# 4488*4882a593Smuzhiyuntbl_fmovm_size: 4489*4882a593Smuzhiyun byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24 4490*4882a593Smuzhiyun byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4491*4882a593Smuzhiyun byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4492*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4493*4882a593Smuzhiyun byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4494*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4495*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4496*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4497*4882a593Smuzhiyun byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4498*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4499*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4500*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4501*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4502*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4503*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4504*4882a593Smuzhiyun byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4505*4882a593Smuzhiyun byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4506*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4507*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4508*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4509*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4510*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4511*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4512*4882a593Smuzhiyun byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4513*4882a593Smuzhiyun byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4514*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4515*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4516*4882a593Smuzhiyun byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4517*4882a593Smuzhiyun byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4518*4882a593Smuzhiyun byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4519*4882a593Smuzhiyun byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4520*4882a593Smuzhiyun byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60 4521*4882a593Smuzhiyun 4522*4882a593Smuzhiyun# 4523*4882a593Smuzhiyun# table to convert a pre-decrement bit string into a post-increment 4524*4882a593Smuzhiyun# or control bit string. 4525*4882a593Smuzhiyun# ex: 0x00 ==> 0x00 4526*4882a593Smuzhiyun# 0x01 ==> 0x80 4527*4882a593Smuzhiyun# 0x02 ==> 0x40 4528*4882a593Smuzhiyun# . 4529*4882a593Smuzhiyun# . 4530*4882a593Smuzhiyun# 0xfd ==> 0xbf 4531*4882a593Smuzhiyun# 0xfe ==> 0x7f 4532*4882a593Smuzhiyun# 0xff ==> 0xff 4533*4882a593Smuzhiyun# 4534*4882a593Smuzhiyuntbl_fmovm_convert: 4535*4882a593Smuzhiyun byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0 4536*4882a593Smuzhiyun byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0 4537*4882a593Smuzhiyun byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8 4538*4882a593Smuzhiyun byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8 4539*4882a593Smuzhiyun byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4 4540*4882a593Smuzhiyun byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4 4541*4882a593Smuzhiyun byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec 4542*4882a593Smuzhiyun byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc 4543*4882a593Smuzhiyun byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2 4544*4882a593Smuzhiyun byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2 4545*4882a593Smuzhiyun byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea 4546*4882a593Smuzhiyun byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa 4547*4882a593Smuzhiyun byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6 4548*4882a593Smuzhiyun byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6 4549*4882a593Smuzhiyun byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee 4550*4882a593Smuzhiyun byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe 4551*4882a593Smuzhiyun byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1 4552*4882a593Smuzhiyun byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1 4553*4882a593Smuzhiyun byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9 4554*4882a593Smuzhiyun byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9 4555*4882a593Smuzhiyun byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5 4556*4882a593Smuzhiyun byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5 4557*4882a593Smuzhiyun byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed 4558*4882a593Smuzhiyun byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd 4559*4882a593Smuzhiyun byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3 4560*4882a593Smuzhiyun byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3 4561*4882a593Smuzhiyun byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb 4562*4882a593Smuzhiyun byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb 4563*4882a593Smuzhiyun byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7 4564*4882a593Smuzhiyun byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7 4565*4882a593Smuzhiyun byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef 4566*4882a593Smuzhiyun byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff 4567*4882a593Smuzhiyun 4568*4882a593Smuzhiyun global fmovm_calc_ea 4569*4882a593Smuzhiyun############################################### 4570*4882a593Smuzhiyun# _fmovm_calc_ea: calculate effective address # 4571*4882a593Smuzhiyun############################################### 4572*4882a593Smuzhiyunfmovm_calc_ea: 4573*4882a593Smuzhiyun mov.l %d0,%a0 # move # bytes to a0 4574*4882a593Smuzhiyun 4575*4882a593Smuzhiyun# currently, MODE and REG are taken from the EXC_OPWORD. this could be 4576*4882a593Smuzhiyun# easily changed if they were inputs passed in registers. 4577*4882a593Smuzhiyun mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word 4578*4882a593Smuzhiyun mov.w %d0,%d1 # make a copy 4579*4882a593Smuzhiyun 4580*4882a593Smuzhiyun andi.w &0x3f,%d0 # extract mode field 4581*4882a593Smuzhiyun andi.l &0x7,%d1 # extract reg field 4582*4882a593Smuzhiyun 4583*4882a593Smuzhiyun# jump to the corresponding function for each {MODE,REG} pair. 4584*4882a593Smuzhiyun mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance 4585*4882a593Smuzhiyun jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode 4586*4882a593Smuzhiyun 4587*4882a593Smuzhiyun swbeg &64 4588*4882a593Smuzhiyuntbl_fea_mode: 4589*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4590*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4591*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4592*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4593*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4594*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4595*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4596*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4597*4882a593Smuzhiyun 4598*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4599*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4600*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4601*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4602*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4603*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4604*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4605*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4606*4882a593Smuzhiyun 4607*4882a593Smuzhiyun short faddr_ind_a0 - tbl_fea_mode 4608*4882a593Smuzhiyun short faddr_ind_a1 - tbl_fea_mode 4609*4882a593Smuzhiyun short faddr_ind_a2 - tbl_fea_mode 4610*4882a593Smuzhiyun short faddr_ind_a3 - tbl_fea_mode 4611*4882a593Smuzhiyun short faddr_ind_a4 - tbl_fea_mode 4612*4882a593Smuzhiyun short faddr_ind_a5 - tbl_fea_mode 4613*4882a593Smuzhiyun short faddr_ind_a6 - tbl_fea_mode 4614*4882a593Smuzhiyun short faddr_ind_a7 - tbl_fea_mode 4615*4882a593Smuzhiyun 4616*4882a593Smuzhiyun short faddr_ind_p_a0 - tbl_fea_mode 4617*4882a593Smuzhiyun short faddr_ind_p_a1 - tbl_fea_mode 4618*4882a593Smuzhiyun short faddr_ind_p_a2 - tbl_fea_mode 4619*4882a593Smuzhiyun short faddr_ind_p_a3 - tbl_fea_mode 4620*4882a593Smuzhiyun short faddr_ind_p_a4 - tbl_fea_mode 4621*4882a593Smuzhiyun short faddr_ind_p_a5 - tbl_fea_mode 4622*4882a593Smuzhiyun short faddr_ind_p_a6 - tbl_fea_mode 4623*4882a593Smuzhiyun short faddr_ind_p_a7 - tbl_fea_mode 4624*4882a593Smuzhiyun 4625*4882a593Smuzhiyun short faddr_ind_m_a0 - tbl_fea_mode 4626*4882a593Smuzhiyun short faddr_ind_m_a1 - tbl_fea_mode 4627*4882a593Smuzhiyun short faddr_ind_m_a2 - tbl_fea_mode 4628*4882a593Smuzhiyun short faddr_ind_m_a3 - tbl_fea_mode 4629*4882a593Smuzhiyun short faddr_ind_m_a4 - tbl_fea_mode 4630*4882a593Smuzhiyun short faddr_ind_m_a5 - tbl_fea_mode 4631*4882a593Smuzhiyun short faddr_ind_m_a6 - tbl_fea_mode 4632*4882a593Smuzhiyun short faddr_ind_m_a7 - tbl_fea_mode 4633*4882a593Smuzhiyun 4634*4882a593Smuzhiyun short faddr_ind_disp_a0 - tbl_fea_mode 4635*4882a593Smuzhiyun short faddr_ind_disp_a1 - tbl_fea_mode 4636*4882a593Smuzhiyun short faddr_ind_disp_a2 - tbl_fea_mode 4637*4882a593Smuzhiyun short faddr_ind_disp_a3 - tbl_fea_mode 4638*4882a593Smuzhiyun short faddr_ind_disp_a4 - tbl_fea_mode 4639*4882a593Smuzhiyun short faddr_ind_disp_a5 - tbl_fea_mode 4640*4882a593Smuzhiyun short faddr_ind_disp_a6 - tbl_fea_mode 4641*4882a593Smuzhiyun short faddr_ind_disp_a7 - tbl_fea_mode 4642*4882a593Smuzhiyun 4643*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4644*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4645*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4646*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4647*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4648*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4649*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4650*4882a593Smuzhiyun short faddr_ind_ext - tbl_fea_mode 4651*4882a593Smuzhiyun 4652*4882a593Smuzhiyun short fabs_short - tbl_fea_mode 4653*4882a593Smuzhiyun short fabs_long - tbl_fea_mode 4654*4882a593Smuzhiyun short fpc_ind - tbl_fea_mode 4655*4882a593Smuzhiyun short fpc_ind_ext - tbl_fea_mode 4656*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4657*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4658*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4659*4882a593Smuzhiyun short tbl_fea_mode - tbl_fea_mode 4660*4882a593Smuzhiyun 4661*4882a593Smuzhiyun################################### 4662*4882a593Smuzhiyun# Address register indirect: (An) # 4663*4882a593Smuzhiyun################################### 4664*4882a593Smuzhiyunfaddr_ind_a0: 4665*4882a593Smuzhiyun mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0 4666*4882a593Smuzhiyun rts 4667*4882a593Smuzhiyun 4668*4882a593Smuzhiyunfaddr_ind_a1: 4669*4882a593Smuzhiyun mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1 4670*4882a593Smuzhiyun rts 4671*4882a593Smuzhiyun 4672*4882a593Smuzhiyunfaddr_ind_a2: 4673*4882a593Smuzhiyun mov.l %a2,%a0 # Get current a2 4674*4882a593Smuzhiyun rts 4675*4882a593Smuzhiyun 4676*4882a593Smuzhiyunfaddr_ind_a3: 4677*4882a593Smuzhiyun mov.l %a3,%a0 # Get current a3 4678*4882a593Smuzhiyun rts 4679*4882a593Smuzhiyun 4680*4882a593Smuzhiyunfaddr_ind_a4: 4681*4882a593Smuzhiyun mov.l %a4,%a0 # Get current a4 4682*4882a593Smuzhiyun rts 4683*4882a593Smuzhiyun 4684*4882a593Smuzhiyunfaddr_ind_a5: 4685*4882a593Smuzhiyun mov.l %a5,%a0 # Get current a5 4686*4882a593Smuzhiyun rts 4687*4882a593Smuzhiyun 4688*4882a593Smuzhiyunfaddr_ind_a6: 4689*4882a593Smuzhiyun mov.l (%a6),%a0 # Get current a6 4690*4882a593Smuzhiyun rts 4691*4882a593Smuzhiyun 4692*4882a593Smuzhiyunfaddr_ind_a7: 4693*4882a593Smuzhiyun mov.l EXC_A7(%a6),%a0 # Get current a7 4694*4882a593Smuzhiyun rts 4695*4882a593Smuzhiyun 4696*4882a593Smuzhiyun##################################################### 4697*4882a593Smuzhiyun# Address register indirect w/ postincrement: (An)+ # 4698*4882a593Smuzhiyun##################################################### 4699*4882a593Smuzhiyunfaddr_ind_p_a0: 4700*4882a593Smuzhiyun mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 4701*4882a593Smuzhiyun mov.l %d0,%d1 4702*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4703*4882a593Smuzhiyun mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value 4704*4882a593Smuzhiyun mov.l %d0,%a0 4705*4882a593Smuzhiyun rts 4706*4882a593Smuzhiyun 4707*4882a593Smuzhiyunfaddr_ind_p_a1: 4708*4882a593Smuzhiyun mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 4709*4882a593Smuzhiyun mov.l %d0,%d1 4710*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4711*4882a593Smuzhiyun mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value 4712*4882a593Smuzhiyun mov.l %d0,%a0 4713*4882a593Smuzhiyun rts 4714*4882a593Smuzhiyun 4715*4882a593Smuzhiyunfaddr_ind_p_a2: 4716*4882a593Smuzhiyun mov.l %a2,%d0 # Get current a2 4717*4882a593Smuzhiyun mov.l %d0,%d1 4718*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4719*4882a593Smuzhiyun mov.l %d1,%a2 # Save incr value 4720*4882a593Smuzhiyun mov.l %d0,%a0 4721*4882a593Smuzhiyun rts 4722*4882a593Smuzhiyun 4723*4882a593Smuzhiyunfaddr_ind_p_a3: 4724*4882a593Smuzhiyun mov.l %a3,%d0 # Get current a3 4725*4882a593Smuzhiyun mov.l %d0,%d1 4726*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4727*4882a593Smuzhiyun mov.l %d1,%a3 # Save incr value 4728*4882a593Smuzhiyun mov.l %d0,%a0 4729*4882a593Smuzhiyun rts 4730*4882a593Smuzhiyun 4731*4882a593Smuzhiyunfaddr_ind_p_a4: 4732*4882a593Smuzhiyun mov.l %a4,%d0 # Get current a4 4733*4882a593Smuzhiyun mov.l %d0,%d1 4734*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4735*4882a593Smuzhiyun mov.l %d1,%a4 # Save incr value 4736*4882a593Smuzhiyun mov.l %d0,%a0 4737*4882a593Smuzhiyun rts 4738*4882a593Smuzhiyun 4739*4882a593Smuzhiyunfaddr_ind_p_a5: 4740*4882a593Smuzhiyun mov.l %a5,%d0 # Get current a5 4741*4882a593Smuzhiyun mov.l %d0,%d1 4742*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4743*4882a593Smuzhiyun mov.l %d1,%a5 # Save incr value 4744*4882a593Smuzhiyun mov.l %d0,%a0 4745*4882a593Smuzhiyun rts 4746*4882a593Smuzhiyun 4747*4882a593Smuzhiyunfaddr_ind_p_a6: 4748*4882a593Smuzhiyun mov.l (%a6),%d0 # Get current a6 4749*4882a593Smuzhiyun mov.l %d0,%d1 4750*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4751*4882a593Smuzhiyun mov.l %d1,(%a6) # Save incr value 4752*4882a593Smuzhiyun mov.l %d0,%a0 4753*4882a593Smuzhiyun rts 4754*4882a593Smuzhiyun 4755*4882a593Smuzhiyunfaddr_ind_p_a7: 4756*4882a593Smuzhiyun mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag 4757*4882a593Smuzhiyun 4758*4882a593Smuzhiyun mov.l EXC_A7(%a6),%d0 # Get current a7 4759*4882a593Smuzhiyun mov.l %d0,%d1 4760*4882a593Smuzhiyun add.l %a0,%d1 # Increment 4761*4882a593Smuzhiyun mov.l %d1,EXC_A7(%a6) # Save incr value 4762*4882a593Smuzhiyun mov.l %d0,%a0 4763*4882a593Smuzhiyun rts 4764*4882a593Smuzhiyun 4765*4882a593Smuzhiyun#################################################### 4766*4882a593Smuzhiyun# Address register indirect w/ predecrement: -(An) # 4767*4882a593Smuzhiyun#################################################### 4768*4882a593Smuzhiyunfaddr_ind_m_a0: 4769*4882a593Smuzhiyun mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 4770*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4771*4882a593Smuzhiyun mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value 4772*4882a593Smuzhiyun mov.l %d0,%a0 4773*4882a593Smuzhiyun rts 4774*4882a593Smuzhiyun 4775*4882a593Smuzhiyunfaddr_ind_m_a1: 4776*4882a593Smuzhiyun mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 4777*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4778*4882a593Smuzhiyun mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value 4779*4882a593Smuzhiyun mov.l %d0,%a0 4780*4882a593Smuzhiyun rts 4781*4882a593Smuzhiyun 4782*4882a593Smuzhiyunfaddr_ind_m_a2: 4783*4882a593Smuzhiyun mov.l %a2,%d0 # Get current a2 4784*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4785*4882a593Smuzhiyun mov.l %d0,%a2 # Save decr value 4786*4882a593Smuzhiyun mov.l %d0,%a0 4787*4882a593Smuzhiyun rts 4788*4882a593Smuzhiyun 4789*4882a593Smuzhiyunfaddr_ind_m_a3: 4790*4882a593Smuzhiyun mov.l %a3,%d0 # Get current a3 4791*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4792*4882a593Smuzhiyun mov.l %d0,%a3 # Save decr value 4793*4882a593Smuzhiyun mov.l %d0,%a0 4794*4882a593Smuzhiyun rts 4795*4882a593Smuzhiyun 4796*4882a593Smuzhiyunfaddr_ind_m_a4: 4797*4882a593Smuzhiyun mov.l %a4,%d0 # Get current a4 4798*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4799*4882a593Smuzhiyun mov.l %d0,%a4 # Save decr value 4800*4882a593Smuzhiyun mov.l %d0,%a0 4801*4882a593Smuzhiyun rts 4802*4882a593Smuzhiyun 4803*4882a593Smuzhiyunfaddr_ind_m_a5: 4804*4882a593Smuzhiyun mov.l %a5,%d0 # Get current a5 4805*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4806*4882a593Smuzhiyun mov.l %d0,%a5 # Save decr value 4807*4882a593Smuzhiyun mov.l %d0,%a0 4808*4882a593Smuzhiyun rts 4809*4882a593Smuzhiyun 4810*4882a593Smuzhiyunfaddr_ind_m_a6: 4811*4882a593Smuzhiyun mov.l (%a6),%d0 # Get current a6 4812*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4813*4882a593Smuzhiyun mov.l %d0,(%a6) # Save decr value 4814*4882a593Smuzhiyun mov.l %d0,%a0 4815*4882a593Smuzhiyun rts 4816*4882a593Smuzhiyun 4817*4882a593Smuzhiyunfaddr_ind_m_a7: 4818*4882a593Smuzhiyun mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag 4819*4882a593Smuzhiyun 4820*4882a593Smuzhiyun mov.l EXC_A7(%a6),%d0 # Get current a7 4821*4882a593Smuzhiyun sub.l %a0,%d0 # Decrement 4822*4882a593Smuzhiyun mov.l %d0,EXC_A7(%a6) # Save decr value 4823*4882a593Smuzhiyun mov.l %d0,%a0 4824*4882a593Smuzhiyun rts 4825*4882a593Smuzhiyun 4826*4882a593Smuzhiyun######################################################## 4827*4882a593Smuzhiyun# Address register indirect w/ displacement: (d16, An) # 4828*4882a593Smuzhiyun######################################################## 4829*4882a593Smuzhiyunfaddr_ind_disp_a0: 4830*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4831*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4832*4882a593Smuzhiyun bsr.l _imem_read_word 4833*4882a593Smuzhiyun 4834*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4835*4882a593Smuzhiyun bne.l iea_iacc # yes 4836*4882a593Smuzhiyun 4837*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4838*4882a593Smuzhiyun 4839*4882a593Smuzhiyun add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16 4840*4882a593Smuzhiyun rts 4841*4882a593Smuzhiyun 4842*4882a593Smuzhiyunfaddr_ind_disp_a1: 4843*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4844*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4845*4882a593Smuzhiyun bsr.l _imem_read_word 4846*4882a593Smuzhiyun 4847*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4848*4882a593Smuzhiyun bne.l iea_iacc # yes 4849*4882a593Smuzhiyun 4850*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4851*4882a593Smuzhiyun 4852*4882a593Smuzhiyun add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16 4853*4882a593Smuzhiyun rts 4854*4882a593Smuzhiyun 4855*4882a593Smuzhiyunfaddr_ind_disp_a2: 4856*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4857*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4858*4882a593Smuzhiyun bsr.l _imem_read_word 4859*4882a593Smuzhiyun 4860*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4861*4882a593Smuzhiyun bne.l iea_iacc # yes 4862*4882a593Smuzhiyun 4863*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4864*4882a593Smuzhiyun 4865*4882a593Smuzhiyun add.l %a2,%a0 # a2 + d16 4866*4882a593Smuzhiyun rts 4867*4882a593Smuzhiyun 4868*4882a593Smuzhiyunfaddr_ind_disp_a3: 4869*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4870*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4871*4882a593Smuzhiyun bsr.l _imem_read_word 4872*4882a593Smuzhiyun 4873*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4874*4882a593Smuzhiyun bne.l iea_iacc # yes 4875*4882a593Smuzhiyun 4876*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4877*4882a593Smuzhiyun 4878*4882a593Smuzhiyun add.l %a3,%a0 # a3 + d16 4879*4882a593Smuzhiyun rts 4880*4882a593Smuzhiyun 4881*4882a593Smuzhiyunfaddr_ind_disp_a4: 4882*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4883*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4884*4882a593Smuzhiyun bsr.l _imem_read_word 4885*4882a593Smuzhiyun 4886*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4887*4882a593Smuzhiyun bne.l iea_iacc # yes 4888*4882a593Smuzhiyun 4889*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4890*4882a593Smuzhiyun 4891*4882a593Smuzhiyun add.l %a4,%a0 # a4 + d16 4892*4882a593Smuzhiyun rts 4893*4882a593Smuzhiyun 4894*4882a593Smuzhiyunfaddr_ind_disp_a5: 4895*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4896*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4897*4882a593Smuzhiyun bsr.l _imem_read_word 4898*4882a593Smuzhiyun 4899*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4900*4882a593Smuzhiyun bne.l iea_iacc # yes 4901*4882a593Smuzhiyun 4902*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4903*4882a593Smuzhiyun 4904*4882a593Smuzhiyun add.l %a5,%a0 # a5 + d16 4905*4882a593Smuzhiyun rts 4906*4882a593Smuzhiyun 4907*4882a593Smuzhiyunfaddr_ind_disp_a6: 4908*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4909*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4910*4882a593Smuzhiyun bsr.l _imem_read_word 4911*4882a593Smuzhiyun 4912*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4913*4882a593Smuzhiyun bne.l iea_iacc # yes 4914*4882a593Smuzhiyun 4915*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4916*4882a593Smuzhiyun 4917*4882a593Smuzhiyun add.l (%a6),%a0 # a6 + d16 4918*4882a593Smuzhiyun rts 4919*4882a593Smuzhiyun 4920*4882a593Smuzhiyunfaddr_ind_disp_a7: 4921*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4922*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4923*4882a593Smuzhiyun bsr.l _imem_read_word 4924*4882a593Smuzhiyun 4925*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4926*4882a593Smuzhiyun bne.l iea_iacc # yes 4927*4882a593Smuzhiyun 4928*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 4929*4882a593Smuzhiyun 4930*4882a593Smuzhiyun add.l EXC_A7(%a6),%a0 # a7 + d16 4931*4882a593Smuzhiyun rts 4932*4882a593Smuzhiyun 4933*4882a593Smuzhiyun######################################################################## 4934*4882a593Smuzhiyun# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) # 4935*4882a593Smuzhiyun# " " " w/ " (base displacement): (bd, An, Xn) # 4936*4882a593Smuzhiyun# Memory indirect postindexed: ([bd, An], Xn, od) # 4937*4882a593Smuzhiyun# Memory indirect preindexed: ([bd, An, Xn], od) # 4938*4882a593Smuzhiyun######################################################################## 4939*4882a593Smuzhiyunfaddr_ind_ext: 4940*4882a593Smuzhiyun addq.l &0x8,%d1 4941*4882a593Smuzhiyun bsr.l fetch_dreg # fetch base areg 4942*4882a593Smuzhiyun mov.l %d0,-(%sp) 4943*4882a593Smuzhiyun 4944*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4945*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4946*4882a593Smuzhiyun bsr.l _imem_read_word # fetch extword in d0 4947*4882a593Smuzhiyun 4948*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4949*4882a593Smuzhiyun bne.l iea_iacc # yes 4950*4882a593Smuzhiyun 4951*4882a593Smuzhiyun mov.l (%sp)+,%a0 4952*4882a593Smuzhiyun 4953*4882a593Smuzhiyun btst &0x8,%d0 4954*4882a593Smuzhiyun bne.w fcalc_mem_ind 4955*4882a593Smuzhiyun 4956*4882a593Smuzhiyun mov.l %d0,L_SCR1(%a6) # hold opword 4957*4882a593Smuzhiyun 4958*4882a593Smuzhiyun mov.l %d0,%d1 4959*4882a593Smuzhiyun rol.w &0x4,%d1 4960*4882a593Smuzhiyun andi.w &0xf,%d1 # extract index regno 4961*4882a593Smuzhiyun 4962*4882a593Smuzhiyun# count on fetch_dreg() not to alter a0... 4963*4882a593Smuzhiyun bsr.l fetch_dreg # fetch index 4964*4882a593Smuzhiyun 4965*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 4966*4882a593Smuzhiyun mov.l L_SCR1(%a6),%d2 # fetch opword 4967*4882a593Smuzhiyun 4968*4882a593Smuzhiyun btst &0xb,%d2 # is it word or long? 4969*4882a593Smuzhiyun bne.b faii8_long 4970*4882a593Smuzhiyun ext.l %d0 # sign extend word index 4971*4882a593Smuzhiyunfaii8_long: 4972*4882a593Smuzhiyun mov.l %d2,%d1 4973*4882a593Smuzhiyun rol.w &0x7,%d1 4974*4882a593Smuzhiyun andi.l &0x3,%d1 # extract scale value 4975*4882a593Smuzhiyun 4976*4882a593Smuzhiyun lsl.l %d1,%d0 # shift index by scale 4977*4882a593Smuzhiyun 4978*4882a593Smuzhiyun extb.l %d2 # sign extend displacement 4979*4882a593Smuzhiyun add.l %d2,%d0 # index + disp 4980*4882a593Smuzhiyun add.l %d0,%a0 # An + (index + disp) 4981*4882a593Smuzhiyun 4982*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore old d2 4983*4882a593Smuzhiyun rts 4984*4882a593Smuzhiyun 4985*4882a593Smuzhiyun########################### 4986*4882a593Smuzhiyun# Absolute short: (XXX).W # 4987*4882a593Smuzhiyun########################### 4988*4882a593Smuzhiyunfabs_short: 4989*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4990*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4991*4882a593Smuzhiyun bsr.l _imem_read_word # fetch short address 4992*4882a593Smuzhiyun 4993*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 4994*4882a593Smuzhiyun bne.l iea_iacc # yes 4995*4882a593Smuzhiyun 4996*4882a593Smuzhiyun mov.w %d0,%a0 # return <ea> in a0 4997*4882a593Smuzhiyun rts 4998*4882a593Smuzhiyun 4999*4882a593Smuzhiyun########################## 5000*4882a593Smuzhiyun# Absolute long: (XXX).L # 5001*4882a593Smuzhiyun########################## 5002*4882a593Smuzhiyunfabs_long: 5003*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5004*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5005*4882a593Smuzhiyun bsr.l _imem_read_long # fetch long address 5006*4882a593Smuzhiyun 5007*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5008*4882a593Smuzhiyun bne.l iea_iacc # yes 5009*4882a593Smuzhiyun 5010*4882a593Smuzhiyun mov.l %d0,%a0 # return <ea> in a0 5011*4882a593Smuzhiyun rts 5012*4882a593Smuzhiyun 5013*4882a593Smuzhiyun####################################################### 5014*4882a593Smuzhiyun# Program counter indirect w/ displacement: (d16, PC) # 5015*4882a593Smuzhiyun####################################################### 5016*4882a593Smuzhiyunfpc_ind: 5017*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5018*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5019*4882a593Smuzhiyun bsr.l _imem_read_word # fetch word displacement 5020*4882a593Smuzhiyun 5021*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5022*4882a593Smuzhiyun bne.l iea_iacc # yes 5023*4882a593Smuzhiyun 5024*4882a593Smuzhiyun mov.w %d0,%a0 # sign extend displacement 5025*4882a593Smuzhiyun 5026*4882a593Smuzhiyun add.l EXC_EXTWPTR(%a6),%a0 # pc + d16 5027*4882a593Smuzhiyun 5028*4882a593Smuzhiyun# _imem_read_word() increased the extwptr by 2. need to adjust here. 5029*4882a593Smuzhiyun subq.l &0x2,%a0 # adjust <ea> 5030*4882a593Smuzhiyun rts 5031*4882a593Smuzhiyun 5032*4882a593Smuzhiyun########################################################## 5033*4882a593Smuzhiyun# PC indirect w/ index(8-bit displacement): (d8, PC, An) # 5034*4882a593Smuzhiyun# " " w/ " (base displacement): (bd, PC, An) # 5035*4882a593Smuzhiyun# PC memory indirect postindexed: ([bd, PC], Xn, od) # 5036*4882a593Smuzhiyun# PC memory indirect preindexed: ([bd, PC, Xn], od) # 5037*4882a593Smuzhiyun########################################################## 5038*4882a593Smuzhiyunfpc_ind_ext: 5039*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5040*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5041*4882a593Smuzhiyun bsr.l _imem_read_word # fetch ext word 5042*4882a593Smuzhiyun 5043*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5044*4882a593Smuzhiyun bne.l iea_iacc # yes 5045*4882a593Smuzhiyun 5046*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0 5047*4882a593Smuzhiyun subq.l &0x2,%a0 # adjust base 5048*4882a593Smuzhiyun 5049*4882a593Smuzhiyun btst &0x8,%d0 # is disp only 8 bits? 5050*4882a593Smuzhiyun bne.w fcalc_mem_ind # calc memory indirect 5051*4882a593Smuzhiyun 5052*4882a593Smuzhiyun mov.l %d0,L_SCR1(%a6) # store opword 5053*4882a593Smuzhiyun 5054*4882a593Smuzhiyun mov.l %d0,%d1 # make extword copy 5055*4882a593Smuzhiyun rol.w &0x4,%d1 # rotate reg num into place 5056*4882a593Smuzhiyun andi.w &0xf,%d1 # extract register number 5057*4882a593Smuzhiyun 5058*4882a593Smuzhiyun# count on fetch_dreg() not to alter a0... 5059*4882a593Smuzhiyun bsr.l fetch_dreg # fetch index 5060*4882a593Smuzhiyun 5061*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 5062*4882a593Smuzhiyun mov.l L_SCR1(%a6),%d2 # fetch opword 5063*4882a593Smuzhiyun 5064*4882a593Smuzhiyun btst &0xb,%d2 # is index word or long? 5065*4882a593Smuzhiyun bne.b fpii8_long # long 5066*4882a593Smuzhiyun ext.l %d0 # sign extend word index 5067*4882a593Smuzhiyunfpii8_long: 5068*4882a593Smuzhiyun mov.l %d2,%d1 5069*4882a593Smuzhiyun rol.w &0x7,%d1 # rotate scale value into place 5070*4882a593Smuzhiyun andi.l &0x3,%d1 # extract scale value 5071*4882a593Smuzhiyun 5072*4882a593Smuzhiyun lsl.l %d1,%d0 # shift index by scale 5073*4882a593Smuzhiyun 5074*4882a593Smuzhiyun extb.l %d2 # sign extend displacement 5075*4882a593Smuzhiyun add.l %d2,%d0 # disp + index 5076*4882a593Smuzhiyun add.l %d0,%a0 # An + (index + disp) 5077*4882a593Smuzhiyun 5078*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore temp register 5079*4882a593Smuzhiyun rts 5080*4882a593Smuzhiyun 5081*4882a593Smuzhiyun# d2 = index 5082*4882a593Smuzhiyun# d3 = base 5083*4882a593Smuzhiyun# d4 = od 5084*4882a593Smuzhiyun# d5 = extword 5085*4882a593Smuzhiyunfcalc_mem_ind: 5086*4882a593Smuzhiyun btst &0x6,%d0 # is the index suppressed? 5087*4882a593Smuzhiyun beq.b fcalc_index 5088*4882a593Smuzhiyun 5089*4882a593Smuzhiyun movm.l &0x3c00,-(%sp) # save d2-d5 5090*4882a593Smuzhiyun 5091*4882a593Smuzhiyun mov.l %d0,%d5 # put extword in d5 5092*4882a593Smuzhiyun mov.l %a0,%d3 # put base in d3 5093*4882a593Smuzhiyun 5094*4882a593Smuzhiyun clr.l %d2 # yes, so index = 0 5095*4882a593Smuzhiyun bra.b fbase_supp_ck 5096*4882a593Smuzhiyun 5097*4882a593Smuzhiyun# index: 5098*4882a593Smuzhiyunfcalc_index: 5099*4882a593Smuzhiyun mov.l %d0,L_SCR1(%a6) # save d0 (opword) 5100*4882a593Smuzhiyun bfextu %d0{&16:&4},%d1 # fetch dreg index 5101*4882a593Smuzhiyun bsr.l fetch_dreg 5102*4882a593Smuzhiyun 5103*4882a593Smuzhiyun movm.l &0x3c00,-(%sp) # save d2-d5 5104*4882a593Smuzhiyun mov.l %d0,%d2 # put index in d2 5105*4882a593Smuzhiyun mov.l L_SCR1(%a6),%d5 5106*4882a593Smuzhiyun mov.l %a0,%d3 5107*4882a593Smuzhiyun 5108*4882a593Smuzhiyun btst &0xb,%d5 # is index word or long? 5109*4882a593Smuzhiyun bne.b fno_ext 5110*4882a593Smuzhiyun ext.l %d2 5111*4882a593Smuzhiyun 5112*4882a593Smuzhiyunfno_ext: 5113*4882a593Smuzhiyun bfextu %d5{&21:&2},%d0 5114*4882a593Smuzhiyun lsl.l %d0,%d2 5115*4882a593Smuzhiyun 5116*4882a593Smuzhiyun# base address (passed as parameter in d3): 5117*4882a593Smuzhiyun# we clear the value here if it should actually be suppressed. 5118*4882a593Smuzhiyunfbase_supp_ck: 5119*4882a593Smuzhiyun btst &0x7,%d5 # is the bd suppressed? 5120*4882a593Smuzhiyun beq.b fno_base_sup 5121*4882a593Smuzhiyun clr.l %d3 5122*4882a593Smuzhiyun 5123*4882a593Smuzhiyun# base displacement: 5124*4882a593Smuzhiyunfno_base_sup: 5125*4882a593Smuzhiyun bfextu %d5{&26:&2},%d0 # get bd size 5126*4882a593Smuzhiyun# beq.l fmovm_error # if (size == 0) it's reserved 5127*4882a593Smuzhiyun 5128*4882a593Smuzhiyun cmpi.b %d0,&0x2 5129*4882a593Smuzhiyun blt.b fno_bd 5130*4882a593Smuzhiyun beq.b fget_word_bd 5131*4882a593Smuzhiyun 5132*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5133*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5134*4882a593Smuzhiyun bsr.l _imem_read_long 5135*4882a593Smuzhiyun 5136*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5137*4882a593Smuzhiyun bne.l fcea_iacc # yes 5138*4882a593Smuzhiyun 5139*4882a593Smuzhiyun bra.b fchk_ind 5140*4882a593Smuzhiyun 5141*4882a593Smuzhiyunfget_word_bd: 5142*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5143*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5144*4882a593Smuzhiyun bsr.l _imem_read_word 5145*4882a593Smuzhiyun 5146*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5147*4882a593Smuzhiyun bne.l fcea_iacc # yes 5148*4882a593Smuzhiyun 5149*4882a593Smuzhiyun ext.l %d0 # sign extend bd 5150*4882a593Smuzhiyun 5151*4882a593Smuzhiyunfchk_ind: 5152*4882a593Smuzhiyun add.l %d0,%d3 # base += bd 5153*4882a593Smuzhiyun 5154*4882a593Smuzhiyun# outer displacement: 5155*4882a593Smuzhiyunfno_bd: 5156*4882a593Smuzhiyun bfextu %d5{&30:&2},%d0 # is od suppressed? 5157*4882a593Smuzhiyun beq.w faii_bd 5158*4882a593Smuzhiyun 5159*4882a593Smuzhiyun cmpi.b %d0,&0x2 5160*4882a593Smuzhiyun blt.b fnull_od 5161*4882a593Smuzhiyun beq.b fword_od 5162*4882a593Smuzhiyun 5163*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5164*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5165*4882a593Smuzhiyun bsr.l _imem_read_long 5166*4882a593Smuzhiyun 5167*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5168*4882a593Smuzhiyun bne.l fcea_iacc # yes 5169*4882a593Smuzhiyun 5170*4882a593Smuzhiyun bra.b fadd_them 5171*4882a593Smuzhiyun 5172*4882a593Smuzhiyunfword_od: 5173*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5174*4882a593Smuzhiyun addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5175*4882a593Smuzhiyun bsr.l _imem_read_word 5176*4882a593Smuzhiyun 5177*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5178*4882a593Smuzhiyun bne.l fcea_iacc # yes 5179*4882a593Smuzhiyun 5180*4882a593Smuzhiyun ext.l %d0 # sign extend od 5181*4882a593Smuzhiyun bra.b fadd_them 5182*4882a593Smuzhiyun 5183*4882a593Smuzhiyunfnull_od: 5184*4882a593Smuzhiyun clr.l %d0 5185*4882a593Smuzhiyun 5186*4882a593Smuzhiyunfadd_them: 5187*4882a593Smuzhiyun mov.l %d0,%d4 5188*4882a593Smuzhiyun 5189*4882a593Smuzhiyun btst &0x2,%d5 # pre or post indexing? 5190*4882a593Smuzhiyun beq.b fpre_indexed 5191*4882a593Smuzhiyun 5192*4882a593Smuzhiyun mov.l %d3,%a0 5193*4882a593Smuzhiyun bsr.l _dmem_read_long 5194*4882a593Smuzhiyun 5195*4882a593Smuzhiyun tst.l %d1 # did dfetch fail? 5196*4882a593Smuzhiyun bne.w fcea_err # yes 5197*4882a593Smuzhiyun 5198*4882a593Smuzhiyun add.l %d2,%d0 # <ea> += index 5199*4882a593Smuzhiyun add.l %d4,%d0 # <ea> += od 5200*4882a593Smuzhiyun bra.b fdone_ea 5201*4882a593Smuzhiyun 5202*4882a593Smuzhiyunfpre_indexed: 5203*4882a593Smuzhiyun add.l %d2,%d3 # preindexing 5204*4882a593Smuzhiyun mov.l %d3,%a0 5205*4882a593Smuzhiyun bsr.l _dmem_read_long 5206*4882a593Smuzhiyun 5207*4882a593Smuzhiyun tst.l %d1 # did dfetch fail? 5208*4882a593Smuzhiyun bne.w fcea_err # yes 5209*4882a593Smuzhiyun 5210*4882a593Smuzhiyun add.l %d4,%d0 # ea += od 5211*4882a593Smuzhiyun bra.b fdone_ea 5212*4882a593Smuzhiyun 5213*4882a593Smuzhiyunfaii_bd: 5214*4882a593Smuzhiyun add.l %d2,%d3 # ea = (base + bd) + index 5215*4882a593Smuzhiyun mov.l %d3,%d0 5216*4882a593Smuzhiyunfdone_ea: 5217*4882a593Smuzhiyun mov.l %d0,%a0 5218*4882a593Smuzhiyun 5219*4882a593Smuzhiyun movm.l (%sp)+,&0x003c # restore d2-d5 5220*4882a593Smuzhiyun rts 5221*4882a593Smuzhiyun 5222*4882a593Smuzhiyun######################################################### 5223*4882a593Smuzhiyunfcea_err: 5224*4882a593Smuzhiyun mov.l %d3,%a0 5225*4882a593Smuzhiyun 5226*4882a593Smuzhiyun movm.l (%sp)+,&0x003c # restore d2-d5 5227*4882a593Smuzhiyun mov.w &0x0101,%d0 5228*4882a593Smuzhiyun bra.l iea_dacc 5229*4882a593Smuzhiyun 5230*4882a593Smuzhiyunfcea_iacc: 5231*4882a593Smuzhiyun movm.l (%sp)+,&0x003c # restore d2-d5 5232*4882a593Smuzhiyun bra.l iea_iacc 5233*4882a593Smuzhiyun 5234*4882a593Smuzhiyunfmovm_out_err: 5235*4882a593Smuzhiyun bsr.l restore 5236*4882a593Smuzhiyun mov.w &0x00e1,%d0 5237*4882a593Smuzhiyun bra.b fmovm_err 5238*4882a593Smuzhiyun 5239*4882a593Smuzhiyunfmovm_in_err: 5240*4882a593Smuzhiyun bsr.l restore 5241*4882a593Smuzhiyun mov.w &0x0161,%d0 5242*4882a593Smuzhiyun 5243*4882a593Smuzhiyunfmovm_err: 5244*4882a593Smuzhiyun mov.l L_SCR1(%a6),%a0 5245*4882a593Smuzhiyun bra.l iea_dacc 5246*4882a593Smuzhiyun 5247*4882a593Smuzhiyun######################################################################### 5248*4882a593Smuzhiyun# XDEF **************************************************************** # 5249*4882a593Smuzhiyun# fmovm_ctrl(): emulate fmovm.l of control registers instr # 5250*4882a593Smuzhiyun# # 5251*4882a593Smuzhiyun# XREF **************************************************************** # 5252*4882a593Smuzhiyun# _imem_read_long() - read longword from memory # 5253*4882a593Smuzhiyun# iea_iacc() - _imem_read_long() failed; error recovery # 5254*4882a593Smuzhiyun# # 5255*4882a593Smuzhiyun# INPUT *************************************************************** # 5256*4882a593Smuzhiyun# None # 5257*4882a593Smuzhiyun# # 5258*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5259*4882a593Smuzhiyun# If _imem_read_long() doesn't fail: # 5260*4882a593Smuzhiyun# USER_FPCR(a6) = new FPCR value # 5261*4882a593Smuzhiyun# USER_FPSR(a6) = new FPSR value # 5262*4882a593Smuzhiyun# USER_FPIAR(a6) = new FPIAR value # 5263*4882a593Smuzhiyun# # 5264*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5265*4882a593Smuzhiyun# Decode the instruction type by looking at the extension word # 5266*4882a593Smuzhiyun# in order to see how many control registers to fetch from memory. # 5267*4882a593Smuzhiyun# Fetch them using _imem_read_long(). If this fetch fails, exit through # 5268*4882a593Smuzhiyun# the special access error exit handler iea_iacc(). # 5269*4882a593Smuzhiyun# # 5270*4882a593Smuzhiyun# Instruction word decoding: # 5271*4882a593Smuzhiyun# # 5272*4882a593Smuzhiyun# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} # 5273*4882a593Smuzhiyun# # 5274*4882a593Smuzhiyun# WORD1 WORD2 # 5275*4882a593Smuzhiyun# 1111 0010 00 111100 100$ $$00 0000 0000 # 5276*4882a593Smuzhiyun# # 5277*4882a593Smuzhiyun# $$$ (100): FPCR # 5278*4882a593Smuzhiyun# (010): FPSR # 5279*4882a593Smuzhiyun# (001): FPIAR # 5280*4882a593Smuzhiyun# (000): FPIAR # 5281*4882a593Smuzhiyun# # 5282*4882a593Smuzhiyun######################################################################### 5283*4882a593Smuzhiyun 5284*4882a593Smuzhiyun global fmovm_ctrl 5285*4882a593Smuzhiyunfmovm_ctrl: 5286*4882a593Smuzhiyun mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits 5287*4882a593Smuzhiyun cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ? 5288*4882a593Smuzhiyun beq.w fctrl_in_7 # yes 5289*4882a593Smuzhiyun cmpi.b %d0,&0x98 # fpcr & fpsr ? 5290*4882a593Smuzhiyun beq.w fctrl_in_6 # yes 5291*4882a593Smuzhiyun cmpi.b %d0,&0x94 # fpcr & fpiar ? 5292*4882a593Smuzhiyun beq.b fctrl_in_5 # yes 5293*4882a593Smuzhiyun 5294*4882a593Smuzhiyun# fmovem.l #<data>, fpsr/fpiar 5295*4882a593Smuzhiyunfctrl_in_3: 5296*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5297*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5298*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPSR from mem 5299*4882a593Smuzhiyun 5300*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5301*4882a593Smuzhiyun bne.l iea_iacc # yes 5302*4882a593Smuzhiyun 5303*4882a593Smuzhiyun mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack 5304*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5305*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5306*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPIAR from mem 5307*4882a593Smuzhiyun 5308*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5309*4882a593Smuzhiyun bne.l iea_iacc # yes 5310*4882a593Smuzhiyun 5311*4882a593Smuzhiyun mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 5312*4882a593Smuzhiyun rts 5313*4882a593Smuzhiyun 5314*4882a593Smuzhiyun# fmovem.l #<data>, fpcr/fpiar 5315*4882a593Smuzhiyunfctrl_in_5: 5316*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5317*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5318*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPCR from mem 5319*4882a593Smuzhiyun 5320*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5321*4882a593Smuzhiyun bne.l iea_iacc # yes 5322*4882a593Smuzhiyun 5323*4882a593Smuzhiyun mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack 5324*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5325*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5326*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPIAR from mem 5327*4882a593Smuzhiyun 5328*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5329*4882a593Smuzhiyun bne.l iea_iacc # yes 5330*4882a593Smuzhiyun 5331*4882a593Smuzhiyun mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 5332*4882a593Smuzhiyun rts 5333*4882a593Smuzhiyun 5334*4882a593Smuzhiyun# fmovem.l #<data>, fpcr/fpsr 5335*4882a593Smuzhiyunfctrl_in_6: 5336*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5337*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5338*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPCR from mem 5339*4882a593Smuzhiyun 5340*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5341*4882a593Smuzhiyun bne.l iea_iacc # yes 5342*4882a593Smuzhiyun 5343*4882a593Smuzhiyun mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 5344*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5345*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5346*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPSR from mem 5347*4882a593Smuzhiyun 5348*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5349*4882a593Smuzhiyun bne.l iea_iacc # yes 5350*4882a593Smuzhiyun 5351*4882a593Smuzhiyun mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 5352*4882a593Smuzhiyun rts 5353*4882a593Smuzhiyun 5354*4882a593Smuzhiyun# fmovem.l #<data>, fpcr/fpsr/fpiar 5355*4882a593Smuzhiyunfctrl_in_7: 5356*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5357*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5358*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPCR from mem 5359*4882a593Smuzhiyun 5360*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5361*4882a593Smuzhiyun bne.l iea_iacc # yes 5362*4882a593Smuzhiyun 5363*4882a593Smuzhiyun mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 5364*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5365*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5366*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPSR from mem 5367*4882a593Smuzhiyun 5368*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5369*4882a593Smuzhiyun bne.l iea_iacc # yes 5370*4882a593Smuzhiyun 5371*4882a593Smuzhiyun mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 5372*4882a593Smuzhiyun mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5373*4882a593Smuzhiyun addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5374*4882a593Smuzhiyun bsr.l _imem_read_long # fetch FPIAR from mem 5375*4882a593Smuzhiyun 5376*4882a593Smuzhiyun tst.l %d1 # did ifetch fail? 5377*4882a593Smuzhiyun bne.l iea_iacc # yes 5378*4882a593Smuzhiyun 5379*4882a593Smuzhiyun mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem 5380*4882a593Smuzhiyun rts 5381*4882a593Smuzhiyun 5382*4882a593Smuzhiyun########################################################################## 5383*4882a593Smuzhiyun 5384*4882a593Smuzhiyun######################################################################### 5385*4882a593Smuzhiyun# XDEF **************************************************************** # 5386*4882a593Smuzhiyun# addsub_scaler2(): scale inputs to fadd/fsub such that no # 5387*4882a593Smuzhiyun# OVFL/UNFL exceptions will result # 5388*4882a593Smuzhiyun# # 5389*4882a593Smuzhiyun# XREF **************************************************************** # 5390*4882a593Smuzhiyun# norm() - normalize mantissa after adjusting exponent # 5391*4882a593Smuzhiyun# # 5392*4882a593Smuzhiyun# INPUT *************************************************************** # 5393*4882a593Smuzhiyun# FP_SRC(a6) = fp op1(src) # 5394*4882a593Smuzhiyun# FP_DST(a6) = fp op2(dst) # 5395*4882a593Smuzhiyun# # 5396*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5397*4882a593Smuzhiyun# FP_SRC(a6) = fp op1 scaled(src) # 5398*4882a593Smuzhiyun# FP_DST(a6) = fp op2 scaled(dst) # 5399*4882a593Smuzhiyun# d0 = scale amount # 5400*4882a593Smuzhiyun# # 5401*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5402*4882a593Smuzhiyun# If the DST exponent is > the SRC exponent, set the DST exponent # 5403*4882a593Smuzhiyun# equal to 0x3fff and scale the SRC exponent by the value that the # 5404*4882a593Smuzhiyun# DST exponent was scaled by. If the SRC exponent is greater or equal, # 5405*4882a593Smuzhiyun# do the opposite. Return this scale factor in d0. # 5406*4882a593Smuzhiyun# If the two exponents differ by > the number of mantissa bits # 5407*4882a593Smuzhiyun# plus two, then set the smallest exponent to a very small value as a # 5408*4882a593Smuzhiyun# quick shortcut. # 5409*4882a593Smuzhiyun# # 5410*4882a593Smuzhiyun######################################################################### 5411*4882a593Smuzhiyun 5412*4882a593Smuzhiyun global addsub_scaler2 5413*4882a593Smuzhiyunaddsub_scaler2: 5414*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 5415*4882a593Smuzhiyun mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 5416*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 5417*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 5418*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d0 5419*4882a593Smuzhiyun mov.w DST_EX(%a1),%d1 5420*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) 5421*4882a593Smuzhiyun mov.w %d1,FP_SCR1_EX(%a6) 5422*4882a593Smuzhiyun 5423*4882a593Smuzhiyun andi.w &0x7fff,%d0 5424*4882a593Smuzhiyun andi.w &0x7fff,%d1 5425*4882a593Smuzhiyun mov.w %d0,L_SCR1(%a6) # store src exponent 5426*4882a593Smuzhiyun mov.w %d1,2+L_SCR1(%a6) # store dst exponent 5427*4882a593Smuzhiyun 5428*4882a593Smuzhiyun cmp.w %d0, %d1 # is src exp >= dst exp? 5429*4882a593Smuzhiyun bge.l src_exp_ge2 5430*4882a593Smuzhiyun 5431*4882a593Smuzhiyun# dst exp is > src exp; scale dst to exp = 0x3fff 5432*4882a593Smuzhiyundst_exp_gt2: 5433*4882a593Smuzhiyun bsr.l scale_to_zero_dst 5434*4882a593Smuzhiyun mov.l %d0,-(%sp) # save scale factor 5435*4882a593Smuzhiyun 5436*4882a593Smuzhiyun cmpi.b STAG(%a6),&DENORM # is dst denormalized? 5437*4882a593Smuzhiyun bne.b cmpexp12 5438*4882a593Smuzhiyun 5439*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 5440*4882a593Smuzhiyun bsr.l norm # normalize the denorm; result is new exp 5441*4882a593Smuzhiyun neg.w %d0 # new exp = -(shft val) 5442*4882a593Smuzhiyun mov.w %d0,L_SCR1(%a6) # inset new exp 5443*4882a593Smuzhiyun 5444*4882a593Smuzhiyuncmpexp12: 5445*4882a593Smuzhiyun mov.w 2+L_SCR1(%a6),%d0 5446*4882a593Smuzhiyun subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 5447*4882a593Smuzhiyun 5448*4882a593Smuzhiyun cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2? 5449*4882a593Smuzhiyun bge.b quick_scale12 5450*4882a593Smuzhiyun 5451*4882a593Smuzhiyun mov.w L_SCR1(%a6),%d0 5452*4882a593Smuzhiyun add.w 0x2(%sp),%d0 # scale src exponent by scale factor 5453*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 5454*4882a593Smuzhiyun and.w &0x8000,%d1 5455*4882a593Smuzhiyun or.w %d1,%d0 # concat {sgn,new exp} 5456*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent 5457*4882a593Smuzhiyun 5458*4882a593Smuzhiyun mov.l (%sp)+,%d0 # return SCALE factor 5459*4882a593Smuzhiyun rts 5460*4882a593Smuzhiyun 5461*4882a593Smuzhiyunquick_scale12: 5462*4882a593Smuzhiyun andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent 5463*4882a593Smuzhiyun bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1 5464*4882a593Smuzhiyun 5465*4882a593Smuzhiyun mov.l (%sp)+,%d0 # return SCALE factor 5466*4882a593Smuzhiyun rts 5467*4882a593Smuzhiyun 5468*4882a593Smuzhiyun# src exp is >= dst exp; scale src to exp = 0x3fff 5469*4882a593Smuzhiyunsrc_exp_ge2: 5470*4882a593Smuzhiyun bsr.l scale_to_zero_src 5471*4882a593Smuzhiyun mov.l %d0,-(%sp) # save scale factor 5472*4882a593Smuzhiyun 5473*4882a593Smuzhiyun cmpi.b DTAG(%a6),&DENORM # is dst denormalized? 5474*4882a593Smuzhiyun bne.b cmpexp22 5475*4882a593Smuzhiyun lea FP_SCR1(%a6),%a0 5476*4882a593Smuzhiyun bsr.l norm # normalize the denorm; result is new exp 5477*4882a593Smuzhiyun neg.w %d0 # new exp = -(shft val) 5478*4882a593Smuzhiyun mov.w %d0,2+L_SCR1(%a6) # inset new exp 5479*4882a593Smuzhiyun 5480*4882a593Smuzhiyuncmpexp22: 5481*4882a593Smuzhiyun mov.w L_SCR1(%a6),%d0 5482*4882a593Smuzhiyun subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 5483*4882a593Smuzhiyun 5484*4882a593Smuzhiyun cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2? 5485*4882a593Smuzhiyun bge.b quick_scale22 5486*4882a593Smuzhiyun 5487*4882a593Smuzhiyun mov.w 2+L_SCR1(%a6),%d0 5488*4882a593Smuzhiyun add.w 0x2(%sp),%d0 # scale dst exponent by scale factor 5489*4882a593Smuzhiyun mov.w FP_SCR1_EX(%a6),%d1 5490*4882a593Smuzhiyun andi.w &0x8000,%d1 5491*4882a593Smuzhiyun or.w %d1,%d0 # concat {sgn,new exp} 5492*4882a593Smuzhiyun mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent 5493*4882a593Smuzhiyun 5494*4882a593Smuzhiyun mov.l (%sp)+,%d0 # return SCALE factor 5495*4882a593Smuzhiyun rts 5496*4882a593Smuzhiyun 5497*4882a593Smuzhiyunquick_scale22: 5498*4882a593Smuzhiyun andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent 5499*4882a593Smuzhiyun bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1 5500*4882a593Smuzhiyun 5501*4882a593Smuzhiyun mov.l (%sp)+,%d0 # return SCALE factor 5502*4882a593Smuzhiyun rts 5503*4882a593Smuzhiyun 5504*4882a593Smuzhiyun########################################################################## 5505*4882a593Smuzhiyun 5506*4882a593Smuzhiyun######################################################################### 5507*4882a593Smuzhiyun# XDEF **************************************************************** # 5508*4882a593Smuzhiyun# scale_to_zero_src(): scale the exponent of extended precision # 5509*4882a593Smuzhiyun# value at FP_SCR0(a6). # 5510*4882a593Smuzhiyun# # 5511*4882a593Smuzhiyun# XREF **************************************************************** # 5512*4882a593Smuzhiyun# norm() - normalize the mantissa if the operand was a DENORM # 5513*4882a593Smuzhiyun# # 5514*4882a593Smuzhiyun# INPUT *************************************************************** # 5515*4882a593Smuzhiyun# FP_SCR0(a6) = extended precision operand to be scaled # 5516*4882a593Smuzhiyun# # 5517*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5518*4882a593Smuzhiyun# FP_SCR0(a6) = scaled extended precision operand # 5519*4882a593Smuzhiyun# d0 = scale value # 5520*4882a593Smuzhiyun# # 5521*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5522*4882a593Smuzhiyun# Set the exponent of the input operand to 0x3fff. Save the value # 5523*4882a593Smuzhiyun# of the difference between the original and new exponent. Then, # 5524*4882a593Smuzhiyun# normalize the operand if it was a DENORM. Add this normalization # 5525*4882a593Smuzhiyun# value to the previous value. Return the result. # 5526*4882a593Smuzhiyun# # 5527*4882a593Smuzhiyun######################################################################### 5528*4882a593Smuzhiyun 5529*4882a593Smuzhiyun global scale_to_zero_src 5530*4882a593Smuzhiyunscale_to_zero_src: 5531*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 5532*4882a593Smuzhiyun mov.w %d1,%d0 # make a copy 5533*4882a593Smuzhiyun 5534*4882a593Smuzhiyun andi.l &0x7fff,%d1 # extract operand's exponent 5535*4882a593Smuzhiyun 5536*4882a593Smuzhiyun andi.w &0x8000,%d0 # extract operand's sgn 5537*4882a593Smuzhiyun or.w &0x3fff,%d0 # insert new operand's exponent(=0) 5538*4882a593Smuzhiyun 5539*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent 5540*4882a593Smuzhiyun 5541*4882a593Smuzhiyun cmpi.b STAG(%a6),&DENORM # is operand normalized? 5542*4882a593Smuzhiyun beq.b stzs_denorm # normalize the DENORM 5543*4882a593Smuzhiyun 5544*4882a593Smuzhiyunstzs_norm: 5545*4882a593Smuzhiyun mov.l &0x3fff,%d0 5546*4882a593Smuzhiyun sub.l %d1,%d0 # scale = BIAS + (-exp) 5547*4882a593Smuzhiyun 5548*4882a593Smuzhiyun rts 5549*4882a593Smuzhiyun 5550*4882a593Smuzhiyunstzs_denorm: 5551*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass ptr to src op 5552*4882a593Smuzhiyun bsr.l norm # normalize denorm 5553*4882a593Smuzhiyun neg.l %d0 # new exponent = -(shft val) 5554*4882a593Smuzhiyun mov.l %d0,%d1 # prepare for op_norm call 5555*4882a593Smuzhiyun bra.b stzs_norm # finish scaling 5556*4882a593Smuzhiyun 5557*4882a593Smuzhiyun### 5558*4882a593Smuzhiyun 5559*4882a593Smuzhiyun######################################################################### 5560*4882a593Smuzhiyun# XDEF **************************************************************** # 5561*4882a593Smuzhiyun# scale_sqrt(): scale the input operand exponent so a subsequent # 5562*4882a593Smuzhiyun# fsqrt operation won't take an exception. # 5563*4882a593Smuzhiyun# # 5564*4882a593Smuzhiyun# XREF **************************************************************** # 5565*4882a593Smuzhiyun# norm() - normalize the mantissa if the operand was a DENORM # 5566*4882a593Smuzhiyun# # 5567*4882a593Smuzhiyun# INPUT *************************************************************** # 5568*4882a593Smuzhiyun# FP_SCR0(a6) = extended precision operand to be scaled # 5569*4882a593Smuzhiyun# # 5570*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5571*4882a593Smuzhiyun# FP_SCR0(a6) = scaled extended precision operand # 5572*4882a593Smuzhiyun# d0 = scale value # 5573*4882a593Smuzhiyun# # 5574*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5575*4882a593Smuzhiyun# If the input operand is a DENORM, normalize it. # 5576*4882a593Smuzhiyun# If the exponent of the input operand is even, set the exponent # 5577*4882a593Smuzhiyun# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the # 5578*4882a593Smuzhiyun# exponent of the input operand is off, set the exponent to ox3fff and # 5579*4882a593Smuzhiyun# return a scale factor of "(exp-0x3fff)/2". # 5580*4882a593Smuzhiyun# # 5581*4882a593Smuzhiyun######################################################################### 5582*4882a593Smuzhiyun 5583*4882a593Smuzhiyun global scale_sqrt 5584*4882a593Smuzhiyunscale_sqrt: 5585*4882a593Smuzhiyun cmpi.b STAG(%a6),&DENORM # is operand normalized? 5586*4882a593Smuzhiyun beq.b ss_denorm # normalize the DENORM 5587*4882a593Smuzhiyun 5588*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 5589*4882a593Smuzhiyun andi.l &0x7fff,%d1 # extract operand's exponent 5590*4882a593Smuzhiyun 5591*4882a593Smuzhiyun andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn 5592*4882a593Smuzhiyun 5593*4882a593Smuzhiyun btst &0x0,%d1 # is exp even or odd? 5594*4882a593Smuzhiyun beq.b ss_norm_even 5595*4882a593Smuzhiyun 5596*4882a593Smuzhiyun ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5597*4882a593Smuzhiyun 5598*4882a593Smuzhiyun mov.l &0x3fff,%d0 5599*4882a593Smuzhiyun sub.l %d1,%d0 # scale = BIAS + (-exp) 5600*4882a593Smuzhiyun asr.l &0x1,%d0 # divide scale factor by 2 5601*4882a593Smuzhiyun rts 5602*4882a593Smuzhiyun 5603*4882a593Smuzhiyunss_norm_even: 5604*4882a593Smuzhiyun ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5605*4882a593Smuzhiyun 5606*4882a593Smuzhiyun mov.l &0x3ffe,%d0 5607*4882a593Smuzhiyun sub.l %d1,%d0 # scale = BIAS + (-exp) 5608*4882a593Smuzhiyun asr.l &0x1,%d0 # divide scale factor by 2 5609*4882a593Smuzhiyun rts 5610*4882a593Smuzhiyun 5611*4882a593Smuzhiyunss_denorm: 5612*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass ptr to src op 5613*4882a593Smuzhiyun bsr.l norm # normalize denorm 5614*4882a593Smuzhiyun 5615*4882a593Smuzhiyun btst &0x0,%d0 # is exp even or odd? 5616*4882a593Smuzhiyun beq.b ss_denorm_even 5617*4882a593Smuzhiyun 5618*4882a593Smuzhiyun ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5619*4882a593Smuzhiyun 5620*4882a593Smuzhiyun add.l &0x3fff,%d0 5621*4882a593Smuzhiyun asr.l &0x1,%d0 # divide scale factor by 2 5622*4882a593Smuzhiyun rts 5623*4882a593Smuzhiyun 5624*4882a593Smuzhiyunss_denorm_even: 5625*4882a593Smuzhiyun ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5626*4882a593Smuzhiyun 5627*4882a593Smuzhiyun add.l &0x3ffe,%d0 5628*4882a593Smuzhiyun asr.l &0x1,%d0 # divide scale factor by 2 5629*4882a593Smuzhiyun rts 5630*4882a593Smuzhiyun 5631*4882a593Smuzhiyun### 5632*4882a593Smuzhiyun 5633*4882a593Smuzhiyun######################################################################### 5634*4882a593Smuzhiyun# XDEF **************************************************************** # 5635*4882a593Smuzhiyun# scale_to_zero_dst(): scale the exponent of extended precision # 5636*4882a593Smuzhiyun# value at FP_SCR1(a6). # 5637*4882a593Smuzhiyun# # 5638*4882a593Smuzhiyun# XREF **************************************************************** # 5639*4882a593Smuzhiyun# norm() - normalize the mantissa if the operand was a DENORM # 5640*4882a593Smuzhiyun# # 5641*4882a593Smuzhiyun# INPUT *************************************************************** # 5642*4882a593Smuzhiyun# FP_SCR1(a6) = extended precision operand to be scaled # 5643*4882a593Smuzhiyun# # 5644*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5645*4882a593Smuzhiyun# FP_SCR1(a6) = scaled extended precision operand # 5646*4882a593Smuzhiyun# d0 = scale value # 5647*4882a593Smuzhiyun# # 5648*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5649*4882a593Smuzhiyun# Set the exponent of the input operand to 0x3fff. Save the value # 5650*4882a593Smuzhiyun# of the difference between the original and new exponent. Then, # 5651*4882a593Smuzhiyun# normalize the operand if it was a DENORM. Add this normalization # 5652*4882a593Smuzhiyun# value to the previous value. Return the result. # 5653*4882a593Smuzhiyun# # 5654*4882a593Smuzhiyun######################################################################### 5655*4882a593Smuzhiyun 5656*4882a593Smuzhiyun global scale_to_zero_dst 5657*4882a593Smuzhiyunscale_to_zero_dst: 5658*4882a593Smuzhiyun mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp} 5659*4882a593Smuzhiyun mov.w %d1,%d0 # make a copy 5660*4882a593Smuzhiyun 5661*4882a593Smuzhiyun andi.l &0x7fff,%d1 # extract operand's exponent 5662*4882a593Smuzhiyun 5663*4882a593Smuzhiyun andi.w &0x8000,%d0 # extract operand's sgn 5664*4882a593Smuzhiyun or.w &0x3fff,%d0 # insert new operand's exponent(=0) 5665*4882a593Smuzhiyun 5666*4882a593Smuzhiyun mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent 5667*4882a593Smuzhiyun 5668*4882a593Smuzhiyun cmpi.b DTAG(%a6),&DENORM # is operand normalized? 5669*4882a593Smuzhiyun beq.b stzd_denorm # normalize the DENORM 5670*4882a593Smuzhiyun 5671*4882a593Smuzhiyunstzd_norm: 5672*4882a593Smuzhiyun mov.l &0x3fff,%d0 5673*4882a593Smuzhiyun sub.l %d1,%d0 # scale = BIAS + (-exp) 5674*4882a593Smuzhiyun rts 5675*4882a593Smuzhiyun 5676*4882a593Smuzhiyunstzd_denorm: 5677*4882a593Smuzhiyun lea FP_SCR1(%a6),%a0 # pass ptr to dst op 5678*4882a593Smuzhiyun bsr.l norm # normalize denorm 5679*4882a593Smuzhiyun neg.l %d0 # new exponent = -(shft val) 5680*4882a593Smuzhiyun mov.l %d0,%d1 # prepare for op_norm call 5681*4882a593Smuzhiyun bra.b stzd_norm # finish scaling 5682*4882a593Smuzhiyun 5683*4882a593Smuzhiyun########################################################################## 5684*4882a593Smuzhiyun 5685*4882a593Smuzhiyun######################################################################### 5686*4882a593Smuzhiyun# XDEF **************************************************************** # 5687*4882a593Smuzhiyun# res_qnan(): return default result w/ QNAN operand for dyadic # 5688*4882a593Smuzhiyun# res_snan(): return default result w/ SNAN operand for dyadic # 5689*4882a593Smuzhiyun# res_qnan_1op(): return dflt result w/ QNAN operand for monadic # 5690*4882a593Smuzhiyun# res_snan_1op(): return dflt result w/ SNAN operand for monadic # 5691*4882a593Smuzhiyun# # 5692*4882a593Smuzhiyun# XREF **************************************************************** # 5693*4882a593Smuzhiyun# None # 5694*4882a593Smuzhiyun# # 5695*4882a593Smuzhiyun# INPUT *************************************************************** # 5696*4882a593Smuzhiyun# FP_SRC(a6) = pointer to extended precision src operand # 5697*4882a593Smuzhiyun# FP_DST(a6) = pointer to extended precision dst operand # 5698*4882a593Smuzhiyun# # 5699*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5700*4882a593Smuzhiyun# fp0 = default result # 5701*4882a593Smuzhiyun# # 5702*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5703*4882a593Smuzhiyun# If either operand (but not both operands) of an operation is a # 5704*4882a593Smuzhiyun# nonsignalling NAN, then that NAN is returned as the result. If both # 5705*4882a593Smuzhiyun# operands are nonsignalling NANs, then the destination operand # 5706*4882a593Smuzhiyun# nonsignalling NAN is returned as the result. # 5707*4882a593Smuzhiyun# If either operand to an operation is a signalling NAN (SNAN), # 5708*4882a593Smuzhiyun# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap # 5709*4882a593Smuzhiyun# enable bit is set in the FPCR, then the trap is taken and the # 5710*4882a593Smuzhiyun# destination is not modified. If the SNAN trap enable bit is not set, # 5711*4882a593Smuzhiyun# then the SNAN is converted to a nonsignalling NAN (by setting the # 5712*4882a593Smuzhiyun# SNAN bit in the operand to one), and the operation continues as # 5713*4882a593Smuzhiyun# described in the preceding paragraph, for nonsignalling NANs. # 5714*4882a593Smuzhiyun# Make sure the appropriate FPSR bits are set before exiting. # 5715*4882a593Smuzhiyun# # 5716*4882a593Smuzhiyun######################################################################### 5717*4882a593Smuzhiyun 5718*4882a593Smuzhiyun global res_qnan 5719*4882a593Smuzhiyun global res_snan 5720*4882a593Smuzhiyunres_qnan: 5721*4882a593Smuzhiyunres_snan: 5722*4882a593Smuzhiyun cmp.b DTAG(%a6), &SNAN # is the dst an SNAN? 5723*4882a593Smuzhiyun beq.b dst_snan2 5724*4882a593Smuzhiyun cmp.b DTAG(%a6), &QNAN # is the dst a QNAN? 5725*4882a593Smuzhiyun beq.b dst_qnan2 5726*4882a593Smuzhiyunsrc_nan: 5727*4882a593Smuzhiyun cmp.b STAG(%a6), &QNAN 5728*4882a593Smuzhiyun beq.b src_qnan2 5729*4882a593Smuzhiyun global res_snan_1op 5730*4882a593Smuzhiyunres_snan_1op: 5731*4882a593Smuzhiyunsrc_snan2: 5732*4882a593Smuzhiyun bset &0x6, FP_SRC_HI(%a6) # set SNAN bit 5733*4882a593Smuzhiyun or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 5734*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 5735*4882a593Smuzhiyun bra.b nan_comp 5736*4882a593Smuzhiyun global res_qnan_1op 5737*4882a593Smuzhiyunres_qnan_1op: 5738*4882a593Smuzhiyunsrc_qnan2: 5739*4882a593Smuzhiyun or.l &nan_mask, USER_FPSR(%a6) 5740*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 5741*4882a593Smuzhiyun bra.b nan_comp 5742*4882a593Smuzhiyundst_snan2: 5743*4882a593Smuzhiyun or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 5744*4882a593Smuzhiyun bset &0x6, FP_DST_HI(%a6) # set SNAN bit 5745*4882a593Smuzhiyun lea FP_DST(%a6), %a0 5746*4882a593Smuzhiyun bra.b nan_comp 5747*4882a593Smuzhiyundst_qnan2: 5748*4882a593Smuzhiyun lea FP_DST(%a6), %a0 5749*4882a593Smuzhiyun cmp.b STAG(%a6), &SNAN 5750*4882a593Smuzhiyun bne nan_done 5751*4882a593Smuzhiyun or.l &aiop_mask+snan_mask, USER_FPSR(%a6) 5752*4882a593Smuzhiyunnan_done: 5753*4882a593Smuzhiyun or.l &nan_mask, USER_FPSR(%a6) 5754*4882a593Smuzhiyunnan_comp: 5755*4882a593Smuzhiyun btst &0x7, FTEMP_EX(%a0) # is NAN neg? 5756*4882a593Smuzhiyun beq.b nan_not_neg 5757*4882a593Smuzhiyun or.l &neg_mask, USER_FPSR(%a6) 5758*4882a593Smuzhiyunnan_not_neg: 5759*4882a593Smuzhiyun fmovm.x (%a0), &0x80 5760*4882a593Smuzhiyun rts 5761*4882a593Smuzhiyun 5762*4882a593Smuzhiyun######################################################################### 5763*4882a593Smuzhiyun# XDEF **************************************************************** # 5764*4882a593Smuzhiyun# res_operr(): return default result during operand error # 5765*4882a593Smuzhiyun# # 5766*4882a593Smuzhiyun# XREF **************************************************************** # 5767*4882a593Smuzhiyun# None # 5768*4882a593Smuzhiyun# # 5769*4882a593Smuzhiyun# INPUT *************************************************************** # 5770*4882a593Smuzhiyun# None # 5771*4882a593Smuzhiyun# # 5772*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5773*4882a593Smuzhiyun# fp0 = default operand error result # 5774*4882a593Smuzhiyun# # 5775*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5776*4882a593Smuzhiyun# An nonsignalling NAN is returned as the default result when # 5777*4882a593Smuzhiyun# an operand error occurs for the following cases: # 5778*4882a593Smuzhiyun# # 5779*4882a593Smuzhiyun# Multiply: (Infinity x Zero) # 5780*4882a593Smuzhiyun# Divide : (Zero / Zero) || (Infinity / Infinity) # 5781*4882a593Smuzhiyun# # 5782*4882a593Smuzhiyun######################################################################### 5783*4882a593Smuzhiyun 5784*4882a593Smuzhiyun global res_operr 5785*4882a593Smuzhiyunres_operr: 5786*4882a593Smuzhiyun or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6) 5787*4882a593Smuzhiyun fmovm.x nan_return(%pc), &0x80 5788*4882a593Smuzhiyun rts 5789*4882a593Smuzhiyun 5790*4882a593Smuzhiyunnan_return: 5791*4882a593Smuzhiyun long 0x7fff0000, 0xffffffff, 0xffffffff 5792*4882a593Smuzhiyun 5793*4882a593Smuzhiyun######################################################################### 5794*4882a593Smuzhiyun# XDEF **************************************************************** # 5795*4882a593Smuzhiyun# _denorm(): denormalize an intermediate result # 5796*4882a593Smuzhiyun# # 5797*4882a593Smuzhiyun# XREF **************************************************************** # 5798*4882a593Smuzhiyun# None # 5799*4882a593Smuzhiyun# # 5800*4882a593Smuzhiyun# INPUT *************************************************************** # 5801*4882a593Smuzhiyun# a0 = points to the operand to be denormalized # 5802*4882a593Smuzhiyun# (in the internal extended format) # 5803*4882a593Smuzhiyun# # 5804*4882a593Smuzhiyun# d0 = rounding precision # 5805*4882a593Smuzhiyun# # 5806*4882a593Smuzhiyun# OUTPUT ************************************************************** # 5807*4882a593Smuzhiyun# a0 = pointer to the denormalized result # 5808*4882a593Smuzhiyun# (in the internal extended format) # 5809*4882a593Smuzhiyun# # 5810*4882a593Smuzhiyun# d0 = guard,round,sticky # 5811*4882a593Smuzhiyun# # 5812*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 5813*4882a593Smuzhiyun# According to the exponent underflow threshold for the given # 5814*4882a593Smuzhiyun# precision, shift the mantissa bits to the right in order raise the # 5815*4882a593Smuzhiyun# exponent of the operand to the threshold value. While shifting the # 5816*4882a593Smuzhiyun# mantissa bits right, maintain the value of the guard, round, and # 5817*4882a593Smuzhiyun# sticky bits. # 5818*4882a593Smuzhiyun# other notes: # 5819*4882a593Smuzhiyun# (1) _denorm() is called by the underflow routines # 5820*4882a593Smuzhiyun# (2) _denorm() does NOT affect the status register # 5821*4882a593Smuzhiyun# # 5822*4882a593Smuzhiyun######################################################################### 5823*4882a593Smuzhiyun 5824*4882a593Smuzhiyun# 5825*4882a593Smuzhiyun# table of exponent threshold values for each precision 5826*4882a593Smuzhiyun# 5827*4882a593Smuzhiyuntbl_thresh: 5828*4882a593Smuzhiyun short 0x0 5829*4882a593Smuzhiyun short sgl_thresh 5830*4882a593Smuzhiyun short dbl_thresh 5831*4882a593Smuzhiyun 5832*4882a593Smuzhiyun global _denorm 5833*4882a593Smuzhiyun_denorm: 5834*4882a593Smuzhiyun# 5835*4882a593Smuzhiyun# Load the exponent threshold for the precision selected and check 5836*4882a593Smuzhiyun# to see if (threshold - exponent) is > 65 in which case we can 5837*4882a593Smuzhiyun# simply calculate the sticky bit and zero the mantissa. otherwise 5838*4882a593Smuzhiyun# we have to call the denormalization routine. 5839*4882a593Smuzhiyun# 5840*4882a593Smuzhiyun lsr.b &0x2, %d0 # shift prec to lo bits 5841*4882a593Smuzhiyun mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold 5842*4882a593Smuzhiyun mov.w %d1, %d0 # copy d1 into d0 5843*4882a593Smuzhiyun sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp 5844*4882a593Smuzhiyun cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits) 5845*4882a593Smuzhiyun bpl.b denorm_set_stky # yes; just calc sticky 5846*4882a593Smuzhiyun 5847*4882a593Smuzhiyun clr.l %d0 # clear g,r,s 5848*4882a593Smuzhiyun btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set? 5849*4882a593Smuzhiyun beq.b denorm_call # no; don't change anything 5850*4882a593Smuzhiyun bset &29, %d0 # yes; set sticky bit 5851*4882a593Smuzhiyun 5852*4882a593Smuzhiyundenorm_call: 5853*4882a593Smuzhiyun bsr.l dnrm_lp # denormalize the number 5854*4882a593Smuzhiyun rts 5855*4882a593Smuzhiyun 5856*4882a593Smuzhiyun# 5857*4882a593Smuzhiyun# all bit would have been shifted off during the denorm so simply 5858*4882a593Smuzhiyun# calculate if the sticky should be set and clear the entire mantissa. 5859*4882a593Smuzhiyun# 5860*4882a593Smuzhiyundenorm_set_stky: 5861*4882a593Smuzhiyun mov.l &0x20000000, %d0 # set sticky bit in return value 5862*4882a593Smuzhiyun mov.w %d1, FTEMP_EX(%a0) # load exp with threshold 5863*4882a593Smuzhiyun clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa) 5864*4882a593Smuzhiyun clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa) 5865*4882a593Smuzhiyun rts 5866*4882a593Smuzhiyun 5867*4882a593Smuzhiyun# # 5868*4882a593Smuzhiyun# dnrm_lp(): normalize exponent/mantissa to specified threshold # 5869*4882a593Smuzhiyun# # 5870*4882a593Smuzhiyun# INPUT: # 5871*4882a593Smuzhiyun# %a0 : points to the operand to be denormalized # 5872*4882a593Smuzhiyun# %d0{31:29} : initial guard,round,sticky # 5873*4882a593Smuzhiyun# %d1{15:0} : denormalization threshold # 5874*4882a593Smuzhiyun# OUTPUT: # 5875*4882a593Smuzhiyun# %a0 : points to the denormalized operand # 5876*4882a593Smuzhiyun# %d0{31:29} : final guard,round,sticky # 5877*4882a593Smuzhiyun# # 5878*4882a593Smuzhiyun 5879*4882a593Smuzhiyun# *** Local Equates *** # 5880*4882a593Smuzhiyunset GRS, L_SCR2 # g,r,s temp storage 5881*4882a593Smuzhiyunset FTEMP_LO2, L_SCR1 # FTEMP_LO copy 5882*4882a593Smuzhiyun 5883*4882a593Smuzhiyun global dnrm_lp 5884*4882a593Smuzhiyundnrm_lp: 5885*4882a593Smuzhiyun 5886*4882a593Smuzhiyun# 5887*4882a593Smuzhiyun# make a copy of FTEMP_LO and place the g,r,s bits directly after it 5888*4882a593Smuzhiyun# in memory so as to make the bitfield extraction for denormalization easier. 5889*4882a593Smuzhiyun# 5890*4882a593Smuzhiyun mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy 5891*4882a593Smuzhiyun mov.l %d0, GRS(%a6) # place g,r,s after it 5892*4882a593Smuzhiyun 5893*4882a593Smuzhiyun# 5894*4882a593Smuzhiyun# check to see how much less than the underflow threshold the operand 5895*4882a593Smuzhiyun# exponent is. 5896*4882a593Smuzhiyun# 5897*4882a593Smuzhiyun mov.l %d1, %d0 # copy the denorm threshold 5898*4882a593Smuzhiyun sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent 5899*4882a593Smuzhiyun ble.b dnrm_no_lp # d1 <= 0 5900*4882a593Smuzhiyun cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ? 5901*4882a593Smuzhiyun blt.b case_1 # yes 5902*4882a593Smuzhiyun cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ? 5903*4882a593Smuzhiyun blt.b case_2 # yes 5904*4882a593Smuzhiyun bra.w case_3 # (d1 >= 64) 5905*4882a593Smuzhiyun 5906*4882a593Smuzhiyun# 5907*4882a593Smuzhiyun# No normalization necessary 5908*4882a593Smuzhiyun# 5909*4882a593Smuzhiyundnrm_no_lp: 5910*4882a593Smuzhiyun mov.l GRS(%a6), %d0 # restore original g,r,s 5911*4882a593Smuzhiyun rts 5912*4882a593Smuzhiyun 5913*4882a593Smuzhiyun# 5914*4882a593Smuzhiyun# case (0<d1<32) 5915*4882a593Smuzhiyun# 5916*4882a593Smuzhiyun# %d0 = denorm threshold 5917*4882a593Smuzhiyun# %d1 = "n" = amt to shift 5918*4882a593Smuzhiyun# 5919*4882a593Smuzhiyun# --------------------------------------------------------- 5920*4882a593Smuzhiyun# | FTEMP_HI | FTEMP_LO |grs000.........000| 5921*4882a593Smuzhiyun# --------------------------------------------------------- 5922*4882a593Smuzhiyun# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 5923*4882a593Smuzhiyun# \ \ \ \ 5924*4882a593Smuzhiyun# \ \ \ \ 5925*4882a593Smuzhiyun# \ \ \ \ 5926*4882a593Smuzhiyun# \ \ \ \ 5927*4882a593Smuzhiyun# \ \ \ \ 5928*4882a593Smuzhiyun# \ \ \ \ 5929*4882a593Smuzhiyun# \ \ \ \ 5930*4882a593Smuzhiyun# \ \ \ \ 5931*4882a593Smuzhiyun# <-(n)-><-(32 - n)-><------(32)-------><------(32)-------> 5932*4882a593Smuzhiyun# --------------------------------------------------------- 5933*4882a593Smuzhiyun# |0.....0| NEW_HI | NEW_FTEMP_LO |grs | 5934*4882a593Smuzhiyun# --------------------------------------------------------- 5935*4882a593Smuzhiyun# 5936*4882a593Smuzhiyuncase_1: 5937*4882a593Smuzhiyun mov.l %d2, -(%sp) # create temp storage 5938*4882a593Smuzhiyun 5939*4882a593Smuzhiyun mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 5940*4882a593Smuzhiyun mov.l &32, %d0 5941*4882a593Smuzhiyun sub.w %d1, %d0 # %d0 = 32 - %d1 5942*4882a593Smuzhiyun 5943*4882a593Smuzhiyun cmpi.w %d1, &29 # is shft amt >= 29 5944*4882a593Smuzhiyun blt.b case1_extract # no; no fix needed 5945*4882a593Smuzhiyun mov.b GRS(%a6), %d2 5946*4882a593Smuzhiyun or.b %d2, 3+FTEMP_LO2(%a6) 5947*4882a593Smuzhiyun 5948*4882a593Smuzhiyuncase1_extract: 5949*4882a593Smuzhiyun bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI 5950*4882a593Smuzhiyun bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO 5951*4882a593Smuzhiyun bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S 5952*4882a593Smuzhiyun 5953*4882a593Smuzhiyun mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI 5954*4882a593Smuzhiyun mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO 5955*4882a593Smuzhiyun 5956*4882a593Smuzhiyun bftst %d0{&2:&30} # were bits shifted off? 5957*4882a593Smuzhiyun beq.b case1_sticky_clear # no; go finish 5958*4882a593Smuzhiyun bset &rnd_stky_bit, %d0 # yes; set sticky bit 5959*4882a593Smuzhiyun 5960*4882a593Smuzhiyuncase1_sticky_clear: 5961*4882a593Smuzhiyun and.l &0xe0000000, %d0 # clear all but G,R,S 5962*4882a593Smuzhiyun mov.l (%sp)+, %d2 # restore temp register 5963*4882a593Smuzhiyun rts 5964*4882a593Smuzhiyun 5965*4882a593Smuzhiyun# 5966*4882a593Smuzhiyun# case (32<=d1<64) 5967*4882a593Smuzhiyun# 5968*4882a593Smuzhiyun# %d0 = denorm threshold 5969*4882a593Smuzhiyun# %d1 = "n" = amt to shift 5970*4882a593Smuzhiyun# 5971*4882a593Smuzhiyun# --------------------------------------------------------- 5972*4882a593Smuzhiyun# | FTEMP_HI | FTEMP_LO |grs000.........000| 5973*4882a593Smuzhiyun# --------------------------------------------------------- 5974*4882a593Smuzhiyun# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 5975*4882a593Smuzhiyun# \ \ \ 5976*4882a593Smuzhiyun# \ \ \ 5977*4882a593Smuzhiyun# \ \ ------------------- 5978*4882a593Smuzhiyun# \ -------------------- \ 5979*4882a593Smuzhiyun# ------------------- \ \ 5980*4882a593Smuzhiyun# \ \ \ 5981*4882a593Smuzhiyun# \ \ \ 5982*4882a593Smuzhiyun# \ \ \ 5983*4882a593Smuzhiyun# <-------(32)------><-(n)-><-(32 - n)-><------(32)-------> 5984*4882a593Smuzhiyun# --------------------------------------------------------- 5985*4882a593Smuzhiyun# |0...............0|0....0| NEW_LO |grs | 5986*4882a593Smuzhiyun# --------------------------------------------------------- 5987*4882a593Smuzhiyun# 5988*4882a593Smuzhiyuncase_2: 5989*4882a593Smuzhiyun mov.l %d2, -(%sp) # create temp storage 5990*4882a593Smuzhiyun 5991*4882a593Smuzhiyun mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 5992*4882a593Smuzhiyun subi.w &0x20, %d1 # %d1 now between 0 and 32 5993*4882a593Smuzhiyun mov.l &0x20, %d0 5994*4882a593Smuzhiyun sub.w %d1, %d0 # %d0 = 32 - %d1 5995*4882a593Smuzhiyun 5996*4882a593Smuzhiyun# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize 5997*4882a593Smuzhiyun# the number of bits to check for the sticky detect. 5998*4882a593Smuzhiyun# it only plays a role in shift amounts of 61-63. 5999*4882a593Smuzhiyun mov.b GRS(%a6), %d2 6000*4882a593Smuzhiyun or.b %d2, 3+FTEMP_LO2(%a6) 6001*4882a593Smuzhiyun 6002*4882a593Smuzhiyun bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO 6003*4882a593Smuzhiyun bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S 6004*4882a593Smuzhiyun 6005*4882a593Smuzhiyun bftst %d1{&2:&30} # were any bits shifted off? 6006*4882a593Smuzhiyun bne.b case2_set_sticky # yes; set sticky bit 6007*4882a593Smuzhiyun bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off? 6008*4882a593Smuzhiyun bne.b case2_set_sticky # yes; set sticky bit 6009*4882a593Smuzhiyun 6010*4882a593Smuzhiyun mov.l %d1, %d0 # move new G,R,S to %d0 6011*4882a593Smuzhiyun bra.b case2_end 6012*4882a593Smuzhiyun 6013*4882a593Smuzhiyuncase2_set_sticky: 6014*4882a593Smuzhiyun mov.l %d1, %d0 # move new G,R,S to %d0 6015*4882a593Smuzhiyun bset &rnd_stky_bit, %d0 # set sticky bit 6016*4882a593Smuzhiyun 6017*4882a593Smuzhiyuncase2_end: 6018*4882a593Smuzhiyun clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0 6019*4882a593Smuzhiyun mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO 6020*4882a593Smuzhiyun and.l &0xe0000000, %d0 # clear all but G,R,S 6021*4882a593Smuzhiyun 6022*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore temp register 6023*4882a593Smuzhiyun rts 6024*4882a593Smuzhiyun 6025*4882a593Smuzhiyun# 6026*4882a593Smuzhiyun# case (d1>=64) 6027*4882a593Smuzhiyun# 6028*4882a593Smuzhiyun# %d0 = denorm threshold 6029*4882a593Smuzhiyun# %d1 = amt to shift 6030*4882a593Smuzhiyun# 6031*4882a593Smuzhiyuncase_3: 6032*4882a593Smuzhiyun mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold 6033*4882a593Smuzhiyun 6034*4882a593Smuzhiyun cmpi.w %d1, &65 # is shift amt > 65? 6035*4882a593Smuzhiyun blt.b case3_64 # no; it's == 64 6036*4882a593Smuzhiyun beq.b case3_65 # no; it's == 65 6037*4882a593Smuzhiyun 6038*4882a593Smuzhiyun# 6039*4882a593Smuzhiyun# case (d1>65) 6040*4882a593Smuzhiyun# 6041*4882a593Smuzhiyun# Shift value is > 65 and out of range. All bits are shifted off. 6042*4882a593Smuzhiyun# Return a zero mantissa with the sticky bit set 6043*4882a593Smuzhiyun# 6044*4882a593Smuzhiyun clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6045*4882a593Smuzhiyun clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6046*4882a593Smuzhiyun mov.l &0x20000000, %d0 # set sticky bit 6047*4882a593Smuzhiyun rts 6048*4882a593Smuzhiyun 6049*4882a593Smuzhiyun# 6050*4882a593Smuzhiyun# case (d1 == 64) 6051*4882a593Smuzhiyun# 6052*4882a593Smuzhiyun# --------------------------------------------------------- 6053*4882a593Smuzhiyun# | FTEMP_HI | FTEMP_LO |grs000.........000| 6054*4882a593Smuzhiyun# --------------------------------------------------------- 6055*4882a593Smuzhiyun# <-------(32)------> 6056*4882a593Smuzhiyun# \ \ 6057*4882a593Smuzhiyun# \ \ 6058*4882a593Smuzhiyun# \ \ 6059*4882a593Smuzhiyun# \ ------------------------------ 6060*4882a593Smuzhiyun# ------------------------------- \ 6061*4882a593Smuzhiyun# \ \ 6062*4882a593Smuzhiyun# \ \ 6063*4882a593Smuzhiyun# \ \ 6064*4882a593Smuzhiyun# <-------(32)------> 6065*4882a593Smuzhiyun# --------------------------------------------------------- 6066*4882a593Smuzhiyun# |0...............0|0................0|grs | 6067*4882a593Smuzhiyun# --------------------------------------------------------- 6068*4882a593Smuzhiyun# 6069*4882a593Smuzhiyuncase3_64: 6070*4882a593Smuzhiyun mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 6071*4882a593Smuzhiyun mov.l %d0, %d1 # make a copy 6072*4882a593Smuzhiyun and.l &0xc0000000, %d0 # extract G,R 6073*4882a593Smuzhiyun and.l &0x3fffffff, %d1 # extract other bits 6074*4882a593Smuzhiyun 6075*4882a593Smuzhiyun bra.b case3_complete 6076*4882a593Smuzhiyun 6077*4882a593Smuzhiyun# 6078*4882a593Smuzhiyun# case (d1 == 65) 6079*4882a593Smuzhiyun# 6080*4882a593Smuzhiyun# --------------------------------------------------------- 6081*4882a593Smuzhiyun# | FTEMP_HI | FTEMP_LO |grs000.........000| 6082*4882a593Smuzhiyun# --------------------------------------------------------- 6083*4882a593Smuzhiyun# <-------(32)------> 6084*4882a593Smuzhiyun# \ \ 6085*4882a593Smuzhiyun# \ \ 6086*4882a593Smuzhiyun# \ \ 6087*4882a593Smuzhiyun# \ ------------------------------ 6088*4882a593Smuzhiyun# -------------------------------- \ 6089*4882a593Smuzhiyun# \ \ 6090*4882a593Smuzhiyun# \ \ 6091*4882a593Smuzhiyun# \ \ 6092*4882a593Smuzhiyun# <-------(31)-----> 6093*4882a593Smuzhiyun# --------------------------------------------------------- 6094*4882a593Smuzhiyun# |0...............0|0................0|0rs | 6095*4882a593Smuzhiyun# --------------------------------------------------------- 6096*4882a593Smuzhiyun# 6097*4882a593Smuzhiyuncase3_65: 6098*4882a593Smuzhiyun mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 6099*4882a593Smuzhiyun and.l &0x80000000, %d0 # extract R bit 6100*4882a593Smuzhiyun lsr.l &0x1, %d0 # shift high bit into R bit 6101*4882a593Smuzhiyun and.l &0x7fffffff, %d1 # extract other bits 6102*4882a593Smuzhiyun 6103*4882a593Smuzhiyuncase3_complete: 6104*4882a593Smuzhiyun# last operation done was an "and" of the bits shifted off so the condition 6105*4882a593Smuzhiyun# codes are already set so branch accordingly. 6106*4882a593Smuzhiyun bne.b case3_set_sticky # yes; go set new sticky 6107*4882a593Smuzhiyun tst.l FTEMP_LO(%a0) # were any bits shifted off? 6108*4882a593Smuzhiyun bne.b case3_set_sticky # yes; go set new sticky 6109*4882a593Smuzhiyun tst.b GRS(%a6) # were any bits shifted off? 6110*4882a593Smuzhiyun bne.b case3_set_sticky # yes; go set new sticky 6111*4882a593Smuzhiyun 6112*4882a593Smuzhiyun# 6113*4882a593Smuzhiyun# no bits were shifted off so don't set the sticky bit. 6114*4882a593Smuzhiyun# the guard and 6115*4882a593Smuzhiyun# the entire mantissa is zero. 6116*4882a593Smuzhiyun# 6117*4882a593Smuzhiyun clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6118*4882a593Smuzhiyun clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6119*4882a593Smuzhiyun rts 6120*4882a593Smuzhiyun 6121*4882a593Smuzhiyun# 6122*4882a593Smuzhiyun# some bits were shifted off so set the sticky bit. 6123*4882a593Smuzhiyun# the entire mantissa is zero. 6124*4882a593Smuzhiyun# 6125*4882a593Smuzhiyuncase3_set_sticky: 6126*4882a593Smuzhiyun bset &rnd_stky_bit,%d0 # set new sticky bit 6127*4882a593Smuzhiyun clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6128*4882a593Smuzhiyun clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6129*4882a593Smuzhiyun rts 6130*4882a593Smuzhiyun 6131*4882a593Smuzhiyun######################################################################### 6132*4882a593Smuzhiyun# XDEF **************************************************************** # 6133*4882a593Smuzhiyun# _round(): round result according to precision/mode # 6134*4882a593Smuzhiyun# # 6135*4882a593Smuzhiyun# XREF **************************************************************** # 6136*4882a593Smuzhiyun# None # 6137*4882a593Smuzhiyun# # 6138*4882a593Smuzhiyun# INPUT *************************************************************** # 6139*4882a593Smuzhiyun# a0 = ptr to input operand in internal extended format # 6140*4882a593Smuzhiyun# d1(hi) = contains rounding precision: # 6141*4882a593Smuzhiyun# ext = $0000xxxx # 6142*4882a593Smuzhiyun# sgl = $0004xxxx # 6143*4882a593Smuzhiyun# dbl = $0008xxxx # 6144*4882a593Smuzhiyun# d1(lo) = contains rounding mode: # 6145*4882a593Smuzhiyun# RN = $xxxx0000 # 6146*4882a593Smuzhiyun# RZ = $xxxx0001 # 6147*4882a593Smuzhiyun# RM = $xxxx0002 # 6148*4882a593Smuzhiyun# RP = $xxxx0003 # 6149*4882a593Smuzhiyun# d0{31:29} = contains the g,r,s bits (extended) # 6150*4882a593Smuzhiyun# # 6151*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6152*4882a593Smuzhiyun# a0 = pointer to rounded result # 6153*4882a593Smuzhiyun# # 6154*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 6155*4882a593Smuzhiyun# On return the value pointed to by a0 is correctly rounded, # 6156*4882a593Smuzhiyun# a0 is preserved and the g-r-s bits in d0 are cleared. # 6157*4882a593Smuzhiyun# The result is not typed - the tag field is invalid. The # 6158*4882a593Smuzhiyun# result is still in the internal extended format. # 6159*4882a593Smuzhiyun# # 6160*4882a593Smuzhiyun# The INEX bit of USER_FPSR will be set if the rounded result was # 6161*4882a593Smuzhiyun# inexact (i.e. if any of the g-r-s bits were set). # 6162*4882a593Smuzhiyun# # 6163*4882a593Smuzhiyun######################################################################### 6164*4882a593Smuzhiyun 6165*4882a593Smuzhiyun global _round 6166*4882a593Smuzhiyun_round: 6167*4882a593Smuzhiyun# 6168*4882a593Smuzhiyun# ext_grs() looks at the rounding precision and sets the appropriate 6169*4882a593Smuzhiyun# G,R,S bits. 6170*4882a593Smuzhiyun# If (G,R,S == 0) then result is exact and round is done, else set 6171*4882a593Smuzhiyun# the inex flag in status reg and continue. 6172*4882a593Smuzhiyun# 6173*4882a593Smuzhiyun bsr.l ext_grs # extract G,R,S 6174*4882a593Smuzhiyun 6175*4882a593Smuzhiyun tst.l %d0 # are G,R,S zero? 6176*4882a593Smuzhiyun beq.w truncate # yes; round is complete 6177*4882a593Smuzhiyun 6178*4882a593Smuzhiyun or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex 6179*4882a593Smuzhiyun 6180*4882a593Smuzhiyun# 6181*4882a593Smuzhiyun# Use rounding mode as an index into a jump table for these modes. 6182*4882a593Smuzhiyun# All of the following assumes grs != 0. 6183*4882a593Smuzhiyun# 6184*4882a593Smuzhiyun mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset 6185*4882a593Smuzhiyun jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler 6186*4882a593Smuzhiyun 6187*4882a593Smuzhiyuntbl_mode: 6188*4882a593Smuzhiyun short rnd_near - tbl_mode 6189*4882a593Smuzhiyun short truncate - tbl_mode # RZ always truncates 6190*4882a593Smuzhiyun short rnd_mnus - tbl_mode 6191*4882a593Smuzhiyun short rnd_plus - tbl_mode 6192*4882a593Smuzhiyun 6193*4882a593Smuzhiyun################################################################# 6194*4882a593Smuzhiyun# ROUND PLUS INFINITY # 6195*4882a593Smuzhiyun# # 6196*4882a593Smuzhiyun# If sign of fp number = 0 (positive), then add 1 to l. # 6197*4882a593Smuzhiyun################################################################# 6198*4882a593Smuzhiyunrnd_plus: 6199*4882a593Smuzhiyun tst.b FTEMP_SGN(%a0) # check for sign 6200*4882a593Smuzhiyun bmi.w truncate # if positive then truncate 6201*4882a593Smuzhiyun 6202*4882a593Smuzhiyun mov.l &0xffffffff, %d0 # force g,r,s to be all f's 6203*4882a593Smuzhiyun swap %d1 # set up d1 for round prec. 6204*4882a593Smuzhiyun 6205*4882a593Smuzhiyun cmpi.b %d1, &s_mode # is prec = sgl? 6206*4882a593Smuzhiyun beq.w add_sgl # yes 6207*4882a593Smuzhiyun bgt.w add_dbl # no; it's dbl 6208*4882a593Smuzhiyun bra.w add_ext # no; it's ext 6209*4882a593Smuzhiyun 6210*4882a593Smuzhiyun################################################################# 6211*4882a593Smuzhiyun# ROUND MINUS INFINITY # 6212*4882a593Smuzhiyun# # 6213*4882a593Smuzhiyun# If sign of fp number = 1 (negative), then add 1 to l. # 6214*4882a593Smuzhiyun################################################################# 6215*4882a593Smuzhiyunrnd_mnus: 6216*4882a593Smuzhiyun tst.b FTEMP_SGN(%a0) # check for sign 6217*4882a593Smuzhiyun bpl.w truncate # if negative then truncate 6218*4882a593Smuzhiyun 6219*4882a593Smuzhiyun mov.l &0xffffffff, %d0 # force g,r,s to be all f's 6220*4882a593Smuzhiyun swap %d1 # set up d1 for round prec. 6221*4882a593Smuzhiyun 6222*4882a593Smuzhiyun cmpi.b %d1, &s_mode # is prec = sgl? 6223*4882a593Smuzhiyun beq.w add_sgl # yes 6224*4882a593Smuzhiyun bgt.w add_dbl # no; it's dbl 6225*4882a593Smuzhiyun bra.w add_ext # no; it's ext 6226*4882a593Smuzhiyun 6227*4882a593Smuzhiyun################################################################# 6228*4882a593Smuzhiyun# ROUND NEAREST # 6229*4882a593Smuzhiyun# # 6230*4882a593Smuzhiyun# If (g=1), then add 1 to l and if (r=s=0), then clear l # 6231*4882a593Smuzhiyun# Note that this will round to even in case of a tie. # 6232*4882a593Smuzhiyun################################################################# 6233*4882a593Smuzhiyunrnd_near: 6234*4882a593Smuzhiyun asl.l &0x1, %d0 # shift g-bit to c-bit 6235*4882a593Smuzhiyun bcc.w truncate # if (g=1) then 6236*4882a593Smuzhiyun 6237*4882a593Smuzhiyun swap %d1 # set up d1 for round prec. 6238*4882a593Smuzhiyun 6239*4882a593Smuzhiyun cmpi.b %d1, &s_mode # is prec = sgl? 6240*4882a593Smuzhiyun beq.w add_sgl # yes 6241*4882a593Smuzhiyun bgt.w add_dbl # no; it's dbl 6242*4882a593Smuzhiyun bra.w add_ext # no; it's ext 6243*4882a593Smuzhiyun 6244*4882a593Smuzhiyun# *** LOCAL EQUATES *** 6245*4882a593Smuzhiyunset ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec 6246*4882a593Smuzhiyunset ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec 6247*4882a593Smuzhiyun 6248*4882a593Smuzhiyun######################### 6249*4882a593Smuzhiyun# ADD SINGLE # 6250*4882a593Smuzhiyun######################### 6251*4882a593Smuzhiyunadd_sgl: 6252*4882a593Smuzhiyun add.l &ad_1_sgl, FTEMP_HI(%a0) 6253*4882a593Smuzhiyun bcc.b scc_clr # no mantissa overflow 6254*4882a593Smuzhiyun roxr.w FTEMP_HI(%a0) # shift v-bit back in 6255*4882a593Smuzhiyun roxr.w FTEMP_HI+2(%a0) # shift v-bit back in 6256*4882a593Smuzhiyun add.w &0x1, FTEMP_EX(%a0) # and incr exponent 6257*4882a593Smuzhiyunscc_clr: 6258*4882a593Smuzhiyun tst.l %d0 # test for rs = 0 6259*4882a593Smuzhiyun bne.b sgl_done 6260*4882a593Smuzhiyun and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit 6261*4882a593Smuzhiyunsgl_done: 6262*4882a593Smuzhiyun and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit 6263*4882a593Smuzhiyun clr.l FTEMP_LO(%a0) # clear d2 6264*4882a593Smuzhiyun rts 6265*4882a593Smuzhiyun 6266*4882a593Smuzhiyun######################### 6267*4882a593Smuzhiyun# ADD EXTENDED # 6268*4882a593Smuzhiyun######################### 6269*4882a593Smuzhiyunadd_ext: 6270*4882a593Smuzhiyun addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit 6271*4882a593Smuzhiyun bcc.b xcc_clr # test for carry out 6272*4882a593Smuzhiyun addq.l &1,FTEMP_HI(%a0) # propagate carry 6273*4882a593Smuzhiyun bcc.b xcc_clr 6274*4882a593Smuzhiyun roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 6275*4882a593Smuzhiyun roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 6276*4882a593Smuzhiyun roxr.w FTEMP_LO(%a0) 6277*4882a593Smuzhiyun roxr.w FTEMP_LO+2(%a0) 6278*4882a593Smuzhiyun add.w &0x1,FTEMP_EX(%a0) # and inc exp 6279*4882a593Smuzhiyunxcc_clr: 6280*4882a593Smuzhiyun tst.l %d0 # test rs = 0 6281*4882a593Smuzhiyun bne.b add_ext_done 6282*4882a593Smuzhiyun and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit 6283*4882a593Smuzhiyunadd_ext_done: 6284*4882a593Smuzhiyun rts 6285*4882a593Smuzhiyun 6286*4882a593Smuzhiyun######################### 6287*4882a593Smuzhiyun# ADD DOUBLE # 6288*4882a593Smuzhiyun######################### 6289*4882a593Smuzhiyunadd_dbl: 6290*4882a593Smuzhiyun add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb 6291*4882a593Smuzhiyun bcc.b dcc_clr # no carry 6292*4882a593Smuzhiyun addq.l &0x1, FTEMP_HI(%a0) # propagate carry 6293*4882a593Smuzhiyun bcc.b dcc_clr # no carry 6294*4882a593Smuzhiyun 6295*4882a593Smuzhiyun roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 6296*4882a593Smuzhiyun roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 6297*4882a593Smuzhiyun roxr.w FTEMP_LO(%a0) 6298*4882a593Smuzhiyun roxr.w FTEMP_LO+2(%a0) 6299*4882a593Smuzhiyun addq.w &0x1, FTEMP_EX(%a0) # incr exponent 6300*4882a593Smuzhiyundcc_clr: 6301*4882a593Smuzhiyun tst.l %d0 # test for rs = 0 6302*4882a593Smuzhiyun bne.b dbl_done 6303*4882a593Smuzhiyun and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit 6304*4882a593Smuzhiyun 6305*4882a593Smuzhiyundbl_done: 6306*4882a593Smuzhiyun and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit 6307*4882a593Smuzhiyun rts 6308*4882a593Smuzhiyun 6309*4882a593Smuzhiyun########################### 6310*4882a593Smuzhiyun# Truncate all other bits # 6311*4882a593Smuzhiyun########################### 6312*4882a593Smuzhiyuntruncate: 6313*4882a593Smuzhiyun swap %d1 # select rnd prec 6314*4882a593Smuzhiyun 6315*4882a593Smuzhiyun cmpi.b %d1, &s_mode # is prec sgl? 6316*4882a593Smuzhiyun beq.w sgl_done # yes 6317*4882a593Smuzhiyun bgt.b dbl_done # no; it's dbl 6318*4882a593Smuzhiyun rts # no; it's ext 6319*4882a593Smuzhiyun 6320*4882a593Smuzhiyun 6321*4882a593Smuzhiyun# 6322*4882a593Smuzhiyun# ext_grs(): extract guard, round and sticky bits according to 6323*4882a593Smuzhiyun# rounding precision. 6324*4882a593Smuzhiyun# 6325*4882a593Smuzhiyun# INPUT 6326*4882a593Smuzhiyun# d0 = extended precision g,r,s (in d0{31:29}) 6327*4882a593Smuzhiyun# d1 = {PREC,ROUND} 6328*4882a593Smuzhiyun# OUTPUT 6329*4882a593Smuzhiyun# d0{31:29} = guard, round, sticky 6330*4882a593Smuzhiyun# 6331*4882a593Smuzhiyun# The ext_grs extract the guard/round/sticky bits according to the 6332*4882a593Smuzhiyun# selected rounding precision. It is called by the round subroutine 6333*4882a593Smuzhiyun# only. All registers except d0 are kept intact. d0 becomes an 6334*4882a593Smuzhiyun# updated guard,round,sticky in d0{31:29} 6335*4882a593Smuzhiyun# 6336*4882a593Smuzhiyun# Notes: the ext_grs uses the round PREC, and therefore has to swap d1 6337*4882a593Smuzhiyun# prior to usage, and needs to restore d1 to original. this 6338*4882a593Smuzhiyun# routine is tightly tied to the round routine and not meant to 6339*4882a593Smuzhiyun# uphold standard subroutine calling practices. 6340*4882a593Smuzhiyun# 6341*4882a593Smuzhiyun 6342*4882a593Smuzhiyunext_grs: 6343*4882a593Smuzhiyun swap %d1 # have d1.w point to round precision 6344*4882a593Smuzhiyun tst.b %d1 # is rnd prec = extended? 6345*4882a593Smuzhiyun bne.b ext_grs_not_ext # no; go handle sgl or dbl 6346*4882a593Smuzhiyun 6347*4882a593Smuzhiyun# 6348*4882a593Smuzhiyun# %d0 actually already hold g,r,s since _round() had it before calling 6349*4882a593Smuzhiyun# this function. so, as long as we don't disturb it, we are "returning" it. 6350*4882a593Smuzhiyun# 6351*4882a593Smuzhiyunext_grs_ext: 6352*4882a593Smuzhiyun swap %d1 # yes; return to correct positions 6353*4882a593Smuzhiyun rts 6354*4882a593Smuzhiyun 6355*4882a593Smuzhiyunext_grs_not_ext: 6356*4882a593Smuzhiyun movm.l &0x3000, -(%sp) # make some temp registers {d2/d3} 6357*4882a593Smuzhiyun 6358*4882a593Smuzhiyun cmpi.b %d1, &s_mode # is rnd prec = sgl? 6359*4882a593Smuzhiyun bne.b ext_grs_dbl # no; go handle dbl 6360*4882a593Smuzhiyun 6361*4882a593Smuzhiyun# 6362*4882a593Smuzhiyun# sgl: 6363*4882a593Smuzhiyun# 96 64 40 32 0 6364*4882a593Smuzhiyun# ----------------------------------------------------- 6365*4882a593Smuzhiyun# | EXP |XXXXXXX| |xx | |grs| 6366*4882a593Smuzhiyun# ----------------------------------------------------- 6367*4882a593Smuzhiyun# <--(24)--->nn\ / 6368*4882a593Smuzhiyun# ee --------------------- 6369*4882a593Smuzhiyun# ww | 6370*4882a593Smuzhiyun# v 6371*4882a593Smuzhiyun# gr new sticky 6372*4882a593Smuzhiyun# 6373*4882a593Smuzhiyunext_grs_sgl: 6374*4882a593Smuzhiyun bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right 6375*4882a593Smuzhiyun mov.l &30, %d2 # of the sgl prec. limits 6376*4882a593Smuzhiyun lsl.l %d2, %d3 # shift g-r bits to MSB of d3 6377*4882a593Smuzhiyun mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test 6378*4882a593Smuzhiyun and.l &0x0000003f, %d2 # s bit is the or of all other 6379*4882a593Smuzhiyun bne.b ext_grs_st_stky # bits to the right of g-r 6380*4882a593Smuzhiyun tst.l FTEMP_LO(%a0) # test lower mantissa 6381*4882a593Smuzhiyun bne.b ext_grs_st_stky # if any are set, set sticky 6382*4882a593Smuzhiyun tst.l %d0 # test original g,r,s 6383*4882a593Smuzhiyun bne.b ext_grs_st_stky # if any are set, set sticky 6384*4882a593Smuzhiyun bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit 6385*4882a593Smuzhiyun 6386*4882a593Smuzhiyun# 6387*4882a593Smuzhiyun# dbl: 6388*4882a593Smuzhiyun# 96 64 32 11 0 6389*4882a593Smuzhiyun# ----------------------------------------------------- 6390*4882a593Smuzhiyun# | EXP |XXXXXXX| | |xx |grs| 6391*4882a593Smuzhiyun# ----------------------------------------------------- 6392*4882a593Smuzhiyun# nn\ / 6393*4882a593Smuzhiyun# ee ------- 6394*4882a593Smuzhiyun# ww | 6395*4882a593Smuzhiyun# v 6396*4882a593Smuzhiyun# gr new sticky 6397*4882a593Smuzhiyun# 6398*4882a593Smuzhiyunext_grs_dbl: 6399*4882a593Smuzhiyun bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right 6400*4882a593Smuzhiyun mov.l &30, %d2 # of the dbl prec. limits 6401*4882a593Smuzhiyun lsl.l %d2, %d3 # shift g-r bits to the MSB of d3 6402*4882a593Smuzhiyun mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test 6403*4882a593Smuzhiyun and.l &0x000001ff, %d2 # s bit is the or-ing of all 6404*4882a593Smuzhiyun bne.b ext_grs_st_stky # other bits to the right of g-r 6405*4882a593Smuzhiyun tst.l %d0 # test word original g,r,s 6406*4882a593Smuzhiyun bne.b ext_grs_st_stky # if any are set, set sticky 6407*4882a593Smuzhiyun bra.b ext_grs_end_sd # if clear, exit 6408*4882a593Smuzhiyun 6409*4882a593Smuzhiyunext_grs_st_stky: 6410*4882a593Smuzhiyun bset &rnd_stky_bit, %d3 # set sticky bit 6411*4882a593Smuzhiyunext_grs_end_sd: 6412*4882a593Smuzhiyun mov.l %d3, %d0 # return grs to d0 6413*4882a593Smuzhiyun 6414*4882a593Smuzhiyun movm.l (%sp)+, &0xc # restore scratch registers {d2/d3} 6415*4882a593Smuzhiyun 6416*4882a593Smuzhiyun swap %d1 # restore d1 to original 6417*4882a593Smuzhiyun rts 6418*4882a593Smuzhiyun 6419*4882a593Smuzhiyun######################################################################### 6420*4882a593Smuzhiyun# norm(): normalize the mantissa of an extended precision input. the # 6421*4882a593Smuzhiyun# input operand should not be normalized already. # 6422*4882a593Smuzhiyun# # 6423*4882a593Smuzhiyun# XDEF **************************************************************** # 6424*4882a593Smuzhiyun# norm() # 6425*4882a593Smuzhiyun# # 6426*4882a593Smuzhiyun# XREF **************************************************************** # 6427*4882a593Smuzhiyun# none # 6428*4882a593Smuzhiyun# # 6429*4882a593Smuzhiyun# INPUT *************************************************************** # 6430*4882a593Smuzhiyun# a0 = pointer fp extended precision operand to normalize # 6431*4882a593Smuzhiyun# # 6432*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6433*4882a593Smuzhiyun# d0 = number of bit positions the mantissa was shifted # 6434*4882a593Smuzhiyun# a0 = the input operand's mantissa is normalized; the exponent # 6435*4882a593Smuzhiyun# is unchanged. # 6436*4882a593Smuzhiyun# # 6437*4882a593Smuzhiyun######################################################################### 6438*4882a593Smuzhiyun global norm 6439*4882a593Smuzhiyunnorm: 6440*4882a593Smuzhiyun mov.l %d2, -(%sp) # create some temp regs 6441*4882a593Smuzhiyun mov.l %d3, -(%sp) 6442*4882a593Smuzhiyun 6443*4882a593Smuzhiyun mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) 6444*4882a593Smuzhiyun mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) 6445*4882a593Smuzhiyun 6446*4882a593Smuzhiyun bfffo %d0{&0:&32}, %d2 # how many places to shift? 6447*4882a593Smuzhiyun beq.b norm_lo # hi(man) is all zeroes! 6448*4882a593Smuzhiyun 6449*4882a593Smuzhiyunnorm_hi: 6450*4882a593Smuzhiyun lsl.l %d2, %d0 # left shift hi(man) 6451*4882a593Smuzhiyun bfextu %d1{&0:%d2}, %d3 # extract lo bits 6452*4882a593Smuzhiyun 6453*4882a593Smuzhiyun or.l %d3, %d0 # create hi(man) 6454*4882a593Smuzhiyun lsl.l %d2, %d1 # create lo(man) 6455*4882a593Smuzhiyun 6456*4882a593Smuzhiyun mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 6457*4882a593Smuzhiyun mov.l %d1, FTEMP_LO(%a0) # store new lo(man) 6458*4882a593Smuzhiyun 6459*4882a593Smuzhiyun mov.l %d2, %d0 # return shift amount 6460*4882a593Smuzhiyun 6461*4882a593Smuzhiyun mov.l (%sp)+, %d3 # restore temp regs 6462*4882a593Smuzhiyun mov.l (%sp)+, %d2 6463*4882a593Smuzhiyun 6464*4882a593Smuzhiyun rts 6465*4882a593Smuzhiyun 6466*4882a593Smuzhiyunnorm_lo: 6467*4882a593Smuzhiyun bfffo %d1{&0:&32}, %d2 # how many places to shift? 6468*4882a593Smuzhiyun lsl.l %d2, %d1 # shift lo(man) 6469*4882a593Smuzhiyun add.l &32, %d2 # add 32 to shft amount 6470*4882a593Smuzhiyun 6471*4882a593Smuzhiyun mov.l %d1, FTEMP_HI(%a0) # store hi(man) 6472*4882a593Smuzhiyun clr.l FTEMP_LO(%a0) # lo(man) is now zero 6473*4882a593Smuzhiyun 6474*4882a593Smuzhiyun mov.l %d2, %d0 # return shift amount 6475*4882a593Smuzhiyun 6476*4882a593Smuzhiyun mov.l (%sp)+, %d3 # restore temp regs 6477*4882a593Smuzhiyun mov.l (%sp)+, %d2 6478*4882a593Smuzhiyun 6479*4882a593Smuzhiyun rts 6480*4882a593Smuzhiyun 6481*4882a593Smuzhiyun######################################################################### 6482*4882a593Smuzhiyun# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # 6483*4882a593Smuzhiyun# - returns corresponding optype tag # 6484*4882a593Smuzhiyun# # 6485*4882a593Smuzhiyun# XDEF **************************************************************** # 6486*4882a593Smuzhiyun# unnorm_fix() # 6487*4882a593Smuzhiyun# # 6488*4882a593Smuzhiyun# XREF **************************************************************** # 6489*4882a593Smuzhiyun# norm() - normalize the mantissa # 6490*4882a593Smuzhiyun# # 6491*4882a593Smuzhiyun# INPUT *************************************************************** # 6492*4882a593Smuzhiyun# a0 = pointer to unnormalized extended precision number # 6493*4882a593Smuzhiyun# # 6494*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6495*4882a593Smuzhiyun# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # 6496*4882a593Smuzhiyun# a0 = input operand has been converted to a norm, denorm, or # 6497*4882a593Smuzhiyun# zero; both the exponent and mantissa are changed. # 6498*4882a593Smuzhiyun# # 6499*4882a593Smuzhiyun######################################################################### 6500*4882a593Smuzhiyun 6501*4882a593Smuzhiyun global unnorm_fix 6502*4882a593Smuzhiyununnorm_fix: 6503*4882a593Smuzhiyun bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? 6504*4882a593Smuzhiyun bne.b unnorm_shift # hi(man) is not all zeroes 6505*4882a593Smuzhiyun 6506*4882a593Smuzhiyun# 6507*4882a593Smuzhiyun# hi(man) is all zeroes so see if any bits in lo(man) are set 6508*4882a593Smuzhiyun# 6509*4882a593Smuzhiyununnorm_chk_lo: 6510*4882a593Smuzhiyun bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? 6511*4882a593Smuzhiyun beq.w unnorm_zero # yes 6512*4882a593Smuzhiyun 6513*4882a593Smuzhiyun add.w &32, %d0 # no; fix shift distance 6514*4882a593Smuzhiyun 6515*4882a593Smuzhiyun# 6516*4882a593Smuzhiyun# d0 = # shifts needed for complete normalization 6517*4882a593Smuzhiyun# 6518*4882a593Smuzhiyununnorm_shift: 6519*4882a593Smuzhiyun clr.l %d1 # clear top word 6520*4882a593Smuzhiyun mov.w FTEMP_EX(%a0), %d1 # extract exponent 6521*4882a593Smuzhiyun and.w &0x7fff, %d1 # strip off sgn 6522*4882a593Smuzhiyun 6523*4882a593Smuzhiyun cmp.w %d0, %d1 # will denorm push exp < 0? 6524*4882a593Smuzhiyun bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 6525*4882a593Smuzhiyun 6526*4882a593Smuzhiyun# 6527*4882a593Smuzhiyun# exponent would not go < 0. Therefore, number stays normalized 6528*4882a593Smuzhiyun# 6529*4882a593Smuzhiyun sub.w %d0, %d1 # shift exponent value 6530*4882a593Smuzhiyun mov.w FTEMP_EX(%a0), %d0 # load old exponent 6531*4882a593Smuzhiyun and.w &0x8000, %d0 # save old sign 6532*4882a593Smuzhiyun or.w %d0, %d1 # {sgn,new exp} 6533*4882a593Smuzhiyun mov.w %d1, FTEMP_EX(%a0) # insert new exponent 6534*4882a593Smuzhiyun 6535*4882a593Smuzhiyun bsr.l norm # normalize UNNORM 6536*4882a593Smuzhiyun 6537*4882a593Smuzhiyun mov.b &NORM, %d0 # return new optype tag 6538*4882a593Smuzhiyun rts 6539*4882a593Smuzhiyun 6540*4882a593Smuzhiyun# 6541*4882a593Smuzhiyun# exponent would go < 0, so only denormalize until exp = 0 6542*4882a593Smuzhiyun# 6543*4882a593Smuzhiyununnorm_nrm_zero: 6544*4882a593Smuzhiyun cmp.b %d1, &32 # is exp <= 32? 6545*4882a593Smuzhiyun bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent 6546*4882a593Smuzhiyun 6547*4882a593Smuzhiyun bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) 6548*4882a593Smuzhiyun mov.l %d0, FTEMP_HI(%a0) # save new hi(man) 6549*4882a593Smuzhiyun 6550*4882a593Smuzhiyun mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 6551*4882a593Smuzhiyun lsl.l %d1, %d0 # extract new lo(man) 6552*4882a593Smuzhiyun mov.l %d0, FTEMP_LO(%a0) # save new lo(man) 6553*4882a593Smuzhiyun 6554*4882a593Smuzhiyun and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 6555*4882a593Smuzhiyun 6556*4882a593Smuzhiyun mov.b &DENORM, %d0 # return new optype tag 6557*4882a593Smuzhiyun rts 6558*4882a593Smuzhiyun 6559*4882a593Smuzhiyun# 6560*4882a593Smuzhiyun# only mantissa bits set are in lo(man) 6561*4882a593Smuzhiyun# 6562*4882a593Smuzhiyununnorm_nrm_zero_lrg: 6563*4882a593Smuzhiyun sub.w &32, %d1 # adjust shft amt by 32 6564*4882a593Smuzhiyun 6565*4882a593Smuzhiyun mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 6566*4882a593Smuzhiyun lsl.l %d1, %d0 # left shift lo(man) 6567*4882a593Smuzhiyun 6568*4882a593Smuzhiyun mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 6569*4882a593Smuzhiyun clr.l FTEMP_LO(%a0) # lo(man) = 0 6570*4882a593Smuzhiyun 6571*4882a593Smuzhiyun and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 6572*4882a593Smuzhiyun 6573*4882a593Smuzhiyun mov.b &DENORM, %d0 # return new optype tag 6574*4882a593Smuzhiyun rts 6575*4882a593Smuzhiyun 6576*4882a593Smuzhiyun# 6577*4882a593Smuzhiyun# whole mantissa is zero so this UNNORM is actually a zero 6578*4882a593Smuzhiyun# 6579*4882a593Smuzhiyununnorm_zero: 6580*4882a593Smuzhiyun and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero 6581*4882a593Smuzhiyun 6582*4882a593Smuzhiyun mov.b &ZERO, %d0 # fix optype tag 6583*4882a593Smuzhiyun rts 6584*4882a593Smuzhiyun 6585*4882a593Smuzhiyun######################################################################### 6586*4882a593Smuzhiyun# XDEF **************************************************************** # 6587*4882a593Smuzhiyun# set_tag_x(): return the optype of the input ext fp number # 6588*4882a593Smuzhiyun# # 6589*4882a593Smuzhiyun# XREF **************************************************************** # 6590*4882a593Smuzhiyun# None # 6591*4882a593Smuzhiyun# # 6592*4882a593Smuzhiyun# INPUT *************************************************************** # 6593*4882a593Smuzhiyun# a0 = pointer to extended precision operand # 6594*4882a593Smuzhiyun# # 6595*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6596*4882a593Smuzhiyun# d0 = value of type tag # 6597*4882a593Smuzhiyun# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO # 6598*4882a593Smuzhiyun# # 6599*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 6600*4882a593Smuzhiyun# Simply test the exponent, j-bit, and mantissa values to # 6601*4882a593Smuzhiyun# determine the type of operand. # 6602*4882a593Smuzhiyun# If it's an unnormalized zero, alter the operand and force it # 6603*4882a593Smuzhiyun# to be a normal zero. # 6604*4882a593Smuzhiyun# # 6605*4882a593Smuzhiyun######################################################################### 6606*4882a593Smuzhiyun 6607*4882a593Smuzhiyun global set_tag_x 6608*4882a593Smuzhiyunset_tag_x: 6609*4882a593Smuzhiyun mov.w FTEMP_EX(%a0), %d0 # extract exponent 6610*4882a593Smuzhiyun andi.w &0x7fff, %d0 # strip off sign 6611*4882a593Smuzhiyun cmpi.w %d0, &0x7fff # is (EXP == MAX)? 6612*4882a593Smuzhiyun beq.b inf_or_nan_x 6613*4882a593Smuzhiyunnot_inf_or_nan_x: 6614*4882a593Smuzhiyun btst &0x7,FTEMP_HI(%a0) 6615*4882a593Smuzhiyun beq.b not_norm_x 6616*4882a593Smuzhiyunis_norm_x: 6617*4882a593Smuzhiyun mov.b &NORM, %d0 6618*4882a593Smuzhiyun rts 6619*4882a593Smuzhiyunnot_norm_x: 6620*4882a593Smuzhiyun tst.w %d0 # is exponent = 0? 6621*4882a593Smuzhiyun bne.b is_unnorm_x 6622*4882a593Smuzhiyunnot_unnorm_x: 6623*4882a593Smuzhiyun tst.l FTEMP_HI(%a0) 6624*4882a593Smuzhiyun bne.b is_denorm_x 6625*4882a593Smuzhiyun tst.l FTEMP_LO(%a0) 6626*4882a593Smuzhiyun bne.b is_denorm_x 6627*4882a593Smuzhiyunis_zero_x: 6628*4882a593Smuzhiyun mov.b &ZERO, %d0 6629*4882a593Smuzhiyun rts 6630*4882a593Smuzhiyunis_denorm_x: 6631*4882a593Smuzhiyun mov.b &DENORM, %d0 6632*4882a593Smuzhiyun rts 6633*4882a593Smuzhiyun# must distinguish now "Unnormalized zeroes" which we 6634*4882a593Smuzhiyun# must convert to zero. 6635*4882a593Smuzhiyunis_unnorm_x: 6636*4882a593Smuzhiyun tst.l FTEMP_HI(%a0) 6637*4882a593Smuzhiyun bne.b is_unnorm_reg_x 6638*4882a593Smuzhiyun tst.l FTEMP_LO(%a0) 6639*4882a593Smuzhiyun bne.b is_unnorm_reg_x 6640*4882a593Smuzhiyun# it's an "unnormalized zero". let's convert it to an actual zero... 6641*4882a593Smuzhiyun andi.w &0x8000,FTEMP_EX(%a0) # clear exponent 6642*4882a593Smuzhiyun mov.b &ZERO, %d0 6643*4882a593Smuzhiyun rts 6644*4882a593Smuzhiyunis_unnorm_reg_x: 6645*4882a593Smuzhiyun mov.b &UNNORM, %d0 6646*4882a593Smuzhiyun rts 6647*4882a593Smuzhiyuninf_or_nan_x: 6648*4882a593Smuzhiyun tst.l FTEMP_LO(%a0) 6649*4882a593Smuzhiyun bne.b is_nan_x 6650*4882a593Smuzhiyun mov.l FTEMP_HI(%a0), %d0 6651*4882a593Smuzhiyun and.l &0x7fffffff, %d0 # msb is a don't care! 6652*4882a593Smuzhiyun bne.b is_nan_x 6653*4882a593Smuzhiyunis_inf_x: 6654*4882a593Smuzhiyun mov.b &INF, %d0 6655*4882a593Smuzhiyun rts 6656*4882a593Smuzhiyunis_nan_x: 6657*4882a593Smuzhiyun btst &0x6, FTEMP_HI(%a0) 6658*4882a593Smuzhiyun beq.b is_snan_x 6659*4882a593Smuzhiyun mov.b &QNAN, %d0 6660*4882a593Smuzhiyun rts 6661*4882a593Smuzhiyunis_snan_x: 6662*4882a593Smuzhiyun mov.b &SNAN, %d0 6663*4882a593Smuzhiyun rts 6664*4882a593Smuzhiyun 6665*4882a593Smuzhiyun######################################################################### 6666*4882a593Smuzhiyun# XDEF **************************************************************** # 6667*4882a593Smuzhiyun# set_tag_d(): return the optype of the input dbl fp number # 6668*4882a593Smuzhiyun# # 6669*4882a593Smuzhiyun# XREF **************************************************************** # 6670*4882a593Smuzhiyun# None # 6671*4882a593Smuzhiyun# # 6672*4882a593Smuzhiyun# INPUT *************************************************************** # 6673*4882a593Smuzhiyun# a0 = points to double precision operand # 6674*4882a593Smuzhiyun# # 6675*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6676*4882a593Smuzhiyun# d0 = value of type tag # 6677*4882a593Smuzhiyun# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 6678*4882a593Smuzhiyun# # 6679*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 6680*4882a593Smuzhiyun# Simply test the exponent, j-bit, and mantissa values to # 6681*4882a593Smuzhiyun# determine the type of operand. # 6682*4882a593Smuzhiyun# # 6683*4882a593Smuzhiyun######################################################################### 6684*4882a593Smuzhiyun 6685*4882a593Smuzhiyun global set_tag_d 6686*4882a593Smuzhiyunset_tag_d: 6687*4882a593Smuzhiyun mov.l FTEMP(%a0), %d0 6688*4882a593Smuzhiyun mov.l %d0, %d1 6689*4882a593Smuzhiyun 6690*4882a593Smuzhiyun andi.l &0x7ff00000, %d0 6691*4882a593Smuzhiyun beq.b zero_or_denorm_d 6692*4882a593Smuzhiyun 6693*4882a593Smuzhiyun cmpi.l %d0, &0x7ff00000 6694*4882a593Smuzhiyun beq.b inf_or_nan_d 6695*4882a593Smuzhiyun 6696*4882a593Smuzhiyunis_norm_d: 6697*4882a593Smuzhiyun mov.b &NORM, %d0 6698*4882a593Smuzhiyun rts 6699*4882a593Smuzhiyunzero_or_denorm_d: 6700*4882a593Smuzhiyun and.l &0x000fffff, %d1 6701*4882a593Smuzhiyun bne is_denorm_d 6702*4882a593Smuzhiyun tst.l 4+FTEMP(%a0) 6703*4882a593Smuzhiyun bne is_denorm_d 6704*4882a593Smuzhiyunis_zero_d: 6705*4882a593Smuzhiyun mov.b &ZERO, %d0 6706*4882a593Smuzhiyun rts 6707*4882a593Smuzhiyunis_denorm_d: 6708*4882a593Smuzhiyun mov.b &DENORM, %d0 6709*4882a593Smuzhiyun rts 6710*4882a593Smuzhiyuninf_or_nan_d: 6711*4882a593Smuzhiyun and.l &0x000fffff, %d1 6712*4882a593Smuzhiyun bne is_nan_d 6713*4882a593Smuzhiyun tst.l 4+FTEMP(%a0) 6714*4882a593Smuzhiyun bne is_nan_d 6715*4882a593Smuzhiyunis_inf_d: 6716*4882a593Smuzhiyun mov.b &INF, %d0 6717*4882a593Smuzhiyun rts 6718*4882a593Smuzhiyunis_nan_d: 6719*4882a593Smuzhiyun btst &19, %d1 6720*4882a593Smuzhiyun bne is_qnan_d 6721*4882a593Smuzhiyunis_snan_d: 6722*4882a593Smuzhiyun mov.b &SNAN, %d0 6723*4882a593Smuzhiyun rts 6724*4882a593Smuzhiyunis_qnan_d: 6725*4882a593Smuzhiyun mov.b &QNAN, %d0 6726*4882a593Smuzhiyun rts 6727*4882a593Smuzhiyun 6728*4882a593Smuzhiyun######################################################################### 6729*4882a593Smuzhiyun# XDEF **************************************************************** # 6730*4882a593Smuzhiyun# set_tag_s(): return the optype of the input sgl fp number # 6731*4882a593Smuzhiyun# # 6732*4882a593Smuzhiyun# XREF **************************************************************** # 6733*4882a593Smuzhiyun# None # 6734*4882a593Smuzhiyun# # 6735*4882a593Smuzhiyun# INPUT *************************************************************** # 6736*4882a593Smuzhiyun# a0 = pointer to single precision operand # 6737*4882a593Smuzhiyun# # 6738*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6739*4882a593Smuzhiyun# d0 = value of type tag # 6740*4882a593Smuzhiyun# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 6741*4882a593Smuzhiyun# # 6742*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 6743*4882a593Smuzhiyun# Simply test the exponent, j-bit, and mantissa values to # 6744*4882a593Smuzhiyun# determine the type of operand. # 6745*4882a593Smuzhiyun# # 6746*4882a593Smuzhiyun######################################################################### 6747*4882a593Smuzhiyun 6748*4882a593Smuzhiyun global set_tag_s 6749*4882a593Smuzhiyunset_tag_s: 6750*4882a593Smuzhiyun mov.l FTEMP(%a0), %d0 6751*4882a593Smuzhiyun mov.l %d0, %d1 6752*4882a593Smuzhiyun 6753*4882a593Smuzhiyun andi.l &0x7f800000, %d0 6754*4882a593Smuzhiyun beq.b zero_or_denorm_s 6755*4882a593Smuzhiyun 6756*4882a593Smuzhiyun cmpi.l %d0, &0x7f800000 6757*4882a593Smuzhiyun beq.b inf_or_nan_s 6758*4882a593Smuzhiyun 6759*4882a593Smuzhiyunis_norm_s: 6760*4882a593Smuzhiyun mov.b &NORM, %d0 6761*4882a593Smuzhiyun rts 6762*4882a593Smuzhiyunzero_or_denorm_s: 6763*4882a593Smuzhiyun and.l &0x007fffff, %d1 6764*4882a593Smuzhiyun bne is_denorm_s 6765*4882a593Smuzhiyunis_zero_s: 6766*4882a593Smuzhiyun mov.b &ZERO, %d0 6767*4882a593Smuzhiyun rts 6768*4882a593Smuzhiyunis_denorm_s: 6769*4882a593Smuzhiyun mov.b &DENORM, %d0 6770*4882a593Smuzhiyun rts 6771*4882a593Smuzhiyuninf_or_nan_s: 6772*4882a593Smuzhiyun and.l &0x007fffff, %d1 6773*4882a593Smuzhiyun bne is_nan_s 6774*4882a593Smuzhiyunis_inf_s: 6775*4882a593Smuzhiyun mov.b &INF, %d0 6776*4882a593Smuzhiyun rts 6777*4882a593Smuzhiyunis_nan_s: 6778*4882a593Smuzhiyun btst &22, %d1 6779*4882a593Smuzhiyun bne is_qnan_s 6780*4882a593Smuzhiyunis_snan_s: 6781*4882a593Smuzhiyun mov.b &SNAN, %d0 6782*4882a593Smuzhiyun rts 6783*4882a593Smuzhiyunis_qnan_s: 6784*4882a593Smuzhiyun mov.b &QNAN, %d0 6785*4882a593Smuzhiyun rts 6786*4882a593Smuzhiyun 6787*4882a593Smuzhiyun######################################################################### 6788*4882a593Smuzhiyun# XDEF **************************************************************** # 6789*4882a593Smuzhiyun# unf_res(): routine to produce default underflow result of a # 6790*4882a593Smuzhiyun# scaled extended precision number; this is used by # 6791*4882a593Smuzhiyun# fadd/fdiv/fmul/etc. emulation routines. # 6792*4882a593Smuzhiyun# unf_res4(): same as above but for fsglmul/fsgldiv which use # 6793*4882a593Smuzhiyun# single round prec and extended prec mode. # 6794*4882a593Smuzhiyun# # 6795*4882a593Smuzhiyun# XREF **************************************************************** # 6796*4882a593Smuzhiyun# _denorm() - denormalize according to scale factor # 6797*4882a593Smuzhiyun# _round() - round denormalized number according to rnd prec # 6798*4882a593Smuzhiyun# # 6799*4882a593Smuzhiyun# INPUT *************************************************************** # 6800*4882a593Smuzhiyun# a0 = pointer to extended precison operand # 6801*4882a593Smuzhiyun# d0 = scale factor # 6802*4882a593Smuzhiyun# d1 = rounding precision/mode # 6803*4882a593Smuzhiyun# # 6804*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6805*4882a593Smuzhiyun# a0 = pointer to default underflow result in extended precision # 6806*4882a593Smuzhiyun# d0.b = result FPSR_cc which caller may or may not want to save # 6807*4882a593Smuzhiyun# # 6808*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 6809*4882a593Smuzhiyun# Convert the input operand to "internal format" which means the # 6810*4882a593Smuzhiyun# exponent is extended to 16 bits and the sign is stored in the unused # 6811*4882a593Smuzhiyun# portion of the extended precison operand. Denormalize the number # 6812*4882a593Smuzhiyun# according to the scale factor passed in d0. Then, round the # 6813*4882a593Smuzhiyun# denormalized result. # 6814*4882a593Smuzhiyun# Set the FPSR_exc bits as appropriate but return the cc bits in # 6815*4882a593Smuzhiyun# d0 in case the caller doesn't want to save them (as is the case for # 6816*4882a593Smuzhiyun# fmove out). # 6817*4882a593Smuzhiyun# unf_res4() for fsglmul/fsgldiv forces the denorm to extended # 6818*4882a593Smuzhiyun# precision and the rounding mode to single. # 6819*4882a593Smuzhiyun# # 6820*4882a593Smuzhiyun######################################################################### 6821*4882a593Smuzhiyun global unf_res 6822*4882a593Smuzhiyununf_res: 6823*4882a593Smuzhiyun mov.l %d1, -(%sp) # save rnd prec,mode on stack 6824*4882a593Smuzhiyun 6825*4882a593Smuzhiyun btst &0x7, FTEMP_EX(%a0) # make "internal" format 6826*4882a593Smuzhiyun sne FTEMP_SGN(%a0) 6827*4882a593Smuzhiyun 6828*4882a593Smuzhiyun mov.w FTEMP_EX(%a0), %d1 # extract exponent 6829*4882a593Smuzhiyun and.w &0x7fff, %d1 6830*4882a593Smuzhiyun sub.w %d0, %d1 6831*4882a593Smuzhiyun mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent 6832*4882a593Smuzhiyun 6833*4882a593Smuzhiyun mov.l %a0, -(%sp) # save operand ptr during calls 6834*4882a593Smuzhiyun 6835*4882a593Smuzhiyun mov.l 0x4(%sp),%d0 # pass rnd prec. 6836*4882a593Smuzhiyun andi.w &0x00c0,%d0 6837*4882a593Smuzhiyun lsr.w &0x4,%d0 6838*4882a593Smuzhiyun bsr.l _denorm # denorm result 6839*4882a593Smuzhiyun 6840*4882a593Smuzhiyun mov.l (%sp),%a0 6841*4882a593Smuzhiyun mov.w 0x6(%sp),%d1 # load prec:mode into %d1 6842*4882a593Smuzhiyun andi.w &0xc0,%d1 # extract rnd prec 6843*4882a593Smuzhiyun lsr.w &0x4,%d1 6844*4882a593Smuzhiyun swap %d1 6845*4882a593Smuzhiyun mov.w 0x6(%sp),%d1 6846*4882a593Smuzhiyun andi.w &0x30,%d1 6847*4882a593Smuzhiyun lsr.w &0x4,%d1 6848*4882a593Smuzhiyun bsr.l _round # round the denorm 6849*4882a593Smuzhiyun 6850*4882a593Smuzhiyun mov.l (%sp)+, %a0 6851*4882a593Smuzhiyun 6852*4882a593Smuzhiyun# result is now rounded properly. convert back to normal format 6853*4882a593Smuzhiyun bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue 6854*4882a593Smuzhiyun tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 6855*4882a593Smuzhiyun beq.b unf_res_chkifzero # no; result is positive 6856*4882a593Smuzhiyun bset &0x7, FTEMP_EX(%a0) # set result sgn 6857*4882a593Smuzhiyun clr.b FTEMP_SGN(%a0) # clear temp sign 6858*4882a593Smuzhiyun 6859*4882a593Smuzhiyun# the number may have become zero after rounding. set ccodes accordingly. 6860*4882a593Smuzhiyununf_res_chkifzero: 6861*4882a593Smuzhiyun clr.l %d0 6862*4882a593Smuzhiyun tst.l FTEMP_HI(%a0) # is value now a zero? 6863*4882a593Smuzhiyun bne.b unf_res_cont # no 6864*4882a593Smuzhiyun tst.l FTEMP_LO(%a0) 6865*4882a593Smuzhiyun bne.b unf_res_cont # no 6866*4882a593Smuzhiyun# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit 6867*4882a593Smuzhiyun bset &z_bit, %d0 # yes; set zero ccode bit 6868*4882a593Smuzhiyun 6869*4882a593Smuzhiyununf_res_cont: 6870*4882a593Smuzhiyun 6871*4882a593Smuzhiyun# 6872*4882a593Smuzhiyun# can inex1 also be set along with unfl and inex2??? 6873*4882a593Smuzhiyun# 6874*4882a593Smuzhiyun# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 6875*4882a593Smuzhiyun# 6876*4882a593Smuzhiyun btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set? 6877*4882a593Smuzhiyun beq.b unf_res_end # no 6878*4882a593Smuzhiyun bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl 6879*4882a593Smuzhiyun 6880*4882a593Smuzhiyununf_res_end: 6881*4882a593Smuzhiyun add.l &0x4, %sp # clear stack 6882*4882a593Smuzhiyun rts 6883*4882a593Smuzhiyun 6884*4882a593Smuzhiyun# unf_res() for fsglmul() and fsgldiv(). 6885*4882a593Smuzhiyun global unf_res4 6886*4882a593Smuzhiyununf_res4: 6887*4882a593Smuzhiyun mov.l %d1,-(%sp) # save rnd prec,mode on stack 6888*4882a593Smuzhiyun 6889*4882a593Smuzhiyun btst &0x7,FTEMP_EX(%a0) # make "internal" format 6890*4882a593Smuzhiyun sne FTEMP_SGN(%a0) 6891*4882a593Smuzhiyun 6892*4882a593Smuzhiyun mov.w FTEMP_EX(%a0),%d1 # extract exponent 6893*4882a593Smuzhiyun and.w &0x7fff,%d1 6894*4882a593Smuzhiyun sub.w %d0,%d1 6895*4882a593Smuzhiyun mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent 6896*4882a593Smuzhiyun 6897*4882a593Smuzhiyun mov.l %a0,-(%sp) # save operand ptr during calls 6898*4882a593Smuzhiyun 6899*4882a593Smuzhiyun clr.l %d0 # force rnd prec = ext 6900*4882a593Smuzhiyun bsr.l _denorm # denorm result 6901*4882a593Smuzhiyun 6902*4882a593Smuzhiyun mov.l (%sp),%a0 6903*4882a593Smuzhiyun mov.w &s_mode,%d1 # force rnd prec = sgl 6904*4882a593Smuzhiyun swap %d1 6905*4882a593Smuzhiyun mov.w 0x6(%sp),%d1 # load rnd mode 6906*4882a593Smuzhiyun andi.w &0x30,%d1 # extract rnd prec 6907*4882a593Smuzhiyun lsr.w &0x4,%d1 6908*4882a593Smuzhiyun bsr.l _round # round the denorm 6909*4882a593Smuzhiyun 6910*4882a593Smuzhiyun mov.l (%sp)+,%a0 6911*4882a593Smuzhiyun 6912*4882a593Smuzhiyun# result is now rounded properly. convert back to normal format 6913*4882a593Smuzhiyun bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue 6914*4882a593Smuzhiyun tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 6915*4882a593Smuzhiyun beq.b unf_res4_chkifzero # no; result is positive 6916*4882a593Smuzhiyun bset &0x7,FTEMP_EX(%a0) # set result sgn 6917*4882a593Smuzhiyun clr.b FTEMP_SGN(%a0) # clear temp sign 6918*4882a593Smuzhiyun 6919*4882a593Smuzhiyun# the number may have become zero after rounding. set ccodes accordingly. 6920*4882a593Smuzhiyununf_res4_chkifzero: 6921*4882a593Smuzhiyun clr.l %d0 6922*4882a593Smuzhiyun tst.l FTEMP_HI(%a0) # is value now a zero? 6923*4882a593Smuzhiyun bne.b unf_res4_cont # no 6924*4882a593Smuzhiyun tst.l FTEMP_LO(%a0) 6925*4882a593Smuzhiyun bne.b unf_res4_cont # no 6926*4882a593Smuzhiyun# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit 6927*4882a593Smuzhiyun bset &z_bit,%d0 # yes; set zero ccode bit 6928*4882a593Smuzhiyun 6929*4882a593Smuzhiyununf_res4_cont: 6930*4882a593Smuzhiyun 6931*4882a593Smuzhiyun# 6932*4882a593Smuzhiyun# can inex1 also be set along with unfl and inex2??? 6933*4882a593Smuzhiyun# 6934*4882a593Smuzhiyun# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 6935*4882a593Smuzhiyun# 6936*4882a593Smuzhiyun btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 6937*4882a593Smuzhiyun beq.b unf_res4_end # no 6938*4882a593Smuzhiyun bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl 6939*4882a593Smuzhiyun 6940*4882a593Smuzhiyununf_res4_end: 6941*4882a593Smuzhiyun add.l &0x4,%sp # clear stack 6942*4882a593Smuzhiyun rts 6943*4882a593Smuzhiyun 6944*4882a593Smuzhiyun######################################################################### 6945*4882a593Smuzhiyun# XDEF **************************************************************** # 6946*4882a593Smuzhiyun# ovf_res(): routine to produce the default overflow result of # 6947*4882a593Smuzhiyun# an overflowing number. # 6948*4882a593Smuzhiyun# ovf_res2(): same as above but the rnd mode/prec are passed # 6949*4882a593Smuzhiyun# differently. # 6950*4882a593Smuzhiyun# # 6951*4882a593Smuzhiyun# XREF **************************************************************** # 6952*4882a593Smuzhiyun# none # 6953*4882a593Smuzhiyun# # 6954*4882a593Smuzhiyun# INPUT *************************************************************** # 6955*4882a593Smuzhiyun# d1.b = '-1' => (-); '0' => (+) # 6956*4882a593Smuzhiyun# ovf_res(): # 6957*4882a593Smuzhiyun# d0 = rnd mode/prec # 6958*4882a593Smuzhiyun# ovf_res2(): # 6959*4882a593Smuzhiyun# hi(d0) = rnd prec # 6960*4882a593Smuzhiyun# lo(d0) = rnd mode # 6961*4882a593Smuzhiyun# # 6962*4882a593Smuzhiyun# OUTPUT ************************************************************** # 6963*4882a593Smuzhiyun# a0 = points to extended precision result # 6964*4882a593Smuzhiyun# d0.b = condition code bits # 6965*4882a593Smuzhiyun# # 6966*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 6967*4882a593Smuzhiyun# The default overflow result can be determined by the sign of # 6968*4882a593Smuzhiyun# the result and the rounding mode/prec in effect. These bits are # 6969*4882a593Smuzhiyun# concatenated together to create an index into the default result # 6970*4882a593Smuzhiyun# table. A pointer to the correct result is returned in a0. The # 6971*4882a593Smuzhiyun# resulting condition codes are returned in d0 in case the caller # 6972*4882a593Smuzhiyun# doesn't want FPSR_cc altered (as is the case for fmove out). # 6973*4882a593Smuzhiyun# # 6974*4882a593Smuzhiyun######################################################################### 6975*4882a593Smuzhiyun 6976*4882a593Smuzhiyun global ovf_res 6977*4882a593Smuzhiyunovf_res: 6978*4882a593Smuzhiyun andi.w &0x10,%d1 # keep result sign 6979*4882a593Smuzhiyun lsr.b &0x4,%d0 # shift prec/mode 6980*4882a593Smuzhiyun or.b %d0,%d1 # concat the two 6981*4882a593Smuzhiyun mov.w %d1,%d0 # make a copy 6982*4882a593Smuzhiyun lsl.b &0x1,%d1 # multiply d1 by 2 6983*4882a593Smuzhiyun bra.b ovf_res_load 6984*4882a593Smuzhiyun 6985*4882a593Smuzhiyun global ovf_res2 6986*4882a593Smuzhiyunovf_res2: 6987*4882a593Smuzhiyun and.w &0x10, %d1 # keep result sign 6988*4882a593Smuzhiyun or.b %d0, %d1 # insert rnd mode 6989*4882a593Smuzhiyun swap %d0 6990*4882a593Smuzhiyun or.b %d0, %d1 # insert rnd prec 6991*4882a593Smuzhiyun mov.w %d1, %d0 # make a copy 6992*4882a593Smuzhiyun lsl.b &0x1, %d1 # shift left by 1 6993*4882a593Smuzhiyun 6994*4882a593Smuzhiyun# 6995*4882a593Smuzhiyun# use the rounding mode, precision, and result sign as in index into the 6996*4882a593Smuzhiyun# two tables below to fetch the default result and the result ccodes. 6997*4882a593Smuzhiyun# 6998*4882a593Smuzhiyunovf_res_load: 6999*4882a593Smuzhiyun mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes 7000*4882a593Smuzhiyun lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr 7001*4882a593Smuzhiyun 7002*4882a593Smuzhiyun rts 7003*4882a593Smuzhiyun 7004*4882a593Smuzhiyuntbl_ovfl_cc: 7005*4882a593Smuzhiyun byte 0x2, 0x0, 0x0, 0x2 7006*4882a593Smuzhiyun byte 0x2, 0x0, 0x0, 0x2 7007*4882a593Smuzhiyun byte 0x2, 0x0, 0x0, 0x2 7008*4882a593Smuzhiyun byte 0x0, 0x0, 0x0, 0x0 7009*4882a593Smuzhiyun byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7010*4882a593Smuzhiyun byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7011*4882a593Smuzhiyun byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7012*4882a593Smuzhiyun 7013*4882a593Smuzhiyuntbl_ovfl_result: 7014*4882a593Smuzhiyun long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7015*4882a593Smuzhiyun long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ 7016*4882a593Smuzhiyun long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM 7017*4882a593Smuzhiyun long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7018*4882a593Smuzhiyun 7019*4882a593Smuzhiyun long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7020*4882a593Smuzhiyun long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ 7021*4882a593Smuzhiyun long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM 7022*4882a593Smuzhiyun long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7023*4882a593Smuzhiyun 7024*4882a593Smuzhiyun long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7025*4882a593Smuzhiyun long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ 7026*4882a593Smuzhiyun long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM 7027*4882a593Smuzhiyun long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7028*4882a593Smuzhiyun 7029*4882a593Smuzhiyun long 0x00000000,0x00000000,0x00000000,0x00000000 7030*4882a593Smuzhiyun long 0x00000000,0x00000000,0x00000000,0x00000000 7031*4882a593Smuzhiyun long 0x00000000,0x00000000,0x00000000,0x00000000 7032*4882a593Smuzhiyun long 0x00000000,0x00000000,0x00000000,0x00000000 7033*4882a593Smuzhiyun 7034*4882a593Smuzhiyun long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7035*4882a593Smuzhiyun long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ 7036*4882a593Smuzhiyun long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7037*4882a593Smuzhiyun long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP 7038*4882a593Smuzhiyun 7039*4882a593Smuzhiyun long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7040*4882a593Smuzhiyun long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ 7041*4882a593Smuzhiyun long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7042*4882a593Smuzhiyun long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP 7043*4882a593Smuzhiyun 7044*4882a593Smuzhiyun long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7045*4882a593Smuzhiyun long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ 7046*4882a593Smuzhiyun long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7047*4882a593Smuzhiyun long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP 7048*4882a593Smuzhiyun 7049*4882a593Smuzhiyun######################################################################### 7050*4882a593Smuzhiyun# XDEF **************************************************************** # 7051*4882a593Smuzhiyun# fout(): move from fp register to memory or data register # 7052*4882a593Smuzhiyun# # 7053*4882a593Smuzhiyun# XREF **************************************************************** # 7054*4882a593Smuzhiyun# _round() - needed to create EXOP for sgl/dbl precision # 7055*4882a593Smuzhiyun# norm() - needed to create EXOP for extended precision # 7056*4882a593Smuzhiyun# ovf_res() - create default overflow result for sgl/dbl precision# 7057*4882a593Smuzhiyun# unf_res() - create default underflow result for sgl/dbl prec. # 7058*4882a593Smuzhiyun# dst_dbl() - create rounded dbl precision result. # 7059*4882a593Smuzhiyun# dst_sgl() - create rounded sgl precision result. # 7060*4882a593Smuzhiyun# fetch_dreg() - fetch dynamic k-factor reg for packed. # 7061*4882a593Smuzhiyun# bindec() - convert FP binary number to packed number. # 7062*4882a593Smuzhiyun# _mem_write() - write data to memory. # 7063*4882a593Smuzhiyun# _mem_write2() - write data to memory unless supv mode -(a7) exc.# 7064*4882a593Smuzhiyun# _dmem_write_{byte,word,long}() - write data to memory. # 7065*4882a593Smuzhiyun# store_dreg_{b,w,l}() - store data to data register file. # 7066*4882a593Smuzhiyun# facc_out_{b,w,l,d,x}() - data access error occurred. # 7067*4882a593Smuzhiyun# # 7068*4882a593Smuzhiyun# INPUT *************************************************************** # 7069*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 7070*4882a593Smuzhiyun# d0 = round prec,mode # 7071*4882a593Smuzhiyun# # 7072*4882a593Smuzhiyun# OUTPUT ************************************************************** # 7073*4882a593Smuzhiyun# fp0 : intermediate underflow or overflow result if # 7074*4882a593Smuzhiyun# OVFL/UNFL occurred for a sgl or dbl operand # 7075*4882a593Smuzhiyun# # 7076*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 7077*4882a593Smuzhiyun# This routine is accessed by many handlers that need to do an # 7078*4882a593Smuzhiyun# opclass three move of an operand out to memory. # 7079*4882a593Smuzhiyun# Decode an fmove out (opclass 3) instruction to determine if # 7080*4882a593Smuzhiyun# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data # 7081*4882a593Smuzhiyun# register or memory. The algorithm uses a standard "fmove" to create # 7082*4882a593Smuzhiyun# the rounded result. Also, since exceptions are disabled, this also # 7083*4882a593Smuzhiyun# create the correct OPERR default result if appropriate. # 7084*4882a593Smuzhiyun# For sgl or dbl precision, overflow or underflow can occur. If # 7085*4882a593Smuzhiyun# either occurs and is enabled, the EXOP. # 7086*4882a593Smuzhiyun# For extended precision, the stacked <ea> must be fixed along # 7087*4882a593Smuzhiyun# w/ the address index register as appropriate w/ _calc_ea_fout(). If # 7088*4882a593Smuzhiyun# the source is a denorm and if underflow is enabled, an EXOP must be # 7089*4882a593Smuzhiyun# created. # 7090*4882a593Smuzhiyun# For packed, the k-factor must be fetched from the instruction # 7091*4882a593Smuzhiyun# word or a data register. The <ea> must be fixed as w/ extended # 7092*4882a593Smuzhiyun# precision. Then, bindec() is called to create the appropriate # 7093*4882a593Smuzhiyun# packed result. # 7094*4882a593Smuzhiyun# If at any time an access error is flagged by one of the move- # 7095*4882a593Smuzhiyun# to-memory routines, then a special exit must be made so that the # 7096*4882a593Smuzhiyun# access error can be handled properly. # 7097*4882a593Smuzhiyun# # 7098*4882a593Smuzhiyun######################################################################### 7099*4882a593Smuzhiyun 7100*4882a593Smuzhiyun global fout 7101*4882a593Smuzhiyunfout: 7102*4882a593Smuzhiyun bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt 7103*4882a593Smuzhiyun mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index 7104*4882a593Smuzhiyun jmp (tbl_fout.b,%pc,%a1) # jump to routine 7105*4882a593Smuzhiyun 7106*4882a593Smuzhiyun swbeg &0x8 7107*4882a593Smuzhiyuntbl_fout: 7108*4882a593Smuzhiyun short fout_long - tbl_fout 7109*4882a593Smuzhiyun short fout_sgl - tbl_fout 7110*4882a593Smuzhiyun short fout_ext - tbl_fout 7111*4882a593Smuzhiyun short fout_pack - tbl_fout 7112*4882a593Smuzhiyun short fout_word - tbl_fout 7113*4882a593Smuzhiyun short fout_dbl - tbl_fout 7114*4882a593Smuzhiyun short fout_byte - tbl_fout 7115*4882a593Smuzhiyun short fout_pack - tbl_fout 7116*4882a593Smuzhiyun 7117*4882a593Smuzhiyun################################################################# 7118*4882a593Smuzhiyun# fmove.b out ################################################### 7119*4882a593Smuzhiyun################################################################# 7120*4882a593Smuzhiyun 7121*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand 7122*4882a593Smuzhiyun# is either a DENORM or a NORM. 7123*4882a593Smuzhiyunfout_byte: 7124*4882a593Smuzhiyun tst.b STAG(%a6) # is operand normalized? 7125*4882a593Smuzhiyun bne.b fout_byte_denorm # no 7126*4882a593Smuzhiyun 7127*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # load value 7128*4882a593Smuzhiyun 7129*4882a593Smuzhiyunfout_byte_norm: 7130*4882a593Smuzhiyun fmov.l %d0,%fpcr # insert rnd prec,mode 7131*4882a593Smuzhiyun 7132*4882a593Smuzhiyun fmov.b %fp0,%d0 # exec move out w/ correct rnd mode 7133*4882a593Smuzhiyun 7134*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 7135*4882a593Smuzhiyun fmov.l %fpsr,%d1 # fetch FPSR 7136*4882a593Smuzhiyun or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7137*4882a593Smuzhiyun 7138*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7139*4882a593Smuzhiyun andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7140*4882a593Smuzhiyun beq.b fout_byte_dn # must save to integer regfile 7141*4882a593Smuzhiyun 7142*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7143*4882a593Smuzhiyun bsr.l _dmem_write_byte # write byte 7144*4882a593Smuzhiyun 7145*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7146*4882a593Smuzhiyun bne.l facc_out_b # yes 7147*4882a593Smuzhiyun 7148*4882a593Smuzhiyun rts 7149*4882a593Smuzhiyun 7150*4882a593Smuzhiyunfout_byte_dn: 7151*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7152*4882a593Smuzhiyun andi.w &0x7,%d1 7153*4882a593Smuzhiyun bsr.l store_dreg_b 7154*4882a593Smuzhiyun rts 7155*4882a593Smuzhiyun 7156*4882a593Smuzhiyunfout_byte_denorm: 7157*4882a593Smuzhiyun mov.l SRC_EX(%a0),%d1 7158*4882a593Smuzhiyun andi.l &0x80000000,%d1 # keep DENORM sign 7159*4882a593Smuzhiyun ori.l &0x00800000,%d1 # make smallest sgl 7160*4882a593Smuzhiyun fmov.s %d1,%fp0 7161*4882a593Smuzhiyun bra.b fout_byte_norm 7162*4882a593Smuzhiyun 7163*4882a593Smuzhiyun################################################################# 7164*4882a593Smuzhiyun# fmove.w out ################################################### 7165*4882a593Smuzhiyun################################################################# 7166*4882a593Smuzhiyun 7167*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand 7168*4882a593Smuzhiyun# is either a DENORM or a NORM. 7169*4882a593Smuzhiyunfout_word: 7170*4882a593Smuzhiyun tst.b STAG(%a6) # is operand normalized? 7171*4882a593Smuzhiyun bne.b fout_word_denorm # no 7172*4882a593Smuzhiyun 7173*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # load value 7174*4882a593Smuzhiyun 7175*4882a593Smuzhiyunfout_word_norm: 7176*4882a593Smuzhiyun fmov.l %d0,%fpcr # insert rnd prec:mode 7177*4882a593Smuzhiyun 7178*4882a593Smuzhiyun fmov.w %fp0,%d0 # exec move out w/ correct rnd mode 7179*4882a593Smuzhiyun 7180*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 7181*4882a593Smuzhiyun fmov.l %fpsr,%d1 # fetch FPSR 7182*4882a593Smuzhiyun or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7183*4882a593Smuzhiyun 7184*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7185*4882a593Smuzhiyun andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7186*4882a593Smuzhiyun beq.b fout_word_dn # must save to integer regfile 7187*4882a593Smuzhiyun 7188*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7189*4882a593Smuzhiyun bsr.l _dmem_write_word # write word 7190*4882a593Smuzhiyun 7191*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7192*4882a593Smuzhiyun bne.l facc_out_w # yes 7193*4882a593Smuzhiyun 7194*4882a593Smuzhiyun rts 7195*4882a593Smuzhiyun 7196*4882a593Smuzhiyunfout_word_dn: 7197*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7198*4882a593Smuzhiyun andi.w &0x7,%d1 7199*4882a593Smuzhiyun bsr.l store_dreg_w 7200*4882a593Smuzhiyun rts 7201*4882a593Smuzhiyun 7202*4882a593Smuzhiyunfout_word_denorm: 7203*4882a593Smuzhiyun mov.l SRC_EX(%a0),%d1 7204*4882a593Smuzhiyun andi.l &0x80000000,%d1 # keep DENORM sign 7205*4882a593Smuzhiyun ori.l &0x00800000,%d1 # make smallest sgl 7206*4882a593Smuzhiyun fmov.s %d1,%fp0 7207*4882a593Smuzhiyun bra.b fout_word_norm 7208*4882a593Smuzhiyun 7209*4882a593Smuzhiyun################################################################# 7210*4882a593Smuzhiyun# fmove.l out ################################################### 7211*4882a593Smuzhiyun################################################################# 7212*4882a593Smuzhiyun 7213*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand 7214*4882a593Smuzhiyun# is either a DENORM or a NORM. 7215*4882a593Smuzhiyunfout_long: 7216*4882a593Smuzhiyun tst.b STAG(%a6) # is operand normalized? 7217*4882a593Smuzhiyun bne.b fout_long_denorm # no 7218*4882a593Smuzhiyun 7219*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # load value 7220*4882a593Smuzhiyun 7221*4882a593Smuzhiyunfout_long_norm: 7222*4882a593Smuzhiyun fmov.l %d0,%fpcr # insert rnd prec:mode 7223*4882a593Smuzhiyun 7224*4882a593Smuzhiyun fmov.l %fp0,%d0 # exec move out w/ correct rnd mode 7225*4882a593Smuzhiyun 7226*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 7227*4882a593Smuzhiyun fmov.l %fpsr,%d1 # fetch FPSR 7228*4882a593Smuzhiyun or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7229*4882a593Smuzhiyun 7230*4882a593Smuzhiyunfout_long_write: 7231*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7232*4882a593Smuzhiyun andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7233*4882a593Smuzhiyun beq.b fout_long_dn # must save to integer regfile 7234*4882a593Smuzhiyun 7235*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7236*4882a593Smuzhiyun bsr.l _dmem_write_long # write long 7237*4882a593Smuzhiyun 7238*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7239*4882a593Smuzhiyun bne.l facc_out_l # yes 7240*4882a593Smuzhiyun 7241*4882a593Smuzhiyun rts 7242*4882a593Smuzhiyun 7243*4882a593Smuzhiyunfout_long_dn: 7244*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7245*4882a593Smuzhiyun andi.w &0x7,%d1 7246*4882a593Smuzhiyun bsr.l store_dreg_l 7247*4882a593Smuzhiyun rts 7248*4882a593Smuzhiyun 7249*4882a593Smuzhiyunfout_long_denorm: 7250*4882a593Smuzhiyun mov.l SRC_EX(%a0),%d1 7251*4882a593Smuzhiyun andi.l &0x80000000,%d1 # keep DENORM sign 7252*4882a593Smuzhiyun ori.l &0x00800000,%d1 # make smallest sgl 7253*4882a593Smuzhiyun fmov.s %d1,%fp0 7254*4882a593Smuzhiyun bra.b fout_long_norm 7255*4882a593Smuzhiyun 7256*4882a593Smuzhiyun################################################################# 7257*4882a593Smuzhiyun# fmove.x out ################################################### 7258*4882a593Smuzhiyun################################################################# 7259*4882a593Smuzhiyun 7260*4882a593Smuzhiyun# Only "Unimplemented Data Type" exceptions enter here. The operand 7261*4882a593Smuzhiyun# is either a DENORM or a NORM. 7262*4882a593Smuzhiyun# The DENORM causes an Underflow exception. 7263*4882a593Smuzhiyunfout_ext: 7264*4882a593Smuzhiyun 7265*4882a593Smuzhiyun# we copy the extended precision result to FP_SCR0 so that the reserved 7266*4882a593Smuzhiyun# 16-bit field gets zeroed. we do this since we promise not to disturb 7267*4882a593Smuzhiyun# what's at SRC(a0). 7268*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7269*4882a593Smuzhiyun clr.w 2+FP_SCR0_EX(%a6) # clear reserved field 7270*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7271*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7272*4882a593Smuzhiyun 7273*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return result 7274*4882a593Smuzhiyun 7275*4882a593Smuzhiyun bsr.l _calc_ea_fout # fix stacked <ea> 7276*4882a593Smuzhiyun 7277*4882a593Smuzhiyun mov.l %a0,%a1 # pass: dst addr 7278*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: src addr 7279*4882a593Smuzhiyun mov.l &0xc,%d0 # pass: opsize is 12 bytes 7280*4882a593Smuzhiyun 7281*4882a593Smuzhiyun# we must not yet write the extended precision data to the stack 7282*4882a593Smuzhiyun# in the pre-decrement case from supervisor mode or else we'll corrupt 7283*4882a593Smuzhiyun# the stack frame. so, leave it in FP_SRC for now and deal with it later... 7284*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg 7285*4882a593Smuzhiyun beq.b fout_ext_a7 7286*4882a593Smuzhiyun 7287*4882a593Smuzhiyun bsr.l _dmem_write # write ext prec number to memory 7288*4882a593Smuzhiyun 7289*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7290*4882a593Smuzhiyun bne.w fout_ext_err # yes 7291*4882a593Smuzhiyun 7292*4882a593Smuzhiyun tst.b STAG(%a6) # is operand normalized? 7293*4882a593Smuzhiyun bne.b fout_ext_denorm # no 7294*4882a593Smuzhiyun rts 7295*4882a593Smuzhiyun 7296*4882a593Smuzhiyun# the number is a DENORM. must set the underflow exception bit 7297*4882a593Smuzhiyunfout_ext_denorm: 7298*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit 7299*4882a593Smuzhiyun 7300*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d0 7301*4882a593Smuzhiyun andi.b &0x0a,%d0 # is UNFL or INEX enabled? 7302*4882a593Smuzhiyun bne.b fout_ext_exc # yes 7303*4882a593Smuzhiyun rts 7304*4882a593Smuzhiyun 7305*4882a593Smuzhiyun# we don't want to do the write if the exception occurred in supervisor mode 7306*4882a593Smuzhiyun# so _mem_write2() handles this for us. 7307*4882a593Smuzhiyunfout_ext_a7: 7308*4882a593Smuzhiyun bsr.l _mem_write2 # write ext prec number to memory 7309*4882a593Smuzhiyun 7310*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7311*4882a593Smuzhiyun bne.w fout_ext_err # yes 7312*4882a593Smuzhiyun 7313*4882a593Smuzhiyun tst.b STAG(%a6) # is operand normalized? 7314*4882a593Smuzhiyun bne.b fout_ext_denorm # no 7315*4882a593Smuzhiyun rts 7316*4882a593Smuzhiyun 7317*4882a593Smuzhiyunfout_ext_exc: 7318*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 7319*4882a593Smuzhiyun bsr.l norm # normalize the mantissa 7320*4882a593Smuzhiyun neg.w %d0 # new exp = -(shft amt) 7321*4882a593Smuzhiyun andi.w &0x7fff,%d0 7322*4882a593Smuzhiyun andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign 7323*4882a593Smuzhiyun or.w %d0,FP_SCR0_EX(%a6) # insert new exponent 7324*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 7325*4882a593Smuzhiyun rts 7326*4882a593Smuzhiyun 7327*4882a593Smuzhiyunfout_ext_err: 7328*4882a593Smuzhiyun mov.l EXC_A6(%a6),(%a6) # fix stacked a6 7329*4882a593Smuzhiyun bra.l facc_out_x 7330*4882a593Smuzhiyun 7331*4882a593Smuzhiyun######################################################################### 7332*4882a593Smuzhiyun# fmove.s out ########################################################### 7333*4882a593Smuzhiyun######################################################################### 7334*4882a593Smuzhiyunfout_sgl: 7335*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 7336*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl prec 7337*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 7338*4882a593Smuzhiyun 7339*4882a593Smuzhiyun# 7340*4882a593Smuzhiyun# operand is a normalized number. first, we check to see if the move out 7341*4882a593Smuzhiyun# would cause either an underflow or overflow. these cases are handled 7342*4882a593Smuzhiyun# separately. otherwise, set the FPCR to the proper rounding mode and 7343*4882a593Smuzhiyun# execute the move. 7344*4882a593Smuzhiyun# 7345*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d0 # extract exponent 7346*4882a593Smuzhiyun andi.w &0x7fff,%d0 # strip sign 7347*4882a593Smuzhiyun 7348*4882a593Smuzhiyun cmpi.w %d0,&SGL_HI # will operand overflow? 7349*4882a593Smuzhiyun bgt.w fout_sgl_ovfl # yes; go handle OVFL 7350*4882a593Smuzhiyun beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL 7351*4882a593Smuzhiyun cmpi.w %d0,&SGL_LO # will operand underflow? 7352*4882a593Smuzhiyun blt.w fout_sgl_unfl # yes; go handle underflow 7353*4882a593Smuzhiyun 7354*4882a593Smuzhiyun# 7355*4882a593Smuzhiyun# NORMs(in range) can be stored out by a simple "fmov.s" 7356*4882a593Smuzhiyun# Unnormalized inputs can come through this point. 7357*4882a593Smuzhiyun# 7358*4882a593Smuzhiyunfout_sgl_exg: 7359*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # fetch fop from stack 7360*4882a593Smuzhiyun 7361*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 7362*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 7363*4882a593Smuzhiyun 7364*4882a593Smuzhiyun fmov.s %fp0,%d0 # store does convert and round 7365*4882a593Smuzhiyun 7366*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 7367*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 7368*4882a593Smuzhiyun 7369*4882a593Smuzhiyun or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex 7370*4882a593Smuzhiyun 7371*4882a593Smuzhiyunfout_sgl_exg_write: 7372*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7373*4882a593Smuzhiyun andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7374*4882a593Smuzhiyun beq.b fout_sgl_exg_write_dn # must save to integer regfile 7375*4882a593Smuzhiyun 7376*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7377*4882a593Smuzhiyun bsr.l _dmem_write_long # write long 7378*4882a593Smuzhiyun 7379*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7380*4882a593Smuzhiyun bne.l facc_out_l # yes 7381*4882a593Smuzhiyun 7382*4882a593Smuzhiyun rts 7383*4882a593Smuzhiyun 7384*4882a593Smuzhiyunfout_sgl_exg_write_dn: 7385*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7386*4882a593Smuzhiyun andi.w &0x7,%d1 7387*4882a593Smuzhiyun bsr.l store_dreg_l 7388*4882a593Smuzhiyun rts 7389*4882a593Smuzhiyun 7390*4882a593Smuzhiyun# 7391*4882a593Smuzhiyun# here, we know that the operand would UNFL if moved out to single prec, 7392*4882a593Smuzhiyun# so, denorm and round and then use generic store single routine to 7393*4882a593Smuzhiyun# write the value to memory. 7394*4882a593Smuzhiyun# 7395*4882a593Smuzhiyunfout_sgl_unfl: 7396*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 7397*4882a593Smuzhiyun 7398*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7399*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7400*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7401*4882a593Smuzhiyun mov.l %a0,-(%sp) 7402*4882a593Smuzhiyun 7403*4882a593Smuzhiyun clr.l %d0 # pass: S.F. = 0 7404*4882a593Smuzhiyun 7405*4882a593Smuzhiyun cmpi.b STAG(%a6),&DENORM # fetch src optype tag 7406*4882a593Smuzhiyun bne.b fout_sgl_unfl_cont # let DENORMs fall through 7407*4882a593Smuzhiyun 7408*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 7409*4882a593Smuzhiyun bsr.l norm # normalize the DENORM 7410*4882a593Smuzhiyun 7411*4882a593Smuzhiyunfout_sgl_unfl_cont: 7412*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to operand 7413*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 7414*4882a593Smuzhiyun bsr.l unf_res # calc default underflow result 7415*4882a593Smuzhiyun 7416*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to fop 7417*4882a593Smuzhiyun bsr.l dst_sgl # convert to single prec 7418*4882a593Smuzhiyun 7419*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7420*4882a593Smuzhiyun andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7421*4882a593Smuzhiyun beq.b fout_sgl_unfl_dn # must save to integer regfile 7422*4882a593Smuzhiyun 7423*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7424*4882a593Smuzhiyun bsr.l _dmem_write_long # write long 7425*4882a593Smuzhiyun 7426*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7427*4882a593Smuzhiyun bne.l facc_out_l # yes 7428*4882a593Smuzhiyun 7429*4882a593Smuzhiyun bra.b fout_sgl_unfl_chkexc 7430*4882a593Smuzhiyun 7431*4882a593Smuzhiyunfout_sgl_unfl_dn: 7432*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7433*4882a593Smuzhiyun andi.w &0x7,%d1 7434*4882a593Smuzhiyun bsr.l store_dreg_l 7435*4882a593Smuzhiyun 7436*4882a593Smuzhiyunfout_sgl_unfl_chkexc: 7437*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 7438*4882a593Smuzhiyun andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7439*4882a593Smuzhiyun bne.w fout_sd_exc_unfl # yes 7440*4882a593Smuzhiyun addq.l &0x4,%sp 7441*4882a593Smuzhiyun rts 7442*4882a593Smuzhiyun 7443*4882a593Smuzhiyun# 7444*4882a593Smuzhiyun# it's definitely an overflow so call ovf_res to get the correct answer 7445*4882a593Smuzhiyun# 7446*4882a593Smuzhiyunfout_sgl_ovfl: 7447*4882a593Smuzhiyun tst.b 3+SRC_HI(%a0) # is result inexact? 7448*4882a593Smuzhiyun bne.b fout_sgl_ovfl_inex2 7449*4882a593Smuzhiyun tst.l SRC_LO(%a0) # is result inexact? 7450*4882a593Smuzhiyun bne.b fout_sgl_ovfl_inex2 7451*4882a593Smuzhiyun ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 7452*4882a593Smuzhiyun bra.b fout_sgl_ovfl_cont 7453*4882a593Smuzhiyunfout_sgl_ovfl_inex2: 7454*4882a593Smuzhiyun ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 7455*4882a593Smuzhiyun 7456*4882a593Smuzhiyunfout_sgl_ovfl_cont: 7457*4882a593Smuzhiyun mov.l %a0,-(%sp) 7458*4882a593Smuzhiyun 7459*4882a593Smuzhiyun# call ovf_res() w/ sgl prec and the correct rnd mode to create the default 7460*4882a593Smuzhiyun# overflow result. DON'T save the returned ccodes from ovf_res() since 7461*4882a593Smuzhiyun# fmove out doesn't alter them. 7462*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 7463*4882a593Smuzhiyun smi %d1 # set if so 7464*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode 7465*4882a593Smuzhiyun bsr.l ovf_res # calc OVFL result 7466*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # load default overflow result 7467*4882a593Smuzhiyun fmov.s %fp0,%d0 # store to single 7468*4882a593Smuzhiyun 7469*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7470*4882a593Smuzhiyun andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7471*4882a593Smuzhiyun beq.b fout_sgl_ovfl_dn # must save to integer regfile 7472*4882a593Smuzhiyun 7473*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7474*4882a593Smuzhiyun bsr.l _dmem_write_long # write long 7475*4882a593Smuzhiyun 7476*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7477*4882a593Smuzhiyun bne.l facc_out_l # yes 7478*4882a593Smuzhiyun 7479*4882a593Smuzhiyun bra.b fout_sgl_ovfl_chkexc 7480*4882a593Smuzhiyun 7481*4882a593Smuzhiyunfout_sgl_ovfl_dn: 7482*4882a593Smuzhiyun mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7483*4882a593Smuzhiyun andi.w &0x7,%d1 7484*4882a593Smuzhiyun bsr.l store_dreg_l 7485*4882a593Smuzhiyun 7486*4882a593Smuzhiyunfout_sgl_ovfl_chkexc: 7487*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 7488*4882a593Smuzhiyun andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7489*4882a593Smuzhiyun bne.w fout_sd_exc_ovfl # yes 7490*4882a593Smuzhiyun addq.l &0x4,%sp 7491*4882a593Smuzhiyun rts 7492*4882a593Smuzhiyun 7493*4882a593Smuzhiyun# 7494*4882a593Smuzhiyun# move out MAY overflow: 7495*4882a593Smuzhiyun# (1) force the exp to 0x3fff 7496*4882a593Smuzhiyun# (2) do a move w/ appropriate rnd mode 7497*4882a593Smuzhiyun# (3) if exp still equals zero, then insert original exponent 7498*4882a593Smuzhiyun# for the correct result. 7499*4882a593Smuzhiyun# if exp now equals one, then it overflowed so call ovf_res. 7500*4882a593Smuzhiyun# 7501*4882a593Smuzhiyunfout_sgl_may_ovfl: 7502*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d1 # fetch current sign 7503*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep it,clear exp 7504*4882a593Smuzhiyun ori.w &0x3fff,%d1 # insert exp = 0 7505*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 7506*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 7507*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 7508*4882a593Smuzhiyun 7509*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 7510*4882a593Smuzhiyun 7511*4882a593Smuzhiyun fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 7512*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 7513*4882a593Smuzhiyun 7514*4882a593Smuzhiyun fabs.x %fp0 # need absolute value 7515*4882a593Smuzhiyun fcmp.b %fp0,&0x2 # did exponent increase? 7516*4882a593Smuzhiyun fblt.w fout_sgl_exg # no; go finish NORM 7517*4882a593Smuzhiyun bra.w fout_sgl_ovfl # yes; go handle overflow 7518*4882a593Smuzhiyun 7519*4882a593Smuzhiyun################ 7520*4882a593Smuzhiyun 7521*4882a593Smuzhiyunfout_sd_exc_unfl: 7522*4882a593Smuzhiyun mov.l (%sp)+,%a0 7523*4882a593Smuzhiyun 7524*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7525*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7526*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7527*4882a593Smuzhiyun 7528*4882a593Smuzhiyun cmpi.b STAG(%a6),&DENORM # was src a DENORM? 7529*4882a593Smuzhiyun bne.b fout_sd_exc_cont # no 7530*4882a593Smuzhiyun 7531*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 7532*4882a593Smuzhiyun bsr.l norm 7533*4882a593Smuzhiyun neg.l %d0 7534*4882a593Smuzhiyun andi.w &0x7fff,%d0 7535*4882a593Smuzhiyun bfins %d0,FP_SCR0_EX(%a6){&1:&15} 7536*4882a593Smuzhiyun bra.b fout_sd_exc_cont 7537*4882a593Smuzhiyun 7538*4882a593Smuzhiyunfout_sd_exc: 7539*4882a593Smuzhiyunfout_sd_exc_ovfl: 7540*4882a593Smuzhiyun mov.l (%sp)+,%a0 # restore a0 7541*4882a593Smuzhiyun 7542*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7543*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7544*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7545*4882a593Smuzhiyun 7546*4882a593Smuzhiyunfout_sd_exc_cont: 7547*4882a593Smuzhiyun bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit 7548*4882a593Smuzhiyun sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit 7549*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM 7550*4882a593Smuzhiyun 7551*4882a593Smuzhiyun mov.b 3+L_SCR3(%a6),%d1 7552*4882a593Smuzhiyun lsr.b &0x4,%d1 7553*4882a593Smuzhiyun andi.w &0x0c,%d1 7554*4882a593Smuzhiyun swap %d1 7555*4882a593Smuzhiyun mov.b 3+L_SCR3(%a6),%d1 7556*4882a593Smuzhiyun lsr.b &0x4,%d1 7557*4882a593Smuzhiyun andi.w &0x03,%d1 7558*4882a593Smuzhiyun clr.l %d0 # pass: zero g,r,s 7559*4882a593Smuzhiyun bsr.l _round # round the DENORM 7560*4882a593Smuzhiyun 7561*4882a593Smuzhiyun tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative? 7562*4882a593Smuzhiyun beq.b fout_sd_exc_done # no 7563*4882a593Smuzhiyun bset &0x7,FP_SCR0_EX(%a6) # yes 7564*4882a593Smuzhiyun 7565*4882a593Smuzhiyunfout_sd_exc_done: 7566*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 7567*4882a593Smuzhiyun rts 7568*4882a593Smuzhiyun 7569*4882a593Smuzhiyun################################################################# 7570*4882a593Smuzhiyun# fmove.d out ################################################### 7571*4882a593Smuzhiyun################################################################# 7572*4882a593Smuzhiyunfout_dbl: 7573*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 7574*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl prec 7575*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 7576*4882a593Smuzhiyun 7577*4882a593Smuzhiyun# 7578*4882a593Smuzhiyun# operand is a normalized number. first, we check to see if the move out 7579*4882a593Smuzhiyun# would cause either an underflow or overflow. these cases are handled 7580*4882a593Smuzhiyun# separately. otherwise, set the FPCR to the proper rounding mode and 7581*4882a593Smuzhiyun# execute the move. 7582*4882a593Smuzhiyun# 7583*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d0 # extract exponent 7584*4882a593Smuzhiyun andi.w &0x7fff,%d0 # strip sign 7585*4882a593Smuzhiyun 7586*4882a593Smuzhiyun cmpi.w %d0,&DBL_HI # will operand overflow? 7587*4882a593Smuzhiyun bgt.w fout_dbl_ovfl # yes; go handle OVFL 7588*4882a593Smuzhiyun beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL 7589*4882a593Smuzhiyun cmpi.w %d0,&DBL_LO # will operand underflow? 7590*4882a593Smuzhiyun blt.w fout_dbl_unfl # yes; go handle underflow 7591*4882a593Smuzhiyun 7592*4882a593Smuzhiyun# 7593*4882a593Smuzhiyun# NORMs(in range) can be stored out by a simple "fmov.d" 7594*4882a593Smuzhiyun# Unnormalized inputs can come through this point. 7595*4882a593Smuzhiyun# 7596*4882a593Smuzhiyunfout_dbl_exg: 7597*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # fetch fop from stack 7598*4882a593Smuzhiyun 7599*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 7600*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 7601*4882a593Smuzhiyun 7602*4882a593Smuzhiyun fmov.d %fp0,L_SCR1(%a6) # store does convert and round 7603*4882a593Smuzhiyun 7604*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 7605*4882a593Smuzhiyun fmov.l %fpsr,%d0 # save FPSR 7606*4882a593Smuzhiyun 7607*4882a593Smuzhiyun or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex 7608*4882a593Smuzhiyun 7609*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a1 # pass: dst addr 7610*4882a593Smuzhiyun lea L_SCR1(%a6),%a0 # pass: src addr 7611*4882a593Smuzhiyun movq.l &0x8,%d0 # pass: opsize is 8 bytes 7612*4882a593Smuzhiyun bsr.l _dmem_write # store dbl fop to memory 7613*4882a593Smuzhiyun 7614*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7615*4882a593Smuzhiyun bne.l facc_out_d # yes 7616*4882a593Smuzhiyun 7617*4882a593Smuzhiyun rts # no; so we're finished 7618*4882a593Smuzhiyun 7619*4882a593Smuzhiyun# 7620*4882a593Smuzhiyun# here, we know that the operand would UNFL if moved out to double prec, 7621*4882a593Smuzhiyun# so, denorm and round and then use generic store double routine to 7622*4882a593Smuzhiyun# write the value to memory. 7623*4882a593Smuzhiyun# 7624*4882a593Smuzhiyunfout_dbl_unfl: 7625*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 7626*4882a593Smuzhiyun 7627*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7628*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7629*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7630*4882a593Smuzhiyun mov.l %a0,-(%sp) 7631*4882a593Smuzhiyun 7632*4882a593Smuzhiyun clr.l %d0 # pass: S.F. = 0 7633*4882a593Smuzhiyun 7634*4882a593Smuzhiyun cmpi.b STAG(%a6),&DENORM # fetch src optype tag 7635*4882a593Smuzhiyun bne.b fout_dbl_unfl_cont # let DENORMs fall through 7636*4882a593Smuzhiyun 7637*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 7638*4882a593Smuzhiyun bsr.l norm # normalize the DENORM 7639*4882a593Smuzhiyun 7640*4882a593Smuzhiyunfout_dbl_unfl_cont: 7641*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to operand 7642*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 7643*4882a593Smuzhiyun bsr.l unf_res # calc default underflow result 7644*4882a593Smuzhiyun 7645*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to fop 7646*4882a593Smuzhiyun bsr.l dst_dbl # convert to single prec 7647*4882a593Smuzhiyun mov.l %d0,L_SCR1(%a6) 7648*4882a593Smuzhiyun mov.l %d1,L_SCR2(%a6) 7649*4882a593Smuzhiyun 7650*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a1 # pass: dst addr 7651*4882a593Smuzhiyun lea L_SCR1(%a6),%a0 # pass: src addr 7652*4882a593Smuzhiyun movq.l &0x8,%d0 # pass: opsize is 8 bytes 7653*4882a593Smuzhiyun bsr.l _dmem_write # store dbl fop to memory 7654*4882a593Smuzhiyun 7655*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7656*4882a593Smuzhiyun bne.l facc_out_d # yes 7657*4882a593Smuzhiyun 7658*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 7659*4882a593Smuzhiyun andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7660*4882a593Smuzhiyun bne.w fout_sd_exc_unfl # yes 7661*4882a593Smuzhiyun addq.l &0x4,%sp 7662*4882a593Smuzhiyun rts 7663*4882a593Smuzhiyun 7664*4882a593Smuzhiyun# 7665*4882a593Smuzhiyun# it's definitely an overflow so call ovf_res to get the correct answer 7666*4882a593Smuzhiyun# 7667*4882a593Smuzhiyunfout_dbl_ovfl: 7668*4882a593Smuzhiyun mov.w 2+SRC_LO(%a0),%d0 7669*4882a593Smuzhiyun andi.w &0x7ff,%d0 7670*4882a593Smuzhiyun bne.b fout_dbl_ovfl_inex2 7671*4882a593Smuzhiyun 7672*4882a593Smuzhiyun ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 7673*4882a593Smuzhiyun bra.b fout_dbl_ovfl_cont 7674*4882a593Smuzhiyunfout_dbl_ovfl_inex2: 7675*4882a593Smuzhiyun ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 7676*4882a593Smuzhiyun 7677*4882a593Smuzhiyunfout_dbl_ovfl_cont: 7678*4882a593Smuzhiyun mov.l %a0,-(%sp) 7679*4882a593Smuzhiyun 7680*4882a593Smuzhiyun# call ovf_res() w/ dbl prec and the correct rnd mode to create the default 7681*4882a593Smuzhiyun# overflow result. DON'T save the returned ccodes from ovf_res() since 7682*4882a593Smuzhiyun# fmove out doesn't alter them. 7683*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 7684*4882a593Smuzhiyun smi %d1 # set if so 7685*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode 7686*4882a593Smuzhiyun bsr.l ovf_res # calc OVFL result 7687*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # load default overflow result 7688*4882a593Smuzhiyun fmov.d %fp0,L_SCR1(%a6) # store to double 7689*4882a593Smuzhiyun 7690*4882a593Smuzhiyun mov.l EXC_EA(%a6),%a1 # pass: dst addr 7691*4882a593Smuzhiyun lea L_SCR1(%a6),%a0 # pass: src addr 7692*4882a593Smuzhiyun movq.l &0x8,%d0 # pass: opsize is 8 bytes 7693*4882a593Smuzhiyun bsr.l _dmem_write # store dbl fop to memory 7694*4882a593Smuzhiyun 7695*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7696*4882a593Smuzhiyun bne.l facc_out_d # yes 7697*4882a593Smuzhiyun 7698*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 7699*4882a593Smuzhiyun andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7700*4882a593Smuzhiyun bne.w fout_sd_exc_ovfl # yes 7701*4882a593Smuzhiyun addq.l &0x4,%sp 7702*4882a593Smuzhiyun rts 7703*4882a593Smuzhiyun 7704*4882a593Smuzhiyun# 7705*4882a593Smuzhiyun# move out MAY overflow: 7706*4882a593Smuzhiyun# (1) force the exp to 0x3fff 7707*4882a593Smuzhiyun# (2) do a move w/ appropriate rnd mode 7708*4882a593Smuzhiyun# (3) if exp still equals zero, then insert original exponent 7709*4882a593Smuzhiyun# for the correct result. 7710*4882a593Smuzhiyun# if exp now equals one, then it overflowed so call ovf_res. 7711*4882a593Smuzhiyun# 7712*4882a593Smuzhiyunfout_dbl_may_ovfl: 7713*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d1 # fetch current sign 7714*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep it,clear exp 7715*4882a593Smuzhiyun ori.w &0x3fff,%d1 # insert exp = 0 7716*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 7717*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 7718*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 7719*4882a593Smuzhiyun 7720*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 7721*4882a593Smuzhiyun 7722*4882a593Smuzhiyun fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 7723*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 7724*4882a593Smuzhiyun 7725*4882a593Smuzhiyun fabs.x %fp0 # need absolute value 7726*4882a593Smuzhiyun fcmp.b %fp0,&0x2 # did exponent increase? 7727*4882a593Smuzhiyun fblt.w fout_dbl_exg # no; go finish NORM 7728*4882a593Smuzhiyun bra.w fout_dbl_ovfl # yes; go handle overflow 7729*4882a593Smuzhiyun 7730*4882a593Smuzhiyun######################################################################### 7731*4882a593Smuzhiyun# XDEF **************************************************************** # 7732*4882a593Smuzhiyun# dst_dbl(): create double precision value from extended prec. # 7733*4882a593Smuzhiyun# # 7734*4882a593Smuzhiyun# XREF **************************************************************** # 7735*4882a593Smuzhiyun# None # 7736*4882a593Smuzhiyun# # 7737*4882a593Smuzhiyun# INPUT *************************************************************** # 7738*4882a593Smuzhiyun# a0 = pointer to source operand in extended precision # 7739*4882a593Smuzhiyun# # 7740*4882a593Smuzhiyun# OUTPUT ************************************************************** # 7741*4882a593Smuzhiyun# d0 = hi(double precision result) # 7742*4882a593Smuzhiyun# d1 = lo(double precision result) # 7743*4882a593Smuzhiyun# # 7744*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 7745*4882a593Smuzhiyun# # 7746*4882a593Smuzhiyun# Changes extended precision to double precision. # 7747*4882a593Smuzhiyun# Note: no attempt is made to round the extended value to double. # 7748*4882a593Smuzhiyun# dbl_sign = ext_sign # 7749*4882a593Smuzhiyun# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) # 7750*4882a593Smuzhiyun# get rid of ext integer bit # 7751*4882a593Smuzhiyun# dbl_mant = ext_mant{62:12} # 7752*4882a593Smuzhiyun# # 7753*4882a593Smuzhiyun# --------------- --------------- --------------- # 7754*4882a593Smuzhiyun# extended -> |s| exp | |1| ms mant | | ls mant | # 7755*4882a593Smuzhiyun# --------------- --------------- --------------- # 7756*4882a593Smuzhiyun# 95 64 63 62 32 31 11 0 # 7757*4882a593Smuzhiyun# | | # 7758*4882a593Smuzhiyun# | | # 7759*4882a593Smuzhiyun# | | # 7760*4882a593Smuzhiyun# v v # 7761*4882a593Smuzhiyun# --------------- --------------- # 7762*4882a593Smuzhiyun# double -> |s|exp| mant | | mant | # 7763*4882a593Smuzhiyun# --------------- --------------- # 7764*4882a593Smuzhiyun# 63 51 32 31 0 # 7765*4882a593Smuzhiyun# # 7766*4882a593Smuzhiyun######################################################################### 7767*4882a593Smuzhiyun 7768*4882a593Smuzhiyundst_dbl: 7769*4882a593Smuzhiyun clr.l %d0 # clear d0 7770*4882a593Smuzhiyun mov.w FTEMP_EX(%a0),%d0 # get exponent 7771*4882a593Smuzhiyun subi.w &EXT_BIAS,%d0 # subtract extended precision bias 7772*4882a593Smuzhiyun addi.w &DBL_BIAS,%d0 # add double precision bias 7773*4882a593Smuzhiyun tst.b FTEMP_HI(%a0) # is number a denorm? 7774*4882a593Smuzhiyun bmi.b dst_get_dupper # no 7775*4882a593Smuzhiyun subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1 7776*4882a593Smuzhiyundst_get_dupper: 7777*4882a593Smuzhiyun swap %d0 # d0 now in upper word 7778*4882a593Smuzhiyun lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp 7779*4882a593Smuzhiyun tst.b FTEMP_EX(%a0) # test sign 7780*4882a593Smuzhiyun bpl.b dst_get_dman # if positive, go process mantissa 7781*4882a593Smuzhiyun bset &0x1f,%d0 # if negative, set sign 7782*4882a593Smuzhiyundst_get_dman: 7783*4882a593Smuzhiyun mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7784*4882a593Smuzhiyun bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms 7785*4882a593Smuzhiyun or.l %d1,%d0 # put these bits in ms word of double 7786*4882a593Smuzhiyun mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack 7787*4882a593Smuzhiyun mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7788*4882a593Smuzhiyun mov.l &21,%d0 # load shift count 7789*4882a593Smuzhiyun lsl.l %d0,%d1 # put lower 11 bits in upper bits 7790*4882a593Smuzhiyun mov.l %d1,L_SCR2(%a6) # build lower lword in memory 7791*4882a593Smuzhiyun mov.l FTEMP_LO(%a0),%d1 # get ls mantissa 7792*4882a593Smuzhiyun bfextu %d1{&0:&21},%d0 # get ls 21 bits of double 7793*4882a593Smuzhiyun mov.l L_SCR2(%a6),%d1 7794*4882a593Smuzhiyun or.l %d0,%d1 # put them in double result 7795*4882a593Smuzhiyun mov.l L_SCR1(%a6),%d0 7796*4882a593Smuzhiyun rts 7797*4882a593Smuzhiyun 7798*4882a593Smuzhiyun######################################################################### 7799*4882a593Smuzhiyun# XDEF **************************************************************** # 7800*4882a593Smuzhiyun# dst_sgl(): create single precision value from extended prec # 7801*4882a593Smuzhiyun# # 7802*4882a593Smuzhiyun# XREF **************************************************************** # 7803*4882a593Smuzhiyun# # 7804*4882a593Smuzhiyun# INPUT *************************************************************** # 7805*4882a593Smuzhiyun# a0 = pointer to source operand in extended precision # 7806*4882a593Smuzhiyun# # 7807*4882a593Smuzhiyun# OUTPUT ************************************************************** # 7808*4882a593Smuzhiyun# d0 = single precision result # 7809*4882a593Smuzhiyun# # 7810*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 7811*4882a593Smuzhiyun# # 7812*4882a593Smuzhiyun# Changes extended precision to single precision. # 7813*4882a593Smuzhiyun# sgl_sign = ext_sign # 7814*4882a593Smuzhiyun# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) # 7815*4882a593Smuzhiyun# get rid of ext integer bit # 7816*4882a593Smuzhiyun# sgl_mant = ext_mant{62:12} # 7817*4882a593Smuzhiyun# # 7818*4882a593Smuzhiyun# --------------- --------------- --------------- # 7819*4882a593Smuzhiyun# extended -> |s| exp | |1| ms mant | | ls mant | # 7820*4882a593Smuzhiyun# --------------- --------------- --------------- # 7821*4882a593Smuzhiyun# 95 64 63 62 40 32 31 12 0 # 7822*4882a593Smuzhiyun# | | # 7823*4882a593Smuzhiyun# | | # 7824*4882a593Smuzhiyun# | | # 7825*4882a593Smuzhiyun# v v # 7826*4882a593Smuzhiyun# --------------- # 7827*4882a593Smuzhiyun# single -> |s|exp| mant | # 7828*4882a593Smuzhiyun# --------------- # 7829*4882a593Smuzhiyun# 31 22 0 # 7830*4882a593Smuzhiyun# # 7831*4882a593Smuzhiyun######################################################################### 7832*4882a593Smuzhiyun 7833*4882a593Smuzhiyundst_sgl: 7834*4882a593Smuzhiyun clr.l %d0 7835*4882a593Smuzhiyun mov.w FTEMP_EX(%a0),%d0 # get exponent 7836*4882a593Smuzhiyun subi.w &EXT_BIAS,%d0 # subtract extended precision bias 7837*4882a593Smuzhiyun addi.w &SGL_BIAS,%d0 # add single precision bias 7838*4882a593Smuzhiyun tst.b FTEMP_HI(%a0) # is number a denorm? 7839*4882a593Smuzhiyun bmi.b dst_get_supper # no 7840*4882a593Smuzhiyun subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1 7841*4882a593Smuzhiyundst_get_supper: 7842*4882a593Smuzhiyun swap %d0 # put exp in upper word of d0 7843*4882a593Smuzhiyun lsl.l &0x7,%d0 # shift it into single exp bits 7844*4882a593Smuzhiyun tst.b FTEMP_EX(%a0) # test sign 7845*4882a593Smuzhiyun bpl.b dst_get_sman # if positive, continue 7846*4882a593Smuzhiyun bset &0x1f,%d0 # if negative, put in sign first 7847*4882a593Smuzhiyundst_get_sman: 7848*4882a593Smuzhiyun mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7849*4882a593Smuzhiyun andi.l &0x7fffff00,%d1 # get upper 23 bits of ms 7850*4882a593Smuzhiyun lsr.l &0x8,%d1 # and put them flush right 7851*4882a593Smuzhiyun or.l %d1,%d0 # put these bits in ms word of single 7852*4882a593Smuzhiyun rts 7853*4882a593Smuzhiyun 7854*4882a593Smuzhiyun############################################################################## 7855*4882a593Smuzhiyunfout_pack: 7856*4882a593Smuzhiyun bsr.l _calc_ea_fout # fetch the <ea> 7857*4882a593Smuzhiyun mov.l %a0,-(%sp) 7858*4882a593Smuzhiyun 7859*4882a593Smuzhiyun mov.b STAG(%a6),%d0 # fetch input type 7860*4882a593Smuzhiyun bne.w fout_pack_not_norm # input is not NORM 7861*4882a593Smuzhiyun 7862*4882a593Smuzhiyunfout_pack_norm: 7863*4882a593Smuzhiyun btst &0x4,EXC_CMDREG(%a6) # static or dynamic? 7864*4882a593Smuzhiyun beq.b fout_pack_s # static 7865*4882a593Smuzhiyun 7866*4882a593Smuzhiyunfout_pack_d: 7867*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg 7868*4882a593Smuzhiyun lsr.b &0x4,%d1 7869*4882a593Smuzhiyun andi.w &0x7,%d1 7870*4882a593Smuzhiyun 7871*4882a593Smuzhiyun bsr.l fetch_dreg # fetch Dn w/ k-factor 7872*4882a593Smuzhiyun 7873*4882a593Smuzhiyun bra.b fout_pack_type 7874*4882a593Smuzhiyunfout_pack_s: 7875*4882a593Smuzhiyun mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field 7876*4882a593Smuzhiyun 7877*4882a593Smuzhiyunfout_pack_type: 7878*4882a593Smuzhiyun bfexts %d0{&25:&7},%d0 # extract k-factor 7879*4882a593Smuzhiyun mov.l %d0,-(%sp) 7880*4882a593Smuzhiyun 7881*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to input 7882*4882a593Smuzhiyun 7883*4882a593Smuzhiyun# bindec is currently scrambling FP_SRC for denorm inputs. 7884*4882a593Smuzhiyun# we'll have to change this, but for now, tough luck!!! 7885*4882a593Smuzhiyun bsr.l bindec # convert xprec to packed 7886*4882a593Smuzhiyun 7887*4882a593Smuzhiyun# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields 7888*4882a593Smuzhiyun andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields 7889*4882a593Smuzhiyun 7890*4882a593Smuzhiyun mov.l (%sp)+,%d0 7891*4882a593Smuzhiyun 7892*4882a593Smuzhiyun tst.b 3+FP_SCR0_EX(%a6) 7893*4882a593Smuzhiyun bne.b fout_pack_set 7894*4882a593Smuzhiyun tst.l FP_SCR0_HI(%a6) 7895*4882a593Smuzhiyun bne.b fout_pack_set 7896*4882a593Smuzhiyun tst.l FP_SCR0_LO(%a6) 7897*4882a593Smuzhiyun bne.b fout_pack_set 7898*4882a593Smuzhiyun 7899*4882a593Smuzhiyun# add the extra condition that only if the k-factor was zero, too, should 7900*4882a593Smuzhiyun# we zero the exponent 7901*4882a593Smuzhiyun tst.l %d0 7902*4882a593Smuzhiyun bne.b fout_pack_set 7903*4882a593Smuzhiyun# "mantissa" is all zero which means that the answer is zero. but, the '040 7904*4882a593Smuzhiyun# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore, 7905*4882a593Smuzhiyun# if the mantissa is zero, I will zero the exponent, too. 7906*4882a593Smuzhiyun# the question now is whether the exponents sign bit is allowed to be non-zero 7907*4882a593Smuzhiyun# for a zero, also... 7908*4882a593Smuzhiyun andi.w &0xf000,FP_SCR0(%a6) 7909*4882a593Smuzhiyun 7910*4882a593Smuzhiyunfout_pack_set: 7911*4882a593Smuzhiyun 7912*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: src addr 7913*4882a593Smuzhiyun 7914*4882a593Smuzhiyunfout_pack_write: 7915*4882a593Smuzhiyun mov.l (%sp)+,%a1 # pass: dst addr 7916*4882a593Smuzhiyun mov.l &0xc,%d0 # pass: opsize is 12 bytes 7917*4882a593Smuzhiyun 7918*4882a593Smuzhiyun cmpi.b SPCOND_FLG(%a6),&mda7_flg 7919*4882a593Smuzhiyun beq.b fout_pack_a7 7920*4882a593Smuzhiyun 7921*4882a593Smuzhiyun bsr.l _dmem_write # write ext prec number to memory 7922*4882a593Smuzhiyun 7923*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7924*4882a593Smuzhiyun bne.w fout_ext_err # yes 7925*4882a593Smuzhiyun 7926*4882a593Smuzhiyun rts 7927*4882a593Smuzhiyun 7928*4882a593Smuzhiyun# we don't want to do the write if the exception occurred in supervisor mode 7929*4882a593Smuzhiyun# so _mem_write2() handles this for us. 7930*4882a593Smuzhiyunfout_pack_a7: 7931*4882a593Smuzhiyun bsr.l _mem_write2 # write ext prec number to memory 7932*4882a593Smuzhiyun 7933*4882a593Smuzhiyun tst.l %d1 # did dstore fail? 7934*4882a593Smuzhiyun bne.w fout_ext_err # yes 7935*4882a593Smuzhiyun 7936*4882a593Smuzhiyun rts 7937*4882a593Smuzhiyun 7938*4882a593Smuzhiyunfout_pack_not_norm: 7939*4882a593Smuzhiyun cmpi.b %d0,&DENORM # is it a DENORM? 7940*4882a593Smuzhiyun beq.w fout_pack_norm # yes 7941*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 7942*4882a593Smuzhiyun clr.w 2+FP_SRC_EX(%a6) 7943*4882a593Smuzhiyun cmpi.b %d0,&SNAN # is it an SNAN? 7944*4882a593Smuzhiyun beq.b fout_pack_snan # yes 7945*4882a593Smuzhiyun bra.b fout_pack_write # no 7946*4882a593Smuzhiyun 7947*4882a593Smuzhiyunfout_pack_snan: 7948*4882a593Smuzhiyun ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP 7949*4882a593Smuzhiyun bset &0x6,FP_SRC_HI(%a6) # set snan bit 7950*4882a593Smuzhiyun bra.b fout_pack_write 7951*4882a593Smuzhiyun 7952*4882a593Smuzhiyun######################################################################### 7953*4882a593Smuzhiyun# XDEF **************************************************************** # 7954*4882a593Smuzhiyun# fmul(): emulates the fmul instruction # 7955*4882a593Smuzhiyun# fsmul(): emulates the fsmul instruction # 7956*4882a593Smuzhiyun# fdmul(): emulates the fdmul instruction # 7957*4882a593Smuzhiyun# # 7958*4882a593Smuzhiyun# XREF **************************************************************** # 7959*4882a593Smuzhiyun# scale_to_zero_src() - scale src exponent to zero # 7960*4882a593Smuzhiyun# scale_to_zero_dst() - scale dst exponent to zero # 7961*4882a593Smuzhiyun# unf_res() - return default underflow result # 7962*4882a593Smuzhiyun# ovf_res() - return default overflow result # 7963*4882a593Smuzhiyun# res_qnan() - return QNAN result # 7964*4882a593Smuzhiyun# res_snan() - return SNAN result # 7965*4882a593Smuzhiyun# # 7966*4882a593Smuzhiyun# INPUT *************************************************************** # 7967*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 7968*4882a593Smuzhiyun# a1 = pointer to extended precision destination operand # 7969*4882a593Smuzhiyun# d0 rnd prec,mode # 7970*4882a593Smuzhiyun# # 7971*4882a593Smuzhiyun# OUTPUT ************************************************************** # 7972*4882a593Smuzhiyun# fp0 = result # 7973*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 7974*4882a593Smuzhiyun# # 7975*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 7976*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 7977*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision. # 7978*4882a593Smuzhiyun# For norms/denorms, scale the exponents such that a multiply # 7979*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fmul to # 7980*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken # 7981*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result # 7982*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the # 7983*4882a593Smuzhiyun# result operand to the proper exponent. # 7984*4882a593Smuzhiyun# # 7985*4882a593Smuzhiyun######################################################################### 7986*4882a593Smuzhiyun 7987*4882a593Smuzhiyun align 0x10 7988*4882a593Smuzhiyuntbl_fmul_ovfl: 7989*4882a593Smuzhiyun long 0x3fff - 0x7ffe # ext_max 7990*4882a593Smuzhiyun long 0x3fff - 0x407e # sgl_max 7991*4882a593Smuzhiyun long 0x3fff - 0x43fe # dbl_max 7992*4882a593Smuzhiyuntbl_fmul_unfl: 7993*4882a593Smuzhiyun long 0x3fff + 0x0001 # ext_unfl 7994*4882a593Smuzhiyun long 0x3fff - 0x3f80 # sgl_unfl 7995*4882a593Smuzhiyun long 0x3fff - 0x3c00 # dbl_unfl 7996*4882a593Smuzhiyun 7997*4882a593Smuzhiyun global fsmul 7998*4882a593Smuzhiyunfsmul: 7999*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 8000*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl prec 8001*4882a593Smuzhiyun bra.b fmul 8002*4882a593Smuzhiyun 8003*4882a593Smuzhiyun global fdmul 8004*4882a593Smuzhiyunfdmul: 8005*4882a593Smuzhiyun andi.b &0x30,%d0 8006*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl prec 8007*4882a593Smuzhiyun 8008*4882a593Smuzhiyun global fmul 8009*4882a593Smuzhiyunfmul: 8010*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 8011*4882a593Smuzhiyun 8012*4882a593Smuzhiyun clr.w %d1 8013*4882a593Smuzhiyun mov.b DTAG(%a6),%d1 8014*4882a593Smuzhiyun lsl.b &0x3,%d1 8015*4882a593Smuzhiyun or.b STAG(%a6),%d1 # combine src tags 8016*4882a593Smuzhiyun bne.w fmul_not_norm # optimize on non-norm input 8017*4882a593Smuzhiyun 8018*4882a593Smuzhiyunfmul_norm: 8019*4882a593Smuzhiyun mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 8020*4882a593Smuzhiyun mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 8021*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 8022*4882a593Smuzhiyun 8023*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8024*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8025*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8026*4882a593Smuzhiyun 8027*4882a593Smuzhiyun bsr.l scale_to_zero_src # scale src exponent 8028*4882a593Smuzhiyun mov.l %d0,-(%sp) # save scale factor 1 8029*4882a593Smuzhiyun 8030*4882a593Smuzhiyun bsr.l scale_to_zero_dst # scale dst exponent 8031*4882a593Smuzhiyun 8032*4882a593Smuzhiyun add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2 8033*4882a593Smuzhiyun 8034*4882a593Smuzhiyun mov.w 2+L_SCR3(%a6),%d1 # fetch precision 8035*4882a593Smuzhiyun lsr.b &0x6,%d1 # shift to lo bits 8036*4882a593Smuzhiyun mov.l (%sp)+,%d0 # load S.F. 8037*4882a593Smuzhiyun cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl? 8038*4882a593Smuzhiyun beq.w fmul_may_ovfl # result may rnd to overflow 8039*4882a593Smuzhiyun blt.w fmul_ovfl # result will overflow 8040*4882a593Smuzhiyun 8041*4882a593Smuzhiyun cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl? 8042*4882a593Smuzhiyun beq.w fmul_may_unfl # result may rnd to no unfl 8043*4882a593Smuzhiyun bgt.w fmul_unfl # result will underflow 8044*4882a593Smuzhiyun 8045*4882a593Smuzhiyun# 8046*4882a593Smuzhiyun# NORMAL: 8047*4882a593Smuzhiyun# - the result of the multiply operation will neither overflow nor underflow. 8048*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode. 8049*4882a593Smuzhiyun# - scale the result exponent using the scale factor. if both operands were 8050*4882a593Smuzhiyun# normalized then we really don't need to go through this scaling. but for now, 8051*4882a593Smuzhiyun# this will do. 8052*4882a593Smuzhiyun# 8053*4882a593Smuzhiyunfmul_normal: 8054*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8055*4882a593Smuzhiyun 8056*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8057*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8058*4882a593Smuzhiyun 8059*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8060*4882a593Smuzhiyun 8061*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 8062*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8063*4882a593Smuzhiyun 8064*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8065*4882a593Smuzhiyun 8066*4882a593Smuzhiyunfmul_normal_exit: 8067*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 8068*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 8069*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8070*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 8071*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8072*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 8073*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 8074*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 8075*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8076*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8077*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8078*4882a593Smuzhiyun rts 8079*4882a593Smuzhiyun 8080*4882a593Smuzhiyun# 8081*4882a593Smuzhiyun# OVERFLOW: 8082*4882a593Smuzhiyun# - the result of the multiply operation is an overflow. 8083*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode in order to 8084*4882a593Smuzhiyun# set the inexact bits. 8085*4882a593Smuzhiyun# - calculate the default result and return it in fp0. 8086*4882a593Smuzhiyun# - if overflow or inexact is enabled, we need a multiply result rounded to 8087*4882a593Smuzhiyun# extended precision. if the original operation was extended, then we have this 8088*4882a593Smuzhiyun# result. if the original operation was single or double, we have to do another 8089*4882a593Smuzhiyun# multiply using extended precision and the correct rounding mode. the result 8090*4882a593Smuzhiyun# of this operation then has its exponent scaled by -0x6000 to create the 8091*4882a593Smuzhiyun# exceptional operand. 8092*4882a593Smuzhiyun# 8093*4882a593Smuzhiyunfmul_ovfl: 8094*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8095*4882a593Smuzhiyun 8096*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8097*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8098*4882a593Smuzhiyun 8099*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8100*4882a593Smuzhiyun 8101*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 8102*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8103*4882a593Smuzhiyun 8104*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8105*4882a593Smuzhiyun 8106*4882a593Smuzhiyun# save setting this until now because this is where fmul_may_ovfl may jump in 8107*4882a593Smuzhiyunfmul_ovfl_tst: 8108*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8109*4882a593Smuzhiyun 8110*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 8111*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 8112*4882a593Smuzhiyun bne.b fmul_ovfl_ena # yes 8113*4882a593Smuzhiyun 8114*4882a593Smuzhiyun# calculate the default result 8115*4882a593Smuzhiyunfmul_ovfl_dis: 8116*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 8117*4882a593Smuzhiyun sne %d1 # set sign param accordingly 8118*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode 8119*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 8120*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 8121*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 8122*4882a593Smuzhiyun rts 8123*4882a593Smuzhiyun 8124*4882a593Smuzhiyun# 8125*4882a593Smuzhiyun# OVFL is enabled; Create EXOP: 8126*4882a593Smuzhiyun# - if precision is extended, then we have the EXOP. simply bias the exponent 8127*4882a593Smuzhiyun# with an extra -0x6000. if the precision is single or double, we need to 8128*4882a593Smuzhiyun# calculate a result rounded to extended precision. 8129*4882a593Smuzhiyun# 8130*4882a593Smuzhiyunfmul_ovfl_ena: 8131*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 8132*4882a593Smuzhiyun andi.b &0xc0,%d1 # test the rnd prec 8133*4882a593Smuzhiyun bne.b fmul_ovfl_ena_sd # it's sgl or dbl 8134*4882a593Smuzhiyun 8135*4882a593Smuzhiyunfmul_ovfl_ena_cont: 8136*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 8137*4882a593Smuzhiyun 8138*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 8139*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8140*4882a593Smuzhiyun mov.w %d1,%d2 # make a copy 8141*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8142*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 8143*4882a593Smuzhiyun subi.l &0x6000,%d1 # subtract bias 8144*4882a593Smuzhiyun andi.w &0x7fff,%d1 # clear sign bit 8145*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 8146*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 8147*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8148*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8149*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8150*4882a593Smuzhiyun bra.b fmul_ovfl_dis 8151*4882a593Smuzhiyun 8152*4882a593Smuzhiyunfmul_ovfl_ena_sd: 8153*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8154*4882a593Smuzhiyun 8155*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 8156*4882a593Smuzhiyun andi.b &0x30,%d1 # keep rnd mode only 8157*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 8158*4882a593Smuzhiyun 8159*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8160*4882a593Smuzhiyun 8161*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8162*4882a593Smuzhiyun bra.b fmul_ovfl_ena_cont 8163*4882a593Smuzhiyun 8164*4882a593Smuzhiyun# 8165*4882a593Smuzhiyun# may OVERFLOW: 8166*4882a593Smuzhiyun# - the result of the multiply operation MAY overflow. 8167*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode in order to 8168*4882a593Smuzhiyun# set the inexact bits. 8169*4882a593Smuzhiyun# - calculate the default result and return it in fp0. 8170*4882a593Smuzhiyun# 8171*4882a593Smuzhiyunfmul_may_ovfl: 8172*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8173*4882a593Smuzhiyun 8174*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8175*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8176*4882a593Smuzhiyun 8177*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8178*4882a593Smuzhiyun 8179*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 8180*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8181*4882a593Smuzhiyun 8182*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8183*4882a593Smuzhiyun 8184*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 8185*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| >= 2.b? 8186*4882a593Smuzhiyun fbge.w fmul_ovfl_tst # yes; overflow has occurred 8187*4882a593Smuzhiyun 8188*4882a593Smuzhiyun# no, it didn't overflow; we have correct result 8189*4882a593Smuzhiyun bra.w fmul_normal_exit 8190*4882a593Smuzhiyun 8191*4882a593Smuzhiyun# 8192*4882a593Smuzhiyun# UNDERFLOW: 8193*4882a593Smuzhiyun# - the result of the multiply operation is an underflow. 8194*4882a593Smuzhiyun# - do the multiply to the proper precision and rounding mode in order to 8195*4882a593Smuzhiyun# set the inexact bits. 8196*4882a593Smuzhiyun# - calculate the default result and return it in fp0. 8197*4882a593Smuzhiyun# - if overflow or inexact is enabled, we need a multiply result rounded to 8198*4882a593Smuzhiyun# extended precision. if the original operation was extended, then we have this 8199*4882a593Smuzhiyun# result. if the original operation was single or double, we have to do another 8200*4882a593Smuzhiyun# multiply using extended precision and the correct rounding mode. the result 8201*4882a593Smuzhiyun# of this operation then has its exponent scaled by -0x6000 to create the 8202*4882a593Smuzhiyun# exceptional operand. 8203*4882a593Smuzhiyun# 8204*4882a593Smuzhiyunfmul_unfl: 8205*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8206*4882a593Smuzhiyun 8207*4882a593Smuzhiyun# for fun, let's use only extended precision, round to zero. then, let 8208*4882a593Smuzhiyun# the unf_res() routine figure out all the rest. 8209*4882a593Smuzhiyun# will we get the correct answer. 8210*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8211*4882a593Smuzhiyun 8212*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set FPCR 8213*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8214*4882a593Smuzhiyun 8215*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8216*4882a593Smuzhiyun 8217*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 8218*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8219*4882a593Smuzhiyun 8220*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8221*4882a593Smuzhiyun 8222*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 8223*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8224*4882a593Smuzhiyun bne.b fmul_unfl_ena # yes 8225*4882a593Smuzhiyun 8226*4882a593Smuzhiyunfmul_unfl_dis: 8227*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 8228*4882a593Smuzhiyun 8229*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 8230*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 8231*4882a593Smuzhiyun bsr.l unf_res # calculate default result 8232*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z' 8233*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8234*4882a593Smuzhiyun rts 8235*4882a593Smuzhiyun 8236*4882a593Smuzhiyun# 8237*4882a593Smuzhiyun# UNFL is enabled. 8238*4882a593Smuzhiyun# 8239*4882a593Smuzhiyunfmul_unfl_ena: 8240*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op 8241*4882a593Smuzhiyun 8242*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 8243*4882a593Smuzhiyun andi.b &0xc0,%d1 # is precision extended? 8244*4882a593Smuzhiyun bne.b fmul_unfl_ena_sd # no, sgl or dbl 8245*4882a593Smuzhiyun 8246*4882a593Smuzhiyun# if the rnd mode is anything but RZ, then we have to re-do the above 8247*4882a593Smuzhiyun# multiplication because we used RZ for all. 8248*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8249*4882a593Smuzhiyun 8250*4882a593Smuzhiyunfmul_unfl_ena_cont: 8251*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8252*4882a593Smuzhiyun 8253*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp1 # execute multiply 8254*4882a593Smuzhiyun 8255*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8256*4882a593Smuzhiyun 8257*4882a593Smuzhiyun fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 8258*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 8259*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8260*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 8261*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8262*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 8263*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 8264*4882a593Smuzhiyun addi.l &0x6000,%d1 # add bias 8265*4882a593Smuzhiyun andi.w &0x7fff,%d1 8266*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 8267*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8268*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8269*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8270*4882a593Smuzhiyun bra.w fmul_unfl_dis 8271*4882a593Smuzhiyun 8272*4882a593Smuzhiyunfmul_unfl_ena_sd: 8273*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 8274*4882a593Smuzhiyun andi.b &0x30,%d1 # use only rnd mode 8275*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 8276*4882a593Smuzhiyun 8277*4882a593Smuzhiyun bra.b fmul_unfl_ena_cont 8278*4882a593Smuzhiyun 8279*4882a593Smuzhiyun# MAY UNDERFLOW: 8280*4882a593Smuzhiyun# -use the correct rounding mode and precision. this code favors operations 8281*4882a593Smuzhiyun# that do not underflow. 8282*4882a593Smuzhiyunfmul_may_unfl: 8283*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8284*4882a593Smuzhiyun 8285*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8286*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8287*4882a593Smuzhiyun 8288*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8289*4882a593Smuzhiyun 8290*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 8291*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8292*4882a593Smuzhiyun 8293*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8294*4882a593Smuzhiyun 8295*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 8296*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| > 2.b? 8297*4882a593Smuzhiyun fbgt.w fmul_normal_exit # no; no underflow occurred 8298*4882a593Smuzhiyun fblt.w fmul_unfl # yes; underflow occurred 8299*4882a593Smuzhiyun 8300*4882a593Smuzhiyun# 8301*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 2. but, 8302*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 2 or 8303*4882a593Smuzhiyun# a normalized number that rounded down to a 2. so, redo the entire operation 8304*4882a593Smuzhiyun# using RZ as the rounding mode to see what the pre-rounded result is. 8305*4882a593Smuzhiyun# this case should be relatively rare. 8306*4882a593Smuzhiyun# 8307*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst operand 8308*4882a593Smuzhiyun 8309*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 8310*4882a593Smuzhiyun andi.b &0xc0,%d1 # keep rnd prec 8311*4882a593Smuzhiyun ori.b &rz_mode*0x10,%d1 # insert RZ 8312*4882a593Smuzhiyun 8313*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 8314*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8315*4882a593Smuzhiyun 8316*4882a593Smuzhiyun fmul.x FP_SCR0(%a6),%fp1 # execute multiply 8317*4882a593Smuzhiyun 8318*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8319*4882a593Smuzhiyun fabs.x %fp1 # make absolute value 8320*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| < 2.b? 8321*4882a593Smuzhiyun fbge.w fmul_normal_exit # no; no underflow occurred 8322*4882a593Smuzhiyun bra.w fmul_unfl # yes, underflow occurred 8323*4882a593Smuzhiyun 8324*4882a593Smuzhiyun################################################################################ 8325*4882a593Smuzhiyun 8326*4882a593Smuzhiyun# 8327*4882a593Smuzhiyun# Multiply: inputs are not both normalized; what are they? 8328*4882a593Smuzhiyun# 8329*4882a593Smuzhiyunfmul_not_norm: 8330*4882a593Smuzhiyun mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1 8331*4882a593Smuzhiyun jmp (tbl_fmul_op.b,%pc,%d1.w) 8332*4882a593Smuzhiyun 8333*4882a593Smuzhiyun swbeg &48 8334*4882a593Smuzhiyuntbl_fmul_op: 8335*4882a593Smuzhiyun short fmul_norm - tbl_fmul_op # NORM x NORM 8336*4882a593Smuzhiyun short fmul_zero - tbl_fmul_op # NORM x ZERO 8337*4882a593Smuzhiyun short fmul_inf_src - tbl_fmul_op # NORM x INF 8338*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 8339*4882a593Smuzhiyun short fmul_norm - tbl_fmul_op # NORM x DENORM 8340*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # NORM x SNAN 8341*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8342*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8343*4882a593Smuzhiyun 8344*4882a593Smuzhiyun short fmul_zero - tbl_fmul_op # ZERO x NORM 8345*4882a593Smuzhiyun short fmul_zero - tbl_fmul_op # ZERO x ZERO 8346*4882a593Smuzhiyun short fmul_res_operr - tbl_fmul_op # ZERO x INF 8347*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN 8348*4882a593Smuzhiyun short fmul_zero - tbl_fmul_op # ZERO x DENORM 8349*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # ZERO x SNAN 8350*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8351*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8352*4882a593Smuzhiyun 8353*4882a593Smuzhiyun short fmul_inf_dst - tbl_fmul_op # INF x NORM 8354*4882a593Smuzhiyun short fmul_res_operr - tbl_fmul_op # INF x ZERO 8355*4882a593Smuzhiyun short fmul_inf_dst - tbl_fmul_op # INF x INF 8356*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # INF x QNAN 8357*4882a593Smuzhiyun short fmul_inf_dst - tbl_fmul_op # INF x DENORM 8358*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # INF x SNAN 8359*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8360*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8361*4882a593Smuzhiyun 8362*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # QNAN x NORM 8363*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO 8364*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # QNAN x INF 8365*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN 8366*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM 8367*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # QNAN x SNAN 8368*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8369*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8370*4882a593Smuzhiyun 8371*4882a593Smuzhiyun short fmul_norm - tbl_fmul_op # NORM x NORM 8372*4882a593Smuzhiyun short fmul_zero - tbl_fmul_op # NORM x ZERO 8373*4882a593Smuzhiyun short fmul_inf_src - tbl_fmul_op # NORM x INF 8374*4882a593Smuzhiyun short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 8375*4882a593Smuzhiyun short fmul_norm - tbl_fmul_op # NORM x DENORM 8376*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # NORM x SNAN 8377*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8378*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8379*4882a593Smuzhiyun 8380*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # SNAN x NORM 8381*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # SNAN x ZERO 8382*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # SNAN x INF 8383*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # SNAN x QNAN 8384*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # SNAN x DENORM 8385*4882a593Smuzhiyun short fmul_res_snan - tbl_fmul_op # SNAN x SNAN 8386*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8387*4882a593Smuzhiyun short tbl_fmul_op - tbl_fmul_op # 8388*4882a593Smuzhiyun 8389*4882a593Smuzhiyunfmul_res_operr: 8390*4882a593Smuzhiyun bra.l res_operr 8391*4882a593Smuzhiyunfmul_res_snan: 8392*4882a593Smuzhiyun bra.l res_snan 8393*4882a593Smuzhiyunfmul_res_qnan: 8394*4882a593Smuzhiyun bra.l res_qnan 8395*4882a593Smuzhiyun 8396*4882a593Smuzhiyun# 8397*4882a593Smuzhiyun# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm) 8398*4882a593Smuzhiyun# 8399*4882a593Smuzhiyun global fmul_zero # global for fsglmul 8400*4882a593Smuzhiyunfmul_zero: 8401*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8402*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 8403*4882a593Smuzhiyun eor.b %d0,%d1 8404*4882a593Smuzhiyun bpl.b fmul_zero_p # result ZERO is pos. 8405*4882a593Smuzhiyunfmul_zero_n: 8406*4882a593Smuzhiyun fmov.s &0x80000000,%fp0 # load -ZERO 8407*4882a593Smuzhiyun mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 8408*4882a593Smuzhiyun rts 8409*4882a593Smuzhiyunfmul_zero_p: 8410*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # load +ZERO 8411*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set Z 8412*4882a593Smuzhiyun rts 8413*4882a593Smuzhiyun 8414*4882a593Smuzhiyun# 8415*4882a593Smuzhiyun# Multiply: (inf x inf) || (inf x norm) || (inf x denorm) 8416*4882a593Smuzhiyun# 8417*4882a593Smuzhiyun# Note: The j-bit for an infinity is a don't-care. However, to be 8418*4882a593Smuzhiyun# strictly compatible w/ the 68881/882, we make sure to return an 8419*4882a593Smuzhiyun# INF w/ the j-bit set if the input INF j-bit was set. Destination 8420*4882a593Smuzhiyun# INFs take priority. 8421*4882a593Smuzhiyun# 8422*4882a593Smuzhiyun global fmul_inf_dst # global for fsglmul 8423*4882a593Smuzhiyunfmul_inf_dst: 8424*4882a593Smuzhiyun fmovm.x DST(%a1),&0x80 # return INF result in fp0 8425*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8426*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 8427*4882a593Smuzhiyun eor.b %d0,%d1 8428*4882a593Smuzhiyun bpl.b fmul_inf_dst_p # result INF is pos. 8429*4882a593Smuzhiyunfmul_inf_dst_n: 8430*4882a593Smuzhiyun fabs.x %fp0 # clear result sign 8431*4882a593Smuzhiyun fneg.x %fp0 # set result sign 8432*4882a593Smuzhiyun mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 8433*4882a593Smuzhiyun rts 8434*4882a593Smuzhiyunfmul_inf_dst_p: 8435*4882a593Smuzhiyun fabs.x %fp0 # clear result sign 8436*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set INF 8437*4882a593Smuzhiyun rts 8438*4882a593Smuzhiyun 8439*4882a593Smuzhiyun global fmul_inf_src # global for fsglmul 8440*4882a593Smuzhiyunfmul_inf_src: 8441*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return INF result in fp0 8442*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8443*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 8444*4882a593Smuzhiyun eor.b %d0,%d1 8445*4882a593Smuzhiyun bpl.b fmul_inf_dst_p # result INF is pos. 8446*4882a593Smuzhiyun bra.b fmul_inf_dst_n 8447*4882a593Smuzhiyun 8448*4882a593Smuzhiyun######################################################################### 8449*4882a593Smuzhiyun# XDEF **************************************************************** # 8450*4882a593Smuzhiyun# fin(): emulates the fmove instruction # 8451*4882a593Smuzhiyun# fsin(): emulates the fsmove instruction # 8452*4882a593Smuzhiyun# fdin(): emulates the fdmove instruction # 8453*4882a593Smuzhiyun# # 8454*4882a593Smuzhiyun# XREF **************************************************************** # 8455*4882a593Smuzhiyun# norm() - normalize mantissa for EXOP on denorm # 8456*4882a593Smuzhiyun# scale_to_zero_src() - scale src exponent to zero # 8457*4882a593Smuzhiyun# ovf_res() - return default overflow result # 8458*4882a593Smuzhiyun# unf_res() - return default underflow result # 8459*4882a593Smuzhiyun# res_qnan_1op() - return QNAN result # 8460*4882a593Smuzhiyun# res_snan_1op() - return SNAN result # 8461*4882a593Smuzhiyun# # 8462*4882a593Smuzhiyun# INPUT *************************************************************** # 8463*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 8464*4882a593Smuzhiyun# d0 = round prec/mode # 8465*4882a593Smuzhiyun# # 8466*4882a593Smuzhiyun# OUTPUT ************************************************************** # 8467*4882a593Smuzhiyun# fp0 = result # 8468*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 8469*4882a593Smuzhiyun# # 8470*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 8471*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 8472*4882a593Smuzhiyun# norms into extended, single, and double precision. # 8473*4882a593Smuzhiyun# Norms can be emulated w/ a regular fmove instruction. For # 8474*4882a593Smuzhiyun# sgl/dbl, must scale exponent and perform an "fmove". Check to see # 8475*4882a593Smuzhiyun# if the result would have overflowed/underflowed. If so, use unf_res() # 8476*4882a593Smuzhiyun# or ovf_res() to return the default result. Also return EXOP if # 8477*4882a593Smuzhiyun# exception is enabled. If no exception, return the default result. # 8478*4882a593Smuzhiyun# Unnorms don't pass through here. # 8479*4882a593Smuzhiyun# # 8480*4882a593Smuzhiyun######################################################################### 8481*4882a593Smuzhiyun 8482*4882a593Smuzhiyun global fsin 8483*4882a593Smuzhiyunfsin: 8484*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 8485*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl precision 8486*4882a593Smuzhiyun bra.b fin 8487*4882a593Smuzhiyun 8488*4882a593Smuzhiyun global fdin 8489*4882a593Smuzhiyunfdin: 8490*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 8491*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl precision 8492*4882a593Smuzhiyun 8493*4882a593Smuzhiyun global fin 8494*4882a593Smuzhiyunfin: 8495*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 8496*4882a593Smuzhiyun 8497*4882a593Smuzhiyun mov.b STAG(%a6),%d1 # fetch src optype tag 8498*4882a593Smuzhiyun bne.w fin_not_norm # optimize on non-norm input 8499*4882a593Smuzhiyun 8500*4882a593Smuzhiyun# 8501*4882a593Smuzhiyun# FP MOVE IN: NORMs and DENORMs ONLY! 8502*4882a593Smuzhiyun# 8503*4882a593Smuzhiyunfin_norm: 8504*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 8505*4882a593Smuzhiyun bne.w fin_not_ext # no, so go handle dbl or sgl 8506*4882a593Smuzhiyun 8507*4882a593Smuzhiyun# 8508*4882a593Smuzhiyun# precision selected is extended. so...we cannot get an underflow 8509*4882a593Smuzhiyun# or overflow because of rounding to the correct precision. so... 8510*4882a593Smuzhiyun# skip the scaling and unscaling... 8511*4882a593Smuzhiyun# 8512*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is the operand negative? 8513*4882a593Smuzhiyun bpl.b fin_norm_done # no 8514*4882a593Smuzhiyun bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 8515*4882a593Smuzhiyunfin_norm_done: 8516*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return result in fp0 8517*4882a593Smuzhiyun rts 8518*4882a593Smuzhiyun 8519*4882a593Smuzhiyun# 8520*4882a593Smuzhiyun# for an extended precision DENORM, the UNFL exception bit is set 8521*4882a593Smuzhiyun# the accrued bit is NOT set in this instance(no inexactness!) 8522*4882a593Smuzhiyun# 8523*4882a593Smuzhiyunfin_denorm: 8524*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 8525*4882a593Smuzhiyun bne.w fin_not_ext # no, so go handle dbl or sgl 8526*4882a593Smuzhiyun 8527*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8528*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is the operand negative? 8529*4882a593Smuzhiyun bpl.b fin_denorm_done # no 8530*4882a593Smuzhiyun bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 8531*4882a593Smuzhiyunfin_denorm_done: 8532*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return result in fp0 8533*4882a593Smuzhiyun btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 8534*4882a593Smuzhiyun bne.b fin_denorm_unfl_ena # yes 8535*4882a593Smuzhiyun rts 8536*4882a593Smuzhiyun 8537*4882a593Smuzhiyun# 8538*4882a593Smuzhiyun# the input is an extended DENORM and underflow is enabled in the FPCR. 8539*4882a593Smuzhiyun# normalize the mantissa and add the bias of 0x6000 to the resulting negative 8540*4882a593Smuzhiyun# exponent and insert back into the operand. 8541*4882a593Smuzhiyun# 8542*4882a593Smuzhiyunfin_denorm_unfl_ena: 8543*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8544*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8545*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8546*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to operand 8547*4882a593Smuzhiyun bsr.l norm # normalize result 8548*4882a593Smuzhiyun neg.w %d0 # new exponent = -(shft val) 8549*4882a593Smuzhiyun addi.w &0x6000,%d0 # add new bias to exponent 8550*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 8551*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep old sign 8552*4882a593Smuzhiyun andi.w &0x7fff,%d0 # clear sign position 8553*4882a593Smuzhiyun or.w %d1,%d0 # concat new exo,old sign 8554*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 8555*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8556*4882a593Smuzhiyun rts 8557*4882a593Smuzhiyun 8558*4882a593Smuzhiyun# 8559*4882a593Smuzhiyun# operand is to be rounded to single or double precision 8560*4882a593Smuzhiyun# 8561*4882a593Smuzhiyunfin_not_ext: 8562*4882a593Smuzhiyun cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 8563*4882a593Smuzhiyun bne.b fin_dbl 8564*4882a593Smuzhiyun 8565*4882a593Smuzhiyun# 8566*4882a593Smuzhiyun# operand is to be rounded to single precision 8567*4882a593Smuzhiyun# 8568*4882a593Smuzhiyunfin_sgl: 8569*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8570*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8571*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8572*4882a593Smuzhiyun bsr.l scale_to_zero_src # calculate scale factor 8573*4882a593Smuzhiyun 8574*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 8575*4882a593Smuzhiyun bge.w fin_sd_unfl # yes; go handle underflow 8576*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 8577*4882a593Smuzhiyun beq.w fin_sd_may_ovfl # maybe; go check 8578*4882a593Smuzhiyun blt.w fin_sd_ovfl # yes; go handle overflow 8579*4882a593Smuzhiyun 8580*4882a593Smuzhiyun# 8581*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved into the fp reg file 8582*4882a593Smuzhiyun# 8583*4882a593Smuzhiyunfin_sd_normal: 8584*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8585*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8586*4882a593Smuzhiyun 8587*4882a593Smuzhiyun fmov.x FP_SCR0(%a6),%fp0 # perform move 8588*4882a593Smuzhiyun 8589*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 8590*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8591*4882a593Smuzhiyun 8592*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8593*4882a593Smuzhiyun 8594*4882a593Smuzhiyunfin_sd_normal_exit: 8595*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 8596*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 8597*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8598*4882a593Smuzhiyun mov.w %d1,%d2 # make a copy 8599*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8600*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 8601*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 8602*4882a593Smuzhiyun or.w %d1,%d2 # concat old sign,new exponent 8603*4882a593Smuzhiyun mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 8604*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8605*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 8606*4882a593Smuzhiyun rts 8607*4882a593Smuzhiyun 8608*4882a593Smuzhiyun# 8609*4882a593Smuzhiyun# operand is to be rounded to double precision 8610*4882a593Smuzhiyun# 8611*4882a593Smuzhiyunfin_dbl: 8612*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8613*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8614*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8615*4882a593Smuzhiyun bsr.l scale_to_zero_src # calculate scale factor 8616*4882a593Smuzhiyun 8617*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 8618*4882a593Smuzhiyun bge.w fin_sd_unfl # yes; go handle underflow 8619*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 8620*4882a593Smuzhiyun beq.w fin_sd_may_ovfl # maybe; go check 8621*4882a593Smuzhiyun blt.w fin_sd_ovfl # yes; go handle overflow 8622*4882a593Smuzhiyun bra.w fin_sd_normal # no; ho handle normalized op 8623*4882a593Smuzhiyun 8624*4882a593Smuzhiyun# 8625*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file 8626*4882a593Smuzhiyun# 8627*4882a593Smuzhiyunfin_sd_unfl: 8628*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8629*4882a593Smuzhiyun 8630*4882a593Smuzhiyun tst.b FP_SCR0_EX(%a6) # is operand negative? 8631*4882a593Smuzhiyun bpl.b fin_sd_unfl_tst 8632*4882a593Smuzhiyun bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 8633*4882a593Smuzhiyun 8634*4882a593Smuzhiyun# if underflow or inexact is enabled, then go calculate the EXOP first. 8635*4882a593Smuzhiyunfin_sd_unfl_tst: 8636*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 8637*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8638*4882a593Smuzhiyun bne.b fin_sd_unfl_ena # yes 8639*4882a593Smuzhiyun 8640*4882a593Smuzhiyunfin_sd_unfl_dis: 8641*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 8642*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 8643*4882a593Smuzhiyun bsr.l unf_res # calculate default result 8644*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 8645*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8646*4882a593Smuzhiyun rts 8647*4882a593Smuzhiyun 8648*4882a593Smuzhiyun# 8649*4882a593Smuzhiyun# operand will underflow AND underflow or inexact is enabled. 8650*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision. 8651*4882a593Smuzhiyun# 8652*4882a593Smuzhiyunfin_sd_unfl_ena: 8653*4882a593Smuzhiyun mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 8654*4882a593Smuzhiyun mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 8655*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 8656*4882a593Smuzhiyun 8657*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 8658*4882a593Smuzhiyun mov.w %d1,%d2 # make a copy 8659*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8660*4882a593Smuzhiyun sub.l %d0,%d1 # subtract scale factor 8661*4882a593Smuzhiyun andi.w &0x8000,%d2 # extract old sign 8662*4882a593Smuzhiyun addi.l &0x6000,%d1 # add new bias 8663*4882a593Smuzhiyun andi.w &0x7fff,%d1 8664*4882a593Smuzhiyun or.w %d1,%d2 # concat old sign,new exp 8665*4882a593Smuzhiyun mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent 8666*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 8667*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8668*4882a593Smuzhiyun bra.b fin_sd_unfl_dis 8669*4882a593Smuzhiyun 8670*4882a593Smuzhiyun# 8671*4882a593Smuzhiyun# operand WILL overflow. 8672*4882a593Smuzhiyun# 8673*4882a593Smuzhiyunfin_sd_ovfl: 8674*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8675*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8676*4882a593Smuzhiyun 8677*4882a593Smuzhiyun fmov.x FP_SCR0(%a6),%fp0 # perform move 8678*4882a593Smuzhiyun 8679*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8680*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 8681*4882a593Smuzhiyun 8682*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8683*4882a593Smuzhiyun 8684*4882a593Smuzhiyunfin_sd_ovfl_tst: 8685*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8686*4882a593Smuzhiyun 8687*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 8688*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 8689*4882a593Smuzhiyun bne.b fin_sd_ovfl_ena # yes 8690*4882a593Smuzhiyun 8691*4882a593Smuzhiyun# 8692*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by 8693*4882a593Smuzhiyun# calling ovf_res(). 8694*4882a593Smuzhiyun# 8695*4882a593Smuzhiyunfin_sd_ovfl_dis: 8696*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 8697*4882a593Smuzhiyun sne %d1 # set sign param accordingly 8698*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass: prec,mode 8699*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 8700*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 8701*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 8702*4882a593Smuzhiyun rts 8703*4882a593Smuzhiyun 8704*4882a593Smuzhiyun# 8705*4882a593Smuzhiyun# OVFL is enabled. 8706*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision. 8707*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR). 8708*4882a593Smuzhiyun# 8709*4882a593Smuzhiyunfin_sd_ovfl_ena: 8710*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 8711*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8712*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 8713*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8714*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 8715*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 8716*4882a593Smuzhiyun sub.l &0x6000,%d1 # subtract bias 8717*4882a593Smuzhiyun andi.w &0x7fff,%d1 8718*4882a593Smuzhiyun or.w %d2,%d1 8719*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8720*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8721*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8722*4882a593Smuzhiyun bra.b fin_sd_ovfl_dis 8723*4882a593Smuzhiyun 8724*4882a593Smuzhiyun# 8725*4882a593Smuzhiyun# the move in MAY overflow. so... 8726*4882a593Smuzhiyun# 8727*4882a593Smuzhiyunfin_sd_may_ovfl: 8728*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8729*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8730*4882a593Smuzhiyun 8731*4882a593Smuzhiyun fmov.x FP_SCR0(%a6),%fp0 # perform the move 8732*4882a593Smuzhiyun 8733*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 8734*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8735*4882a593Smuzhiyun 8736*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8737*4882a593Smuzhiyun 8738*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 8739*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| >= 2.b? 8740*4882a593Smuzhiyun fbge.w fin_sd_ovfl_tst # yes; overflow has occurred 8741*4882a593Smuzhiyun 8742*4882a593Smuzhiyun# no, it didn't overflow; we have correct result 8743*4882a593Smuzhiyun bra.w fin_sd_normal_exit 8744*4882a593Smuzhiyun 8745*4882a593Smuzhiyun########################################################################## 8746*4882a593Smuzhiyun 8747*4882a593Smuzhiyun# 8748*4882a593Smuzhiyun# operand is not a NORM: check its optype and branch accordingly 8749*4882a593Smuzhiyun# 8750*4882a593Smuzhiyunfin_not_norm: 8751*4882a593Smuzhiyun cmpi.b %d1,&DENORM # weed out DENORM 8752*4882a593Smuzhiyun beq.w fin_denorm 8753*4882a593Smuzhiyun cmpi.b %d1,&SNAN # weed out SNANs 8754*4882a593Smuzhiyun beq.l res_snan_1op 8755*4882a593Smuzhiyun cmpi.b %d1,&QNAN # weed out QNANs 8756*4882a593Smuzhiyun beq.l res_qnan_1op 8757*4882a593Smuzhiyun 8758*4882a593Smuzhiyun# 8759*4882a593Smuzhiyun# do the fmove in; at this point, only possible ops are ZERO and INF. 8760*4882a593Smuzhiyun# use fmov to determine ccodes. 8761*4882a593Smuzhiyun# prec:mode should be zero at this point but it won't affect answer anyways. 8762*4882a593Smuzhiyun# 8763*4882a593Smuzhiyun fmov.x SRC(%a0),%fp0 # do fmove in 8764*4882a593Smuzhiyun fmov.l %fpsr,%d0 # no exceptions possible 8765*4882a593Smuzhiyun rol.l &0x8,%d0 # put ccodes in lo byte 8766*4882a593Smuzhiyun mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 8767*4882a593Smuzhiyun rts 8768*4882a593Smuzhiyun 8769*4882a593Smuzhiyun######################################################################### 8770*4882a593Smuzhiyun# XDEF **************************************************************** # 8771*4882a593Smuzhiyun# fdiv(): emulates the fdiv instruction # 8772*4882a593Smuzhiyun# fsdiv(): emulates the fsdiv instruction # 8773*4882a593Smuzhiyun# fddiv(): emulates the fddiv instruction # 8774*4882a593Smuzhiyun# # 8775*4882a593Smuzhiyun# XREF **************************************************************** # 8776*4882a593Smuzhiyun# scale_to_zero_src() - scale src exponent to zero # 8777*4882a593Smuzhiyun# scale_to_zero_dst() - scale dst exponent to zero # 8778*4882a593Smuzhiyun# unf_res() - return default underflow result # 8779*4882a593Smuzhiyun# ovf_res() - return default overflow result # 8780*4882a593Smuzhiyun# res_qnan() - return QNAN result # 8781*4882a593Smuzhiyun# res_snan() - return SNAN result # 8782*4882a593Smuzhiyun# # 8783*4882a593Smuzhiyun# INPUT *************************************************************** # 8784*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 8785*4882a593Smuzhiyun# a1 = pointer to extended precision destination operand # 8786*4882a593Smuzhiyun# d0 rnd prec,mode # 8787*4882a593Smuzhiyun# # 8788*4882a593Smuzhiyun# OUTPUT ************************************************************** # 8789*4882a593Smuzhiyun# fp0 = result # 8790*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 8791*4882a593Smuzhiyun# # 8792*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 8793*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 8794*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision. # 8795*4882a593Smuzhiyun# For norms/denorms, scale the exponents such that a divide # 8796*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fdiv to # 8797*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken # 8798*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result # 8799*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the # 8800*4882a593Smuzhiyun# result operand to the proper exponent. # 8801*4882a593Smuzhiyun# # 8802*4882a593Smuzhiyun######################################################################### 8803*4882a593Smuzhiyun 8804*4882a593Smuzhiyun align 0x10 8805*4882a593Smuzhiyuntbl_fdiv_unfl: 8806*4882a593Smuzhiyun long 0x3fff - 0x0000 # ext_unfl 8807*4882a593Smuzhiyun long 0x3fff - 0x3f81 # sgl_unfl 8808*4882a593Smuzhiyun long 0x3fff - 0x3c01 # dbl_unfl 8809*4882a593Smuzhiyun 8810*4882a593Smuzhiyuntbl_fdiv_ovfl: 8811*4882a593Smuzhiyun long 0x3fff - 0x7ffe # ext overflow exponent 8812*4882a593Smuzhiyun long 0x3fff - 0x407e # sgl overflow exponent 8813*4882a593Smuzhiyun long 0x3fff - 0x43fe # dbl overflow exponent 8814*4882a593Smuzhiyun 8815*4882a593Smuzhiyun global fsdiv 8816*4882a593Smuzhiyunfsdiv: 8817*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 8818*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl prec 8819*4882a593Smuzhiyun bra.b fdiv 8820*4882a593Smuzhiyun 8821*4882a593Smuzhiyun global fddiv 8822*4882a593Smuzhiyunfddiv: 8823*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 8824*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl prec 8825*4882a593Smuzhiyun 8826*4882a593Smuzhiyun global fdiv 8827*4882a593Smuzhiyunfdiv: 8828*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 8829*4882a593Smuzhiyun 8830*4882a593Smuzhiyun clr.w %d1 8831*4882a593Smuzhiyun mov.b DTAG(%a6),%d1 8832*4882a593Smuzhiyun lsl.b &0x3,%d1 8833*4882a593Smuzhiyun or.b STAG(%a6),%d1 # combine src tags 8834*4882a593Smuzhiyun 8835*4882a593Smuzhiyun bne.w fdiv_not_norm # optimize on non-norm input 8836*4882a593Smuzhiyun 8837*4882a593Smuzhiyun# 8838*4882a593Smuzhiyun# DIVIDE: NORMs and DENORMs ONLY! 8839*4882a593Smuzhiyun# 8840*4882a593Smuzhiyunfdiv_norm: 8841*4882a593Smuzhiyun mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 8842*4882a593Smuzhiyun mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 8843*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 8844*4882a593Smuzhiyun 8845*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8846*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8847*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8848*4882a593Smuzhiyun 8849*4882a593Smuzhiyun bsr.l scale_to_zero_src # scale src exponent 8850*4882a593Smuzhiyun mov.l %d0,-(%sp) # save scale factor 1 8851*4882a593Smuzhiyun 8852*4882a593Smuzhiyun bsr.l scale_to_zero_dst # scale dst exponent 8853*4882a593Smuzhiyun 8854*4882a593Smuzhiyun neg.l (%sp) # SCALE FACTOR = scale1 - scale2 8855*4882a593Smuzhiyun add.l %d0,(%sp) 8856*4882a593Smuzhiyun 8857*4882a593Smuzhiyun mov.w 2+L_SCR3(%a6),%d1 # fetch precision 8858*4882a593Smuzhiyun lsr.b &0x6,%d1 # shift to lo bits 8859*4882a593Smuzhiyun mov.l (%sp)+,%d0 # load S.F. 8860*4882a593Smuzhiyun cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow? 8861*4882a593Smuzhiyun ble.w fdiv_may_ovfl # result will overflow 8862*4882a593Smuzhiyun 8863*4882a593Smuzhiyun cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow? 8864*4882a593Smuzhiyun beq.w fdiv_may_unfl # maybe 8865*4882a593Smuzhiyun bgt.w fdiv_unfl # yes; go handle underflow 8866*4882a593Smuzhiyun 8867*4882a593Smuzhiyunfdiv_normal: 8868*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8869*4882a593Smuzhiyun 8870*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # save FPCR 8871*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8872*4882a593Smuzhiyun 8873*4882a593Smuzhiyun fdiv.x FP_SCR0(%a6),%fp0 # perform divide 8874*4882a593Smuzhiyun 8875*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 8876*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8877*4882a593Smuzhiyun 8878*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8879*4882a593Smuzhiyun 8880*4882a593Smuzhiyunfdiv_normal_exit: 8881*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 8882*4882a593Smuzhiyun mov.l %d2,-(%sp) # store d2 8883*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8884*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 8885*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8886*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 8887*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 8888*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 8889*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8890*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8891*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 8892*4882a593Smuzhiyun rts 8893*4882a593Smuzhiyun 8894*4882a593Smuzhiyuntbl_fdiv_ovfl2: 8895*4882a593Smuzhiyun long 0x7fff 8896*4882a593Smuzhiyun long 0x407f 8897*4882a593Smuzhiyun long 0x43ff 8898*4882a593Smuzhiyun 8899*4882a593Smuzhiyunfdiv_no_ovfl: 8900*4882a593Smuzhiyun mov.l (%sp)+,%d0 # restore scale factor 8901*4882a593Smuzhiyun bra.b fdiv_normal_exit 8902*4882a593Smuzhiyun 8903*4882a593Smuzhiyunfdiv_may_ovfl: 8904*4882a593Smuzhiyun mov.l %d0,-(%sp) # save scale factor 8905*4882a593Smuzhiyun 8906*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8907*4882a593Smuzhiyun 8908*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 8909*4882a593Smuzhiyun fmov.l &0x0,%fpsr # set FPSR 8910*4882a593Smuzhiyun 8911*4882a593Smuzhiyun fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8912*4882a593Smuzhiyun 8913*4882a593Smuzhiyun fmov.l %fpsr,%d0 8914*4882a593Smuzhiyun fmov.l &0x0,%fpcr 8915*4882a593Smuzhiyun 8916*4882a593Smuzhiyun or.l %d0,USER_FPSR(%a6) # save INEX,N 8917*4882a593Smuzhiyun 8918*4882a593Smuzhiyun fmovm.x &0x01,-(%sp) # save result to stack 8919*4882a593Smuzhiyun mov.w (%sp),%d0 # fetch new exponent 8920*4882a593Smuzhiyun add.l &0xc,%sp # clear result from stack 8921*4882a593Smuzhiyun andi.l &0x7fff,%d0 # strip sign 8922*4882a593Smuzhiyun sub.l (%sp),%d0 # add scale factor 8923*4882a593Smuzhiyun cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4) 8924*4882a593Smuzhiyun blt.b fdiv_no_ovfl 8925*4882a593Smuzhiyun mov.l (%sp)+,%d0 8926*4882a593Smuzhiyun 8927*4882a593Smuzhiyunfdiv_ovfl_tst: 8928*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8929*4882a593Smuzhiyun 8930*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 8931*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 8932*4882a593Smuzhiyun bne.b fdiv_ovfl_ena # yes 8933*4882a593Smuzhiyun 8934*4882a593Smuzhiyunfdiv_ovfl_dis: 8935*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 8936*4882a593Smuzhiyun sne %d1 # set sign param accordingly 8937*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass prec:rnd 8938*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 8939*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF if applicable 8940*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 8941*4882a593Smuzhiyun rts 8942*4882a593Smuzhiyun 8943*4882a593Smuzhiyunfdiv_ovfl_ena: 8944*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 8945*4882a593Smuzhiyun andi.b &0xc0,%d1 # is precision extended? 8946*4882a593Smuzhiyun bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl 8947*4882a593Smuzhiyun 8948*4882a593Smuzhiyunfdiv_ovfl_ena_cont: 8949*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 8950*4882a593Smuzhiyun 8951*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 8952*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8953*4882a593Smuzhiyun mov.w %d1,%d2 # make a copy 8954*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 8955*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 8956*4882a593Smuzhiyun subi.l &0x6000,%d1 # subtract bias 8957*4882a593Smuzhiyun andi.w &0x7fff,%d1 # clear sign bit 8958*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 8959*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 8960*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8961*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 8962*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8963*4882a593Smuzhiyun bra.b fdiv_ovfl_dis 8964*4882a593Smuzhiyun 8965*4882a593Smuzhiyunfdiv_ovfl_ena_sd: 8966*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8967*4882a593Smuzhiyun 8968*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 8969*4882a593Smuzhiyun andi.b &0x30,%d1 # keep rnd mode 8970*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 8971*4882a593Smuzhiyun 8972*4882a593Smuzhiyun fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8973*4882a593Smuzhiyun 8974*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8975*4882a593Smuzhiyun bra.b fdiv_ovfl_ena_cont 8976*4882a593Smuzhiyun 8977*4882a593Smuzhiyunfdiv_unfl: 8978*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8979*4882a593Smuzhiyun 8980*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8981*4882a593Smuzhiyun 8982*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set FPCR 8983*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 8984*4882a593Smuzhiyun 8985*4882a593Smuzhiyun fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8986*4882a593Smuzhiyun 8987*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 8988*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 8989*4882a593Smuzhiyun 8990*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 8991*4882a593Smuzhiyun 8992*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 8993*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8994*4882a593Smuzhiyun bne.b fdiv_unfl_ena # yes 8995*4882a593Smuzhiyun 8996*4882a593Smuzhiyunfdiv_unfl_dis: 8997*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 8998*4882a593Smuzhiyun 8999*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 9000*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 9001*4882a593Smuzhiyun bsr.l unf_res # calculate default result 9002*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 9003*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9004*4882a593Smuzhiyun rts 9005*4882a593Smuzhiyun 9006*4882a593Smuzhiyun# 9007*4882a593Smuzhiyun# UNFL is enabled. 9008*4882a593Smuzhiyun# 9009*4882a593Smuzhiyunfdiv_unfl_ena: 9010*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op 9011*4882a593Smuzhiyun 9012*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 9013*4882a593Smuzhiyun andi.b &0xc0,%d1 # is precision extended? 9014*4882a593Smuzhiyun bne.b fdiv_unfl_ena_sd # no, sgl or dbl 9015*4882a593Smuzhiyun 9016*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 9017*4882a593Smuzhiyun 9018*4882a593Smuzhiyunfdiv_unfl_ena_cont: 9019*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9020*4882a593Smuzhiyun 9021*4882a593Smuzhiyun fdiv.x FP_SCR0(%a6),%fp1 # execute divide 9022*4882a593Smuzhiyun 9023*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9024*4882a593Smuzhiyun 9025*4882a593Smuzhiyun fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 9026*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 9027*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 9028*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 9029*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 9030*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 9031*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factoer 9032*4882a593Smuzhiyun addi.l &0x6000,%d1 # add bias 9033*4882a593Smuzhiyun andi.w &0x7fff,%d1 9034*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 9035*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exp 9036*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 9037*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9038*4882a593Smuzhiyun bra.w fdiv_unfl_dis 9039*4882a593Smuzhiyun 9040*4882a593Smuzhiyunfdiv_unfl_ena_sd: 9041*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 9042*4882a593Smuzhiyun andi.b &0x30,%d1 # use only rnd mode 9043*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 9044*4882a593Smuzhiyun 9045*4882a593Smuzhiyun bra.b fdiv_unfl_ena_cont 9046*4882a593Smuzhiyun 9047*4882a593Smuzhiyun# 9048*4882a593Smuzhiyun# the divide operation MAY underflow: 9049*4882a593Smuzhiyun# 9050*4882a593Smuzhiyunfdiv_may_unfl: 9051*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 9052*4882a593Smuzhiyun 9053*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 9054*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9055*4882a593Smuzhiyun 9056*4882a593Smuzhiyun fdiv.x FP_SCR0(%a6),%fp0 # execute divide 9057*4882a593Smuzhiyun 9058*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 9059*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9060*4882a593Smuzhiyun 9061*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 9062*4882a593Smuzhiyun 9063*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 9064*4882a593Smuzhiyun fcmp.b %fp1,&0x1 # is |result| > 1.b? 9065*4882a593Smuzhiyun fbgt.w fdiv_normal_exit # no; no underflow occurred 9066*4882a593Smuzhiyun fblt.w fdiv_unfl # yes; underflow occurred 9067*4882a593Smuzhiyun 9068*4882a593Smuzhiyun# 9069*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 1. but, 9070*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 1 9071*4882a593Smuzhiyun# or a normalized number that rounded down to a 1. so, redo the entire 9072*4882a593Smuzhiyun# operation using RZ as the rounding mode to see what the pre-rounded 9073*4882a593Smuzhiyun# result is. this case should be relatively rare. 9074*4882a593Smuzhiyun# 9075*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 9076*4882a593Smuzhiyun 9077*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 9078*4882a593Smuzhiyun andi.b &0xc0,%d1 # keep rnd prec 9079*4882a593Smuzhiyun ori.b &rz_mode*0x10,%d1 # insert RZ 9080*4882a593Smuzhiyun 9081*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 9082*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9083*4882a593Smuzhiyun 9084*4882a593Smuzhiyun fdiv.x FP_SCR0(%a6),%fp1 # execute divide 9085*4882a593Smuzhiyun 9086*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9087*4882a593Smuzhiyun fabs.x %fp1 # make absolute value 9088*4882a593Smuzhiyun fcmp.b %fp1,&0x1 # is |result| < 1.b? 9089*4882a593Smuzhiyun fbge.w fdiv_normal_exit # no; no underflow occurred 9090*4882a593Smuzhiyun bra.w fdiv_unfl # yes; underflow occurred 9091*4882a593Smuzhiyun 9092*4882a593Smuzhiyun############################################################################ 9093*4882a593Smuzhiyun 9094*4882a593Smuzhiyun# 9095*4882a593Smuzhiyun# Divide: inputs are not both normalized; what are they? 9096*4882a593Smuzhiyun# 9097*4882a593Smuzhiyunfdiv_not_norm: 9098*4882a593Smuzhiyun mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1 9099*4882a593Smuzhiyun jmp (tbl_fdiv_op.b,%pc,%d1.w*1) 9100*4882a593Smuzhiyun 9101*4882a593Smuzhiyun swbeg &48 9102*4882a593Smuzhiyuntbl_fdiv_op: 9103*4882a593Smuzhiyun short fdiv_norm - tbl_fdiv_op # NORM / NORM 9104*4882a593Smuzhiyun short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO 9105*4882a593Smuzhiyun short fdiv_zero_load - tbl_fdiv_op # NORM / INF 9106*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN 9107*4882a593Smuzhiyun short fdiv_norm - tbl_fdiv_op # NORM / DENORM 9108*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN 9109*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9110*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9111*4882a593Smuzhiyun 9112*4882a593Smuzhiyun short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM 9113*4882a593Smuzhiyun short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO 9114*4882a593Smuzhiyun short fdiv_zero_load - tbl_fdiv_op # ZERO / INF 9115*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN 9116*4882a593Smuzhiyun short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM 9117*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN 9118*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9119*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9120*4882a593Smuzhiyun 9121*4882a593Smuzhiyun short fdiv_inf_dst - tbl_fdiv_op # INF / NORM 9122*4882a593Smuzhiyun short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO 9123*4882a593Smuzhiyun short fdiv_res_operr - tbl_fdiv_op # INF / INF 9124*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN 9125*4882a593Smuzhiyun short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM 9126*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # INF / SNAN 9127*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9128*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9129*4882a593Smuzhiyun 9130*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM 9131*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO 9132*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF 9133*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN 9134*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM 9135*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN 9136*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9137*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9138*4882a593Smuzhiyun 9139*4882a593Smuzhiyun short fdiv_norm - tbl_fdiv_op # DENORM / NORM 9140*4882a593Smuzhiyun short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO 9141*4882a593Smuzhiyun short fdiv_zero_load - tbl_fdiv_op # DENORM / INF 9142*4882a593Smuzhiyun short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN 9143*4882a593Smuzhiyun short fdiv_norm - tbl_fdiv_op # DENORM / DENORM 9144*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN 9145*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9146*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9147*4882a593Smuzhiyun 9148*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM 9149*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO 9150*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # SNAN / INF 9151*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN 9152*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM 9153*4882a593Smuzhiyun short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN 9154*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9155*4882a593Smuzhiyun short tbl_fdiv_op - tbl_fdiv_op # 9156*4882a593Smuzhiyun 9157*4882a593Smuzhiyunfdiv_res_qnan: 9158*4882a593Smuzhiyun bra.l res_qnan 9159*4882a593Smuzhiyunfdiv_res_snan: 9160*4882a593Smuzhiyun bra.l res_snan 9161*4882a593Smuzhiyunfdiv_res_operr: 9162*4882a593Smuzhiyun bra.l res_operr 9163*4882a593Smuzhiyun 9164*4882a593Smuzhiyun global fdiv_zero_load # global for fsgldiv 9165*4882a593Smuzhiyunfdiv_zero_load: 9166*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # result sign is exclusive 9167*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 # or of input signs. 9168*4882a593Smuzhiyun eor.b %d0,%d1 9169*4882a593Smuzhiyun bpl.b fdiv_zero_load_p # result is positive 9170*4882a593Smuzhiyun fmov.s &0x80000000,%fp0 # load a -ZERO 9171*4882a593Smuzhiyun mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 9172*4882a593Smuzhiyun rts 9173*4882a593Smuzhiyunfdiv_zero_load_p: 9174*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # load a +ZERO 9175*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set Z 9176*4882a593Smuzhiyun rts 9177*4882a593Smuzhiyun 9178*4882a593Smuzhiyun# 9179*4882a593Smuzhiyun# The destination was In Range and the source was a ZERO. The result, 9180*4882a593Smuzhiyun# Therefore, is an INF w/ the proper sign. 9181*4882a593Smuzhiyun# So, determine the sign and return a new INF (w/ the j-bit cleared). 9182*4882a593Smuzhiyun# 9183*4882a593Smuzhiyun global fdiv_inf_load # global for fsgldiv 9184*4882a593Smuzhiyunfdiv_inf_load: 9185*4882a593Smuzhiyun ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ 9186*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # load both signs 9187*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 9188*4882a593Smuzhiyun eor.b %d0,%d1 9189*4882a593Smuzhiyun bpl.b fdiv_inf_load_p # result is positive 9190*4882a593Smuzhiyun fmov.s &0xff800000,%fp0 # make result -INF 9191*4882a593Smuzhiyun mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 9192*4882a593Smuzhiyun rts 9193*4882a593Smuzhiyunfdiv_inf_load_p: 9194*4882a593Smuzhiyun fmov.s &0x7f800000,%fp0 # make result +INF 9195*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set INF 9196*4882a593Smuzhiyun rts 9197*4882a593Smuzhiyun 9198*4882a593Smuzhiyun# 9199*4882a593Smuzhiyun# The destination was an INF w/ an In Range or ZERO source, the result is 9200*4882a593Smuzhiyun# an INF w/ the proper sign. 9201*4882a593Smuzhiyun# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the 9202*4882a593Smuzhiyun# dst INF is set, then then j-bit of the result INF is also set). 9203*4882a593Smuzhiyun# 9204*4882a593Smuzhiyun global fdiv_inf_dst # global for fsgldiv 9205*4882a593Smuzhiyunfdiv_inf_dst: 9206*4882a593Smuzhiyun mov.b DST_EX(%a1),%d0 # load both signs 9207*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d1 9208*4882a593Smuzhiyun eor.b %d0,%d1 9209*4882a593Smuzhiyun bpl.b fdiv_inf_dst_p # result is positive 9210*4882a593Smuzhiyun 9211*4882a593Smuzhiyun fmovm.x DST(%a1),&0x80 # return result in fp0 9212*4882a593Smuzhiyun fabs.x %fp0 # clear sign bit 9213*4882a593Smuzhiyun fneg.x %fp0 # set sign bit 9214*4882a593Smuzhiyun mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG 9215*4882a593Smuzhiyun rts 9216*4882a593Smuzhiyun 9217*4882a593Smuzhiyunfdiv_inf_dst_p: 9218*4882a593Smuzhiyun fmovm.x DST(%a1),&0x80 # return result in fp0 9219*4882a593Smuzhiyun fabs.x %fp0 # return positive INF 9220*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set INF 9221*4882a593Smuzhiyun rts 9222*4882a593Smuzhiyun 9223*4882a593Smuzhiyun######################################################################### 9224*4882a593Smuzhiyun# XDEF **************************************************************** # 9225*4882a593Smuzhiyun# fneg(): emulates the fneg instruction # 9226*4882a593Smuzhiyun# fsneg(): emulates the fsneg instruction # 9227*4882a593Smuzhiyun# fdneg(): emulates the fdneg instruction # 9228*4882a593Smuzhiyun# # 9229*4882a593Smuzhiyun# XREF **************************************************************** # 9230*4882a593Smuzhiyun# norm() - normalize a denorm to provide EXOP # 9231*4882a593Smuzhiyun# scale_to_zero_src() - scale sgl/dbl source exponent # 9232*4882a593Smuzhiyun# ovf_res() - return default overflow result # 9233*4882a593Smuzhiyun# unf_res() - return default underflow result # 9234*4882a593Smuzhiyun# res_qnan_1op() - return QNAN result # 9235*4882a593Smuzhiyun# res_snan_1op() - return SNAN result # 9236*4882a593Smuzhiyun# # 9237*4882a593Smuzhiyun# INPUT *************************************************************** # 9238*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 9239*4882a593Smuzhiyun# d0 = rnd prec,mode # 9240*4882a593Smuzhiyun# # 9241*4882a593Smuzhiyun# OUTPUT ************************************************************** # 9242*4882a593Smuzhiyun# fp0 = result # 9243*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 9244*4882a593Smuzhiyun# # 9245*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 9246*4882a593Smuzhiyun# Handle NANs, zeroes, and infinities as special cases. Separate # 9247*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precisions. Extended precision can be # 9248*4882a593Smuzhiyun# emulated by simply setting sign bit. Sgl/dbl operands must be scaled # 9249*4882a593Smuzhiyun# and an actual fneg performed to see if overflow/underflow would have # 9250*4882a593Smuzhiyun# occurred. If so, return default underflow/overflow result. Else, # 9251*4882a593Smuzhiyun# scale the result exponent and return result. FPSR gets set based on # 9252*4882a593Smuzhiyun# the result value. # 9253*4882a593Smuzhiyun# # 9254*4882a593Smuzhiyun######################################################################### 9255*4882a593Smuzhiyun 9256*4882a593Smuzhiyun global fsneg 9257*4882a593Smuzhiyunfsneg: 9258*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 9259*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl precision 9260*4882a593Smuzhiyun bra.b fneg 9261*4882a593Smuzhiyun 9262*4882a593Smuzhiyun global fdneg 9263*4882a593Smuzhiyunfdneg: 9264*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 9265*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl prec 9266*4882a593Smuzhiyun 9267*4882a593Smuzhiyun global fneg 9268*4882a593Smuzhiyunfneg: 9269*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 9270*4882a593Smuzhiyun mov.b STAG(%a6),%d1 9271*4882a593Smuzhiyun bne.w fneg_not_norm # optimize on non-norm input 9272*4882a593Smuzhiyun 9273*4882a593Smuzhiyun# 9274*4882a593Smuzhiyun# NEGATE SIGN : norms and denorms ONLY! 9275*4882a593Smuzhiyun# 9276*4882a593Smuzhiyunfneg_norm: 9277*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 9278*4882a593Smuzhiyun bne.w fneg_not_ext # no; go handle sgl or dbl 9279*4882a593Smuzhiyun 9280*4882a593Smuzhiyun# 9281*4882a593Smuzhiyun# precision selected is extended. so...we can not get an underflow 9282*4882a593Smuzhiyun# or overflow because of rounding to the correct precision. so... 9283*4882a593Smuzhiyun# skip the scaling and unscaling... 9284*4882a593Smuzhiyun# 9285*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9286*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9287*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d0 9288*4882a593Smuzhiyun eori.w &0x8000,%d0 # negate sign 9289*4882a593Smuzhiyun bpl.b fneg_norm_load # sign is positive 9290*4882a593Smuzhiyun mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9291*4882a593Smuzhiyunfneg_norm_load: 9292*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) 9293*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9294*4882a593Smuzhiyun rts 9295*4882a593Smuzhiyun 9296*4882a593Smuzhiyun# 9297*4882a593Smuzhiyun# for an extended precision DENORM, the UNFL exception bit is set 9298*4882a593Smuzhiyun# the accrued bit is NOT set in this instance(no inexactness!) 9299*4882a593Smuzhiyun# 9300*4882a593Smuzhiyunfneg_denorm: 9301*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 9302*4882a593Smuzhiyun bne.b fneg_not_ext # no; go handle sgl or dbl 9303*4882a593Smuzhiyun 9304*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9305*4882a593Smuzhiyun 9306*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9307*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9308*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d0 9309*4882a593Smuzhiyun eori.w &0x8000,%d0 # negate sign 9310*4882a593Smuzhiyun bpl.b fneg_denorm_done # no 9311*4882a593Smuzhiyun mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit 9312*4882a593Smuzhiyunfneg_denorm_done: 9313*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) 9314*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9315*4882a593Smuzhiyun 9316*4882a593Smuzhiyun btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 9317*4882a593Smuzhiyun bne.b fneg_ext_unfl_ena # yes 9318*4882a593Smuzhiyun rts 9319*4882a593Smuzhiyun 9320*4882a593Smuzhiyun# 9321*4882a593Smuzhiyun# the input is an extended DENORM and underflow is enabled in the FPCR. 9322*4882a593Smuzhiyun# normalize the mantissa and add the bias of 0x6000 to the resulting negative 9323*4882a593Smuzhiyun# exponent and insert back into the operand. 9324*4882a593Smuzhiyun# 9325*4882a593Smuzhiyunfneg_ext_unfl_ena: 9326*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to operand 9327*4882a593Smuzhiyun bsr.l norm # normalize result 9328*4882a593Smuzhiyun neg.w %d0 # new exponent = -(shft val) 9329*4882a593Smuzhiyun addi.w &0x6000,%d0 # add new bias to exponent 9330*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 9331*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep old sign 9332*4882a593Smuzhiyun andi.w &0x7fff,%d0 # clear sign position 9333*4882a593Smuzhiyun or.w %d1,%d0 # concat old sign, new exponent 9334*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 9335*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9336*4882a593Smuzhiyun rts 9337*4882a593Smuzhiyun 9338*4882a593Smuzhiyun# 9339*4882a593Smuzhiyun# operand is either single or double 9340*4882a593Smuzhiyun# 9341*4882a593Smuzhiyunfneg_not_ext: 9342*4882a593Smuzhiyun cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 9343*4882a593Smuzhiyun bne.b fneg_dbl 9344*4882a593Smuzhiyun 9345*4882a593Smuzhiyun# 9346*4882a593Smuzhiyun# operand is to be rounded to single precision 9347*4882a593Smuzhiyun# 9348*4882a593Smuzhiyunfneg_sgl: 9349*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9350*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9351*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9352*4882a593Smuzhiyun bsr.l scale_to_zero_src # calculate scale factor 9353*4882a593Smuzhiyun 9354*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 9355*4882a593Smuzhiyun bge.w fneg_sd_unfl # yes; go handle underflow 9356*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 9357*4882a593Smuzhiyun beq.w fneg_sd_may_ovfl # maybe; go check 9358*4882a593Smuzhiyun blt.w fneg_sd_ovfl # yes; go handle overflow 9359*4882a593Smuzhiyun 9360*4882a593Smuzhiyun# 9361*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved in to the fp reg file 9362*4882a593Smuzhiyun# 9363*4882a593Smuzhiyunfneg_sd_normal: 9364*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9365*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 9366*4882a593Smuzhiyun 9367*4882a593Smuzhiyun fneg.x FP_SCR0(%a6),%fp0 # perform negation 9368*4882a593Smuzhiyun 9369*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 9370*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9371*4882a593Smuzhiyun 9372*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 9373*4882a593Smuzhiyun 9374*4882a593Smuzhiyunfneg_sd_normal_exit: 9375*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 9376*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 9377*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 9378*4882a593Smuzhiyun mov.w %d1,%d2 # make a copy 9379*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 9380*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 9381*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 9382*4882a593Smuzhiyun or.w %d1,%d2 # concat old sign,new exp 9383*4882a593Smuzhiyun mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 9384*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 9385*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9386*4882a593Smuzhiyun rts 9387*4882a593Smuzhiyun 9388*4882a593Smuzhiyun# 9389*4882a593Smuzhiyun# operand is to be rounded to double precision 9390*4882a593Smuzhiyun# 9391*4882a593Smuzhiyunfneg_dbl: 9392*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9393*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9394*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9395*4882a593Smuzhiyun bsr.l scale_to_zero_src # calculate scale factor 9396*4882a593Smuzhiyun 9397*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 9398*4882a593Smuzhiyun bge.b fneg_sd_unfl # yes; go handle underflow 9399*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 9400*4882a593Smuzhiyun beq.w fneg_sd_may_ovfl # maybe; go check 9401*4882a593Smuzhiyun blt.w fneg_sd_ovfl # yes; go handle overflow 9402*4882a593Smuzhiyun bra.w fneg_sd_normal # no; ho handle normalized op 9403*4882a593Smuzhiyun 9404*4882a593Smuzhiyun# 9405*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file 9406*4882a593Smuzhiyun# 9407*4882a593Smuzhiyunfneg_sd_unfl: 9408*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9409*4882a593Smuzhiyun 9410*4882a593Smuzhiyun eori.b &0x80,FP_SCR0_EX(%a6) # negate sign 9411*4882a593Smuzhiyun bpl.b fneg_sd_unfl_tst 9412*4882a593Smuzhiyun bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 9413*4882a593Smuzhiyun 9414*4882a593Smuzhiyun# if underflow or inexact is enabled, go calculate EXOP first. 9415*4882a593Smuzhiyunfneg_sd_unfl_tst: 9416*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 9417*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 9418*4882a593Smuzhiyun bne.b fneg_sd_unfl_ena # yes 9419*4882a593Smuzhiyun 9420*4882a593Smuzhiyunfneg_sd_unfl_dis: 9421*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 9422*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 9423*4882a593Smuzhiyun bsr.l unf_res # calculate default result 9424*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 9425*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9426*4882a593Smuzhiyun rts 9427*4882a593Smuzhiyun 9428*4882a593Smuzhiyun# 9429*4882a593Smuzhiyun# operand will underflow AND underflow is enabled. 9430*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision. 9431*4882a593Smuzhiyun# 9432*4882a593Smuzhiyunfneg_sd_unfl_ena: 9433*4882a593Smuzhiyun mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 9434*4882a593Smuzhiyun mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 9435*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 9436*4882a593Smuzhiyun 9437*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 9438*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 9439*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 9440*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 9441*4882a593Smuzhiyun sub.l %d0,%d1 # subtract scale factor 9442*4882a593Smuzhiyun addi.l &0x6000,%d1 # add new bias 9443*4882a593Smuzhiyun andi.w &0x7fff,%d1 9444*4882a593Smuzhiyun or.w %d2,%d1 # concat new sign,new exp 9445*4882a593Smuzhiyun mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 9446*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 9447*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 9448*4882a593Smuzhiyun bra.b fneg_sd_unfl_dis 9449*4882a593Smuzhiyun 9450*4882a593Smuzhiyun# 9451*4882a593Smuzhiyun# operand WILL overflow. 9452*4882a593Smuzhiyun# 9453*4882a593Smuzhiyunfneg_sd_ovfl: 9454*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9455*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 9456*4882a593Smuzhiyun 9457*4882a593Smuzhiyun fneg.x FP_SCR0(%a6),%fp0 # perform negation 9458*4882a593Smuzhiyun 9459*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9460*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 9461*4882a593Smuzhiyun 9462*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 9463*4882a593Smuzhiyun 9464*4882a593Smuzhiyunfneg_sd_ovfl_tst: 9465*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 9466*4882a593Smuzhiyun 9467*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 9468*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 9469*4882a593Smuzhiyun bne.b fneg_sd_ovfl_ena # yes 9470*4882a593Smuzhiyun 9471*4882a593Smuzhiyun# 9472*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by 9473*4882a593Smuzhiyun# calling ovf_res(). 9474*4882a593Smuzhiyun# 9475*4882a593Smuzhiyunfneg_sd_ovfl_dis: 9476*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 9477*4882a593Smuzhiyun sne %d1 # set sign param accordingly 9478*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass: prec,mode 9479*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 9480*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 9481*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 9482*4882a593Smuzhiyun rts 9483*4882a593Smuzhiyun 9484*4882a593Smuzhiyun# 9485*4882a593Smuzhiyun# OVFL is enabled. 9486*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision. 9487*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR). 9488*4882a593Smuzhiyun# 9489*4882a593Smuzhiyunfneg_sd_ovfl_ena: 9490*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 9491*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 9492*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 9493*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 9494*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 9495*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 9496*4882a593Smuzhiyun subi.l &0x6000,%d1 # subtract bias 9497*4882a593Smuzhiyun andi.w &0x7fff,%d1 9498*4882a593Smuzhiyun or.w %d2,%d1 # concat sign,exp 9499*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 9500*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9501*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 9502*4882a593Smuzhiyun bra.b fneg_sd_ovfl_dis 9503*4882a593Smuzhiyun 9504*4882a593Smuzhiyun# 9505*4882a593Smuzhiyun# the move in MAY underflow. so... 9506*4882a593Smuzhiyun# 9507*4882a593Smuzhiyunfneg_sd_may_ovfl: 9508*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9509*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 9510*4882a593Smuzhiyun 9511*4882a593Smuzhiyun fneg.x FP_SCR0(%a6),%fp0 # perform negation 9512*4882a593Smuzhiyun 9513*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 9514*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9515*4882a593Smuzhiyun 9516*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 9517*4882a593Smuzhiyun 9518*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 9519*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| >= 2.b? 9520*4882a593Smuzhiyun fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred 9521*4882a593Smuzhiyun 9522*4882a593Smuzhiyun# no, it didn't overflow; we have correct result 9523*4882a593Smuzhiyun bra.w fneg_sd_normal_exit 9524*4882a593Smuzhiyun 9525*4882a593Smuzhiyun########################################################################## 9526*4882a593Smuzhiyun 9527*4882a593Smuzhiyun# 9528*4882a593Smuzhiyun# input is not normalized; what is it? 9529*4882a593Smuzhiyun# 9530*4882a593Smuzhiyunfneg_not_norm: 9531*4882a593Smuzhiyun cmpi.b %d1,&DENORM # weed out DENORM 9532*4882a593Smuzhiyun beq.w fneg_denorm 9533*4882a593Smuzhiyun cmpi.b %d1,&SNAN # weed out SNAN 9534*4882a593Smuzhiyun beq.l res_snan_1op 9535*4882a593Smuzhiyun cmpi.b %d1,&QNAN # weed out QNAN 9536*4882a593Smuzhiyun beq.l res_qnan_1op 9537*4882a593Smuzhiyun 9538*4882a593Smuzhiyun# 9539*4882a593Smuzhiyun# do the fneg; at this point, only possible ops are ZERO and INF. 9540*4882a593Smuzhiyun# use fneg to determine ccodes. 9541*4882a593Smuzhiyun# prec:mode should be zero at this point but it won't affect answer anyways. 9542*4882a593Smuzhiyun# 9543*4882a593Smuzhiyun fneg.x SRC_EX(%a0),%fp0 # do fneg 9544*4882a593Smuzhiyun fmov.l %fpsr,%d0 9545*4882a593Smuzhiyun rol.l &0x8,%d0 # put ccodes in lo byte 9546*4882a593Smuzhiyun mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 9547*4882a593Smuzhiyun rts 9548*4882a593Smuzhiyun 9549*4882a593Smuzhiyun######################################################################### 9550*4882a593Smuzhiyun# XDEF **************************************************************** # 9551*4882a593Smuzhiyun# ftst(): emulates the ftest instruction # 9552*4882a593Smuzhiyun# # 9553*4882a593Smuzhiyun# XREF **************************************************************** # 9554*4882a593Smuzhiyun# res{s,q}nan_1op() - set NAN result for monadic instruction # 9555*4882a593Smuzhiyun# # 9556*4882a593Smuzhiyun# INPUT *************************************************************** # 9557*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 9558*4882a593Smuzhiyun# # 9559*4882a593Smuzhiyun# OUTPUT ************************************************************** # 9560*4882a593Smuzhiyun# none # 9561*4882a593Smuzhiyun# # 9562*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 9563*4882a593Smuzhiyun# Check the source operand tag (STAG) and set the FPCR according # 9564*4882a593Smuzhiyun# to the operand type and sign. # 9565*4882a593Smuzhiyun# # 9566*4882a593Smuzhiyun######################################################################### 9567*4882a593Smuzhiyun 9568*4882a593Smuzhiyun global ftst 9569*4882a593Smuzhiyunftst: 9570*4882a593Smuzhiyun mov.b STAG(%a6),%d1 9571*4882a593Smuzhiyun bne.b ftst_not_norm # optimize on non-norm input 9572*4882a593Smuzhiyun 9573*4882a593Smuzhiyun# 9574*4882a593Smuzhiyun# Norm: 9575*4882a593Smuzhiyun# 9576*4882a593Smuzhiyunftst_norm: 9577*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 9578*4882a593Smuzhiyun bmi.b ftst_norm_m # yes 9579*4882a593Smuzhiyun rts 9580*4882a593Smuzhiyunftst_norm_m: 9581*4882a593Smuzhiyun mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9582*4882a593Smuzhiyun rts 9583*4882a593Smuzhiyun 9584*4882a593Smuzhiyun# 9585*4882a593Smuzhiyun# input is not normalized; what is it? 9586*4882a593Smuzhiyun# 9587*4882a593Smuzhiyunftst_not_norm: 9588*4882a593Smuzhiyun cmpi.b %d1,&ZERO # weed out ZERO 9589*4882a593Smuzhiyun beq.b ftst_zero 9590*4882a593Smuzhiyun cmpi.b %d1,&INF # weed out INF 9591*4882a593Smuzhiyun beq.b ftst_inf 9592*4882a593Smuzhiyun cmpi.b %d1,&SNAN # weed out SNAN 9593*4882a593Smuzhiyun beq.l res_snan_1op 9594*4882a593Smuzhiyun cmpi.b %d1,&QNAN # weed out QNAN 9595*4882a593Smuzhiyun beq.l res_qnan_1op 9596*4882a593Smuzhiyun 9597*4882a593Smuzhiyun# 9598*4882a593Smuzhiyun# Denorm: 9599*4882a593Smuzhiyun# 9600*4882a593Smuzhiyunftst_denorm: 9601*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 9602*4882a593Smuzhiyun bmi.b ftst_denorm_m # yes 9603*4882a593Smuzhiyun rts 9604*4882a593Smuzhiyunftst_denorm_m: 9605*4882a593Smuzhiyun mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9606*4882a593Smuzhiyun rts 9607*4882a593Smuzhiyun 9608*4882a593Smuzhiyun# 9609*4882a593Smuzhiyun# Infinity: 9610*4882a593Smuzhiyun# 9611*4882a593Smuzhiyunftst_inf: 9612*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 9613*4882a593Smuzhiyun bmi.b ftst_inf_m # yes 9614*4882a593Smuzhiyunftst_inf_p: 9615*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9616*4882a593Smuzhiyun rts 9617*4882a593Smuzhiyunftst_inf_m: 9618*4882a593Smuzhiyun mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits 9619*4882a593Smuzhiyun rts 9620*4882a593Smuzhiyun 9621*4882a593Smuzhiyun# 9622*4882a593Smuzhiyun# Zero: 9623*4882a593Smuzhiyun# 9624*4882a593Smuzhiyunftst_zero: 9625*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 9626*4882a593Smuzhiyun bmi.b ftst_zero_m # yes 9627*4882a593Smuzhiyunftst_zero_p: 9628*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9629*4882a593Smuzhiyun rts 9630*4882a593Smuzhiyunftst_zero_m: 9631*4882a593Smuzhiyun mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9632*4882a593Smuzhiyun rts 9633*4882a593Smuzhiyun 9634*4882a593Smuzhiyun######################################################################### 9635*4882a593Smuzhiyun# XDEF **************************************************************** # 9636*4882a593Smuzhiyun# fint(): emulates the fint instruction # 9637*4882a593Smuzhiyun# # 9638*4882a593Smuzhiyun# XREF **************************************************************** # 9639*4882a593Smuzhiyun# res_{s,q}nan_1op() - set NAN result for monadic operation # 9640*4882a593Smuzhiyun# # 9641*4882a593Smuzhiyun# INPUT *************************************************************** # 9642*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 9643*4882a593Smuzhiyun# d0 = round precision/mode # 9644*4882a593Smuzhiyun# # 9645*4882a593Smuzhiyun# OUTPUT ************************************************************** # 9646*4882a593Smuzhiyun# fp0 = result # 9647*4882a593Smuzhiyun# # 9648*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 9649*4882a593Smuzhiyun# Separate according to operand type. Unnorms don't pass through # 9650*4882a593Smuzhiyun# here. For norms, load the rounding mode/prec, execute a "fint", then # 9651*4882a593Smuzhiyun# store the resulting FPSR bits. # 9652*4882a593Smuzhiyun# For denorms, force the j-bit to a one and do the same as for # 9653*4882a593Smuzhiyun# norms. Denorms are so low that the answer will either be a zero or a # 9654*4882a593Smuzhiyun# one. # 9655*4882a593Smuzhiyun# For zeroes/infs/NANs, return the same while setting the FPSR # 9656*4882a593Smuzhiyun# as appropriate. # 9657*4882a593Smuzhiyun# # 9658*4882a593Smuzhiyun######################################################################### 9659*4882a593Smuzhiyun 9660*4882a593Smuzhiyun global fint 9661*4882a593Smuzhiyunfint: 9662*4882a593Smuzhiyun mov.b STAG(%a6),%d1 9663*4882a593Smuzhiyun bne.b fint_not_norm # optimize on non-norm input 9664*4882a593Smuzhiyun 9665*4882a593Smuzhiyun# 9666*4882a593Smuzhiyun# Norm: 9667*4882a593Smuzhiyun# 9668*4882a593Smuzhiyunfint_norm: 9669*4882a593Smuzhiyun andi.b &0x30,%d0 # set prec = ext 9670*4882a593Smuzhiyun 9671*4882a593Smuzhiyun fmov.l %d0,%fpcr # set FPCR 9672*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9673*4882a593Smuzhiyun 9674*4882a593Smuzhiyun fint.x SRC(%a0),%fp0 # execute fint 9675*4882a593Smuzhiyun 9676*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9677*4882a593Smuzhiyun fmov.l %fpsr,%d0 # save FPSR 9678*4882a593Smuzhiyun or.l %d0,USER_FPSR(%a6) # set exception bits 9679*4882a593Smuzhiyun 9680*4882a593Smuzhiyun rts 9681*4882a593Smuzhiyun 9682*4882a593Smuzhiyun# 9683*4882a593Smuzhiyun# input is not normalized; what is it? 9684*4882a593Smuzhiyun# 9685*4882a593Smuzhiyunfint_not_norm: 9686*4882a593Smuzhiyun cmpi.b %d1,&ZERO # weed out ZERO 9687*4882a593Smuzhiyun beq.b fint_zero 9688*4882a593Smuzhiyun cmpi.b %d1,&INF # weed out INF 9689*4882a593Smuzhiyun beq.b fint_inf 9690*4882a593Smuzhiyun cmpi.b %d1,&DENORM # weed out DENORM 9691*4882a593Smuzhiyun beq.b fint_denorm 9692*4882a593Smuzhiyun cmpi.b %d1,&SNAN # weed out SNAN 9693*4882a593Smuzhiyun beq.l res_snan_1op 9694*4882a593Smuzhiyun bra.l res_qnan_1op # weed out QNAN 9695*4882a593Smuzhiyun 9696*4882a593Smuzhiyun# 9697*4882a593Smuzhiyun# Denorm: 9698*4882a593Smuzhiyun# 9699*4882a593Smuzhiyun# for DENORMs, the result will be either (+/-)ZERO or (+/-)1. 9700*4882a593Smuzhiyun# also, the INEX2 and AINEX exception bits will be set. 9701*4882a593Smuzhiyun# so, we could either set these manually or force the DENORM 9702*4882a593Smuzhiyun# to a very small NORM and ship it to the NORM routine. 9703*4882a593Smuzhiyun# I do the latter. 9704*4882a593Smuzhiyun# 9705*4882a593Smuzhiyunfint_denorm: 9706*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 9707*4882a593Smuzhiyun mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 9708*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 9709*4882a593Smuzhiyun bra.b fint_norm 9710*4882a593Smuzhiyun 9711*4882a593Smuzhiyun# 9712*4882a593Smuzhiyun# Zero: 9713*4882a593Smuzhiyun# 9714*4882a593Smuzhiyunfint_zero: 9715*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is ZERO negative? 9716*4882a593Smuzhiyun bmi.b fint_zero_m # yes 9717*4882a593Smuzhiyunfint_zero_p: 9718*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # return +ZERO in fp0 9719*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 9720*4882a593Smuzhiyun rts 9721*4882a593Smuzhiyunfint_zero_m: 9722*4882a593Smuzhiyun fmov.s &0x80000000,%fp0 # return -ZERO in fp0 9723*4882a593Smuzhiyun mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9724*4882a593Smuzhiyun rts 9725*4882a593Smuzhiyun 9726*4882a593Smuzhiyun# 9727*4882a593Smuzhiyun# Infinity: 9728*4882a593Smuzhiyun# 9729*4882a593Smuzhiyunfint_inf: 9730*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return result in fp0 9731*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is INF negative? 9732*4882a593Smuzhiyun bmi.b fint_inf_m # yes 9733*4882a593Smuzhiyunfint_inf_p: 9734*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9735*4882a593Smuzhiyun rts 9736*4882a593Smuzhiyunfint_inf_m: 9737*4882a593Smuzhiyun mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 9738*4882a593Smuzhiyun rts 9739*4882a593Smuzhiyun 9740*4882a593Smuzhiyun######################################################################### 9741*4882a593Smuzhiyun# XDEF **************************************************************** # 9742*4882a593Smuzhiyun# fintrz(): emulates the fintrz instruction # 9743*4882a593Smuzhiyun# # 9744*4882a593Smuzhiyun# XREF **************************************************************** # 9745*4882a593Smuzhiyun# res_{s,q}nan_1op() - set NAN result for monadic operation # 9746*4882a593Smuzhiyun# # 9747*4882a593Smuzhiyun# INPUT *************************************************************** # 9748*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 9749*4882a593Smuzhiyun# d0 = round precision/mode # 9750*4882a593Smuzhiyun# # 9751*4882a593Smuzhiyun# OUTPUT ************************************************************** # 9752*4882a593Smuzhiyun# fp0 = result # 9753*4882a593Smuzhiyun# # 9754*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 9755*4882a593Smuzhiyun# Separate according to operand type. Unnorms don't pass through # 9756*4882a593Smuzhiyun# here. For norms, load the rounding mode/prec, execute a "fintrz", # 9757*4882a593Smuzhiyun# then store the resulting FPSR bits. # 9758*4882a593Smuzhiyun# For denorms, force the j-bit to a one and do the same as for # 9759*4882a593Smuzhiyun# norms. Denorms are so low that the answer will either be a zero or a # 9760*4882a593Smuzhiyun# one. # 9761*4882a593Smuzhiyun# For zeroes/infs/NANs, return the same while setting the FPSR # 9762*4882a593Smuzhiyun# as appropriate. # 9763*4882a593Smuzhiyun# # 9764*4882a593Smuzhiyun######################################################################### 9765*4882a593Smuzhiyun 9766*4882a593Smuzhiyun global fintrz 9767*4882a593Smuzhiyunfintrz: 9768*4882a593Smuzhiyun mov.b STAG(%a6),%d1 9769*4882a593Smuzhiyun bne.b fintrz_not_norm # optimize on non-norm input 9770*4882a593Smuzhiyun 9771*4882a593Smuzhiyun# 9772*4882a593Smuzhiyun# Norm: 9773*4882a593Smuzhiyun# 9774*4882a593Smuzhiyunfintrz_norm: 9775*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9776*4882a593Smuzhiyun 9777*4882a593Smuzhiyun fintrz.x SRC(%a0),%fp0 # execute fintrz 9778*4882a593Smuzhiyun 9779*4882a593Smuzhiyun fmov.l %fpsr,%d0 # save FPSR 9780*4882a593Smuzhiyun or.l %d0,USER_FPSR(%a6) # set exception bits 9781*4882a593Smuzhiyun 9782*4882a593Smuzhiyun rts 9783*4882a593Smuzhiyun 9784*4882a593Smuzhiyun# 9785*4882a593Smuzhiyun# input is not normalized; what is it? 9786*4882a593Smuzhiyun# 9787*4882a593Smuzhiyunfintrz_not_norm: 9788*4882a593Smuzhiyun cmpi.b %d1,&ZERO # weed out ZERO 9789*4882a593Smuzhiyun beq.b fintrz_zero 9790*4882a593Smuzhiyun cmpi.b %d1,&INF # weed out INF 9791*4882a593Smuzhiyun beq.b fintrz_inf 9792*4882a593Smuzhiyun cmpi.b %d1,&DENORM # weed out DENORM 9793*4882a593Smuzhiyun beq.b fintrz_denorm 9794*4882a593Smuzhiyun cmpi.b %d1,&SNAN # weed out SNAN 9795*4882a593Smuzhiyun beq.l res_snan_1op 9796*4882a593Smuzhiyun bra.l res_qnan_1op # weed out QNAN 9797*4882a593Smuzhiyun 9798*4882a593Smuzhiyun# 9799*4882a593Smuzhiyun# Denorm: 9800*4882a593Smuzhiyun# 9801*4882a593Smuzhiyun# for DENORMs, the result will be (+/-)ZERO. 9802*4882a593Smuzhiyun# also, the INEX2 and AINEX exception bits will be set. 9803*4882a593Smuzhiyun# so, we could either set these manually or force the DENORM 9804*4882a593Smuzhiyun# to a very small NORM and ship it to the NORM routine. 9805*4882a593Smuzhiyun# I do the latter. 9806*4882a593Smuzhiyun# 9807*4882a593Smuzhiyunfintrz_denorm: 9808*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 9809*4882a593Smuzhiyun mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 9810*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 9811*4882a593Smuzhiyun bra.b fintrz_norm 9812*4882a593Smuzhiyun 9813*4882a593Smuzhiyun# 9814*4882a593Smuzhiyun# Zero: 9815*4882a593Smuzhiyun# 9816*4882a593Smuzhiyunfintrz_zero: 9817*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is ZERO negative? 9818*4882a593Smuzhiyun bmi.b fintrz_zero_m # yes 9819*4882a593Smuzhiyunfintrz_zero_p: 9820*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # return +ZERO in fp0 9821*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 9822*4882a593Smuzhiyun rts 9823*4882a593Smuzhiyunfintrz_zero_m: 9824*4882a593Smuzhiyun fmov.s &0x80000000,%fp0 # return -ZERO in fp0 9825*4882a593Smuzhiyun mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9826*4882a593Smuzhiyun rts 9827*4882a593Smuzhiyun 9828*4882a593Smuzhiyun# 9829*4882a593Smuzhiyun# Infinity: 9830*4882a593Smuzhiyun# 9831*4882a593Smuzhiyunfintrz_inf: 9832*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return result in fp0 9833*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is INF negative? 9834*4882a593Smuzhiyun bmi.b fintrz_inf_m # yes 9835*4882a593Smuzhiyunfintrz_inf_p: 9836*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9837*4882a593Smuzhiyun rts 9838*4882a593Smuzhiyunfintrz_inf_m: 9839*4882a593Smuzhiyun mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 9840*4882a593Smuzhiyun rts 9841*4882a593Smuzhiyun 9842*4882a593Smuzhiyun######################################################################### 9843*4882a593Smuzhiyun# XDEF **************************************************************** # 9844*4882a593Smuzhiyun# fabs(): emulates the fabs instruction # 9845*4882a593Smuzhiyun# fsabs(): emulates the fsabs instruction # 9846*4882a593Smuzhiyun# fdabs(): emulates the fdabs instruction # 9847*4882a593Smuzhiyun# # 9848*4882a593Smuzhiyun# XREF **************************************************************** # 9849*4882a593Smuzhiyun# norm() - normalize denorm mantissa to provide EXOP # 9850*4882a593Smuzhiyun# scale_to_zero_src() - make exponent. = 0; get scale factor # 9851*4882a593Smuzhiyun# unf_res() - calculate underflow result # 9852*4882a593Smuzhiyun# ovf_res() - calculate overflow result # 9853*4882a593Smuzhiyun# res_{s,q}nan_1op() - set NAN result for monadic operation # 9854*4882a593Smuzhiyun# # 9855*4882a593Smuzhiyun# INPUT *************************************************************** # 9856*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 9857*4882a593Smuzhiyun# d0 = rnd precision/mode # 9858*4882a593Smuzhiyun# # 9859*4882a593Smuzhiyun# OUTPUT ************************************************************** # 9860*4882a593Smuzhiyun# fp0 = result # 9861*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 9862*4882a593Smuzhiyun# # 9863*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 9864*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 9865*4882a593Smuzhiyun# norms into extended, single, and double precision. # 9866*4882a593Smuzhiyun# Simply clear sign for extended precision norm. Ext prec denorm # 9867*4882a593Smuzhiyun# gets an EXOP created for it since it's an underflow. # 9868*4882a593Smuzhiyun# Double and single precision can overflow and underflow. First, # 9869*4882a593Smuzhiyun# scale the operand such that the exponent is zero. Perform an "fabs" # 9870*4882a593Smuzhiyun# using the correct rnd mode/prec. Check to see if the original # 9871*4882a593Smuzhiyun# exponent would take an exception. If so, use unf_res() or ovf_res() # 9872*4882a593Smuzhiyun# to calculate the default result. Also, create the EXOP for the # 9873*4882a593Smuzhiyun# exceptional case. If no exception should occur, insert the correct # 9874*4882a593Smuzhiyun# result exponent and return. # 9875*4882a593Smuzhiyun# Unnorms don't pass through here. # 9876*4882a593Smuzhiyun# # 9877*4882a593Smuzhiyun######################################################################### 9878*4882a593Smuzhiyun 9879*4882a593Smuzhiyun global fsabs 9880*4882a593Smuzhiyunfsabs: 9881*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 9882*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl precision 9883*4882a593Smuzhiyun bra.b fabs 9884*4882a593Smuzhiyun 9885*4882a593Smuzhiyun global fdabs 9886*4882a593Smuzhiyunfdabs: 9887*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 9888*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl precision 9889*4882a593Smuzhiyun 9890*4882a593Smuzhiyun global fabs 9891*4882a593Smuzhiyunfabs: 9892*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 9893*4882a593Smuzhiyun mov.b STAG(%a6),%d1 9894*4882a593Smuzhiyun bne.w fabs_not_norm # optimize on non-norm input 9895*4882a593Smuzhiyun 9896*4882a593Smuzhiyun# 9897*4882a593Smuzhiyun# ABSOLUTE VALUE: norms and denorms ONLY! 9898*4882a593Smuzhiyun# 9899*4882a593Smuzhiyunfabs_norm: 9900*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 9901*4882a593Smuzhiyun bne.b fabs_not_ext # no; go handle sgl or dbl 9902*4882a593Smuzhiyun 9903*4882a593Smuzhiyun# 9904*4882a593Smuzhiyun# precision selected is extended. so...we can not get an underflow 9905*4882a593Smuzhiyun# or overflow because of rounding to the correct precision. so... 9906*4882a593Smuzhiyun# skip the scaling and unscaling... 9907*4882a593Smuzhiyun# 9908*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9909*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9910*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d1 9911*4882a593Smuzhiyun bclr &15,%d1 # force absolute value 9912*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert exponent 9913*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9914*4882a593Smuzhiyun rts 9915*4882a593Smuzhiyun 9916*4882a593Smuzhiyun# 9917*4882a593Smuzhiyun# for an extended precision DENORM, the UNFL exception bit is set 9918*4882a593Smuzhiyun# the accrued bit is NOT set in this instance(no inexactness!) 9919*4882a593Smuzhiyun# 9920*4882a593Smuzhiyunfabs_denorm: 9921*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 9922*4882a593Smuzhiyun bne.b fabs_not_ext # no 9923*4882a593Smuzhiyun 9924*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9925*4882a593Smuzhiyun 9926*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9927*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9928*4882a593Smuzhiyun mov.w SRC_EX(%a0),%d0 9929*4882a593Smuzhiyun bclr &15,%d0 # clear sign 9930*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) # insert exponent 9931*4882a593Smuzhiyun 9932*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9933*4882a593Smuzhiyun 9934*4882a593Smuzhiyun btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 9935*4882a593Smuzhiyun bne.b fabs_ext_unfl_ena 9936*4882a593Smuzhiyun rts 9937*4882a593Smuzhiyun 9938*4882a593Smuzhiyun# 9939*4882a593Smuzhiyun# the input is an extended DENORM and underflow is enabled in the FPCR. 9940*4882a593Smuzhiyun# normalize the mantissa and add the bias of 0x6000 to the resulting negative 9941*4882a593Smuzhiyun# exponent and insert back into the operand. 9942*4882a593Smuzhiyun# 9943*4882a593Smuzhiyunfabs_ext_unfl_ena: 9944*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: ptr to operand 9945*4882a593Smuzhiyun bsr.l norm # normalize result 9946*4882a593Smuzhiyun neg.w %d0 # new exponent = -(shft val) 9947*4882a593Smuzhiyun addi.w &0x6000,%d0 # add new bias to exponent 9948*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 9949*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep old sign 9950*4882a593Smuzhiyun andi.w &0x7fff,%d0 # clear sign position 9951*4882a593Smuzhiyun or.w %d1,%d0 # concat old sign, new exponent 9952*4882a593Smuzhiyun mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 9953*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9954*4882a593Smuzhiyun rts 9955*4882a593Smuzhiyun 9956*4882a593Smuzhiyun# 9957*4882a593Smuzhiyun# operand is either single or double 9958*4882a593Smuzhiyun# 9959*4882a593Smuzhiyunfabs_not_ext: 9960*4882a593Smuzhiyun cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 9961*4882a593Smuzhiyun bne.b fabs_dbl 9962*4882a593Smuzhiyun 9963*4882a593Smuzhiyun# 9964*4882a593Smuzhiyun# operand is to be rounded to single precision 9965*4882a593Smuzhiyun# 9966*4882a593Smuzhiyunfabs_sgl: 9967*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9968*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9969*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9970*4882a593Smuzhiyun bsr.l scale_to_zero_src # calculate scale factor 9971*4882a593Smuzhiyun 9972*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 9973*4882a593Smuzhiyun bge.w fabs_sd_unfl # yes; go handle underflow 9974*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 9975*4882a593Smuzhiyun beq.w fabs_sd_may_ovfl # maybe; go check 9976*4882a593Smuzhiyun blt.w fabs_sd_ovfl # yes; go handle overflow 9977*4882a593Smuzhiyun 9978*4882a593Smuzhiyun# 9979*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved in to the fp reg file 9980*4882a593Smuzhiyun# 9981*4882a593Smuzhiyunfabs_sd_normal: 9982*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 9983*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 9984*4882a593Smuzhiyun 9985*4882a593Smuzhiyun fabs.x FP_SCR0(%a6),%fp0 # perform absolute 9986*4882a593Smuzhiyun 9987*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 9988*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 9989*4882a593Smuzhiyun 9990*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 9991*4882a593Smuzhiyun 9992*4882a593Smuzhiyunfabs_sd_normal_exit: 9993*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 9994*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 9995*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 9996*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 9997*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 9998*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 9999*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10000*4882a593Smuzhiyun or.w %d1,%d2 # concat old sign,new exp 10001*4882a593Smuzhiyun mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 10002*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10003*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10004*4882a593Smuzhiyun rts 10005*4882a593Smuzhiyun 10006*4882a593Smuzhiyun# 10007*4882a593Smuzhiyun# operand is to be rounded to double precision 10008*4882a593Smuzhiyun# 10009*4882a593Smuzhiyunfabs_dbl: 10010*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10011*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10012*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10013*4882a593Smuzhiyun bsr.l scale_to_zero_src # calculate scale factor 10014*4882a593Smuzhiyun 10015*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 10016*4882a593Smuzhiyun bge.b fabs_sd_unfl # yes; go handle underflow 10017*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 10018*4882a593Smuzhiyun beq.w fabs_sd_may_ovfl # maybe; go check 10019*4882a593Smuzhiyun blt.w fabs_sd_ovfl # yes; go handle overflow 10020*4882a593Smuzhiyun bra.w fabs_sd_normal # no; ho handle normalized op 10021*4882a593Smuzhiyun 10022*4882a593Smuzhiyun# 10023*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file 10024*4882a593Smuzhiyun# 10025*4882a593Smuzhiyunfabs_sd_unfl: 10026*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10027*4882a593Smuzhiyun 10028*4882a593Smuzhiyun bclr &0x7,FP_SCR0_EX(%a6) # force absolute value 10029*4882a593Smuzhiyun 10030*4882a593Smuzhiyun# if underflow or inexact is enabled, go calculate EXOP first. 10031*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 10032*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10033*4882a593Smuzhiyun bne.b fabs_sd_unfl_ena # yes 10034*4882a593Smuzhiyun 10035*4882a593Smuzhiyunfabs_sd_unfl_dis: 10036*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 10037*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10038*4882a593Smuzhiyun bsr.l unf_res # calculate default result 10039*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 10040*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10041*4882a593Smuzhiyun rts 10042*4882a593Smuzhiyun 10043*4882a593Smuzhiyun# 10044*4882a593Smuzhiyun# operand will underflow AND underflow is enabled. 10045*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision. 10046*4882a593Smuzhiyun# 10047*4882a593Smuzhiyunfabs_sd_unfl_ena: 10048*4882a593Smuzhiyun mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 10049*4882a593Smuzhiyun mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 10050*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 10051*4882a593Smuzhiyun 10052*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10053*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10054*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10055*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10056*4882a593Smuzhiyun sub.l %d0,%d1 # subtract scale factor 10057*4882a593Smuzhiyun addi.l &0x6000,%d1 # add new bias 10058*4882a593Smuzhiyun andi.w &0x7fff,%d1 10059*4882a593Smuzhiyun or.w %d2,%d1 # concat new sign,new exp 10060*4882a593Smuzhiyun mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 10061*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 10062*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10063*4882a593Smuzhiyun bra.b fabs_sd_unfl_dis 10064*4882a593Smuzhiyun 10065*4882a593Smuzhiyun# 10066*4882a593Smuzhiyun# operand WILL overflow. 10067*4882a593Smuzhiyun# 10068*4882a593Smuzhiyunfabs_sd_ovfl: 10069*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10070*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10071*4882a593Smuzhiyun 10072*4882a593Smuzhiyun fabs.x FP_SCR0(%a6),%fp0 # perform absolute 10073*4882a593Smuzhiyun 10074*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10075*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 10076*4882a593Smuzhiyun 10077*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10078*4882a593Smuzhiyun 10079*4882a593Smuzhiyunfabs_sd_ovfl_tst: 10080*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 10081*4882a593Smuzhiyun 10082*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 10083*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 10084*4882a593Smuzhiyun bne.b fabs_sd_ovfl_ena # yes 10085*4882a593Smuzhiyun 10086*4882a593Smuzhiyun# 10087*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by 10088*4882a593Smuzhiyun# calling ovf_res(). 10089*4882a593Smuzhiyun# 10090*4882a593Smuzhiyunfabs_sd_ovfl_dis: 10091*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 10092*4882a593Smuzhiyun sne %d1 # set sign param accordingly 10093*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass: prec,mode 10094*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 10095*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 10096*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 10097*4882a593Smuzhiyun rts 10098*4882a593Smuzhiyun 10099*4882a593Smuzhiyun# 10100*4882a593Smuzhiyun# OVFL is enabled. 10101*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision. 10102*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR). 10103*4882a593Smuzhiyun# 10104*4882a593Smuzhiyunfabs_sd_ovfl_ena: 10105*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10106*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10107*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10108*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10109*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10110*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10111*4882a593Smuzhiyun subi.l &0x6000,%d1 # subtract bias 10112*4882a593Smuzhiyun andi.w &0x7fff,%d1 10113*4882a593Smuzhiyun or.w %d2,%d1 # concat sign,exp 10114*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10115*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10116*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10117*4882a593Smuzhiyun bra.b fabs_sd_ovfl_dis 10118*4882a593Smuzhiyun 10119*4882a593Smuzhiyun# 10120*4882a593Smuzhiyun# the move in MAY underflow. so... 10121*4882a593Smuzhiyun# 10122*4882a593Smuzhiyunfabs_sd_may_ovfl: 10123*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10124*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10125*4882a593Smuzhiyun 10126*4882a593Smuzhiyun fabs.x FP_SCR0(%a6),%fp0 # perform absolute 10127*4882a593Smuzhiyun 10128*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10129*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10130*4882a593Smuzhiyun 10131*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10132*4882a593Smuzhiyun 10133*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 10134*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| >= 2.b? 10135*4882a593Smuzhiyun fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred 10136*4882a593Smuzhiyun 10137*4882a593Smuzhiyun# no, it didn't overflow; we have correct result 10138*4882a593Smuzhiyun bra.w fabs_sd_normal_exit 10139*4882a593Smuzhiyun 10140*4882a593Smuzhiyun########################################################################## 10141*4882a593Smuzhiyun 10142*4882a593Smuzhiyun# 10143*4882a593Smuzhiyun# input is not normalized; what is it? 10144*4882a593Smuzhiyun# 10145*4882a593Smuzhiyunfabs_not_norm: 10146*4882a593Smuzhiyun cmpi.b %d1,&DENORM # weed out DENORM 10147*4882a593Smuzhiyun beq.w fabs_denorm 10148*4882a593Smuzhiyun cmpi.b %d1,&SNAN # weed out SNAN 10149*4882a593Smuzhiyun beq.l res_snan_1op 10150*4882a593Smuzhiyun cmpi.b %d1,&QNAN # weed out QNAN 10151*4882a593Smuzhiyun beq.l res_qnan_1op 10152*4882a593Smuzhiyun 10153*4882a593Smuzhiyun fabs.x SRC(%a0),%fp0 # force absolute value 10154*4882a593Smuzhiyun 10155*4882a593Smuzhiyun cmpi.b %d1,&INF # weed out INF 10156*4882a593Smuzhiyun beq.b fabs_inf 10157*4882a593Smuzhiyunfabs_zero: 10158*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10159*4882a593Smuzhiyun rts 10160*4882a593Smuzhiyunfabs_inf: 10161*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 10162*4882a593Smuzhiyun rts 10163*4882a593Smuzhiyun 10164*4882a593Smuzhiyun######################################################################### 10165*4882a593Smuzhiyun# XDEF **************************************************************** # 10166*4882a593Smuzhiyun# fcmp(): fp compare op routine # 10167*4882a593Smuzhiyun# # 10168*4882a593Smuzhiyun# XREF **************************************************************** # 10169*4882a593Smuzhiyun# res_qnan() - return QNAN result # 10170*4882a593Smuzhiyun# res_snan() - return SNAN result # 10171*4882a593Smuzhiyun# # 10172*4882a593Smuzhiyun# INPUT *************************************************************** # 10173*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 10174*4882a593Smuzhiyun# a1 = pointer to extended precision destination operand # 10175*4882a593Smuzhiyun# d0 = round prec/mode # 10176*4882a593Smuzhiyun# # 10177*4882a593Smuzhiyun# OUTPUT ************************************************************** # 10178*4882a593Smuzhiyun# None # 10179*4882a593Smuzhiyun# # 10180*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 10181*4882a593Smuzhiyun# Handle NANs and denorms as special cases. For everything else, # 10182*4882a593Smuzhiyun# just use the actual fcmp instruction to produce the correct condition # 10183*4882a593Smuzhiyun# codes. # 10184*4882a593Smuzhiyun# # 10185*4882a593Smuzhiyun######################################################################### 10186*4882a593Smuzhiyun 10187*4882a593Smuzhiyun global fcmp 10188*4882a593Smuzhiyunfcmp: 10189*4882a593Smuzhiyun clr.w %d1 10190*4882a593Smuzhiyun mov.b DTAG(%a6),%d1 10191*4882a593Smuzhiyun lsl.b &0x3,%d1 10192*4882a593Smuzhiyun or.b STAG(%a6),%d1 10193*4882a593Smuzhiyun bne.b fcmp_not_norm # optimize on non-norm input 10194*4882a593Smuzhiyun 10195*4882a593Smuzhiyun# 10196*4882a593Smuzhiyun# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs 10197*4882a593Smuzhiyun# 10198*4882a593Smuzhiyunfcmp_norm: 10199*4882a593Smuzhiyun fmovm.x DST(%a1),&0x80 # load dst op 10200*4882a593Smuzhiyun 10201*4882a593Smuzhiyun fcmp.x %fp0,SRC(%a0) # do compare 10202*4882a593Smuzhiyun 10203*4882a593Smuzhiyun fmov.l %fpsr,%d0 # save FPSR 10204*4882a593Smuzhiyun rol.l &0x8,%d0 # extract ccode bits 10205*4882a593Smuzhiyun mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set) 10206*4882a593Smuzhiyun 10207*4882a593Smuzhiyun rts 10208*4882a593Smuzhiyun 10209*4882a593Smuzhiyun# 10210*4882a593Smuzhiyun# fcmp: inputs are not both normalized; what are they? 10211*4882a593Smuzhiyun# 10212*4882a593Smuzhiyunfcmp_not_norm: 10213*4882a593Smuzhiyun mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1 10214*4882a593Smuzhiyun jmp (tbl_fcmp_op.b,%pc,%d1.w*1) 10215*4882a593Smuzhiyun 10216*4882a593Smuzhiyun swbeg &48 10217*4882a593Smuzhiyuntbl_fcmp_op: 10218*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # NORM - NORM 10219*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # NORM - ZERO 10220*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # NORM - INF 10221*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN 10222*4882a593Smuzhiyun short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM 10223*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN 10224*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10225*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10226*4882a593Smuzhiyun 10227*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # ZERO - NORM 10228*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # ZERO - ZERO 10229*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # ZERO - INF 10230*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN 10231*4882a593Smuzhiyun short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM 10232*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN 10233*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10234*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10235*4882a593Smuzhiyun 10236*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # INF - NORM 10237*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # INF - ZERO 10238*4882a593Smuzhiyun short fcmp_norm - tbl_fcmp_op # INF - INF 10239*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN 10240*4882a593Smuzhiyun short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM 10241*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # INF - SNAN 10242*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10243*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10244*4882a593Smuzhiyun 10245*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM 10246*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO 10247*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF 10248*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN 10249*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM 10250*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN 10251*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10252*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10253*4882a593Smuzhiyun 10254*4882a593Smuzhiyun short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM 10255*4882a593Smuzhiyun short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO 10256*4882a593Smuzhiyun short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF 10257*4882a593Smuzhiyun short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN 10258*4882a593Smuzhiyun short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM 10259*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN 10260*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10261*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10262*4882a593Smuzhiyun 10263*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM 10264*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO 10265*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # SNAN - INF 10266*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN 10267*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM 10268*4882a593Smuzhiyun short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN 10269*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10270*4882a593Smuzhiyun short tbl_fcmp_op - tbl_fcmp_op # 10271*4882a593Smuzhiyun 10272*4882a593Smuzhiyun# unlike all other functions for QNAN and SNAN, fcmp does NOT set the 10273*4882a593Smuzhiyun# 'N' bit for a negative QNAN or SNAN input so we must squelch it here. 10274*4882a593Smuzhiyunfcmp_res_qnan: 10275*4882a593Smuzhiyun bsr.l res_qnan 10276*4882a593Smuzhiyun andi.b &0xf7,FPSR_CC(%a6) 10277*4882a593Smuzhiyun rts 10278*4882a593Smuzhiyunfcmp_res_snan: 10279*4882a593Smuzhiyun bsr.l res_snan 10280*4882a593Smuzhiyun andi.b &0xf7,FPSR_CC(%a6) 10281*4882a593Smuzhiyun rts 10282*4882a593Smuzhiyun 10283*4882a593Smuzhiyun# 10284*4882a593Smuzhiyun# DENORMs are a little more difficult. 10285*4882a593Smuzhiyun# If you have a 2 DENORMs, then you can just force the j-bit to a one 10286*4882a593Smuzhiyun# and use the fcmp_norm routine. 10287*4882a593Smuzhiyun# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one 10288*4882a593Smuzhiyun# and use the fcmp_norm routine. 10289*4882a593Smuzhiyun# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also. 10290*4882a593Smuzhiyun# But with a DENORM and a NORM of the same sign, the neg bit is set if the 10291*4882a593Smuzhiyun# (1) signs are (+) and the DENORM is the dst or 10292*4882a593Smuzhiyun# (2) signs are (-) and the DENORM is the src 10293*4882a593Smuzhiyun# 10294*4882a593Smuzhiyun 10295*4882a593Smuzhiyunfcmp_dnrm_s: 10296*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10297*4882a593Smuzhiyun mov.l SRC_HI(%a0),%d0 10298*4882a593Smuzhiyun bset &31,%d0 # DENORM src; make into small norm 10299*4882a593Smuzhiyun mov.l %d0,FP_SCR0_HI(%a6) 10300*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10301*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 10302*4882a593Smuzhiyun bra.w fcmp_norm 10303*4882a593Smuzhiyun 10304*4882a593Smuzhiyunfcmp_dnrm_d: 10305*4882a593Smuzhiyun mov.l DST_EX(%a1),FP_SCR0_EX(%a6) 10306*4882a593Smuzhiyun mov.l DST_HI(%a1),%d0 10307*4882a593Smuzhiyun bset &31,%d0 # DENORM src; make into small norm 10308*4882a593Smuzhiyun mov.l %d0,FP_SCR0_HI(%a6) 10309*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 10310*4882a593Smuzhiyun lea FP_SCR0(%a6),%a1 10311*4882a593Smuzhiyun bra.w fcmp_norm 10312*4882a593Smuzhiyun 10313*4882a593Smuzhiyunfcmp_dnrm_sd: 10314*4882a593Smuzhiyun mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10315*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10316*4882a593Smuzhiyun mov.l DST_HI(%a1),%d0 10317*4882a593Smuzhiyun bset &31,%d0 # DENORM dst; make into small norm 10318*4882a593Smuzhiyun mov.l %d0,FP_SCR1_HI(%a6) 10319*4882a593Smuzhiyun mov.l SRC_HI(%a0),%d0 10320*4882a593Smuzhiyun bset &31,%d0 # DENORM dst; make into small norm 10321*4882a593Smuzhiyun mov.l %d0,FP_SCR0_HI(%a6) 10322*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10323*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10324*4882a593Smuzhiyun lea FP_SCR1(%a6),%a1 10325*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 10326*4882a593Smuzhiyun bra.w fcmp_norm 10327*4882a593Smuzhiyun 10328*4882a593Smuzhiyunfcmp_nrm_dnrm: 10329*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # determine if like signs 10330*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 10331*4882a593Smuzhiyun eor.b %d0,%d1 10332*4882a593Smuzhiyun bmi.w fcmp_dnrm_s 10333*4882a593Smuzhiyun 10334*4882a593Smuzhiyun# signs are the same, so must determine the answer ourselves. 10335*4882a593Smuzhiyun tst.b %d0 # is src op negative? 10336*4882a593Smuzhiyun bmi.b fcmp_nrm_dnrm_m # yes 10337*4882a593Smuzhiyun rts 10338*4882a593Smuzhiyunfcmp_nrm_dnrm_m: 10339*4882a593Smuzhiyun mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10340*4882a593Smuzhiyun rts 10341*4882a593Smuzhiyun 10342*4882a593Smuzhiyunfcmp_dnrm_nrm: 10343*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # determine if like signs 10344*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 10345*4882a593Smuzhiyun eor.b %d0,%d1 10346*4882a593Smuzhiyun bmi.w fcmp_dnrm_d 10347*4882a593Smuzhiyun 10348*4882a593Smuzhiyun# signs are the same, so must determine the answer ourselves. 10349*4882a593Smuzhiyun tst.b %d0 # is src op negative? 10350*4882a593Smuzhiyun bpl.b fcmp_dnrm_nrm_m # no 10351*4882a593Smuzhiyun rts 10352*4882a593Smuzhiyunfcmp_dnrm_nrm_m: 10353*4882a593Smuzhiyun mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10354*4882a593Smuzhiyun rts 10355*4882a593Smuzhiyun 10356*4882a593Smuzhiyun######################################################################### 10357*4882a593Smuzhiyun# XDEF **************************************************************** # 10358*4882a593Smuzhiyun# fsglmul(): emulates the fsglmul instruction # 10359*4882a593Smuzhiyun# # 10360*4882a593Smuzhiyun# XREF **************************************************************** # 10361*4882a593Smuzhiyun# scale_to_zero_src() - scale src exponent to zero # 10362*4882a593Smuzhiyun# scale_to_zero_dst() - scale dst exponent to zero # 10363*4882a593Smuzhiyun# unf_res4() - return default underflow result for sglop # 10364*4882a593Smuzhiyun# ovf_res() - return default overflow result # 10365*4882a593Smuzhiyun# res_qnan() - return QNAN result # 10366*4882a593Smuzhiyun# res_snan() - return SNAN result # 10367*4882a593Smuzhiyun# # 10368*4882a593Smuzhiyun# INPUT *************************************************************** # 10369*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 10370*4882a593Smuzhiyun# a1 = pointer to extended precision destination operand # 10371*4882a593Smuzhiyun# d0 rnd prec,mode # 10372*4882a593Smuzhiyun# # 10373*4882a593Smuzhiyun# OUTPUT ************************************************************** # 10374*4882a593Smuzhiyun# fp0 = result # 10375*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 10376*4882a593Smuzhiyun# # 10377*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 10378*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 10379*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision. # 10380*4882a593Smuzhiyun# For norms/denorms, scale the exponents such that a multiply # 10381*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fsglmul to # 10382*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken # 10383*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result # 10384*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the # 10385*4882a593Smuzhiyun# result operand to the proper exponent. # 10386*4882a593Smuzhiyun# # 10387*4882a593Smuzhiyun######################################################################### 10388*4882a593Smuzhiyun 10389*4882a593Smuzhiyun global fsglmul 10390*4882a593Smuzhiyunfsglmul: 10391*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 10392*4882a593Smuzhiyun 10393*4882a593Smuzhiyun clr.w %d1 10394*4882a593Smuzhiyun mov.b DTAG(%a6),%d1 10395*4882a593Smuzhiyun lsl.b &0x3,%d1 10396*4882a593Smuzhiyun or.b STAG(%a6),%d1 10397*4882a593Smuzhiyun 10398*4882a593Smuzhiyun bne.w fsglmul_not_norm # optimize on non-norm input 10399*4882a593Smuzhiyun 10400*4882a593Smuzhiyunfsglmul_norm: 10401*4882a593Smuzhiyun mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10402*4882a593Smuzhiyun mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 10403*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10404*4882a593Smuzhiyun 10405*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10406*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10407*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10408*4882a593Smuzhiyun 10409*4882a593Smuzhiyun bsr.l scale_to_zero_src # scale exponent 10410*4882a593Smuzhiyun mov.l %d0,-(%sp) # save scale factor 1 10411*4882a593Smuzhiyun 10412*4882a593Smuzhiyun bsr.l scale_to_zero_dst # scale dst exponent 10413*4882a593Smuzhiyun 10414*4882a593Smuzhiyun add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2 10415*4882a593Smuzhiyun 10416*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl? 10417*4882a593Smuzhiyun beq.w fsglmul_may_ovfl # result may rnd to overflow 10418*4882a593Smuzhiyun blt.w fsglmul_ovfl # result will overflow 10419*4882a593Smuzhiyun 10420*4882a593Smuzhiyun cmpi.l %d0,&0x3fff+0x0001 # would result unfl? 10421*4882a593Smuzhiyun beq.w fsglmul_may_unfl # result may rnd to no unfl 10422*4882a593Smuzhiyun bgt.w fsglmul_unfl # result will underflow 10423*4882a593Smuzhiyun 10424*4882a593Smuzhiyunfsglmul_normal: 10425*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10426*4882a593Smuzhiyun 10427*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10428*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10429*4882a593Smuzhiyun 10430*4882a593Smuzhiyun fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10431*4882a593Smuzhiyun 10432*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10433*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10434*4882a593Smuzhiyun 10435*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10436*4882a593Smuzhiyun 10437*4882a593Smuzhiyunfsglmul_normal_exit: 10438*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 10439*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10440*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 10441*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10442*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10443*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10444*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10445*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 10446*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10447*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10448*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10449*4882a593Smuzhiyun rts 10450*4882a593Smuzhiyun 10451*4882a593Smuzhiyunfsglmul_ovfl: 10452*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10453*4882a593Smuzhiyun 10454*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10455*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10456*4882a593Smuzhiyun 10457*4882a593Smuzhiyun fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10458*4882a593Smuzhiyun 10459*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10460*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10461*4882a593Smuzhiyun 10462*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10463*4882a593Smuzhiyun 10464*4882a593Smuzhiyunfsglmul_ovfl_tst: 10465*4882a593Smuzhiyun 10466*4882a593Smuzhiyun# save setting this until now because this is where fsglmul_may_ovfl may jump in 10467*4882a593Smuzhiyun or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex 10468*4882a593Smuzhiyun 10469*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 10470*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 10471*4882a593Smuzhiyun bne.b fsglmul_ovfl_ena # yes 10472*4882a593Smuzhiyun 10473*4882a593Smuzhiyunfsglmul_ovfl_dis: 10474*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 10475*4882a593Smuzhiyun sne %d1 # set sign param accordingly 10476*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass prec:rnd 10477*4882a593Smuzhiyun andi.b &0x30,%d0 # force prec = ext 10478*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 10479*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 10480*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 10481*4882a593Smuzhiyun rts 10482*4882a593Smuzhiyun 10483*4882a593Smuzhiyunfsglmul_ovfl_ena: 10484*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 10485*4882a593Smuzhiyun 10486*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10487*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10488*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10489*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10490*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10491*4882a593Smuzhiyun subi.l &0x6000,%d1 # subtract bias 10492*4882a593Smuzhiyun andi.w &0x7fff,%d1 10493*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10494*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 10495*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10496*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10497*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10498*4882a593Smuzhiyun bra.b fsglmul_ovfl_dis 10499*4882a593Smuzhiyun 10500*4882a593Smuzhiyunfsglmul_may_ovfl: 10501*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10502*4882a593Smuzhiyun 10503*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10504*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10505*4882a593Smuzhiyun 10506*4882a593Smuzhiyun fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10507*4882a593Smuzhiyun 10508*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10509*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10510*4882a593Smuzhiyun 10511*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10512*4882a593Smuzhiyun 10513*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 10514*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| >= 2.b? 10515*4882a593Smuzhiyun fbge.w fsglmul_ovfl_tst # yes; overflow has occurred 10516*4882a593Smuzhiyun 10517*4882a593Smuzhiyun# no, it didn't overflow; we have correct result 10518*4882a593Smuzhiyun bra.w fsglmul_normal_exit 10519*4882a593Smuzhiyun 10520*4882a593Smuzhiyunfsglmul_unfl: 10521*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10522*4882a593Smuzhiyun 10523*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10524*4882a593Smuzhiyun 10525*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set FPCR 10526*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10527*4882a593Smuzhiyun 10528*4882a593Smuzhiyun fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10529*4882a593Smuzhiyun 10530*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10531*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10532*4882a593Smuzhiyun 10533*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10534*4882a593Smuzhiyun 10535*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 10536*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10537*4882a593Smuzhiyun bne.b fsglmul_unfl_ena # yes 10538*4882a593Smuzhiyun 10539*4882a593Smuzhiyunfsglmul_unfl_dis: 10540*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 10541*4882a593Smuzhiyun 10542*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 10543*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10544*4882a593Smuzhiyun bsr.l unf_res4 # calculate default result 10545*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 10546*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10547*4882a593Smuzhiyun rts 10548*4882a593Smuzhiyun 10549*4882a593Smuzhiyun# 10550*4882a593Smuzhiyun# UNFL is enabled. 10551*4882a593Smuzhiyun# 10552*4882a593Smuzhiyunfsglmul_unfl_ena: 10553*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op 10554*4882a593Smuzhiyun 10555*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10556*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10557*4882a593Smuzhiyun 10558*4882a593Smuzhiyun fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 10559*4882a593Smuzhiyun 10560*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10561*4882a593Smuzhiyun 10562*4882a593Smuzhiyun fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 10563*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10564*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10565*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10566*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10567*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10568*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10569*4882a593Smuzhiyun addi.l &0x6000,%d1 # add bias 10570*4882a593Smuzhiyun andi.w &0x7fff,%d1 10571*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 10572*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10573*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10574*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10575*4882a593Smuzhiyun bra.w fsglmul_unfl_dis 10576*4882a593Smuzhiyun 10577*4882a593Smuzhiyunfsglmul_may_unfl: 10578*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10579*4882a593Smuzhiyun 10580*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10581*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10582*4882a593Smuzhiyun 10583*4882a593Smuzhiyun fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10584*4882a593Smuzhiyun 10585*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10586*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10587*4882a593Smuzhiyun 10588*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10589*4882a593Smuzhiyun 10590*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 10591*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| > 2.b? 10592*4882a593Smuzhiyun fbgt.w fsglmul_normal_exit # no; no underflow occurred 10593*4882a593Smuzhiyun fblt.w fsglmul_unfl # yes; underflow occurred 10594*4882a593Smuzhiyun 10595*4882a593Smuzhiyun# 10596*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 2. but, 10597*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 2 or 10598*4882a593Smuzhiyun# a normalized number that rounded down to a 2. so, redo the entire operation 10599*4882a593Smuzhiyun# using RZ as the rounding mode to see what the pre-rounded result is. 10600*4882a593Smuzhiyun# this case should be relatively rare. 10601*4882a593Smuzhiyun# 10602*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 10603*4882a593Smuzhiyun 10604*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 10605*4882a593Smuzhiyun andi.b &0xc0,%d1 # keep rnd prec 10606*4882a593Smuzhiyun ori.b &rz_mode*0x10,%d1 # insert RZ 10607*4882a593Smuzhiyun 10608*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 10609*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10610*4882a593Smuzhiyun 10611*4882a593Smuzhiyun fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 10612*4882a593Smuzhiyun 10613*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10614*4882a593Smuzhiyun fabs.x %fp1 # make absolute value 10615*4882a593Smuzhiyun fcmp.b %fp1,&0x2 # is |result| < 2.b? 10616*4882a593Smuzhiyun fbge.w fsglmul_normal_exit # no; no underflow occurred 10617*4882a593Smuzhiyun bra.w fsglmul_unfl # yes, underflow occurred 10618*4882a593Smuzhiyun 10619*4882a593Smuzhiyun############################################################################## 10620*4882a593Smuzhiyun 10621*4882a593Smuzhiyun# 10622*4882a593Smuzhiyun# Single Precision Multiply: inputs are not both normalized; what are they? 10623*4882a593Smuzhiyun# 10624*4882a593Smuzhiyunfsglmul_not_norm: 10625*4882a593Smuzhiyun mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1 10626*4882a593Smuzhiyun jmp (tbl_fsglmul_op.b,%pc,%d1.w*1) 10627*4882a593Smuzhiyun 10628*4882a593Smuzhiyun swbeg &48 10629*4882a593Smuzhiyuntbl_fsglmul_op: 10630*4882a593Smuzhiyun short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 10631*4882a593Smuzhiyun short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 10632*4882a593Smuzhiyun short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 10633*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 10634*4882a593Smuzhiyun short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 10635*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 10636*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10637*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10638*4882a593Smuzhiyun 10639*4882a593Smuzhiyun short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM 10640*4882a593Smuzhiyun short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO 10641*4882a593Smuzhiyun short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF 10642*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN 10643*4882a593Smuzhiyun short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM 10644*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN 10645*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10646*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10647*4882a593Smuzhiyun 10648*4882a593Smuzhiyun short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM 10649*4882a593Smuzhiyun short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO 10650*4882a593Smuzhiyun short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF 10651*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN 10652*4882a593Smuzhiyun short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM 10653*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN 10654*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10655*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10656*4882a593Smuzhiyun 10657*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM 10658*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO 10659*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF 10660*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN 10661*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM 10662*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN 10663*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10664*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10665*4882a593Smuzhiyun 10666*4882a593Smuzhiyun short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 10667*4882a593Smuzhiyun short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 10668*4882a593Smuzhiyun short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 10669*4882a593Smuzhiyun short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 10670*4882a593Smuzhiyun short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 10671*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 10672*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10673*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10674*4882a593Smuzhiyun 10675*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM 10676*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO 10677*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF 10678*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN 10679*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM 10680*4882a593Smuzhiyun short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN 10681*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10682*4882a593Smuzhiyun short tbl_fsglmul_op - tbl_fsglmul_op # 10683*4882a593Smuzhiyun 10684*4882a593Smuzhiyunfsglmul_res_operr: 10685*4882a593Smuzhiyun bra.l res_operr 10686*4882a593Smuzhiyunfsglmul_res_snan: 10687*4882a593Smuzhiyun bra.l res_snan 10688*4882a593Smuzhiyunfsglmul_res_qnan: 10689*4882a593Smuzhiyun bra.l res_qnan 10690*4882a593Smuzhiyunfsglmul_zero: 10691*4882a593Smuzhiyun bra.l fmul_zero 10692*4882a593Smuzhiyunfsglmul_inf_src: 10693*4882a593Smuzhiyun bra.l fmul_inf_src 10694*4882a593Smuzhiyunfsglmul_inf_dst: 10695*4882a593Smuzhiyun bra.l fmul_inf_dst 10696*4882a593Smuzhiyun 10697*4882a593Smuzhiyun######################################################################### 10698*4882a593Smuzhiyun# XDEF **************************************************************** # 10699*4882a593Smuzhiyun# fsgldiv(): emulates the fsgldiv instruction # 10700*4882a593Smuzhiyun# # 10701*4882a593Smuzhiyun# XREF **************************************************************** # 10702*4882a593Smuzhiyun# scale_to_zero_src() - scale src exponent to zero # 10703*4882a593Smuzhiyun# scale_to_zero_dst() - scale dst exponent to zero # 10704*4882a593Smuzhiyun# unf_res4() - return default underflow result for sglop # 10705*4882a593Smuzhiyun# ovf_res() - return default overflow result # 10706*4882a593Smuzhiyun# res_qnan() - return QNAN result # 10707*4882a593Smuzhiyun# res_snan() - return SNAN result # 10708*4882a593Smuzhiyun# # 10709*4882a593Smuzhiyun# INPUT *************************************************************** # 10710*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 10711*4882a593Smuzhiyun# a1 = pointer to extended precision destination operand # 10712*4882a593Smuzhiyun# d0 rnd prec,mode # 10713*4882a593Smuzhiyun# # 10714*4882a593Smuzhiyun# OUTPUT ************************************************************** # 10715*4882a593Smuzhiyun# fp0 = result # 10716*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 10717*4882a593Smuzhiyun# # 10718*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 10719*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 10720*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision. # 10721*4882a593Smuzhiyun# For norms/denorms, scale the exponents such that a divide # 10722*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fsgldiv to # 10723*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken # 10724*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result # 10725*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the # 10726*4882a593Smuzhiyun# result operand to the proper exponent. # 10727*4882a593Smuzhiyun# # 10728*4882a593Smuzhiyun######################################################################### 10729*4882a593Smuzhiyun 10730*4882a593Smuzhiyun global fsgldiv 10731*4882a593Smuzhiyunfsgldiv: 10732*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 10733*4882a593Smuzhiyun 10734*4882a593Smuzhiyun clr.w %d1 10735*4882a593Smuzhiyun mov.b DTAG(%a6),%d1 10736*4882a593Smuzhiyun lsl.b &0x3,%d1 10737*4882a593Smuzhiyun or.b STAG(%a6),%d1 # combine src tags 10738*4882a593Smuzhiyun 10739*4882a593Smuzhiyun bne.w fsgldiv_not_norm # optimize on non-norm input 10740*4882a593Smuzhiyun 10741*4882a593Smuzhiyun# 10742*4882a593Smuzhiyun# DIVIDE: NORMs and DENORMs ONLY! 10743*4882a593Smuzhiyun# 10744*4882a593Smuzhiyunfsgldiv_norm: 10745*4882a593Smuzhiyun mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10746*4882a593Smuzhiyun mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 10747*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10748*4882a593Smuzhiyun 10749*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10750*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10751*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10752*4882a593Smuzhiyun 10753*4882a593Smuzhiyun bsr.l scale_to_zero_src # calculate scale factor 1 10754*4882a593Smuzhiyun mov.l %d0,-(%sp) # save scale factor 1 10755*4882a593Smuzhiyun 10756*4882a593Smuzhiyun bsr.l scale_to_zero_dst # calculate scale factor 2 10757*4882a593Smuzhiyun 10758*4882a593Smuzhiyun neg.l (%sp) # S.F. = scale1 - scale2 10759*4882a593Smuzhiyun add.l %d0,(%sp) 10760*4882a593Smuzhiyun 10761*4882a593Smuzhiyun mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode 10762*4882a593Smuzhiyun lsr.b &0x6,%d1 10763*4882a593Smuzhiyun mov.l (%sp)+,%d0 10764*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x7ffe 10765*4882a593Smuzhiyun ble.w fsgldiv_may_ovfl 10766*4882a593Smuzhiyun 10767*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x0000 # will result underflow? 10768*4882a593Smuzhiyun beq.w fsgldiv_may_unfl # maybe 10769*4882a593Smuzhiyun bgt.w fsgldiv_unfl # yes; go handle underflow 10770*4882a593Smuzhiyun 10771*4882a593Smuzhiyunfsgldiv_normal: 10772*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10773*4882a593Smuzhiyun 10774*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # save FPCR 10775*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10776*4882a593Smuzhiyun 10777*4882a593Smuzhiyun fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide 10778*4882a593Smuzhiyun 10779*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 10780*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10781*4882a593Smuzhiyun 10782*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10783*4882a593Smuzhiyun 10784*4882a593Smuzhiyunfsgldiv_normal_exit: 10785*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 10786*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10787*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 10788*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10789*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10790*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10791*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10792*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 10793*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10794*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10795*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10796*4882a593Smuzhiyun rts 10797*4882a593Smuzhiyun 10798*4882a593Smuzhiyunfsgldiv_may_ovfl: 10799*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10800*4882a593Smuzhiyun 10801*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10802*4882a593Smuzhiyun fmov.l &0x0,%fpsr # set FPSR 10803*4882a593Smuzhiyun 10804*4882a593Smuzhiyun fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide 10805*4882a593Smuzhiyun 10806*4882a593Smuzhiyun fmov.l %fpsr,%d1 10807*4882a593Smuzhiyun fmov.l &0x0,%fpcr 10808*4882a593Smuzhiyun 10809*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX,N 10810*4882a593Smuzhiyun 10811*4882a593Smuzhiyun fmovm.x &0x01,-(%sp) # save result to stack 10812*4882a593Smuzhiyun mov.w (%sp),%d1 # fetch new exponent 10813*4882a593Smuzhiyun add.l &0xc,%sp # clear result 10814*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10815*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10816*4882a593Smuzhiyun cmp.l %d1,&0x7fff # did divide overflow? 10817*4882a593Smuzhiyun blt.b fsgldiv_normal_exit 10818*4882a593Smuzhiyun 10819*4882a593Smuzhiyunfsgldiv_ovfl_tst: 10820*4882a593Smuzhiyun or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 10821*4882a593Smuzhiyun 10822*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 10823*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 10824*4882a593Smuzhiyun bne.b fsgldiv_ovfl_ena # yes 10825*4882a593Smuzhiyun 10826*4882a593Smuzhiyunfsgldiv_ovfl_dis: 10827*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative 10828*4882a593Smuzhiyun sne %d1 # set sign param accordingly 10829*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass prec:rnd 10830*4882a593Smuzhiyun andi.b &0x30,%d0 # kill precision 10831*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 10832*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF if applicable 10833*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 10834*4882a593Smuzhiyun rts 10835*4882a593Smuzhiyun 10836*4882a593Smuzhiyunfsgldiv_ovfl_ena: 10837*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 10838*4882a593Smuzhiyun 10839*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10840*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10841*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10842*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10843*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10844*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10845*4882a593Smuzhiyun subi.l &0x6000,%d1 # subtract new bias 10846*4882a593Smuzhiyun andi.w &0x7fff,%d1 # clear ms bit 10847*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign,new exp 10848*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10849*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10850*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10851*4882a593Smuzhiyun bra.b fsgldiv_ovfl_dis 10852*4882a593Smuzhiyun 10853*4882a593Smuzhiyunfsgldiv_unfl: 10854*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10855*4882a593Smuzhiyun 10856*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10857*4882a593Smuzhiyun 10858*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set FPCR 10859*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10860*4882a593Smuzhiyun 10861*4882a593Smuzhiyun fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 10862*4882a593Smuzhiyun 10863*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10864*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10865*4882a593Smuzhiyun 10866*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10867*4882a593Smuzhiyun 10868*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 10869*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10870*4882a593Smuzhiyun bne.b fsgldiv_unfl_ena # yes 10871*4882a593Smuzhiyun 10872*4882a593Smuzhiyunfsgldiv_unfl_dis: 10873*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 10874*4882a593Smuzhiyun 10875*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 10876*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10877*4882a593Smuzhiyun bsr.l unf_res4 # calculate default result 10878*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 10879*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10880*4882a593Smuzhiyun rts 10881*4882a593Smuzhiyun 10882*4882a593Smuzhiyun# 10883*4882a593Smuzhiyun# UNFL is enabled. 10884*4882a593Smuzhiyun# 10885*4882a593Smuzhiyunfsgldiv_unfl_ena: 10886*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op 10887*4882a593Smuzhiyun 10888*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10889*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10890*4882a593Smuzhiyun 10891*4882a593Smuzhiyun fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 10892*4882a593Smuzhiyun 10893*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10894*4882a593Smuzhiyun 10895*4882a593Smuzhiyun fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 10896*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 10897*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10898*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 10899*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 10900*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 10901*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 10902*4882a593Smuzhiyun addi.l &0x6000,%d1 # add bias 10903*4882a593Smuzhiyun andi.w &0x7fff,%d1 # clear top bit 10904*4882a593Smuzhiyun or.w %d2,%d1 # concat old sign, new exp 10905*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10906*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 10907*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10908*4882a593Smuzhiyun bra.b fsgldiv_unfl_dis 10909*4882a593Smuzhiyun 10910*4882a593Smuzhiyun# 10911*4882a593Smuzhiyun# the divide operation MAY underflow: 10912*4882a593Smuzhiyun# 10913*4882a593Smuzhiyunfsgldiv_may_unfl: 10914*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10915*4882a593Smuzhiyun 10916*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 10917*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10918*4882a593Smuzhiyun 10919*4882a593Smuzhiyun fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 10920*4882a593Smuzhiyun 10921*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 10922*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10923*4882a593Smuzhiyun 10924*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 10925*4882a593Smuzhiyun 10926*4882a593Smuzhiyun fabs.x %fp0,%fp1 # make a copy of result 10927*4882a593Smuzhiyun fcmp.b %fp1,&0x1 # is |result| > 1.b? 10928*4882a593Smuzhiyun fbgt.w fsgldiv_normal_exit # no; no underflow occurred 10929*4882a593Smuzhiyun fblt.w fsgldiv_unfl # yes; underflow occurred 10930*4882a593Smuzhiyun 10931*4882a593Smuzhiyun# 10932*4882a593Smuzhiyun# we still don't know if underflow occurred. result is ~ equal to 1. but, 10933*4882a593Smuzhiyun# we don't know if the result was an underflow that rounded up to a 1 10934*4882a593Smuzhiyun# or a normalized number that rounded down to a 1. so, redo the entire 10935*4882a593Smuzhiyun# operation using RZ as the rounding mode to see what the pre-rounded 10936*4882a593Smuzhiyun# result is. this case should be relatively rare. 10937*4882a593Smuzhiyun# 10938*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1 10939*4882a593Smuzhiyun 10940*4882a593Smuzhiyun clr.l %d1 # clear scratch register 10941*4882a593Smuzhiyun ori.b &rz_mode*0x10,%d1 # force RZ rnd mode 10942*4882a593Smuzhiyun 10943*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 10944*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 10945*4882a593Smuzhiyun 10946*4882a593Smuzhiyun fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 10947*4882a593Smuzhiyun 10948*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 10949*4882a593Smuzhiyun fabs.x %fp1 # make absolute value 10950*4882a593Smuzhiyun fcmp.b %fp1,&0x1 # is |result| < 1.b? 10951*4882a593Smuzhiyun fbge.w fsgldiv_normal_exit # no; no underflow occurred 10952*4882a593Smuzhiyun bra.w fsgldiv_unfl # yes; underflow occurred 10953*4882a593Smuzhiyun 10954*4882a593Smuzhiyun############################################################################ 10955*4882a593Smuzhiyun 10956*4882a593Smuzhiyun# 10957*4882a593Smuzhiyun# Divide: inputs are not both normalized; what are they? 10958*4882a593Smuzhiyun# 10959*4882a593Smuzhiyunfsgldiv_not_norm: 10960*4882a593Smuzhiyun mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1 10961*4882a593Smuzhiyun jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1) 10962*4882a593Smuzhiyun 10963*4882a593Smuzhiyun swbeg &48 10964*4882a593Smuzhiyuntbl_fsgldiv_op: 10965*4882a593Smuzhiyun short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM 10966*4882a593Smuzhiyun short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO 10967*4882a593Smuzhiyun short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF 10968*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN 10969*4882a593Smuzhiyun short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM 10970*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN 10971*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 10972*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 10973*4882a593Smuzhiyun 10974*4882a593Smuzhiyun short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM 10975*4882a593Smuzhiyun short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO 10976*4882a593Smuzhiyun short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF 10977*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN 10978*4882a593Smuzhiyun short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM 10979*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN 10980*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 10981*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 10982*4882a593Smuzhiyun 10983*4882a593Smuzhiyun short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM 10984*4882a593Smuzhiyun short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO 10985*4882a593Smuzhiyun short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF 10986*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN 10987*4882a593Smuzhiyun short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM 10988*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN 10989*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 10990*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 10991*4882a593Smuzhiyun 10992*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM 10993*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO 10994*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF 10995*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN 10996*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM 10997*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN 10998*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 10999*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 11000*4882a593Smuzhiyun 11001*4882a593Smuzhiyun short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM 11002*4882a593Smuzhiyun short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO 11003*4882a593Smuzhiyun short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF 11004*4882a593Smuzhiyun short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN 11005*4882a593Smuzhiyun short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM 11006*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN 11007*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 11008*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 11009*4882a593Smuzhiyun 11010*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM 11011*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO 11012*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF 11013*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN 11014*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM 11015*4882a593Smuzhiyun short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN 11016*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 11017*4882a593Smuzhiyun short tbl_fsgldiv_op - tbl_fsgldiv_op # 11018*4882a593Smuzhiyun 11019*4882a593Smuzhiyunfsgldiv_res_qnan: 11020*4882a593Smuzhiyun bra.l res_qnan 11021*4882a593Smuzhiyunfsgldiv_res_snan: 11022*4882a593Smuzhiyun bra.l res_snan 11023*4882a593Smuzhiyunfsgldiv_res_operr: 11024*4882a593Smuzhiyun bra.l res_operr 11025*4882a593Smuzhiyunfsgldiv_inf_load: 11026*4882a593Smuzhiyun bra.l fdiv_inf_load 11027*4882a593Smuzhiyunfsgldiv_zero_load: 11028*4882a593Smuzhiyun bra.l fdiv_zero_load 11029*4882a593Smuzhiyunfsgldiv_inf_dst: 11030*4882a593Smuzhiyun bra.l fdiv_inf_dst 11031*4882a593Smuzhiyun 11032*4882a593Smuzhiyun######################################################################### 11033*4882a593Smuzhiyun# XDEF **************************************************************** # 11034*4882a593Smuzhiyun# fadd(): emulates the fadd instruction # 11035*4882a593Smuzhiyun# fsadd(): emulates the fadd instruction # 11036*4882a593Smuzhiyun# fdadd(): emulates the fdadd instruction # 11037*4882a593Smuzhiyun# # 11038*4882a593Smuzhiyun# XREF **************************************************************** # 11039*4882a593Smuzhiyun# addsub_scaler2() - scale the operands so they won't take exc # 11040*4882a593Smuzhiyun# ovf_res() - return default overflow result # 11041*4882a593Smuzhiyun# unf_res() - return default underflow result # 11042*4882a593Smuzhiyun# res_qnan() - set QNAN result # 11043*4882a593Smuzhiyun# res_snan() - set SNAN result # 11044*4882a593Smuzhiyun# res_operr() - set OPERR result # 11045*4882a593Smuzhiyun# scale_to_zero_src() - set src operand exponent equal to zero # 11046*4882a593Smuzhiyun# scale_to_zero_dst() - set dst operand exponent equal to zero # 11047*4882a593Smuzhiyun# # 11048*4882a593Smuzhiyun# INPUT *************************************************************** # 11049*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 11050*4882a593Smuzhiyun# a1 = pointer to extended precision destination operand # 11051*4882a593Smuzhiyun# # 11052*4882a593Smuzhiyun# OUTPUT ************************************************************** # 11053*4882a593Smuzhiyun# fp0 = result # 11054*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 11055*4882a593Smuzhiyun# # 11056*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 11057*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 11058*4882a593Smuzhiyun# norms into extended, single, and double precision. # 11059*4882a593Smuzhiyun# Do addition after scaling exponents such that exception won't # 11060*4882a593Smuzhiyun# occur. Then, check result exponent to see if exception would have # 11061*4882a593Smuzhiyun# occurred. If so, return default result and maybe EXOP. Else, insert # 11062*4882a593Smuzhiyun# the correct result exponent and return. Set FPSR bits as appropriate. # 11063*4882a593Smuzhiyun# # 11064*4882a593Smuzhiyun######################################################################### 11065*4882a593Smuzhiyun 11066*4882a593Smuzhiyun global fsadd 11067*4882a593Smuzhiyunfsadd: 11068*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 11069*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl prec 11070*4882a593Smuzhiyun bra.b fadd 11071*4882a593Smuzhiyun 11072*4882a593Smuzhiyun global fdadd 11073*4882a593Smuzhiyunfdadd: 11074*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 11075*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl prec 11076*4882a593Smuzhiyun 11077*4882a593Smuzhiyun global fadd 11078*4882a593Smuzhiyunfadd: 11079*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 11080*4882a593Smuzhiyun 11081*4882a593Smuzhiyun clr.w %d1 11082*4882a593Smuzhiyun mov.b DTAG(%a6),%d1 11083*4882a593Smuzhiyun lsl.b &0x3,%d1 11084*4882a593Smuzhiyun or.b STAG(%a6),%d1 # combine src tags 11085*4882a593Smuzhiyun 11086*4882a593Smuzhiyun bne.w fadd_not_norm # optimize on non-norm input 11087*4882a593Smuzhiyun 11088*4882a593Smuzhiyun# 11089*4882a593Smuzhiyun# ADD: norms and denorms 11090*4882a593Smuzhiyun# 11091*4882a593Smuzhiyunfadd_norm: 11092*4882a593Smuzhiyun bsr.l addsub_scaler2 # scale exponents 11093*4882a593Smuzhiyun 11094*4882a593Smuzhiyunfadd_zero_entry: 11095*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11096*4882a593Smuzhiyun 11097*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11098*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 11099*4882a593Smuzhiyun 11100*4882a593Smuzhiyun fadd.x FP_SCR0(%a6),%fp0 # execute add 11101*4882a593Smuzhiyun 11102*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11103*4882a593Smuzhiyun fmov.l %fpsr,%d1 # fetch INEX2,N,Z 11104*4882a593Smuzhiyun 11105*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 11106*4882a593Smuzhiyun 11107*4882a593Smuzhiyun fbeq.w fadd_zero_exit # if result is zero, end now 11108*4882a593Smuzhiyun 11109*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 11110*4882a593Smuzhiyun 11111*4882a593Smuzhiyun fmovm.x &0x01,-(%sp) # save result to stack 11112*4882a593Smuzhiyun 11113*4882a593Smuzhiyun mov.w 2+L_SCR3(%a6),%d1 11114*4882a593Smuzhiyun lsr.b &0x6,%d1 11115*4882a593Smuzhiyun 11116*4882a593Smuzhiyun mov.w (%sp),%d2 # fetch new sign, exp 11117*4882a593Smuzhiyun andi.l &0x7fff,%d2 # strip sign 11118*4882a593Smuzhiyun sub.l %d0,%d2 # add scale factor 11119*4882a593Smuzhiyun 11120*4882a593Smuzhiyun cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow? 11121*4882a593Smuzhiyun bge.b fadd_ovfl # yes 11122*4882a593Smuzhiyun 11123*4882a593Smuzhiyun cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow? 11124*4882a593Smuzhiyun blt.w fadd_unfl # yes 11125*4882a593Smuzhiyun beq.w fadd_may_unfl # maybe; go find out 11126*4882a593Smuzhiyun 11127*4882a593Smuzhiyunfadd_normal: 11128*4882a593Smuzhiyun mov.w (%sp),%d1 11129*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep sign 11130*4882a593Smuzhiyun or.w %d2,%d1 # concat sign,new exp 11131*4882a593Smuzhiyun mov.w %d1,(%sp) # insert new exponent 11132*4882a593Smuzhiyun 11133*4882a593Smuzhiyun fmovm.x (%sp)+,&0x80 # return result in fp0 11134*4882a593Smuzhiyun 11135*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 11136*4882a593Smuzhiyun rts 11137*4882a593Smuzhiyun 11138*4882a593Smuzhiyunfadd_zero_exit: 11139*4882a593Smuzhiyun# fmov.s &0x00000000,%fp0 # return zero in fp0 11140*4882a593Smuzhiyun rts 11141*4882a593Smuzhiyun 11142*4882a593Smuzhiyuntbl_fadd_ovfl: 11143*4882a593Smuzhiyun long 0x7fff # ext ovfl 11144*4882a593Smuzhiyun long 0x407f # sgl ovfl 11145*4882a593Smuzhiyun long 0x43ff # dbl ovfl 11146*4882a593Smuzhiyun 11147*4882a593Smuzhiyuntbl_fadd_unfl: 11148*4882a593Smuzhiyun long 0x0000 # ext unfl 11149*4882a593Smuzhiyun long 0x3f81 # sgl unfl 11150*4882a593Smuzhiyun long 0x3c01 # dbl unfl 11151*4882a593Smuzhiyun 11152*4882a593Smuzhiyunfadd_ovfl: 11153*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11154*4882a593Smuzhiyun 11155*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 11156*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 11157*4882a593Smuzhiyun bne.b fadd_ovfl_ena # yes 11158*4882a593Smuzhiyun 11159*4882a593Smuzhiyun add.l &0xc,%sp 11160*4882a593Smuzhiyunfadd_ovfl_dis: 11161*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 11162*4882a593Smuzhiyun sne %d1 # set sign param accordingly 11163*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass prec:rnd 11164*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 11165*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11166*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 11167*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 11168*4882a593Smuzhiyun rts 11169*4882a593Smuzhiyun 11170*4882a593Smuzhiyunfadd_ovfl_ena: 11171*4882a593Smuzhiyun mov.b L_SCR3(%a6),%d1 11172*4882a593Smuzhiyun andi.b &0xc0,%d1 # is precision extended? 11173*4882a593Smuzhiyun bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl 11174*4882a593Smuzhiyun 11175*4882a593Smuzhiyunfadd_ovfl_ena_cont: 11176*4882a593Smuzhiyun mov.w (%sp),%d1 11177*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep sign 11178*4882a593Smuzhiyun subi.l &0x6000,%d2 # add extra bias 11179*4882a593Smuzhiyun andi.w &0x7fff,%d2 11180*4882a593Smuzhiyun or.w %d2,%d1 # concat sign,new exp 11181*4882a593Smuzhiyun mov.w %d1,(%sp) # insert new exponent 11182*4882a593Smuzhiyun 11183*4882a593Smuzhiyun fmovm.x (%sp)+,&0x40 # return EXOP in fp1 11184*4882a593Smuzhiyun bra.b fadd_ovfl_dis 11185*4882a593Smuzhiyun 11186*4882a593Smuzhiyunfadd_ovfl_ena_sd: 11187*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11188*4882a593Smuzhiyun 11189*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11190*4882a593Smuzhiyun andi.b &0x30,%d1 # keep rnd mode 11191*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 11192*4882a593Smuzhiyun 11193*4882a593Smuzhiyun fadd.x FP_SCR0(%a6),%fp0 # execute add 11194*4882a593Smuzhiyun 11195*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11196*4882a593Smuzhiyun 11197*4882a593Smuzhiyun add.l &0xc,%sp 11198*4882a593Smuzhiyun fmovm.x &0x01,-(%sp) 11199*4882a593Smuzhiyun bra.b fadd_ovfl_ena_cont 11200*4882a593Smuzhiyun 11201*4882a593Smuzhiyunfadd_unfl: 11202*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11203*4882a593Smuzhiyun 11204*4882a593Smuzhiyun add.l &0xc,%sp 11205*4882a593Smuzhiyun 11206*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11207*4882a593Smuzhiyun 11208*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set FPCR 11209*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11210*4882a593Smuzhiyun 11211*4882a593Smuzhiyun fadd.x FP_SCR0(%a6),%fp0 # execute add 11212*4882a593Smuzhiyun 11213*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11214*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 11215*4882a593Smuzhiyun 11216*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX,N 11217*4882a593Smuzhiyun 11218*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 11219*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11220*4882a593Smuzhiyun bne.b fadd_unfl_ena # yes 11221*4882a593Smuzhiyun 11222*4882a593Smuzhiyunfadd_unfl_dis: 11223*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 11224*4882a593Smuzhiyun 11225*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 11226*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11227*4882a593Smuzhiyun bsr.l unf_res # calculate default result 11228*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 11229*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11230*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 11231*4882a593Smuzhiyun rts 11232*4882a593Smuzhiyun 11233*4882a593Smuzhiyunfadd_unfl_ena: 11234*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op 11235*4882a593Smuzhiyun 11236*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11237*4882a593Smuzhiyun andi.b &0xc0,%d1 # is precision extended? 11238*4882a593Smuzhiyun bne.b fadd_unfl_ena_sd # no; sgl or dbl 11239*4882a593Smuzhiyun 11240*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 11241*4882a593Smuzhiyun 11242*4882a593Smuzhiyunfadd_unfl_ena_cont: 11243*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11244*4882a593Smuzhiyun 11245*4882a593Smuzhiyun fadd.x FP_SCR0(%a6),%fp1 # execute multiply 11246*4882a593Smuzhiyun 11247*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11248*4882a593Smuzhiyun 11249*4882a593Smuzhiyun fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 11250*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11251*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 11252*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 11253*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 11254*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 11255*4882a593Smuzhiyun addi.l &0x6000,%d1 # add new bias 11256*4882a593Smuzhiyun andi.w &0x7fff,%d1 # clear top bit 11257*4882a593Smuzhiyun or.w %d2,%d1 # concat sign,new exp 11258*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11259*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11260*4882a593Smuzhiyun bra.w fadd_unfl_dis 11261*4882a593Smuzhiyun 11262*4882a593Smuzhiyunfadd_unfl_ena_sd: 11263*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11264*4882a593Smuzhiyun andi.b &0x30,%d1 # use only rnd mode 11265*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 11266*4882a593Smuzhiyun 11267*4882a593Smuzhiyun bra.b fadd_unfl_ena_cont 11268*4882a593Smuzhiyun 11269*4882a593Smuzhiyun# 11270*4882a593Smuzhiyun# result is equal to the smallest normalized number in the selected precision 11271*4882a593Smuzhiyun# if the precision is extended, this result could not have come from an 11272*4882a593Smuzhiyun# underflow that rounded up. 11273*4882a593Smuzhiyun# 11274*4882a593Smuzhiyunfadd_may_unfl: 11275*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11276*4882a593Smuzhiyun andi.b &0xc0,%d1 11277*4882a593Smuzhiyun beq.w fadd_normal # yes; no underflow occurred 11278*4882a593Smuzhiyun 11279*4882a593Smuzhiyun mov.l 0x4(%sp),%d1 # extract hi(man) 11280*4882a593Smuzhiyun cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 11281*4882a593Smuzhiyun bne.w fadd_normal # no; no underflow occurred 11282*4882a593Smuzhiyun 11283*4882a593Smuzhiyun tst.l 0x8(%sp) # is lo(man) = 0x0? 11284*4882a593Smuzhiyun bne.w fadd_normal # no; no underflow occurred 11285*4882a593Smuzhiyun 11286*4882a593Smuzhiyun btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 11287*4882a593Smuzhiyun beq.w fadd_normal # no; no underflow occurred 11288*4882a593Smuzhiyun 11289*4882a593Smuzhiyun# 11290*4882a593Smuzhiyun# ok, so now the result has a exponent equal to the smallest normalized 11291*4882a593Smuzhiyun# exponent for the selected precision. also, the mantissa is equal to 11292*4882a593Smuzhiyun# 0x8000000000000000 and this mantissa is the result of rounding non-zero 11293*4882a593Smuzhiyun# g,r,s. 11294*4882a593Smuzhiyun# now, we must determine whether the pre-rounded result was an underflow 11295*4882a593Smuzhiyun# rounded "up" or a normalized number rounded "down". 11296*4882a593Smuzhiyun# so, we do this be re-executing the add using RZ as the rounding mode and 11297*4882a593Smuzhiyun# seeing if the new result is smaller or equal to the current result. 11298*4882a593Smuzhiyun# 11299*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 11300*4882a593Smuzhiyun 11301*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11302*4882a593Smuzhiyun andi.b &0xc0,%d1 # keep rnd prec 11303*4882a593Smuzhiyun ori.b &rz_mode*0x10,%d1 # insert rnd mode 11304*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 11305*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11306*4882a593Smuzhiyun 11307*4882a593Smuzhiyun fadd.x FP_SCR0(%a6),%fp1 # execute add 11308*4882a593Smuzhiyun 11309*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11310*4882a593Smuzhiyun 11311*4882a593Smuzhiyun fabs.x %fp0 # compare absolute values 11312*4882a593Smuzhiyun fabs.x %fp1 11313*4882a593Smuzhiyun fcmp.x %fp0,%fp1 # is first result > second? 11314*4882a593Smuzhiyun 11315*4882a593Smuzhiyun fbgt.w fadd_unfl # yes; it's an underflow 11316*4882a593Smuzhiyun bra.w fadd_normal # no; it's not an underflow 11317*4882a593Smuzhiyun 11318*4882a593Smuzhiyun########################################################################## 11319*4882a593Smuzhiyun 11320*4882a593Smuzhiyun# 11321*4882a593Smuzhiyun# Add: inputs are not both normalized; what are they? 11322*4882a593Smuzhiyun# 11323*4882a593Smuzhiyunfadd_not_norm: 11324*4882a593Smuzhiyun mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1 11325*4882a593Smuzhiyun jmp (tbl_fadd_op.b,%pc,%d1.w*1) 11326*4882a593Smuzhiyun 11327*4882a593Smuzhiyun swbeg &48 11328*4882a593Smuzhiyuntbl_fadd_op: 11329*4882a593Smuzhiyun short fadd_norm - tbl_fadd_op # NORM + NORM 11330*4882a593Smuzhiyun short fadd_zero_src - tbl_fadd_op # NORM + ZERO 11331*4882a593Smuzhiyun short fadd_inf_src - tbl_fadd_op # NORM + INF 11332*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11333*4882a593Smuzhiyun short fadd_norm - tbl_fadd_op # NORM + DENORM 11334*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11335*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11336*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11337*4882a593Smuzhiyun 11338*4882a593Smuzhiyun short fadd_zero_dst - tbl_fadd_op # ZERO + NORM 11339*4882a593Smuzhiyun short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO 11340*4882a593Smuzhiyun short fadd_inf_src - tbl_fadd_op # ZERO + INF 11341*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11342*4882a593Smuzhiyun short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM 11343*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11344*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11345*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11346*4882a593Smuzhiyun 11347*4882a593Smuzhiyun short fadd_inf_dst - tbl_fadd_op # INF + NORM 11348*4882a593Smuzhiyun short fadd_inf_dst - tbl_fadd_op # INF + ZERO 11349*4882a593Smuzhiyun short fadd_inf_2 - tbl_fadd_op # INF + INF 11350*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11351*4882a593Smuzhiyun short fadd_inf_dst - tbl_fadd_op # INF + DENORM 11352*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11353*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11354*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11355*4882a593Smuzhiyun 11356*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # QNAN + NORM 11357*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO 11358*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # QNAN + INF 11359*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN 11360*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM 11361*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # QNAN + SNAN 11362*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11363*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11364*4882a593Smuzhiyun 11365*4882a593Smuzhiyun short fadd_norm - tbl_fadd_op # DENORM + NORM 11366*4882a593Smuzhiyun short fadd_zero_src - tbl_fadd_op # DENORM + ZERO 11367*4882a593Smuzhiyun short fadd_inf_src - tbl_fadd_op # DENORM + INF 11368*4882a593Smuzhiyun short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11369*4882a593Smuzhiyun short fadd_norm - tbl_fadd_op # DENORM + DENORM 11370*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11371*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11372*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11373*4882a593Smuzhiyun 11374*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # SNAN + NORM 11375*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # SNAN + ZERO 11376*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # SNAN + INF 11377*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # SNAN + QNAN 11378*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # SNAN + DENORM 11379*4882a593Smuzhiyun short fadd_res_snan - tbl_fadd_op # SNAN + SNAN 11380*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11381*4882a593Smuzhiyun short tbl_fadd_op - tbl_fadd_op # 11382*4882a593Smuzhiyun 11383*4882a593Smuzhiyunfadd_res_qnan: 11384*4882a593Smuzhiyun bra.l res_qnan 11385*4882a593Smuzhiyunfadd_res_snan: 11386*4882a593Smuzhiyun bra.l res_snan 11387*4882a593Smuzhiyun 11388*4882a593Smuzhiyun# 11389*4882a593Smuzhiyun# both operands are ZEROes 11390*4882a593Smuzhiyun# 11391*4882a593Smuzhiyunfadd_zero_2: 11392*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # are the signs opposite 11393*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 11394*4882a593Smuzhiyun eor.b %d0,%d1 11395*4882a593Smuzhiyun bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO) 11396*4882a593Smuzhiyun 11397*4882a593Smuzhiyun# the signs are the same. so determine whether they are positive or negative 11398*4882a593Smuzhiyun# and return the appropriately signed zero. 11399*4882a593Smuzhiyun tst.b %d0 # are ZEROes positive or negative? 11400*4882a593Smuzhiyun bmi.b fadd_zero_rm # negative 11401*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # return +ZERO 11402*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set Z 11403*4882a593Smuzhiyun rts 11404*4882a593Smuzhiyun 11405*4882a593Smuzhiyun# 11406*4882a593Smuzhiyun# the ZEROes have opposite signs: 11407*4882a593Smuzhiyun# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. 11408*4882a593Smuzhiyun# - -ZERO is returned in the case of RM. 11409*4882a593Smuzhiyun# 11410*4882a593Smuzhiyunfadd_zero_2_chk_rm: 11411*4882a593Smuzhiyun mov.b 3+L_SCR3(%a6),%d1 11412*4882a593Smuzhiyun andi.b &0x30,%d1 # extract rnd mode 11413*4882a593Smuzhiyun cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM? 11414*4882a593Smuzhiyun beq.b fadd_zero_rm # yes 11415*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # return +ZERO 11416*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set Z 11417*4882a593Smuzhiyun rts 11418*4882a593Smuzhiyun 11419*4882a593Smuzhiyunfadd_zero_rm: 11420*4882a593Smuzhiyun fmov.s &0x80000000,%fp0 # return -ZERO 11421*4882a593Smuzhiyun mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z 11422*4882a593Smuzhiyun rts 11423*4882a593Smuzhiyun 11424*4882a593Smuzhiyun# 11425*4882a593Smuzhiyun# one operand is a ZERO and the other is a DENORM or NORM. scale 11426*4882a593Smuzhiyun# the DENORM or NORM and jump to the regular fadd routine. 11427*4882a593Smuzhiyun# 11428*4882a593Smuzhiyunfadd_zero_dst: 11429*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11430*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11431*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11432*4882a593Smuzhiyun bsr.l scale_to_zero_src # scale the operand 11433*4882a593Smuzhiyun clr.w FP_SCR1_EX(%a6) 11434*4882a593Smuzhiyun clr.l FP_SCR1_HI(%a6) 11435*4882a593Smuzhiyun clr.l FP_SCR1_LO(%a6) 11436*4882a593Smuzhiyun bra.w fadd_zero_entry # go execute fadd 11437*4882a593Smuzhiyun 11438*4882a593Smuzhiyunfadd_zero_src: 11439*4882a593Smuzhiyun mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11440*4882a593Smuzhiyun mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11441*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11442*4882a593Smuzhiyun bsr.l scale_to_zero_dst # scale the operand 11443*4882a593Smuzhiyun clr.w FP_SCR0_EX(%a6) 11444*4882a593Smuzhiyun clr.l FP_SCR0_HI(%a6) 11445*4882a593Smuzhiyun clr.l FP_SCR0_LO(%a6) 11446*4882a593Smuzhiyun bra.w fadd_zero_entry # go execute fadd 11447*4882a593Smuzhiyun 11448*4882a593Smuzhiyun# 11449*4882a593Smuzhiyun# both operands are INFs. an OPERR will result if the INFs have 11450*4882a593Smuzhiyun# different signs. else, an INF of the same sign is returned 11451*4882a593Smuzhiyun# 11452*4882a593Smuzhiyunfadd_inf_2: 11453*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11454*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 11455*4882a593Smuzhiyun eor.b %d1,%d0 11456*4882a593Smuzhiyun bmi.l res_operr # weed out (-INF)+(+INF) 11457*4882a593Smuzhiyun 11458*4882a593Smuzhiyun# ok, so it's not an OPERR. but, we do have to remember to return the 11459*4882a593Smuzhiyun# src INF since that's where the 881/882 gets the j-bit from... 11460*4882a593Smuzhiyun 11461*4882a593Smuzhiyun# 11462*4882a593Smuzhiyun# operands are INF and one of {ZERO, INF, DENORM, NORM} 11463*4882a593Smuzhiyun# 11464*4882a593Smuzhiyunfadd_inf_src: 11465*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return src INF 11466*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is INF positive? 11467*4882a593Smuzhiyun bpl.b fadd_inf_done # yes; we're done 11468*4882a593Smuzhiyun mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11469*4882a593Smuzhiyun rts 11470*4882a593Smuzhiyun 11471*4882a593Smuzhiyun# 11472*4882a593Smuzhiyun# operands are INF and one of {ZERO, INF, DENORM, NORM} 11473*4882a593Smuzhiyun# 11474*4882a593Smuzhiyunfadd_inf_dst: 11475*4882a593Smuzhiyun fmovm.x DST(%a1),&0x80 # return dst INF 11476*4882a593Smuzhiyun tst.b DST_EX(%a1) # is INF positive? 11477*4882a593Smuzhiyun bpl.b fadd_inf_done # yes; we're done 11478*4882a593Smuzhiyun mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11479*4882a593Smuzhiyun rts 11480*4882a593Smuzhiyun 11481*4882a593Smuzhiyunfadd_inf_done: 11482*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set INF 11483*4882a593Smuzhiyun rts 11484*4882a593Smuzhiyun 11485*4882a593Smuzhiyun######################################################################### 11486*4882a593Smuzhiyun# XDEF **************************************************************** # 11487*4882a593Smuzhiyun# fsub(): emulates the fsub instruction # 11488*4882a593Smuzhiyun# fssub(): emulates the fssub instruction # 11489*4882a593Smuzhiyun# fdsub(): emulates the fdsub instruction # 11490*4882a593Smuzhiyun# # 11491*4882a593Smuzhiyun# XREF **************************************************************** # 11492*4882a593Smuzhiyun# addsub_scaler2() - scale the operands so they won't take exc # 11493*4882a593Smuzhiyun# ovf_res() - return default overflow result # 11494*4882a593Smuzhiyun# unf_res() - return default underflow result # 11495*4882a593Smuzhiyun# res_qnan() - set QNAN result # 11496*4882a593Smuzhiyun# res_snan() - set SNAN result # 11497*4882a593Smuzhiyun# res_operr() - set OPERR result # 11498*4882a593Smuzhiyun# scale_to_zero_src() - set src operand exponent equal to zero # 11499*4882a593Smuzhiyun# scale_to_zero_dst() - set dst operand exponent equal to zero # 11500*4882a593Smuzhiyun# # 11501*4882a593Smuzhiyun# INPUT *************************************************************** # 11502*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 11503*4882a593Smuzhiyun# a1 = pointer to extended precision destination operand # 11504*4882a593Smuzhiyun# # 11505*4882a593Smuzhiyun# OUTPUT ************************************************************** # 11506*4882a593Smuzhiyun# fp0 = result # 11507*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 11508*4882a593Smuzhiyun# # 11509*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 11510*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 11511*4882a593Smuzhiyun# norms into extended, single, and double precision. # 11512*4882a593Smuzhiyun# Do subtraction after scaling exponents such that exception won't# 11513*4882a593Smuzhiyun# occur. Then, check result exponent to see if exception would have # 11514*4882a593Smuzhiyun# occurred. If so, return default result and maybe EXOP. Else, insert # 11515*4882a593Smuzhiyun# the correct result exponent and return. Set FPSR bits as appropriate. # 11516*4882a593Smuzhiyun# # 11517*4882a593Smuzhiyun######################################################################### 11518*4882a593Smuzhiyun 11519*4882a593Smuzhiyun global fssub 11520*4882a593Smuzhiyunfssub: 11521*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 11522*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl prec 11523*4882a593Smuzhiyun bra.b fsub 11524*4882a593Smuzhiyun 11525*4882a593Smuzhiyun global fdsub 11526*4882a593Smuzhiyunfdsub: 11527*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 11528*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl prec 11529*4882a593Smuzhiyun 11530*4882a593Smuzhiyun global fsub 11531*4882a593Smuzhiyunfsub: 11532*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 11533*4882a593Smuzhiyun 11534*4882a593Smuzhiyun clr.w %d1 11535*4882a593Smuzhiyun mov.b DTAG(%a6),%d1 11536*4882a593Smuzhiyun lsl.b &0x3,%d1 11537*4882a593Smuzhiyun or.b STAG(%a6),%d1 # combine src tags 11538*4882a593Smuzhiyun 11539*4882a593Smuzhiyun bne.w fsub_not_norm # optimize on non-norm input 11540*4882a593Smuzhiyun 11541*4882a593Smuzhiyun# 11542*4882a593Smuzhiyun# SUB: norms and denorms 11543*4882a593Smuzhiyun# 11544*4882a593Smuzhiyunfsub_norm: 11545*4882a593Smuzhiyun bsr.l addsub_scaler2 # scale exponents 11546*4882a593Smuzhiyun 11547*4882a593Smuzhiyunfsub_zero_entry: 11548*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11549*4882a593Smuzhiyun 11550*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11551*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 11552*4882a593Smuzhiyun 11553*4882a593Smuzhiyun fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11554*4882a593Smuzhiyun 11555*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11556*4882a593Smuzhiyun fmov.l %fpsr,%d1 # fetch INEX2, N, Z 11557*4882a593Smuzhiyun 11558*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 11559*4882a593Smuzhiyun 11560*4882a593Smuzhiyun fbeq.w fsub_zero_exit # if result zero, end now 11561*4882a593Smuzhiyun 11562*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 11563*4882a593Smuzhiyun 11564*4882a593Smuzhiyun fmovm.x &0x01,-(%sp) # save result to stack 11565*4882a593Smuzhiyun 11566*4882a593Smuzhiyun mov.w 2+L_SCR3(%a6),%d1 11567*4882a593Smuzhiyun lsr.b &0x6,%d1 11568*4882a593Smuzhiyun 11569*4882a593Smuzhiyun mov.w (%sp),%d2 # fetch new exponent 11570*4882a593Smuzhiyun andi.l &0x7fff,%d2 # strip sign 11571*4882a593Smuzhiyun sub.l %d0,%d2 # add scale factor 11572*4882a593Smuzhiyun 11573*4882a593Smuzhiyun cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow? 11574*4882a593Smuzhiyun bge.b fsub_ovfl # yes 11575*4882a593Smuzhiyun 11576*4882a593Smuzhiyun cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow? 11577*4882a593Smuzhiyun blt.w fsub_unfl # yes 11578*4882a593Smuzhiyun beq.w fsub_may_unfl # maybe; go find out 11579*4882a593Smuzhiyun 11580*4882a593Smuzhiyunfsub_normal: 11581*4882a593Smuzhiyun mov.w (%sp),%d1 11582*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep sign 11583*4882a593Smuzhiyun or.w %d2,%d1 # insert new exponent 11584*4882a593Smuzhiyun mov.w %d1,(%sp) # insert new exponent 11585*4882a593Smuzhiyun 11586*4882a593Smuzhiyun fmovm.x (%sp)+,&0x80 # return result in fp0 11587*4882a593Smuzhiyun 11588*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 11589*4882a593Smuzhiyun rts 11590*4882a593Smuzhiyun 11591*4882a593Smuzhiyunfsub_zero_exit: 11592*4882a593Smuzhiyun# fmov.s &0x00000000,%fp0 # return zero in fp0 11593*4882a593Smuzhiyun rts 11594*4882a593Smuzhiyun 11595*4882a593Smuzhiyuntbl_fsub_ovfl: 11596*4882a593Smuzhiyun long 0x7fff # ext ovfl 11597*4882a593Smuzhiyun long 0x407f # sgl ovfl 11598*4882a593Smuzhiyun long 0x43ff # dbl ovfl 11599*4882a593Smuzhiyun 11600*4882a593Smuzhiyuntbl_fsub_unfl: 11601*4882a593Smuzhiyun long 0x0000 # ext unfl 11602*4882a593Smuzhiyun long 0x3f81 # sgl unfl 11603*4882a593Smuzhiyun long 0x3c01 # dbl unfl 11604*4882a593Smuzhiyun 11605*4882a593Smuzhiyunfsub_ovfl: 11606*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11607*4882a593Smuzhiyun 11608*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 11609*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 11610*4882a593Smuzhiyun bne.b fsub_ovfl_ena # yes 11611*4882a593Smuzhiyun 11612*4882a593Smuzhiyun add.l &0xc,%sp 11613*4882a593Smuzhiyunfsub_ovfl_dis: 11614*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 11615*4882a593Smuzhiyun sne %d1 # set sign param accordingly 11616*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass prec:rnd 11617*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 11618*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11619*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 11620*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 11621*4882a593Smuzhiyun rts 11622*4882a593Smuzhiyun 11623*4882a593Smuzhiyunfsub_ovfl_ena: 11624*4882a593Smuzhiyun mov.b L_SCR3(%a6),%d1 11625*4882a593Smuzhiyun andi.b &0xc0,%d1 # is precision extended? 11626*4882a593Smuzhiyun bne.b fsub_ovfl_ena_sd # no 11627*4882a593Smuzhiyun 11628*4882a593Smuzhiyunfsub_ovfl_ena_cont: 11629*4882a593Smuzhiyun mov.w (%sp),%d1 # fetch {sgn,exp} 11630*4882a593Smuzhiyun andi.w &0x8000,%d1 # keep sign 11631*4882a593Smuzhiyun subi.l &0x6000,%d2 # subtract new bias 11632*4882a593Smuzhiyun andi.w &0x7fff,%d2 # clear top bit 11633*4882a593Smuzhiyun or.w %d2,%d1 # concat sign,exp 11634*4882a593Smuzhiyun mov.w %d1,(%sp) # insert new exponent 11635*4882a593Smuzhiyun 11636*4882a593Smuzhiyun fmovm.x (%sp)+,&0x40 # return EXOP in fp1 11637*4882a593Smuzhiyun bra.b fsub_ovfl_dis 11638*4882a593Smuzhiyun 11639*4882a593Smuzhiyunfsub_ovfl_ena_sd: 11640*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11641*4882a593Smuzhiyun 11642*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11643*4882a593Smuzhiyun andi.b &0x30,%d1 # clear rnd prec 11644*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 11645*4882a593Smuzhiyun 11646*4882a593Smuzhiyun fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11647*4882a593Smuzhiyun 11648*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11649*4882a593Smuzhiyun 11650*4882a593Smuzhiyun add.l &0xc,%sp 11651*4882a593Smuzhiyun fmovm.x &0x01,-(%sp) 11652*4882a593Smuzhiyun bra.b fsub_ovfl_ena_cont 11653*4882a593Smuzhiyun 11654*4882a593Smuzhiyunfsub_unfl: 11655*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11656*4882a593Smuzhiyun 11657*4882a593Smuzhiyun add.l &0xc,%sp 11658*4882a593Smuzhiyun 11659*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11660*4882a593Smuzhiyun 11661*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set FPCR 11662*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11663*4882a593Smuzhiyun 11664*4882a593Smuzhiyun fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11665*4882a593Smuzhiyun 11666*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11667*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 11668*4882a593Smuzhiyun 11669*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) 11670*4882a593Smuzhiyun 11671*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 11672*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11673*4882a593Smuzhiyun bne.b fsub_unfl_ena # yes 11674*4882a593Smuzhiyun 11675*4882a593Smuzhiyunfsub_unfl_dis: 11676*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 11677*4882a593Smuzhiyun 11678*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 11679*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11680*4882a593Smuzhiyun bsr.l unf_res # calculate default result 11681*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 11682*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11683*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 11684*4882a593Smuzhiyun rts 11685*4882a593Smuzhiyun 11686*4882a593Smuzhiyunfsub_unfl_ena: 11687*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 11688*4882a593Smuzhiyun 11689*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11690*4882a593Smuzhiyun andi.b &0xc0,%d1 # is precision extended? 11691*4882a593Smuzhiyun bne.b fsub_unfl_ena_sd # no 11692*4882a593Smuzhiyun 11693*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 11694*4882a593Smuzhiyun 11695*4882a593Smuzhiyunfsub_unfl_ena_cont: 11696*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11697*4882a593Smuzhiyun 11698*4882a593Smuzhiyun fsub.x FP_SCR0(%a6),%fp1 # execute subtract 11699*4882a593Smuzhiyun 11700*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11701*4882a593Smuzhiyun 11702*4882a593Smuzhiyun fmovm.x &0x40,FP_SCR0(%a6) # store result to stack 11703*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11704*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 11705*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 11706*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 11707*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 11708*4882a593Smuzhiyun addi.l &0x6000,%d1 # subtract new bias 11709*4882a593Smuzhiyun andi.w &0x7fff,%d1 # clear top bit 11710*4882a593Smuzhiyun or.w %d2,%d1 # concat sgn,exp 11711*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11712*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11713*4882a593Smuzhiyun bra.w fsub_unfl_dis 11714*4882a593Smuzhiyun 11715*4882a593Smuzhiyunfsub_unfl_ena_sd: 11716*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11717*4882a593Smuzhiyun andi.b &0x30,%d1 # clear rnd prec 11718*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 11719*4882a593Smuzhiyun 11720*4882a593Smuzhiyun bra.b fsub_unfl_ena_cont 11721*4882a593Smuzhiyun 11722*4882a593Smuzhiyun# 11723*4882a593Smuzhiyun# result is equal to the smallest normalized number in the selected precision 11724*4882a593Smuzhiyun# if the precision is extended, this result could not have come from an 11725*4882a593Smuzhiyun# underflow that rounded up. 11726*4882a593Smuzhiyun# 11727*4882a593Smuzhiyunfsub_may_unfl: 11728*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11729*4882a593Smuzhiyun andi.b &0xc0,%d1 # fetch rnd prec 11730*4882a593Smuzhiyun beq.w fsub_normal # yes; no underflow occurred 11731*4882a593Smuzhiyun 11732*4882a593Smuzhiyun mov.l 0x4(%sp),%d1 11733*4882a593Smuzhiyun cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 11734*4882a593Smuzhiyun bne.w fsub_normal # no; no underflow occurred 11735*4882a593Smuzhiyun 11736*4882a593Smuzhiyun tst.l 0x8(%sp) # is lo(man) = 0x0? 11737*4882a593Smuzhiyun bne.w fsub_normal # no; no underflow occurred 11738*4882a593Smuzhiyun 11739*4882a593Smuzhiyun btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 11740*4882a593Smuzhiyun beq.w fsub_normal # no; no underflow occurred 11741*4882a593Smuzhiyun 11742*4882a593Smuzhiyun# 11743*4882a593Smuzhiyun# ok, so now the result has a exponent equal to the smallest normalized 11744*4882a593Smuzhiyun# exponent for the selected precision. also, the mantissa is equal to 11745*4882a593Smuzhiyun# 0x8000000000000000 and this mantissa is the result of rounding non-zero 11746*4882a593Smuzhiyun# g,r,s. 11747*4882a593Smuzhiyun# now, we must determine whether the pre-rounded result was an underflow 11748*4882a593Smuzhiyun# rounded "up" or a normalized number rounded "down". 11749*4882a593Smuzhiyun# so, we do this be re-executing the add using RZ as the rounding mode and 11750*4882a593Smuzhiyun# seeing if the new result is smaller or equal to the current result. 11751*4882a593Smuzhiyun# 11752*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 11753*4882a593Smuzhiyun 11754*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 11755*4882a593Smuzhiyun andi.b &0xc0,%d1 # keep rnd prec 11756*4882a593Smuzhiyun ori.b &rz_mode*0x10,%d1 # insert rnd mode 11757*4882a593Smuzhiyun fmov.l %d1,%fpcr # set FPCR 11758*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11759*4882a593Smuzhiyun 11760*4882a593Smuzhiyun fsub.x FP_SCR0(%a6),%fp1 # execute subtract 11761*4882a593Smuzhiyun 11762*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 11763*4882a593Smuzhiyun 11764*4882a593Smuzhiyun fabs.x %fp0 # compare absolute values 11765*4882a593Smuzhiyun fabs.x %fp1 11766*4882a593Smuzhiyun fcmp.x %fp0,%fp1 # is first result > second? 11767*4882a593Smuzhiyun 11768*4882a593Smuzhiyun fbgt.w fsub_unfl # yes; it's an underflow 11769*4882a593Smuzhiyun bra.w fsub_normal # no; it's not an underflow 11770*4882a593Smuzhiyun 11771*4882a593Smuzhiyun########################################################################## 11772*4882a593Smuzhiyun 11773*4882a593Smuzhiyun# 11774*4882a593Smuzhiyun# Sub: inputs are not both normalized; what are they? 11775*4882a593Smuzhiyun# 11776*4882a593Smuzhiyunfsub_not_norm: 11777*4882a593Smuzhiyun mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1 11778*4882a593Smuzhiyun jmp (tbl_fsub_op.b,%pc,%d1.w*1) 11779*4882a593Smuzhiyun 11780*4882a593Smuzhiyun swbeg &48 11781*4882a593Smuzhiyuntbl_fsub_op: 11782*4882a593Smuzhiyun short fsub_norm - tbl_fsub_op # NORM - NORM 11783*4882a593Smuzhiyun short fsub_zero_src - tbl_fsub_op # NORM - ZERO 11784*4882a593Smuzhiyun short fsub_inf_src - tbl_fsub_op # NORM - INF 11785*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11786*4882a593Smuzhiyun short fsub_norm - tbl_fsub_op # NORM - DENORM 11787*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11788*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11789*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11790*4882a593Smuzhiyun 11791*4882a593Smuzhiyun short fsub_zero_dst - tbl_fsub_op # ZERO - NORM 11792*4882a593Smuzhiyun short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO 11793*4882a593Smuzhiyun short fsub_inf_src - tbl_fsub_op # ZERO - INF 11794*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11795*4882a593Smuzhiyun short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM 11796*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11797*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11798*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11799*4882a593Smuzhiyun 11800*4882a593Smuzhiyun short fsub_inf_dst - tbl_fsub_op # INF - NORM 11801*4882a593Smuzhiyun short fsub_inf_dst - tbl_fsub_op # INF - ZERO 11802*4882a593Smuzhiyun short fsub_inf_2 - tbl_fsub_op # INF - INF 11803*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11804*4882a593Smuzhiyun short fsub_inf_dst - tbl_fsub_op # INF - DENORM 11805*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11806*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11807*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11808*4882a593Smuzhiyun 11809*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # QNAN - NORM 11810*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO 11811*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # QNAN - INF 11812*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN 11813*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM 11814*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # QNAN - SNAN 11815*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11816*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11817*4882a593Smuzhiyun 11818*4882a593Smuzhiyun short fsub_norm - tbl_fsub_op # DENORM - NORM 11819*4882a593Smuzhiyun short fsub_zero_src - tbl_fsub_op # DENORM - ZERO 11820*4882a593Smuzhiyun short fsub_inf_src - tbl_fsub_op # DENORM - INF 11821*4882a593Smuzhiyun short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11822*4882a593Smuzhiyun short fsub_norm - tbl_fsub_op # DENORM - DENORM 11823*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11824*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11825*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11826*4882a593Smuzhiyun 11827*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # SNAN - NORM 11828*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # SNAN - ZERO 11829*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # SNAN - INF 11830*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # SNAN - QNAN 11831*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # SNAN - DENORM 11832*4882a593Smuzhiyun short fsub_res_snan - tbl_fsub_op # SNAN - SNAN 11833*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11834*4882a593Smuzhiyun short tbl_fsub_op - tbl_fsub_op # 11835*4882a593Smuzhiyun 11836*4882a593Smuzhiyunfsub_res_qnan: 11837*4882a593Smuzhiyun bra.l res_qnan 11838*4882a593Smuzhiyunfsub_res_snan: 11839*4882a593Smuzhiyun bra.l res_snan 11840*4882a593Smuzhiyun 11841*4882a593Smuzhiyun# 11842*4882a593Smuzhiyun# both operands are ZEROes 11843*4882a593Smuzhiyun# 11844*4882a593Smuzhiyunfsub_zero_2: 11845*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 11846*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 11847*4882a593Smuzhiyun eor.b %d1,%d0 11848*4882a593Smuzhiyun bpl.b fsub_zero_2_chk_rm 11849*4882a593Smuzhiyun 11850*4882a593Smuzhiyun# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO 11851*4882a593Smuzhiyun tst.b %d0 # is dst negative? 11852*4882a593Smuzhiyun bmi.b fsub_zero_2_rm # yes 11853*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # no; return +ZERO 11854*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set Z 11855*4882a593Smuzhiyun rts 11856*4882a593Smuzhiyun 11857*4882a593Smuzhiyun# 11858*4882a593Smuzhiyun# the ZEROes have the same signs: 11859*4882a593Smuzhiyun# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP 11860*4882a593Smuzhiyun# - -ZERO is returned in the case of RM. 11861*4882a593Smuzhiyun# 11862*4882a593Smuzhiyunfsub_zero_2_chk_rm: 11863*4882a593Smuzhiyun mov.b 3+L_SCR3(%a6),%d1 11864*4882a593Smuzhiyun andi.b &0x30,%d1 # extract rnd mode 11865*4882a593Smuzhiyun cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM? 11866*4882a593Smuzhiyun beq.b fsub_zero_2_rm # yes 11867*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # no; return +ZERO 11868*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set Z 11869*4882a593Smuzhiyun rts 11870*4882a593Smuzhiyun 11871*4882a593Smuzhiyunfsub_zero_2_rm: 11872*4882a593Smuzhiyun fmov.s &0x80000000,%fp0 # return -ZERO 11873*4882a593Smuzhiyun mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG 11874*4882a593Smuzhiyun rts 11875*4882a593Smuzhiyun 11876*4882a593Smuzhiyun# 11877*4882a593Smuzhiyun# one operand is a ZERO and the other is a DENORM or a NORM. 11878*4882a593Smuzhiyun# scale the DENORM or NORM and jump to the regular fsub routine. 11879*4882a593Smuzhiyun# 11880*4882a593Smuzhiyunfsub_zero_dst: 11881*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11882*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11883*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11884*4882a593Smuzhiyun bsr.l scale_to_zero_src # scale the operand 11885*4882a593Smuzhiyun clr.w FP_SCR1_EX(%a6) 11886*4882a593Smuzhiyun clr.l FP_SCR1_HI(%a6) 11887*4882a593Smuzhiyun clr.l FP_SCR1_LO(%a6) 11888*4882a593Smuzhiyun bra.w fsub_zero_entry # go execute fsub 11889*4882a593Smuzhiyun 11890*4882a593Smuzhiyunfsub_zero_src: 11891*4882a593Smuzhiyun mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11892*4882a593Smuzhiyun mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11893*4882a593Smuzhiyun mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11894*4882a593Smuzhiyun bsr.l scale_to_zero_dst # scale the operand 11895*4882a593Smuzhiyun clr.w FP_SCR0_EX(%a6) 11896*4882a593Smuzhiyun clr.l FP_SCR0_HI(%a6) 11897*4882a593Smuzhiyun clr.l FP_SCR0_LO(%a6) 11898*4882a593Smuzhiyun bra.w fsub_zero_entry # go execute fsub 11899*4882a593Smuzhiyun 11900*4882a593Smuzhiyun# 11901*4882a593Smuzhiyun# both operands are INFs. an OPERR will result if the INFs have the 11902*4882a593Smuzhiyun# same signs. else, 11903*4882a593Smuzhiyun# 11904*4882a593Smuzhiyunfsub_inf_2: 11905*4882a593Smuzhiyun mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11906*4882a593Smuzhiyun mov.b DST_EX(%a1),%d1 11907*4882a593Smuzhiyun eor.b %d1,%d0 11908*4882a593Smuzhiyun bpl.l res_operr # weed out (-INF)+(+INF) 11909*4882a593Smuzhiyun 11910*4882a593Smuzhiyun# ok, so it's not an OPERR. but we do have to remember to return 11911*4882a593Smuzhiyun# the src INF since that's where the 881/882 gets the j-bit. 11912*4882a593Smuzhiyun 11913*4882a593Smuzhiyunfsub_inf_src: 11914*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return src INF 11915*4882a593Smuzhiyun fneg.x %fp0 # invert sign 11916*4882a593Smuzhiyun fbge.w fsub_inf_done # sign is now positive 11917*4882a593Smuzhiyun mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11918*4882a593Smuzhiyun rts 11919*4882a593Smuzhiyun 11920*4882a593Smuzhiyunfsub_inf_dst: 11921*4882a593Smuzhiyun fmovm.x DST(%a1),&0x80 # return dst INF 11922*4882a593Smuzhiyun tst.b DST_EX(%a1) # is INF negative? 11923*4882a593Smuzhiyun bpl.b fsub_inf_done # no 11924*4882a593Smuzhiyun mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11925*4882a593Smuzhiyun rts 11926*4882a593Smuzhiyun 11927*4882a593Smuzhiyunfsub_inf_done: 11928*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set INF 11929*4882a593Smuzhiyun rts 11930*4882a593Smuzhiyun 11931*4882a593Smuzhiyun######################################################################### 11932*4882a593Smuzhiyun# XDEF **************************************************************** # 11933*4882a593Smuzhiyun# fsqrt(): emulates the fsqrt instruction # 11934*4882a593Smuzhiyun# fssqrt(): emulates the fssqrt instruction # 11935*4882a593Smuzhiyun# fdsqrt(): emulates the fdsqrt instruction # 11936*4882a593Smuzhiyun# # 11937*4882a593Smuzhiyun# XREF **************************************************************** # 11938*4882a593Smuzhiyun# scale_sqrt() - scale the source operand # 11939*4882a593Smuzhiyun# unf_res() - return default underflow result # 11940*4882a593Smuzhiyun# ovf_res() - return default overflow result # 11941*4882a593Smuzhiyun# res_qnan_1op() - return QNAN result # 11942*4882a593Smuzhiyun# res_snan_1op() - return SNAN result # 11943*4882a593Smuzhiyun# # 11944*4882a593Smuzhiyun# INPUT *************************************************************** # 11945*4882a593Smuzhiyun# a0 = pointer to extended precision source operand # 11946*4882a593Smuzhiyun# d0 rnd prec,mode # 11947*4882a593Smuzhiyun# # 11948*4882a593Smuzhiyun# OUTPUT ************************************************************** # 11949*4882a593Smuzhiyun# fp0 = result # 11950*4882a593Smuzhiyun# fp1 = EXOP (if exception occurred) # 11951*4882a593Smuzhiyun# # 11952*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 11953*4882a593Smuzhiyun# Handle NANs, infinities, and zeroes as special cases. Divide # 11954*4882a593Smuzhiyun# norms/denorms into ext/sgl/dbl precision. # 11955*4882a593Smuzhiyun# For norms/denorms, scale the exponents such that a sqrt # 11956*4882a593Smuzhiyun# instruction won't cause an exception. Use the regular fsqrt to # 11957*4882a593Smuzhiyun# compute a result. Check if the regular operands would have taken # 11958*4882a593Smuzhiyun# an exception. If so, return the default overflow/underflow result # 11959*4882a593Smuzhiyun# and return the EXOP if exceptions are enabled. Else, scale the # 11960*4882a593Smuzhiyun# result operand to the proper exponent. # 11961*4882a593Smuzhiyun# # 11962*4882a593Smuzhiyun######################################################################### 11963*4882a593Smuzhiyun 11964*4882a593Smuzhiyun global fssqrt 11965*4882a593Smuzhiyunfssqrt: 11966*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 11967*4882a593Smuzhiyun ori.b &s_mode*0x10,%d0 # insert sgl precision 11968*4882a593Smuzhiyun bra.b fsqrt 11969*4882a593Smuzhiyun 11970*4882a593Smuzhiyun global fdsqrt 11971*4882a593Smuzhiyunfdsqrt: 11972*4882a593Smuzhiyun andi.b &0x30,%d0 # clear rnd prec 11973*4882a593Smuzhiyun ori.b &d_mode*0x10,%d0 # insert dbl precision 11974*4882a593Smuzhiyun 11975*4882a593Smuzhiyun global fsqrt 11976*4882a593Smuzhiyunfsqrt: 11977*4882a593Smuzhiyun mov.l %d0,L_SCR3(%a6) # store rnd info 11978*4882a593Smuzhiyun clr.w %d1 11979*4882a593Smuzhiyun mov.b STAG(%a6),%d1 11980*4882a593Smuzhiyun bne.w fsqrt_not_norm # optimize on non-norm input 11981*4882a593Smuzhiyun 11982*4882a593Smuzhiyun# 11983*4882a593Smuzhiyun# SQUARE ROOT: norms and denorms ONLY! 11984*4882a593Smuzhiyun# 11985*4882a593Smuzhiyunfsqrt_norm: 11986*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 11987*4882a593Smuzhiyun bmi.l res_operr # yes 11988*4882a593Smuzhiyun 11989*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 11990*4882a593Smuzhiyun bne.b fsqrt_not_ext # no; go handle sgl or dbl 11991*4882a593Smuzhiyun 11992*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 11993*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 11994*4882a593Smuzhiyun 11995*4882a593Smuzhiyun fsqrt.x (%a0),%fp0 # execute square root 11996*4882a593Smuzhiyun 11997*4882a593Smuzhiyun fmov.l %fpsr,%d1 11998*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # set N,INEX 11999*4882a593Smuzhiyun 12000*4882a593Smuzhiyun rts 12001*4882a593Smuzhiyun 12002*4882a593Smuzhiyunfsqrt_denorm: 12003*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is operand negative? 12004*4882a593Smuzhiyun bmi.l res_operr # yes 12005*4882a593Smuzhiyun 12006*4882a593Smuzhiyun andi.b &0xc0,%d0 # is precision extended? 12007*4882a593Smuzhiyun bne.b fsqrt_not_ext # no; go handle sgl or dbl 12008*4882a593Smuzhiyun 12009*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12010*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12011*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12012*4882a593Smuzhiyun 12013*4882a593Smuzhiyun bsr.l scale_sqrt # calculate scale factor 12014*4882a593Smuzhiyun 12015*4882a593Smuzhiyun bra.w fsqrt_sd_normal 12016*4882a593Smuzhiyun 12017*4882a593Smuzhiyun# 12018*4882a593Smuzhiyun# operand is either single or double 12019*4882a593Smuzhiyun# 12020*4882a593Smuzhiyunfsqrt_not_ext: 12021*4882a593Smuzhiyun cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 12022*4882a593Smuzhiyun bne.w fsqrt_dbl 12023*4882a593Smuzhiyun 12024*4882a593Smuzhiyun# 12025*4882a593Smuzhiyun# operand is to be rounded to single precision 12026*4882a593Smuzhiyun# 12027*4882a593Smuzhiyunfsqrt_sgl: 12028*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12029*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12030*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12031*4882a593Smuzhiyun 12032*4882a593Smuzhiyun bsr.l scale_sqrt # calculate scale factor 12033*4882a593Smuzhiyun 12034*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow? 12035*4882a593Smuzhiyun beq.w fsqrt_sd_may_unfl 12036*4882a593Smuzhiyun bgt.w fsqrt_sd_unfl # yes; go handle underflow 12037*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x407f # will move in overflow? 12038*4882a593Smuzhiyun beq.w fsqrt_sd_may_ovfl # maybe; go check 12039*4882a593Smuzhiyun blt.w fsqrt_sd_ovfl # yes; go handle overflow 12040*4882a593Smuzhiyun 12041*4882a593Smuzhiyun# 12042*4882a593Smuzhiyun# operand will NOT overflow or underflow when moved in to the fp reg file 12043*4882a593Smuzhiyun# 12044*4882a593Smuzhiyunfsqrt_sd_normal: 12045*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 12046*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 12047*4882a593Smuzhiyun 12048*4882a593Smuzhiyun fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 12049*4882a593Smuzhiyun 12050*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 12051*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 12052*4882a593Smuzhiyun 12053*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 12054*4882a593Smuzhiyun 12055*4882a593Smuzhiyunfsqrt_sd_normal_exit: 12056*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 12057*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 12058*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 12059*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 12060*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 12061*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 12062*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 12063*4882a593Smuzhiyun or.w %d1,%d2 # concat old sign,new exp 12064*4882a593Smuzhiyun mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 12065*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 12066*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12067*4882a593Smuzhiyun rts 12068*4882a593Smuzhiyun 12069*4882a593Smuzhiyun# 12070*4882a593Smuzhiyun# operand is to be rounded to double precision 12071*4882a593Smuzhiyun# 12072*4882a593Smuzhiyunfsqrt_dbl: 12073*4882a593Smuzhiyun mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12074*4882a593Smuzhiyun mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12075*4882a593Smuzhiyun mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12076*4882a593Smuzhiyun 12077*4882a593Smuzhiyun bsr.l scale_sqrt # calculate scale factor 12078*4882a593Smuzhiyun 12079*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow? 12080*4882a593Smuzhiyun beq.w fsqrt_sd_may_unfl 12081*4882a593Smuzhiyun bgt.b fsqrt_sd_unfl # yes; go handle underflow 12082*4882a593Smuzhiyun cmpi.l %d0,&0x3fff-0x43ff # will move in overflow? 12083*4882a593Smuzhiyun beq.w fsqrt_sd_may_ovfl # maybe; go check 12084*4882a593Smuzhiyun blt.w fsqrt_sd_ovfl # yes; go handle overflow 12085*4882a593Smuzhiyun bra.w fsqrt_sd_normal # no; ho handle normalized op 12086*4882a593Smuzhiyun 12087*4882a593Smuzhiyun# we're on the line here and the distinguising characteristic is whether 12088*4882a593Smuzhiyun# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number 12089*4882a593Smuzhiyun# elsewise fall through to underflow. 12090*4882a593Smuzhiyunfsqrt_sd_may_unfl: 12091*4882a593Smuzhiyun btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 12092*4882a593Smuzhiyun bne.w fsqrt_sd_normal # yes, so no underflow 12093*4882a593Smuzhiyun 12094*4882a593Smuzhiyun# 12095*4882a593Smuzhiyun# operand WILL underflow when moved in to the fp register file 12096*4882a593Smuzhiyun# 12097*4882a593Smuzhiyunfsqrt_sd_unfl: 12098*4882a593Smuzhiyun bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12099*4882a593Smuzhiyun 12100*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set FPCR 12101*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 12102*4882a593Smuzhiyun 12103*4882a593Smuzhiyun fsqrt.x FP_SCR0(%a6),%fp0 # execute square root 12104*4882a593Smuzhiyun 12105*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 12106*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 12107*4882a593Smuzhiyun 12108*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 12109*4882a593Smuzhiyun 12110*4882a593Smuzhiyun# if underflow or inexact is enabled, go calculate EXOP first. 12111*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 12112*4882a593Smuzhiyun andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12113*4882a593Smuzhiyun bne.b fsqrt_sd_unfl_ena # yes 12114*4882a593Smuzhiyun 12115*4882a593Smuzhiyunfsqrt_sd_unfl_dis: 12116*4882a593Smuzhiyun fmovm.x &0x80,FP_SCR0(%a6) # store out result 12117*4882a593Smuzhiyun 12118*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 # pass: result addr 12119*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12120*4882a593Smuzhiyun bsr.l unf_res # calculate default result 12121*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 12122*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12123*4882a593Smuzhiyun rts 12124*4882a593Smuzhiyun 12125*4882a593Smuzhiyun# 12126*4882a593Smuzhiyun# operand will underflow AND underflow is enabled. 12127*4882a593Smuzhiyun# Therefore, we must return the result rounded to extended precision. 12128*4882a593Smuzhiyun# 12129*4882a593Smuzhiyunfsqrt_sd_unfl_ena: 12130*4882a593Smuzhiyun mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 12131*4882a593Smuzhiyun mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 12132*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 12133*4882a593Smuzhiyun 12134*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 12135*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 12136*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 12137*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 12138*4882a593Smuzhiyun sub.l %d0,%d1 # subtract scale factor 12139*4882a593Smuzhiyun addi.l &0x6000,%d1 # add new bias 12140*4882a593Smuzhiyun andi.w &0x7fff,%d1 12141*4882a593Smuzhiyun or.w %d2,%d1 # concat new sign,new exp 12142*4882a593Smuzhiyun mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 12143*4882a593Smuzhiyun fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 12144*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 12145*4882a593Smuzhiyun bra.b fsqrt_sd_unfl_dis 12146*4882a593Smuzhiyun 12147*4882a593Smuzhiyun# 12148*4882a593Smuzhiyun# operand WILL overflow. 12149*4882a593Smuzhiyun# 12150*4882a593Smuzhiyunfsqrt_sd_ovfl: 12151*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 12152*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 12153*4882a593Smuzhiyun 12154*4882a593Smuzhiyun fsqrt.x FP_SCR0(%a6),%fp0 # perform square root 12155*4882a593Smuzhiyun 12156*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 12157*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save FPSR 12158*4882a593Smuzhiyun 12159*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 12160*4882a593Smuzhiyun 12161*4882a593Smuzhiyunfsqrt_sd_ovfl_tst: 12162*4882a593Smuzhiyun or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 12163*4882a593Smuzhiyun 12164*4882a593Smuzhiyun mov.b FPCR_ENABLE(%a6),%d1 12165*4882a593Smuzhiyun andi.b &0x13,%d1 # is OVFL or INEX enabled? 12166*4882a593Smuzhiyun bne.b fsqrt_sd_ovfl_ena # yes 12167*4882a593Smuzhiyun 12168*4882a593Smuzhiyun# 12169*4882a593Smuzhiyun# OVFL is not enabled; therefore, we must create the default result by 12170*4882a593Smuzhiyun# calling ovf_res(). 12171*4882a593Smuzhiyun# 12172*4882a593Smuzhiyunfsqrt_sd_ovfl_dis: 12173*4882a593Smuzhiyun btst &neg_bit,FPSR_CC(%a6) # is result negative? 12174*4882a593Smuzhiyun sne %d1 # set sign param accordingly 12175*4882a593Smuzhiyun mov.l L_SCR3(%a6),%d0 # pass: prec,mode 12176*4882a593Smuzhiyun bsr.l ovf_res # calculate default result 12177*4882a593Smuzhiyun or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 12178*4882a593Smuzhiyun fmovm.x (%a0),&0x80 # return default result in fp0 12179*4882a593Smuzhiyun rts 12180*4882a593Smuzhiyun 12181*4882a593Smuzhiyun# 12182*4882a593Smuzhiyun# OVFL is enabled. 12183*4882a593Smuzhiyun# the INEX2 bit has already been updated by the round to the correct precision. 12184*4882a593Smuzhiyun# now, round to extended(and don't alter the FPSR). 12185*4882a593Smuzhiyun# 12186*4882a593Smuzhiyunfsqrt_sd_ovfl_ena: 12187*4882a593Smuzhiyun mov.l %d2,-(%sp) # save d2 12188*4882a593Smuzhiyun mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12189*4882a593Smuzhiyun mov.l %d1,%d2 # make a copy 12190*4882a593Smuzhiyun andi.l &0x7fff,%d1 # strip sign 12191*4882a593Smuzhiyun andi.w &0x8000,%d2 # keep old sign 12192*4882a593Smuzhiyun sub.l %d0,%d1 # add scale factor 12193*4882a593Smuzhiyun subi.l &0x6000,%d1 # subtract bias 12194*4882a593Smuzhiyun andi.w &0x7fff,%d1 12195*4882a593Smuzhiyun or.w %d2,%d1 # concat sign,exp 12196*4882a593Smuzhiyun mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12197*4882a593Smuzhiyun fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12198*4882a593Smuzhiyun mov.l (%sp)+,%d2 # restore d2 12199*4882a593Smuzhiyun bra.b fsqrt_sd_ovfl_dis 12200*4882a593Smuzhiyun 12201*4882a593Smuzhiyun# 12202*4882a593Smuzhiyun# the move in MAY underflow. so... 12203*4882a593Smuzhiyun# 12204*4882a593Smuzhiyunfsqrt_sd_may_ovfl: 12205*4882a593Smuzhiyun btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 12206*4882a593Smuzhiyun bne.w fsqrt_sd_ovfl # yes, so overflow 12207*4882a593Smuzhiyun 12208*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear FPSR 12209*4882a593Smuzhiyun fmov.l L_SCR3(%a6),%fpcr # set FPCR 12210*4882a593Smuzhiyun 12211*4882a593Smuzhiyun fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 12212*4882a593Smuzhiyun 12213*4882a593Smuzhiyun fmov.l %fpsr,%d1 # save status 12214*4882a593Smuzhiyun fmov.l &0x0,%fpcr # clear FPCR 12215*4882a593Smuzhiyun 12216*4882a593Smuzhiyun or.l %d1,USER_FPSR(%a6) # save INEX2,N 12217*4882a593Smuzhiyun 12218*4882a593Smuzhiyun fmov.x %fp0,%fp1 # make a copy of result 12219*4882a593Smuzhiyun fcmp.b %fp1,&0x1 # is |result| >= 1.b? 12220*4882a593Smuzhiyun fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred 12221*4882a593Smuzhiyun 12222*4882a593Smuzhiyun# no, it didn't overflow; we have correct result 12223*4882a593Smuzhiyun bra.w fsqrt_sd_normal_exit 12224*4882a593Smuzhiyun 12225*4882a593Smuzhiyun########################################################################## 12226*4882a593Smuzhiyun 12227*4882a593Smuzhiyun# 12228*4882a593Smuzhiyun# input is not normalized; what is it? 12229*4882a593Smuzhiyun# 12230*4882a593Smuzhiyunfsqrt_not_norm: 12231*4882a593Smuzhiyun cmpi.b %d1,&DENORM # weed out DENORM 12232*4882a593Smuzhiyun beq.w fsqrt_denorm 12233*4882a593Smuzhiyun cmpi.b %d1,&ZERO # weed out ZERO 12234*4882a593Smuzhiyun beq.b fsqrt_zero 12235*4882a593Smuzhiyun cmpi.b %d1,&INF # weed out INF 12236*4882a593Smuzhiyun beq.b fsqrt_inf 12237*4882a593Smuzhiyun cmpi.b %d1,&SNAN # weed out SNAN 12238*4882a593Smuzhiyun beq.l res_snan_1op 12239*4882a593Smuzhiyun bra.l res_qnan_1op 12240*4882a593Smuzhiyun 12241*4882a593Smuzhiyun# 12242*4882a593Smuzhiyun# fsqrt(+0) = +0 12243*4882a593Smuzhiyun# fsqrt(-0) = -0 12244*4882a593Smuzhiyun# fsqrt(+INF) = +INF 12245*4882a593Smuzhiyun# fsqrt(-INF) = OPERR 12246*4882a593Smuzhiyun# 12247*4882a593Smuzhiyunfsqrt_zero: 12248*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is ZERO positive or negative? 12249*4882a593Smuzhiyun bmi.b fsqrt_zero_m # negative 12250*4882a593Smuzhiyunfsqrt_zero_p: 12251*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # return +ZERO 12252*4882a593Smuzhiyun mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 12253*4882a593Smuzhiyun rts 12254*4882a593Smuzhiyunfsqrt_zero_m: 12255*4882a593Smuzhiyun fmov.s &0x80000000,%fp0 # return -ZERO 12256*4882a593Smuzhiyun mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 12257*4882a593Smuzhiyun rts 12258*4882a593Smuzhiyun 12259*4882a593Smuzhiyunfsqrt_inf: 12260*4882a593Smuzhiyun tst.b SRC_EX(%a0) # is INF positive or negative? 12261*4882a593Smuzhiyun bmi.l res_operr # negative 12262*4882a593Smuzhiyunfsqrt_inf_p: 12263*4882a593Smuzhiyun fmovm.x SRC(%a0),&0x80 # return +INF in fp0 12264*4882a593Smuzhiyun mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 12265*4882a593Smuzhiyun rts 12266*4882a593Smuzhiyun 12267*4882a593Smuzhiyun######################################################################### 12268*4882a593Smuzhiyun# XDEF **************************************************************** # 12269*4882a593Smuzhiyun# fetch_dreg(): fetch register according to index in d1 # 12270*4882a593Smuzhiyun# # 12271*4882a593Smuzhiyun# XREF **************************************************************** # 12272*4882a593Smuzhiyun# None # 12273*4882a593Smuzhiyun# # 12274*4882a593Smuzhiyun# INPUT *************************************************************** # 12275*4882a593Smuzhiyun# d1 = index of register to fetch from # 12276*4882a593Smuzhiyun# # 12277*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12278*4882a593Smuzhiyun# d0 = value of register fetched # 12279*4882a593Smuzhiyun# # 12280*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12281*4882a593Smuzhiyun# According to the index value in d1 which can range from zero # 12282*4882a593Smuzhiyun# to fifteen, load the corresponding register file value (where # 12283*4882a593Smuzhiyun# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the # 12284*4882a593Smuzhiyun# stack. The rest should still be in their original places. # 12285*4882a593Smuzhiyun# # 12286*4882a593Smuzhiyun######################################################################### 12287*4882a593Smuzhiyun 12288*4882a593Smuzhiyun# this routine leaves d1 intact for subsequent store_dreg calls. 12289*4882a593Smuzhiyun global fetch_dreg 12290*4882a593Smuzhiyunfetch_dreg: 12291*4882a593Smuzhiyun mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0 12292*4882a593Smuzhiyun jmp (tbl_fdreg.b,%pc,%d0.w*1) 12293*4882a593Smuzhiyun 12294*4882a593Smuzhiyuntbl_fdreg: 12295*4882a593Smuzhiyun short fdreg0 - tbl_fdreg 12296*4882a593Smuzhiyun short fdreg1 - tbl_fdreg 12297*4882a593Smuzhiyun short fdreg2 - tbl_fdreg 12298*4882a593Smuzhiyun short fdreg3 - tbl_fdreg 12299*4882a593Smuzhiyun short fdreg4 - tbl_fdreg 12300*4882a593Smuzhiyun short fdreg5 - tbl_fdreg 12301*4882a593Smuzhiyun short fdreg6 - tbl_fdreg 12302*4882a593Smuzhiyun short fdreg7 - tbl_fdreg 12303*4882a593Smuzhiyun short fdreg8 - tbl_fdreg 12304*4882a593Smuzhiyun short fdreg9 - tbl_fdreg 12305*4882a593Smuzhiyun short fdrega - tbl_fdreg 12306*4882a593Smuzhiyun short fdregb - tbl_fdreg 12307*4882a593Smuzhiyun short fdregc - tbl_fdreg 12308*4882a593Smuzhiyun short fdregd - tbl_fdreg 12309*4882a593Smuzhiyun short fdrege - tbl_fdreg 12310*4882a593Smuzhiyun short fdregf - tbl_fdreg 12311*4882a593Smuzhiyun 12312*4882a593Smuzhiyunfdreg0: 12313*4882a593Smuzhiyun mov.l EXC_DREGS+0x0(%a6),%d0 12314*4882a593Smuzhiyun rts 12315*4882a593Smuzhiyunfdreg1: 12316*4882a593Smuzhiyun mov.l EXC_DREGS+0x4(%a6),%d0 12317*4882a593Smuzhiyun rts 12318*4882a593Smuzhiyunfdreg2: 12319*4882a593Smuzhiyun mov.l %d2,%d0 12320*4882a593Smuzhiyun rts 12321*4882a593Smuzhiyunfdreg3: 12322*4882a593Smuzhiyun mov.l %d3,%d0 12323*4882a593Smuzhiyun rts 12324*4882a593Smuzhiyunfdreg4: 12325*4882a593Smuzhiyun mov.l %d4,%d0 12326*4882a593Smuzhiyun rts 12327*4882a593Smuzhiyunfdreg5: 12328*4882a593Smuzhiyun mov.l %d5,%d0 12329*4882a593Smuzhiyun rts 12330*4882a593Smuzhiyunfdreg6: 12331*4882a593Smuzhiyun mov.l %d6,%d0 12332*4882a593Smuzhiyun rts 12333*4882a593Smuzhiyunfdreg7: 12334*4882a593Smuzhiyun mov.l %d7,%d0 12335*4882a593Smuzhiyun rts 12336*4882a593Smuzhiyunfdreg8: 12337*4882a593Smuzhiyun mov.l EXC_DREGS+0x8(%a6),%d0 12338*4882a593Smuzhiyun rts 12339*4882a593Smuzhiyunfdreg9: 12340*4882a593Smuzhiyun mov.l EXC_DREGS+0xc(%a6),%d0 12341*4882a593Smuzhiyun rts 12342*4882a593Smuzhiyunfdrega: 12343*4882a593Smuzhiyun mov.l %a2,%d0 12344*4882a593Smuzhiyun rts 12345*4882a593Smuzhiyunfdregb: 12346*4882a593Smuzhiyun mov.l %a3,%d0 12347*4882a593Smuzhiyun rts 12348*4882a593Smuzhiyunfdregc: 12349*4882a593Smuzhiyun mov.l %a4,%d0 12350*4882a593Smuzhiyun rts 12351*4882a593Smuzhiyunfdregd: 12352*4882a593Smuzhiyun mov.l %a5,%d0 12353*4882a593Smuzhiyun rts 12354*4882a593Smuzhiyunfdrege: 12355*4882a593Smuzhiyun mov.l (%a6),%d0 12356*4882a593Smuzhiyun rts 12357*4882a593Smuzhiyunfdregf: 12358*4882a593Smuzhiyun mov.l EXC_A7(%a6),%d0 12359*4882a593Smuzhiyun rts 12360*4882a593Smuzhiyun 12361*4882a593Smuzhiyun######################################################################### 12362*4882a593Smuzhiyun# XDEF **************************************************************** # 12363*4882a593Smuzhiyun# store_dreg_l(): store longword to data register specified by d1 # 12364*4882a593Smuzhiyun# # 12365*4882a593Smuzhiyun# XREF **************************************************************** # 12366*4882a593Smuzhiyun# None # 12367*4882a593Smuzhiyun# # 12368*4882a593Smuzhiyun# INPUT *************************************************************** # 12369*4882a593Smuzhiyun# d0 = longowrd value to store # 12370*4882a593Smuzhiyun# d1 = index of register to fetch from # 12371*4882a593Smuzhiyun# # 12372*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12373*4882a593Smuzhiyun# (data register is updated) # 12374*4882a593Smuzhiyun# # 12375*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12376*4882a593Smuzhiyun# According to the index value in d1, store the longword value # 12377*4882a593Smuzhiyun# in d0 to the corresponding data register. D0/D1 are on the stack # 12378*4882a593Smuzhiyun# while the rest are in their initial places. # 12379*4882a593Smuzhiyun# # 12380*4882a593Smuzhiyun######################################################################### 12381*4882a593Smuzhiyun 12382*4882a593Smuzhiyun global store_dreg_l 12383*4882a593Smuzhiyunstore_dreg_l: 12384*4882a593Smuzhiyun mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1 12385*4882a593Smuzhiyun jmp (tbl_sdregl.b,%pc,%d1.w*1) 12386*4882a593Smuzhiyun 12387*4882a593Smuzhiyuntbl_sdregl: 12388*4882a593Smuzhiyun short sdregl0 - tbl_sdregl 12389*4882a593Smuzhiyun short sdregl1 - tbl_sdregl 12390*4882a593Smuzhiyun short sdregl2 - tbl_sdregl 12391*4882a593Smuzhiyun short sdregl3 - tbl_sdregl 12392*4882a593Smuzhiyun short sdregl4 - tbl_sdregl 12393*4882a593Smuzhiyun short sdregl5 - tbl_sdregl 12394*4882a593Smuzhiyun short sdregl6 - tbl_sdregl 12395*4882a593Smuzhiyun short sdregl7 - tbl_sdregl 12396*4882a593Smuzhiyun 12397*4882a593Smuzhiyunsdregl0: 12398*4882a593Smuzhiyun mov.l %d0,EXC_DREGS+0x0(%a6) 12399*4882a593Smuzhiyun rts 12400*4882a593Smuzhiyunsdregl1: 12401*4882a593Smuzhiyun mov.l %d0,EXC_DREGS+0x4(%a6) 12402*4882a593Smuzhiyun rts 12403*4882a593Smuzhiyunsdregl2: 12404*4882a593Smuzhiyun mov.l %d0,%d2 12405*4882a593Smuzhiyun rts 12406*4882a593Smuzhiyunsdregl3: 12407*4882a593Smuzhiyun mov.l %d0,%d3 12408*4882a593Smuzhiyun rts 12409*4882a593Smuzhiyunsdregl4: 12410*4882a593Smuzhiyun mov.l %d0,%d4 12411*4882a593Smuzhiyun rts 12412*4882a593Smuzhiyunsdregl5: 12413*4882a593Smuzhiyun mov.l %d0,%d5 12414*4882a593Smuzhiyun rts 12415*4882a593Smuzhiyunsdregl6: 12416*4882a593Smuzhiyun mov.l %d0,%d6 12417*4882a593Smuzhiyun rts 12418*4882a593Smuzhiyunsdregl7: 12419*4882a593Smuzhiyun mov.l %d0,%d7 12420*4882a593Smuzhiyun rts 12421*4882a593Smuzhiyun 12422*4882a593Smuzhiyun######################################################################### 12423*4882a593Smuzhiyun# XDEF **************************************************************** # 12424*4882a593Smuzhiyun# store_dreg_w(): store word to data register specified by d1 # 12425*4882a593Smuzhiyun# # 12426*4882a593Smuzhiyun# XREF **************************************************************** # 12427*4882a593Smuzhiyun# None # 12428*4882a593Smuzhiyun# # 12429*4882a593Smuzhiyun# INPUT *************************************************************** # 12430*4882a593Smuzhiyun# d0 = word value to store # 12431*4882a593Smuzhiyun# d1 = index of register to fetch from # 12432*4882a593Smuzhiyun# # 12433*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12434*4882a593Smuzhiyun# (data register is updated) # 12435*4882a593Smuzhiyun# # 12436*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12437*4882a593Smuzhiyun# According to the index value in d1, store the word value # 12438*4882a593Smuzhiyun# in d0 to the corresponding data register. D0/D1 are on the stack # 12439*4882a593Smuzhiyun# while the rest are in their initial places. # 12440*4882a593Smuzhiyun# # 12441*4882a593Smuzhiyun######################################################################### 12442*4882a593Smuzhiyun 12443*4882a593Smuzhiyun global store_dreg_w 12444*4882a593Smuzhiyunstore_dreg_w: 12445*4882a593Smuzhiyun mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1 12446*4882a593Smuzhiyun jmp (tbl_sdregw.b,%pc,%d1.w*1) 12447*4882a593Smuzhiyun 12448*4882a593Smuzhiyuntbl_sdregw: 12449*4882a593Smuzhiyun short sdregw0 - tbl_sdregw 12450*4882a593Smuzhiyun short sdregw1 - tbl_sdregw 12451*4882a593Smuzhiyun short sdregw2 - tbl_sdregw 12452*4882a593Smuzhiyun short sdregw3 - tbl_sdregw 12453*4882a593Smuzhiyun short sdregw4 - tbl_sdregw 12454*4882a593Smuzhiyun short sdregw5 - tbl_sdregw 12455*4882a593Smuzhiyun short sdregw6 - tbl_sdregw 12456*4882a593Smuzhiyun short sdregw7 - tbl_sdregw 12457*4882a593Smuzhiyun 12458*4882a593Smuzhiyunsdregw0: 12459*4882a593Smuzhiyun mov.w %d0,2+EXC_DREGS+0x0(%a6) 12460*4882a593Smuzhiyun rts 12461*4882a593Smuzhiyunsdregw1: 12462*4882a593Smuzhiyun mov.w %d0,2+EXC_DREGS+0x4(%a6) 12463*4882a593Smuzhiyun rts 12464*4882a593Smuzhiyunsdregw2: 12465*4882a593Smuzhiyun mov.w %d0,%d2 12466*4882a593Smuzhiyun rts 12467*4882a593Smuzhiyunsdregw3: 12468*4882a593Smuzhiyun mov.w %d0,%d3 12469*4882a593Smuzhiyun rts 12470*4882a593Smuzhiyunsdregw4: 12471*4882a593Smuzhiyun mov.w %d0,%d4 12472*4882a593Smuzhiyun rts 12473*4882a593Smuzhiyunsdregw5: 12474*4882a593Smuzhiyun mov.w %d0,%d5 12475*4882a593Smuzhiyun rts 12476*4882a593Smuzhiyunsdregw6: 12477*4882a593Smuzhiyun mov.w %d0,%d6 12478*4882a593Smuzhiyun rts 12479*4882a593Smuzhiyunsdregw7: 12480*4882a593Smuzhiyun mov.w %d0,%d7 12481*4882a593Smuzhiyun rts 12482*4882a593Smuzhiyun 12483*4882a593Smuzhiyun######################################################################### 12484*4882a593Smuzhiyun# XDEF **************************************************************** # 12485*4882a593Smuzhiyun# store_dreg_b(): store byte to data register specified by d1 # 12486*4882a593Smuzhiyun# # 12487*4882a593Smuzhiyun# XREF **************************************************************** # 12488*4882a593Smuzhiyun# None # 12489*4882a593Smuzhiyun# # 12490*4882a593Smuzhiyun# INPUT *************************************************************** # 12491*4882a593Smuzhiyun# d0 = byte value to store # 12492*4882a593Smuzhiyun# d1 = index of register to fetch from # 12493*4882a593Smuzhiyun# # 12494*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12495*4882a593Smuzhiyun# (data register is updated) # 12496*4882a593Smuzhiyun# # 12497*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12498*4882a593Smuzhiyun# According to the index value in d1, store the byte value # 12499*4882a593Smuzhiyun# in d0 to the corresponding data register. D0/D1 are on the stack # 12500*4882a593Smuzhiyun# while the rest are in their initial places. # 12501*4882a593Smuzhiyun# # 12502*4882a593Smuzhiyun######################################################################### 12503*4882a593Smuzhiyun 12504*4882a593Smuzhiyun global store_dreg_b 12505*4882a593Smuzhiyunstore_dreg_b: 12506*4882a593Smuzhiyun mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1 12507*4882a593Smuzhiyun jmp (tbl_sdregb.b,%pc,%d1.w*1) 12508*4882a593Smuzhiyun 12509*4882a593Smuzhiyuntbl_sdregb: 12510*4882a593Smuzhiyun short sdregb0 - tbl_sdregb 12511*4882a593Smuzhiyun short sdregb1 - tbl_sdregb 12512*4882a593Smuzhiyun short sdregb2 - tbl_sdregb 12513*4882a593Smuzhiyun short sdregb3 - tbl_sdregb 12514*4882a593Smuzhiyun short sdregb4 - tbl_sdregb 12515*4882a593Smuzhiyun short sdregb5 - tbl_sdregb 12516*4882a593Smuzhiyun short sdregb6 - tbl_sdregb 12517*4882a593Smuzhiyun short sdregb7 - tbl_sdregb 12518*4882a593Smuzhiyun 12519*4882a593Smuzhiyunsdregb0: 12520*4882a593Smuzhiyun mov.b %d0,3+EXC_DREGS+0x0(%a6) 12521*4882a593Smuzhiyun rts 12522*4882a593Smuzhiyunsdregb1: 12523*4882a593Smuzhiyun mov.b %d0,3+EXC_DREGS+0x4(%a6) 12524*4882a593Smuzhiyun rts 12525*4882a593Smuzhiyunsdregb2: 12526*4882a593Smuzhiyun mov.b %d0,%d2 12527*4882a593Smuzhiyun rts 12528*4882a593Smuzhiyunsdregb3: 12529*4882a593Smuzhiyun mov.b %d0,%d3 12530*4882a593Smuzhiyun rts 12531*4882a593Smuzhiyunsdregb4: 12532*4882a593Smuzhiyun mov.b %d0,%d4 12533*4882a593Smuzhiyun rts 12534*4882a593Smuzhiyunsdregb5: 12535*4882a593Smuzhiyun mov.b %d0,%d5 12536*4882a593Smuzhiyun rts 12537*4882a593Smuzhiyunsdregb6: 12538*4882a593Smuzhiyun mov.b %d0,%d6 12539*4882a593Smuzhiyun rts 12540*4882a593Smuzhiyunsdregb7: 12541*4882a593Smuzhiyun mov.b %d0,%d7 12542*4882a593Smuzhiyun rts 12543*4882a593Smuzhiyun 12544*4882a593Smuzhiyun######################################################################### 12545*4882a593Smuzhiyun# XDEF **************************************************************** # 12546*4882a593Smuzhiyun# inc_areg(): increment an address register by the value in d0 # 12547*4882a593Smuzhiyun# # 12548*4882a593Smuzhiyun# XREF **************************************************************** # 12549*4882a593Smuzhiyun# None # 12550*4882a593Smuzhiyun# # 12551*4882a593Smuzhiyun# INPUT *************************************************************** # 12552*4882a593Smuzhiyun# d0 = amount to increment by # 12553*4882a593Smuzhiyun# d1 = index of address register to increment # 12554*4882a593Smuzhiyun# # 12555*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12556*4882a593Smuzhiyun# (address register is updated) # 12557*4882a593Smuzhiyun# # 12558*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12559*4882a593Smuzhiyun# Typically used for an instruction w/ a post-increment <ea>, # 12560*4882a593Smuzhiyun# this routine adds the increment value in d0 to the address register # 12561*4882a593Smuzhiyun# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 12562*4882a593Smuzhiyun# in their original places. # 12563*4882a593Smuzhiyun# For a7, if the increment amount is one, then we have to # 12564*4882a593Smuzhiyun# increment by two. For any a7 update, set the mia7_flag so that if # 12565*4882a593Smuzhiyun# an access error exception occurs later in emulation, this address # 12566*4882a593Smuzhiyun# register update can be undone. # 12567*4882a593Smuzhiyun# # 12568*4882a593Smuzhiyun######################################################################### 12569*4882a593Smuzhiyun 12570*4882a593Smuzhiyun global inc_areg 12571*4882a593Smuzhiyuninc_areg: 12572*4882a593Smuzhiyun mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1 12573*4882a593Smuzhiyun jmp (tbl_iareg.b,%pc,%d1.w*1) 12574*4882a593Smuzhiyun 12575*4882a593Smuzhiyuntbl_iareg: 12576*4882a593Smuzhiyun short iareg0 - tbl_iareg 12577*4882a593Smuzhiyun short iareg1 - tbl_iareg 12578*4882a593Smuzhiyun short iareg2 - tbl_iareg 12579*4882a593Smuzhiyun short iareg3 - tbl_iareg 12580*4882a593Smuzhiyun short iareg4 - tbl_iareg 12581*4882a593Smuzhiyun short iareg5 - tbl_iareg 12582*4882a593Smuzhiyun short iareg6 - tbl_iareg 12583*4882a593Smuzhiyun short iareg7 - tbl_iareg 12584*4882a593Smuzhiyun 12585*4882a593Smuzhiyuniareg0: add.l %d0,EXC_DREGS+0x8(%a6) 12586*4882a593Smuzhiyun rts 12587*4882a593Smuzhiyuniareg1: add.l %d0,EXC_DREGS+0xc(%a6) 12588*4882a593Smuzhiyun rts 12589*4882a593Smuzhiyuniareg2: add.l %d0,%a2 12590*4882a593Smuzhiyun rts 12591*4882a593Smuzhiyuniareg3: add.l %d0,%a3 12592*4882a593Smuzhiyun rts 12593*4882a593Smuzhiyuniareg4: add.l %d0,%a4 12594*4882a593Smuzhiyun rts 12595*4882a593Smuzhiyuniareg5: add.l %d0,%a5 12596*4882a593Smuzhiyun rts 12597*4882a593Smuzhiyuniareg6: add.l %d0,(%a6) 12598*4882a593Smuzhiyun rts 12599*4882a593Smuzhiyuniareg7: mov.b &mia7_flg,SPCOND_FLG(%a6) 12600*4882a593Smuzhiyun cmpi.b %d0,&0x1 12601*4882a593Smuzhiyun beq.b iareg7b 12602*4882a593Smuzhiyun add.l %d0,EXC_A7(%a6) 12603*4882a593Smuzhiyun rts 12604*4882a593Smuzhiyuniareg7b: 12605*4882a593Smuzhiyun addq.l &0x2,EXC_A7(%a6) 12606*4882a593Smuzhiyun rts 12607*4882a593Smuzhiyun 12608*4882a593Smuzhiyun######################################################################### 12609*4882a593Smuzhiyun# XDEF **************************************************************** # 12610*4882a593Smuzhiyun# dec_areg(): decrement an address register by the value in d0 # 12611*4882a593Smuzhiyun# # 12612*4882a593Smuzhiyun# XREF **************************************************************** # 12613*4882a593Smuzhiyun# None # 12614*4882a593Smuzhiyun# # 12615*4882a593Smuzhiyun# INPUT *************************************************************** # 12616*4882a593Smuzhiyun# d0 = amount to decrement by # 12617*4882a593Smuzhiyun# d1 = index of address register to decrement # 12618*4882a593Smuzhiyun# # 12619*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12620*4882a593Smuzhiyun# (address register is updated) # 12621*4882a593Smuzhiyun# # 12622*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12623*4882a593Smuzhiyun# Typically used for an instruction w/ a pre-decrement <ea>, # 12624*4882a593Smuzhiyun# this routine adds the decrement value in d0 to the address register # 12625*4882a593Smuzhiyun# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 12626*4882a593Smuzhiyun# in their original places. # 12627*4882a593Smuzhiyun# For a7, if the decrement amount is one, then we have to # 12628*4882a593Smuzhiyun# decrement by two. For any a7 update, set the mda7_flag so that if # 12629*4882a593Smuzhiyun# an access error exception occurs later in emulation, this address # 12630*4882a593Smuzhiyun# register update can be undone. # 12631*4882a593Smuzhiyun# # 12632*4882a593Smuzhiyun######################################################################### 12633*4882a593Smuzhiyun 12634*4882a593Smuzhiyun global dec_areg 12635*4882a593Smuzhiyundec_areg: 12636*4882a593Smuzhiyun mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1 12637*4882a593Smuzhiyun jmp (tbl_dareg.b,%pc,%d1.w*1) 12638*4882a593Smuzhiyun 12639*4882a593Smuzhiyuntbl_dareg: 12640*4882a593Smuzhiyun short dareg0 - tbl_dareg 12641*4882a593Smuzhiyun short dareg1 - tbl_dareg 12642*4882a593Smuzhiyun short dareg2 - tbl_dareg 12643*4882a593Smuzhiyun short dareg3 - tbl_dareg 12644*4882a593Smuzhiyun short dareg4 - tbl_dareg 12645*4882a593Smuzhiyun short dareg5 - tbl_dareg 12646*4882a593Smuzhiyun short dareg6 - tbl_dareg 12647*4882a593Smuzhiyun short dareg7 - tbl_dareg 12648*4882a593Smuzhiyun 12649*4882a593Smuzhiyundareg0: sub.l %d0,EXC_DREGS+0x8(%a6) 12650*4882a593Smuzhiyun rts 12651*4882a593Smuzhiyundareg1: sub.l %d0,EXC_DREGS+0xc(%a6) 12652*4882a593Smuzhiyun rts 12653*4882a593Smuzhiyundareg2: sub.l %d0,%a2 12654*4882a593Smuzhiyun rts 12655*4882a593Smuzhiyundareg3: sub.l %d0,%a3 12656*4882a593Smuzhiyun rts 12657*4882a593Smuzhiyundareg4: sub.l %d0,%a4 12658*4882a593Smuzhiyun rts 12659*4882a593Smuzhiyundareg5: sub.l %d0,%a5 12660*4882a593Smuzhiyun rts 12661*4882a593Smuzhiyundareg6: sub.l %d0,(%a6) 12662*4882a593Smuzhiyun rts 12663*4882a593Smuzhiyundareg7: mov.b &mda7_flg,SPCOND_FLG(%a6) 12664*4882a593Smuzhiyun cmpi.b %d0,&0x1 12665*4882a593Smuzhiyun beq.b dareg7b 12666*4882a593Smuzhiyun sub.l %d0,EXC_A7(%a6) 12667*4882a593Smuzhiyun rts 12668*4882a593Smuzhiyundareg7b: 12669*4882a593Smuzhiyun subq.l &0x2,EXC_A7(%a6) 12670*4882a593Smuzhiyun rts 12671*4882a593Smuzhiyun 12672*4882a593Smuzhiyun############################################################################## 12673*4882a593Smuzhiyun 12674*4882a593Smuzhiyun######################################################################### 12675*4882a593Smuzhiyun# XDEF **************************************************************** # 12676*4882a593Smuzhiyun# load_fpn1(): load FP register value into FP_SRC(a6). # 12677*4882a593Smuzhiyun# # 12678*4882a593Smuzhiyun# XREF **************************************************************** # 12679*4882a593Smuzhiyun# None # 12680*4882a593Smuzhiyun# # 12681*4882a593Smuzhiyun# INPUT *************************************************************** # 12682*4882a593Smuzhiyun# d0 = index of FP register to load # 12683*4882a593Smuzhiyun# # 12684*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12685*4882a593Smuzhiyun# FP_SRC(a6) = value loaded from FP register file # 12686*4882a593Smuzhiyun# # 12687*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12688*4882a593Smuzhiyun# Using the index in d0, load FP_SRC(a6) with a number from the # 12689*4882a593Smuzhiyun# FP register file. # 12690*4882a593Smuzhiyun# # 12691*4882a593Smuzhiyun######################################################################### 12692*4882a593Smuzhiyun 12693*4882a593Smuzhiyun global load_fpn1 12694*4882a593Smuzhiyunload_fpn1: 12695*4882a593Smuzhiyun mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0 12696*4882a593Smuzhiyun jmp (tbl_load_fpn1.b,%pc,%d0.w*1) 12697*4882a593Smuzhiyun 12698*4882a593Smuzhiyuntbl_load_fpn1: 12699*4882a593Smuzhiyun short load_fpn1_0 - tbl_load_fpn1 12700*4882a593Smuzhiyun short load_fpn1_1 - tbl_load_fpn1 12701*4882a593Smuzhiyun short load_fpn1_2 - tbl_load_fpn1 12702*4882a593Smuzhiyun short load_fpn1_3 - tbl_load_fpn1 12703*4882a593Smuzhiyun short load_fpn1_4 - tbl_load_fpn1 12704*4882a593Smuzhiyun short load_fpn1_5 - tbl_load_fpn1 12705*4882a593Smuzhiyun short load_fpn1_6 - tbl_load_fpn1 12706*4882a593Smuzhiyun short load_fpn1_7 - tbl_load_fpn1 12707*4882a593Smuzhiyun 12708*4882a593Smuzhiyunload_fpn1_0: 12709*4882a593Smuzhiyun mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6) 12710*4882a593Smuzhiyun mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6) 12711*4882a593Smuzhiyun mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6) 12712*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12713*4882a593Smuzhiyun rts 12714*4882a593Smuzhiyunload_fpn1_1: 12715*4882a593Smuzhiyun mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6) 12716*4882a593Smuzhiyun mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6) 12717*4882a593Smuzhiyun mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6) 12718*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12719*4882a593Smuzhiyun rts 12720*4882a593Smuzhiyunload_fpn1_2: 12721*4882a593Smuzhiyun fmovm.x &0x20, FP_SRC(%a6) 12722*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12723*4882a593Smuzhiyun rts 12724*4882a593Smuzhiyunload_fpn1_3: 12725*4882a593Smuzhiyun fmovm.x &0x10, FP_SRC(%a6) 12726*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12727*4882a593Smuzhiyun rts 12728*4882a593Smuzhiyunload_fpn1_4: 12729*4882a593Smuzhiyun fmovm.x &0x08, FP_SRC(%a6) 12730*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12731*4882a593Smuzhiyun rts 12732*4882a593Smuzhiyunload_fpn1_5: 12733*4882a593Smuzhiyun fmovm.x &0x04, FP_SRC(%a6) 12734*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12735*4882a593Smuzhiyun rts 12736*4882a593Smuzhiyunload_fpn1_6: 12737*4882a593Smuzhiyun fmovm.x &0x02, FP_SRC(%a6) 12738*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12739*4882a593Smuzhiyun rts 12740*4882a593Smuzhiyunload_fpn1_7: 12741*4882a593Smuzhiyun fmovm.x &0x01, FP_SRC(%a6) 12742*4882a593Smuzhiyun lea FP_SRC(%a6), %a0 12743*4882a593Smuzhiyun rts 12744*4882a593Smuzhiyun 12745*4882a593Smuzhiyun############################################################################# 12746*4882a593Smuzhiyun 12747*4882a593Smuzhiyun######################################################################### 12748*4882a593Smuzhiyun# XDEF **************************************************************** # 12749*4882a593Smuzhiyun# load_fpn2(): load FP register value into FP_DST(a6). # 12750*4882a593Smuzhiyun# # 12751*4882a593Smuzhiyun# XREF **************************************************************** # 12752*4882a593Smuzhiyun# None # 12753*4882a593Smuzhiyun# # 12754*4882a593Smuzhiyun# INPUT *************************************************************** # 12755*4882a593Smuzhiyun# d0 = index of FP register to load # 12756*4882a593Smuzhiyun# # 12757*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12758*4882a593Smuzhiyun# FP_DST(a6) = value loaded from FP register file # 12759*4882a593Smuzhiyun# # 12760*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12761*4882a593Smuzhiyun# Using the index in d0, load FP_DST(a6) with a number from the # 12762*4882a593Smuzhiyun# FP register file. # 12763*4882a593Smuzhiyun# # 12764*4882a593Smuzhiyun######################################################################### 12765*4882a593Smuzhiyun 12766*4882a593Smuzhiyun global load_fpn2 12767*4882a593Smuzhiyunload_fpn2: 12768*4882a593Smuzhiyun mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0 12769*4882a593Smuzhiyun jmp (tbl_load_fpn2.b,%pc,%d0.w*1) 12770*4882a593Smuzhiyun 12771*4882a593Smuzhiyuntbl_load_fpn2: 12772*4882a593Smuzhiyun short load_fpn2_0 - tbl_load_fpn2 12773*4882a593Smuzhiyun short load_fpn2_1 - tbl_load_fpn2 12774*4882a593Smuzhiyun short load_fpn2_2 - tbl_load_fpn2 12775*4882a593Smuzhiyun short load_fpn2_3 - tbl_load_fpn2 12776*4882a593Smuzhiyun short load_fpn2_4 - tbl_load_fpn2 12777*4882a593Smuzhiyun short load_fpn2_5 - tbl_load_fpn2 12778*4882a593Smuzhiyun short load_fpn2_6 - tbl_load_fpn2 12779*4882a593Smuzhiyun short load_fpn2_7 - tbl_load_fpn2 12780*4882a593Smuzhiyun 12781*4882a593Smuzhiyunload_fpn2_0: 12782*4882a593Smuzhiyun mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6) 12783*4882a593Smuzhiyun mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6) 12784*4882a593Smuzhiyun mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6) 12785*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12786*4882a593Smuzhiyun rts 12787*4882a593Smuzhiyunload_fpn2_1: 12788*4882a593Smuzhiyun mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6) 12789*4882a593Smuzhiyun mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6) 12790*4882a593Smuzhiyun mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6) 12791*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12792*4882a593Smuzhiyun rts 12793*4882a593Smuzhiyunload_fpn2_2: 12794*4882a593Smuzhiyun fmovm.x &0x20, FP_DST(%a6) 12795*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12796*4882a593Smuzhiyun rts 12797*4882a593Smuzhiyunload_fpn2_3: 12798*4882a593Smuzhiyun fmovm.x &0x10, FP_DST(%a6) 12799*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12800*4882a593Smuzhiyun rts 12801*4882a593Smuzhiyunload_fpn2_4: 12802*4882a593Smuzhiyun fmovm.x &0x08, FP_DST(%a6) 12803*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12804*4882a593Smuzhiyun rts 12805*4882a593Smuzhiyunload_fpn2_5: 12806*4882a593Smuzhiyun fmovm.x &0x04, FP_DST(%a6) 12807*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12808*4882a593Smuzhiyun rts 12809*4882a593Smuzhiyunload_fpn2_6: 12810*4882a593Smuzhiyun fmovm.x &0x02, FP_DST(%a6) 12811*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12812*4882a593Smuzhiyun rts 12813*4882a593Smuzhiyunload_fpn2_7: 12814*4882a593Smuzhiyun fmovm.x &0x01, FP_DST(%a6) 12815*4882a593Smuzhiyun lea FP_DST(%a6), %a0 12816*4882a593Smuzhiyun rts 12817*4882a593Smuzhiyun 12818*4882a593Smuzhiyun############################################################################# 12819*4882a593Smuzhiyun 12820*4882a593Smuzhiyun######################################################################### 12821*4882a593Smuzhiyun# XDEF **************************************************************** # 12822*4882a593Smuzhiyun# store_fpreg(): store an fp value to the fpreg designated d0. # 12823*4882a593Smuzhiyun# # 12824*4882a593Smuzhiyun# XREF **************************************************************** # 12825*4882a593Smuzhiyun# None # 12826*4882a593Smuzhiyun# # 12827*4882a593Smuzhiyun# INPUT *************************************************************** # 12828*4882a593Smuzhiyun# fp0 = extended precision value to store # 12829*4882a593Smuzhiyun# d0 = index of floating-point register # 12830*4882a593Smuzhiyun# # 12831*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12832*4882a593Smuzhiyun# None # 12833*4882a593Smuzhiyun# # 12834*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12835*4882a593Smuzhiyun# Store the value in fp0 to the FP register designated by the # 12836*4882a593Smuzhiyun# value in d0. The FP number can be DENORM or SNAN so we have to be # 12837*4882a593Smuzhiyun# careful that we don't take an exception here. # 12838*4882a593Smuzhiyun# # 12839*4882a593Smuzhiyun######################################################################### 12840*4882a593Smuzhiyun 12841*4882a593Smuzhiyun global store_fpreg 12842*4882a593Smuzhiyunstore_fpreg: 12843*4882a593Smuzhiyun mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0 12844*4882a593Smuzhiyun jmp (tbl_store_fpreg.b,%pc,%d0.w*1) 12845*4882a593Smuzhiyun 12846*4882a593Smuzhiyuntbl_store_fpreg: 12847*4882a593Smuzhiyun short store_fpreg_0 - tbl_store_fpreg 12848*4882a593Smuzhiyun short store_fpreg_1 - tbl_store_fpreg 12849*4882a593Smuzhiyun short store_fpreg_2 - tbl_store_fpreg 12850*4882a593Smuzhiyun short store_fpreg_3 - tbl_store_fpreg 12851*4882a593Smuzhiyun short store_fpreg_4 - tbl_store_fpreg 12852*4882a593Smuzhiyun short store_fpreg_5 - tbl_store_fpreg 12853*4882a593Smuzhiyun short store_fpreg_6 - tbl_store_fpreg 12854*4882a593Smuzhiyun short store_fpreg_7 - tbl_store_fpreg 12855*4882a593Smuzhiyun 12856*4882a593Smuzhiyunstore_fpreg_0: 12857*4882a593Smuzhiyun fmovm.x &0x80, EXC_FP0(%a6) 12858*4882a593Smuzhiyun rts 12859*4882a593Smuzhiyunstore_fpreg_1: 12860*4882a593Smuzhiyun fmovm.x &0x80, EXC_FP1(%a6) 12861*4882a593Smuzhiyun rts 12862*4882a593Smuzhiyunstore_fpreg_2: 12863*4882a593Smuzhiyun fmovm.x &0x01, -(%sp) 12864*4882a593Smuzhiyun fmovm.x (%sp)+, &0x20 12865*4882a593Smuzhiyun rts 12866*4882a593Smuzhiyunstore_fpreg_3: 12867*4882a593Smuzhiyun fmovm.x &0x01, -(%sp) 12868*4882a593Smuzhiyun fmovm.x (%sp)+, &0x10 12869*4882a593Smuzhiyun rts 12870*4882a593Smuzhiyunstore_fpreg_4: 12871*4882a593Smuzhiyun fmovm.x &0x01, -(%sp) 12872*4882a593Smuzhiyun fmovm.x (%sp)+, &0x08 12873*4882a593Smuzhiyun rts 12874*4882a593Smuzhiyunstore_fpreg_5: 12875*4882a593Smuzhiyun fmovm.x &0x01, -(%sp) 12876*4882a593Smuzhiyun fmovm.x (%sp)+, &0x04 12877*4882a593Smuzhiyun rts 12878*4882a593Smuzhiyunstore_fpreg_6: 12879*4882a593Smuzhiyun fmovm.x &0x01, -(%sp) 12880*4882a593Smuzhiyun fmovm.x (%sp)+, &0x02 12881*4882a593Smuzhiyun rts 12882*4882a593Smuzhiyunstore_fpreg_7: 12883*4882a593Smuzhiyun fmovm.x &0x01, -(%sp) 12884*4882a593Smuzhiyun fmovm.x (%sp)+, &0x01 12885*4882a593Smuzhiyun rts 12886*4882a593Smuzhiyun 12887*4882a593Smuzhiyun######################################################################### 12888*4882a593Smuzhiyun# XDEF **************************************************************** # 12889*4882a593Smuzhiyun# get_packed(): fetch a packed operand from memory and then # 12890*4882a593Smuzhiyun# convert it to a floating-point binary number. # 12891*4882a593Smuzhiyun# # 12892*4882a593Smuzhiyun# XREF **************************************************************** # 12893*4882a593Smuzhiyun# _dcalc_ea() - calculate the correct <ea> # 12894*4882a593Smuzhiyun# _mem_read() - fetch the packed operand from memory # 12895*4882a593Smuzhiyun# facc_in_x() - the fetch failed so jump to special exit code # 12896*4882a593Smuzhiyun# decbin() - convert packed to binary extended precision # 12897*4882a593Smuzhiyun# # 12898*4882a593Smuzhiyun# INPUT *************************************************************** # 12899*4882a593Smuzhiyun# None # 12900*4882a593Smuzhiyun# # 12901*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12902*4882a593Smuzhiyun# If no failure on _mem_read(): # 12903*4882a593Smuzhiyun# FP_SRC(a6) = packed operand now as a binary FP number # 12904*4882a593Smuzhiyun# # 12905*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12906*4882a593Smuzhiyun# Get the correct <ea> which is the value on the exception stack # 12907*4882a593Smuzhiyun# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. # 12908*4882a593Smuzhiyun# Then, fetch the operand from memory. If the fetch fails, exit # 12909*4882a593Smuzhiyun# through facc_in_x(). # 12910*4882a593Smuzhiyun# If the packed operand is a ZERO,NAN, or INF, convert it to # 12911*4882a593Smuzhiyun# its binary representation here. Else, call decbin() which will # 12912*4882a593Smuzhiyun# convert the packed value to an extended precision binary value. # 12913*4882a593Smuzhiyun# # 12914*4882a593Smuzhiyun######################################################################### 12915*4882a593Smuzhiyun 12916*4882a593Smuzhiyun# the stacked <ea> for packed is correct except for -(An). 12917*4882a593Smuzhiyun# the base reg must be updated for both -(An) and (An)+. 12918*4882a593Smuzhiyun global get_packed 12919*4882a593Smuzhiyunget_packed: 12920*4882a593Smuzhiyun mov.l &0xc,%d0 # packed is 12 bytes 12921*4882a593Smuzhiyun bsr.l _dcalc_ea # fetch <ea>; correct An 12922*4882a593Smuzhiyun 12923*4882a593Smuzhiyun lea FP_SRC(%a6),%a1 # pass: ptr to super dst 12924*4882a593Smuzhiyun mov.l &0xc,%d0 # pass: 12 bytes 12925*4882a593Smuzhiyun bsr.l _dmem_read # read packed operand 12926*4882a593Smuzhiyun 12927*4882a593Smuzhiyun tst.l %d1 # did dfetch fail? 12928*4882a593Smuzhiyun bne.l facc_in_x # yes 12929*4882a593Smuzhiyun 12930*4882a593Smuzhiyun# The packed operand is an INF or a NAN if the exponent field is all ones. 12931*4882a593Smuzhiyun bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 12932*4882a593Smuzhiyun cmpi.w %d0,&0x7fff # INF or NAN? 12933*4882a593Smuzhiyun bne.b gp_try_zero # no 12934*4882a593Smuzhiyun rts # operand is an INF or NAN 12935*4882a593Smuzhiyun 12936*4882a593Smuzhiyun# The packed operand is a zero if the mantissa is all zero, else it's 12937*4882a593Smuzhiyun# a normal packed op. 12938*4882a593Smuzhiyungp_try_zero: 12939*4882a593Smuzhiyun mov.b 3+FP_SRC(%a6),%d0 # get byte 4 12940*4882a593Smuzhiyun andi.b &0x0f,%d0 # clear all but last nybble 12941*4882a593Smuzhiyun bne.b gp_not_spec # not a zero 12942*4882a593Smuzhiyun tst.l FP_SRC_HI(%a6) # is lw 2 zero? 12943*4882a593Smuzhiyun bne.b gp_not_spec # not a zero 12944*4882a593Smuzhiyun tst.l FP_SRC_LO(%a6) # is lw 3 zero? 12945*4882a593Smuzhiyun bne.b gp_not_spec # not a zero 12946*4882a593Smuzhiyun rts # operand is a ZERO 12947*4882a593Smuzhiyungp_not_spec: 12948*4882a593Smuzhiyun lea FP_SRC(%a6),%a0 # pass: ptr to packed op 12949*4882a593Smuzhiyun bsr.l decbin # convert to extended 12950*4882a593Smuzhiyun fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 12951*4882a593Smuzhiyun rts 12952*4882a593Smuzhiyun 12953*4882a593Smuzhiyun######################################################################### 12954*4882a593Smuzhiyun# decbin(): Converts normalized packed bcd value pointed to by register # 12955*4882a593Smuzhiyun# a0 to extended-precision value in fp0. # 12956*4882a593Smuzhiyun# # 12957*4882a593Smuzhiyun# INPUT *************************************************************** # 12958*4882a593Smuzhiyun# a0 = pointer to normalized packed bcd value # 12959*4882a593Smuzhiyun# # 12960*4882a593Smuzhiyun# OUTPUT ************************************************************** # 12961*4882a593Smuzhiyun# fp0 = exact fp representation of the packed bcd value. # 12962*4882a593Smuzhiyun# # 12963*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 12964*4882a593Smuzhiyun# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, # 12965*4882a593Smuzhiyun# and NaN operands are dispatched without entering this routine) # 12966*4882a593Smuzhiyun# value in 68881/882 format at location (a0). # 12967*4882a593Smuzhiyun# # 12968*4882a593Smuzhiyun# A1. Convert the bcd exponent to binary by successive adds and # 12969*4882a593Smuzhiyun# muls. Set the sign according to SE. Subtract 16 to compensate # 12970*4882a593Smuzhiyun# for the mantissa which is to be interpreted as 17 integer # 12971*4882a593Smuzhiyun# digits, rather than 1 integer and 16 fraction digits. # 12972*4882a593Smuzhiyun# Note: this operation can never overflow. # 12973*4882a593Smuzhiyun# # 12974*4882a593Smuzhiyun# A2. Convert the bcd mantissa to binary by successive # 12975*4882a593Smuzhiyun# adds and muls in FP0. Set the sign according to SM. # 12976*4882a593Smuzhiyun# The mantissa digits will be converted with the decimal point # 12977*4882a593Smuzhiyun# assumed following the least-significant digit. # 12978*4882a593Smuzhiyun# Note: this operation can never overflow. # 12979*4882a593Smuzhiyun# # 12980*4882a593Smuzhiyun# A3. Count the number of leading/trailing zeros in the # 12981*4882a593Smuzhiyun# bcd string. If SE is positive, count the leading zeros; # 12982*4882a593Smuzhiyun# if negative, count the trailing zeros. Set the adjusted # 12983*4882a593Smuzhiyun# exponent equal to the exponent from A1 and the zero count # 12984*4882a593Smuzhiyun# added if SM = 1 and subtracted if SM = 0. Scale the # 12985*4882a593Smuzhiyun# mantissa the equivalent of forcing in the bcd value: # 12986*4882a593Smuzhiyun# # 12987*4882a593Smuzhiyun# SM = 0 a non-zero digit in the integer position # 12988*4882a593Smuzhiyun# SM = 1 a non-zero digit in Mant0, lsd of the fraction # 12989*4882a593Smuzhiyun# # 12990*4882a593Smuzhiyun# this will insure that any value, regardless of its # 12991*4882a593Smuzhiyun# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted # 12992*4882a593Smuzhiyun# consistently. # 12993*4882a593Smuzhiyun# # 12994*4882a593Smuzhiyun# A4. Calculate the factor 10^exp in FP1 using a table of # 12995*4882a593Smuzhiyun# 10^(2^n) values. To reduce the error in forming factors # 12996*4882a593Smuzhiyun# greater than 10^27, a directed rounding scheme is used with # 12997*4882a593Smuzhiyun# tables rounded to RN, RM, and RP, according to the table # 12998*4882a593Smuzhiyun# in the comments of the pwrten section. # 12999*4882a593Smuzhiyun# # 13000*4882a593Smuzhiyun# A5. Form the final binary number by scaling the mantissa by # 13001*4882a593Smuzhiyun# the exponent factor. This is done by multiplying the # 13002*4882a593Smuzhiyun# mantissa in FP0 by the factor in FP1 if the adjusted # 13003*4882a593Smuzhiyun# exponent sign is positive, and dividing FP0 by FP1 if # 13004*4882a593Smuzhiyun# it is negative. # 13005*4882a593Smuzhiyun# # 13006*4882a593Smuzhiyun# Clean up and return. Check if the final mul or div was inexact. # 13007*4882a593Smuzhiyun# If so, set INEX1 in USER_FPSR. # 13008*4882a593Smuzhiyun# # 13009*4882a593Smuzhiyun######################################################################### 13010*4882a593Smuzhiyun 13011*4882a593Smuzhiyun# 13012*4882a593Smuzhiyun# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded 13013*4882a593Smuzhiyun# to nearest, minus, and plus, respectively. The tables include 13014*4882a593Smuzhiyun# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding 13015*4882a593Smuzhiyun# is required until the power is greater than 27, however, all 13016*4882a593Smuzhiyun# tables include the first 5 for ease of indexing. 13017*4882a593Smuzhiyun# 13018*4882a593SmuzhiyunRTABLE: 13019*4882a593Smuzhiyun byte 0,0,0,0 13020*4882a593Smuzhiyun byte 2,3,2,3 13021*4882a593Smuzhiyun byte 2,3,3,2 13022*4882a593Smuzhiyun byte 3,2,2,3 13023*4882a593Smuzhiyun 13024*4882a593Smuzhiyun set FNIBS,7 13025*4882a593Smuzhiyun set FSTRT,0 13026*4882a593Smuzhiyun 13027*4882a593Smuzhiyun set ESTRT,4 13028*4882a593Smuzhiyun set EDIGITS,2 13029*4882a593Smuzhiyun 13030*4882a593Smuzhiyun global decbin 13031*4882a593Smuzhiyundecbin: 13032*4882a593Smuzhiyun mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input 13033*4882a593Smuzhiyun mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it 13034*4882a593Smuzhiyun mov.l 0x8(%a0),FP_SCR0_LO(%a6) 13035*4882a593Smuzhiyun 13036*4882a593Smuzhiyun lea FP_SCR0(%a6),%a0 13037*4882a593Smuzhiyun 13038*4882a593Smuzhiyun movm.l &0x3c00,-(%sp) # save d2-d5 13039*4882a593Smuzhiyun fmovm.x &0x1,-(%sp) # save fp1 13040*4882a593Smuzhiyun# 13041*4882a593Smuzhiyun# Calculate exponent: 13042*4882a593Smuzhiyun# 1. Copy bcd value in memory for use as a working copy. 13043*4882a593Smuzhiyun# 2. Calculate absolute value of exponent in d1 by mul and add. 13044*4882a593Smuzhiyun# 3. Correct for exponent sign. 13045*4882a593Smuzhiyun# 4. Subtract 16 to compensate for interpreting the mant as all integer digits. 13046*4882a593Smuzhiyun# (i.e., all digits assumed left of the decimal point.) 13047*4882a593Smuzhiyun# 13048*4882a593Smuzhiyun# Register usage: 13049*4882a593Smuzhiyun# 13050*4882a593Smuzhiyun# calc_e: 13051*4882a593Smuzhiyun# (*) d0: temp digit storage 13052*4882a593Smuzhiyun# (*) d1: accumulator for binary exponent 13053*4882a593Smuzhiyun# (*) d2: digit count 13054*4882a593Smuzhiyun# (*) d3: offset pointer 13055*4882a593Smuzhiyun# ( ) d4: first word of bcd 13056*4882a593Smuzhiyun# ( ) a0: pointer to working bcd value 13057*4882a593Smuzhiyun# ( ) a6: pointer to original bcd value 13058*4882a593Smuzhiyun# (*) FP_SCR1: working copy of original bcd value 13059*4882a593Smuzhiyun# (*) L_SCR1: copy of original exponent word 13060*4882a593Smuzhiyun# 13061*4882a593Smuzhiyuncalc_e: 13062*4882a593Smuzhiyun mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part 13063*4882a593Smuzhiyun mov.l &ESTRT,%d3 # counter to pick up digits 13064*4882a593Smuzhiyun mov.l (%a0),%d4 # get first word of bcd 13065*4882a593Smuzhiyun clr.l %d1 # zero d1 for accumulator 13066*4882a593Smuzhiyune_gd: 13067*4882a593Smuzhiyun mulu.l &0xa,%d1 # mul partial product by one digit place 13068*4882a593Smuzhiyun bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0 13069*4882a593Smuzhiyun add.l %d0,%d1 # d1 = d1 + d0 13070*4882a593Smuzhiyun addq.b &4,%d3 # advance d3 to the next digit 13071*4882a593Smuzhiyun dbf.w %d2,e_gd # if we have used all 3 digits, exit loop 13072*4882a593Smuzhiyun btst &30,%d4 # get SE 13073*4882a593Smuzhiyun beq.b e_pos # don't negate if pos 13074*4882a593Smuzhiyun neg.l %d1 # negate before subtracting 13075*4882a593Smuzhiyune_pos: 13076*4882a593Smuzhiyun sub.l &16,%d1 # sub to compensate for shift of mant 13077*4882a593Smuzhiyun bge.b e_save # if still pos, do not neg 13078*4882a593Smuzhiyun neg.l %d1 # now negative, make pos and set SE 13079*4882a593Smuzhiyun or.l &0x40000000,%d4 # set SE in d4, 13080*4882a593Smuzhiyun or.l &0x40000000,(%a0) # and in working bcd 13081*4882a593Smuzhiyune_save: 13082*4882a593Smuzhiyun mov.l %d1,-(%sp) # save exp on stack 13083*4882a593Smuzhiyun# 13084*4882a593Smuzhiyun# 13085*4882a593Smuzhiyun# Calculate mantissa: 13086*4882a593Smuzhiyun# 1. Calculate absolute value of mantissa in fp0 by mul and add. 13087*4882a593Smuzhiyun# 2. Correct for mantissa sign. 13088*4882a593Smuzhiyun# (i.e., all digits assumed left of the decimal point.) 13089*4882a593Smuzhiyun# 13090*4882a593Smuzhiyun# Register usage: 13091*4882a593Smuzhiyun# 13092*4882a593Smuzhiyun# calc_m: 13093*4882a593Smuzhiyun# (*) d0: temp digit storage 13094*4882a593Smuzhiyun# (*) d1: lword counter 13095*4882a593Smuzhiyun# (*) d2: digit count 13096*4882a593Smuzhiyun# (*) d3: offset pointer 13097*4882a593Smuzhiyun# ( ) d4: words 2 and 3 of bcd 13098*4882a593Smuzhiyun# ( ) a0: pointer to working bcd value 13099*4882a593Smuzhiyun# ( ) a6: pointer to original bcd value 13100*4882a593Smuzhiyun# (*) fp0: mantissa accumulator 13101*4882a593Smuzhiyun# ( ) FP_SCR1: working copy of original bcd value 13102*4882a593Smuzhiyun# ( ) L_SCR1: copy of original exponent word 13103*4882a593Smuzhiyun# 13104*4882a593Smuzhiyuncalc_m: 13105*4882a593Smuzhiyun mov.l &1,%d1 # word counter, init to 1 13106*4882a593Smuzhiyun fmov.s &0x00000000,%fp0 # accumulator 13107*4882a593Smuzhiyun# 13108*4882a593Smuzhiyun# 13109*4882a593Smuzhiyun# Since the packed number has a long word between the first & second parts, 13110*4882a593Smuzhiyun# get the integer digit then skip down & get the rest of the 13111*4882a593Smuzhiyun# mantissa. We will unroll the loop once. 13112*4882a593Smuzhiyun# 13113*4882a593Smuzhiyun bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word 13114*4882a593Smuzhiyun fadd.b %d0,%fp0 # add digit to sum in fp0 13115*4882a593Smuzhiyun# 13116*4882a593Smuzhiyun# 13117*4882a593Smuzhiyun# Get the rest of the mantissa. 13118*4882a593Smuzhiyun# 13119*4882a593Smuzhiyunloadlw: 13120*4882a593Smuzhiyun mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4 13121*4882a593Smuzhiyun mov.l &FSTRT,%d3 # counter to pick up digits 13122*4882a593Smuzhiyun mov.l &FNIBS,%d2 # reset number of digits per a0 ptr 13123*4882a593Smuzhiyunmd2b: 13124*4882a593Smuzhiyun fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10 13125*4882a593Smuzhiyun bfextu %d4{%d3:&4},%d0 # get the digit and zero extend 13126*4882a593Smuzhiyun fadd.b %d0,%fp0 # fp0 = fp0 + digit 13127*4882a593Smuzhiyun# 13128*4882a593Smuzhiyun# 13129*4882a593Smuzhiyun# If all the digits (8) in that long word have been converted (d2=0), 13130*4882a593Smuzhiyun# then inc d1 (=2) to point to the next long word and reset d3 to 0 13131*4882a593Smuzhiyun# to initialize the digit offset, and set d2 to 7 for the digit count; 13132*4882a593Smuzhiyun# else continue with this long word. 13133*4882a593Smuzhiyun# 13134*4882a593Smuzhiyun addq.b &4,%d3 # advance d3 to the next digit 13135*4882a593Smuzhiyun dbf.w %d2,md2b # check for last digit in this lw 13136*4882a593Smuzhiyunnextlw: 13137*4882a593Smuzhiyun addq.l &1,%d1 # inc lw pointer in mantissa 13138*4882a593Smuzhiyun cmp.l %d1,&2 # test for last lw 13139*4882a593Smuzhiyun ble.b loadlw # if not, get last one 13140*4882a593Smuzhiyun# 13141*4882a593Smuzhiyun# Check the sign of the mant and make the value in fp0 the same sign. 13142*4882a593Smuzhiyun# 13143*4882a593Smuzhiyunm_sign: 13144*4882a593Smuzhiyun btst &31,(%a0) # test sign of the mantissa 13145*4882a593Smuzhiyun beq.b ap_st_z # if clear, go to append/strip zeros 13146*4882a593Smuzhiyun fneg.x %fp0 # if set, negate fp0 13147*4882a593Smuzhiyun# 13148*4882a593Smuzhiyun# Append/strip zeros: 13149*4882a593Smuzhiyun# 13150*4882a593Smuzhiyun# For adjusted exponents which have an absolute value greater than 27*, 13151*4882a593Smuzhiyun# this routine calculates the amount needed to normalize the mantissa 13152*4882a593Smuzhiyun# for the adjusted exponent. That number is subtracted from the exp 13153*4882a593Smuzhiyun# if the exp was positive, and added if it was negative. The purpose 13154*4882a593Smuzhiyun# of this is to reduce the value of the exponent and the possibility 13155*4882a593Smuzhiyun# of error in calculation of pwrten. 13156*4882a593Smuzhiyun# 13157*4882a593Smuzhiyun# 1. Branch on the sign of the adjusted exponent. 13158*4882a593Smuzhiyun# 2p.(positive exp) 13159*4882a593Smuzhiyun# 2. Check M16 and the digits in lwords 2 and 3 in descending order. 13160*4882a593Smuzhiyun# 3. Add one for each zero encountered until a non-zero digit. 13161*4882a593Smuzhiyun# 4. Subtract the count from the exp. 13162*4882a593Smuzhiyun# 5. Check if the exp has crossed zero in #3 above; make the exp abs 13163*4882a593Smuzhiyun# and set SE. 13164*4882a593Smuzhiyun# 6. Multiply the mantissa by 10**count. 13165*4882a593Smuzhiyun# 2n.(negative exp) 13166*4882a593Smuzhiyun# 2. Check the digits in lwords 3 and 2 in descending order. 13167*4882a593Smuzhiyun# 3. Add one for each zero encountered until a non-zero digit. 13168*4882a593Smuzhiyun# 4. Add the count to the exp. 13169*4882a593Smuzhiyun# 5. Check if the exp has crossed zero in #3 above; clear SE. 13170*4882a593Smuzhiyun# 6. Divide the mantissa by 10**count. 13171*4882a593Smuzhiyun# 13172*4882a593Smuzhiyun# *Why 27? If the adjusted exponent is within -28 < expA < 28, than 13173*4882a593Smuzhiyun# any adjustment due to append/strip zeros will drive the resultane 13174*4882a593Smuzhiyun# exponent towards zero. Since all pwrten constants with a power 13175*4882a593Smuzhiyun# of 27 or less are exact, there is no need to use this routine to 13176*4882a593Smuzhiyun# attempt to lessen the resultant exponent. 13177*4882a593Smuzhiyun# 13178*4882a593Smuzhiyun# Register usage: 13179*4882a593Smuzhiyun# 13180*4882a593Smuzhiyun# ap_st_z: 13181*4882a593Smuzhiyun# (*) d0: temp digit storage 13182*4882a593Smuzhiyun# (*) d1: zero count 13183*4882a593Smuzhiyun# (*) d2: digit count 13184*4882a593Smuzhiyun# (*) d3: offset pointer 13185*4882a593Smuzhiyun# ( ) d4: first word of bcd 13186*4882a593Smuzhiyun# (*) d5: lword counter 13187*4882a593Smuzhiyun# ( ) a0: pointer to working bcd value 13188*4882a593Smuzhiyun# ( ) FP_SCR1: working copy of original bcd value 13189*4882a593Smuzhiyun# ( ) L_SCR1: copy of original exponent word 13190*4882a593Smuzhiyun# 13191*4882a593Smuzhiyun# 13192*4882a593Smuzhiyun# First check the absolute value of the exponent to see if this 13193*4882a593Smuzhiyun# routine is necessary. If so, then check the sign of the exponent 13194*4882a593Smuzhiyun# and do append (+) or strip (-) zeros accordingly. 13195*4882a593Smuzhiyun# This section handles a positive adjusted exponent. 13196*4882a593Smuzhiyun# 13197*4882a593Smuzhiyunap_st_z: 13198*4882a593Smuzhiyun mov.l (%sp),%d1 # load expA for range test 13199*4882a593Smuzhiyun cmp.l %d1,&27 # test is with 27 13200*4882a593Smuzhiyun ble.w pwrten # if abs(expA) <28, skip ap/st zeros 13201*4882a593Smuzhiyun btst &30,(%a0) # check sign of exp 13202*4882a593Smuzhiyun bne.b ap_st_n # if neg, go to neg side 13203*4882a593Smuzhiyun clr.l %d1 # zero count reg 13204*4882a593Smuzhiyun mov.l (%a0),%d4 # load lword 1 to d4 13205*4882a593Smuzhiyun bfextu %d4{&28:&4},%d0 # get M16 in d0 13206*4882a593Smuzhiyun bne.b ap_p_fx # if M16 is non-zero, go fix exp 13207*4882a593Smuzhiyun addq.l &1,%d1 # inc zero count 13208*4882a593Smuzhiyun mov.l &1,%d5 # init lword counter 13209*4882a593Smuzhiyun mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4 13210*4882a593Smuzhiyun bne.b ap_p_cl # if lw 2 is zero, skip it 13211*4882a593Smuzhiyun addq.l &8,%d1 # and inc count by 8 13212*4882a593Smuzhiyun addq.l &1,%d5 # inc lword counter 13213*4882a593Smuzhiyun mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4 13214*4882a593Smuzhiyunap_p_cl: 13215*4882a593Smuzhiyun clr.l %d3 # init offset reg 13216*4882a593Smuzhiyun mov.l &7,%d2 # init digit counter 13217*4882a593Smuzhiyunap_p_gd: 13218*4882a593Smuzhiyun bfextu %d4{%d3:&4},%d0 # get digit 13219*4882a593Smuzhiyun bne.b ap_p_fx # if non-zero, go to fix exp 13220*4882a593Smuzhiyun addq.l &4,%d3 # point to next digit 13221*4882a593Smuzhiyun addq.l &1,%d1 # inc digit counter 13222*4882a593Smuzhiyun dbf.w %d2,ap_p_gd # get next digit 13223*4882a593Smuzhiyunap_p_fx: 13224*4882a593Smuzhiyun mov.l %d1,%d0 # copy counter to d2 13225*4882a593Smuzhiyun mov.l (%sp),%d1 # get adjusted exp from memory 13226*4882a593Smuzhiyun sub.l %d0,%d1 # subtract count from exp 13227*4882a593Smuzhiyun bge.b ap_p_fm # if still pos, go to pwrten 13228*4882a593Smuzhiyun neg.l %d1 # now its neg; get abs 13229*4882a593Smuzhiyun mov.l (%a0),%d4 # load lword 1 to d4 13230*4882a593Smuzhiyun or.l &0x40000000,%d4 # and set SE in d4 13231*4882a593Smuzhiyun or.l &0x40000000,(%a0) # and in memory 13232*4882a593Smuzhiyun# 13233*4882a593Smuzhiyun# Calculate the mantissa multiplier to compensate for the striping of 13234*4882a593Smuzhiyun# zeros from the mantissa. 13235*4882a593Smuzhiyun# 13236*4882a593Smuzhiyunap_p_fm: 13237*4882a593Smuzhiyun lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 13238*4882a593Smuzhiyun clr.l %d3 # init table index 13239*4882a593Smuzhiyun fmov.s &0x3f800000,%fp1 # init fp1 to 1 13240*4882a593Smuzhiyun mov.l &3,%d2 # init d2 to count bits in counter 13241*4882a593Smuzhiyunap_p_el: 13242*4882a593Smuzhiyun asr.l &1,%d0 # shift lsb into carry 13243*4882a593Smuzhiyun bcc.b ap_p_en # if 1, mul fp1 by pwrten factor 13244*4882a593Smuzhiyun fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13245*4882a593Smuzhiyunap_p_en: 13246*4882a593Smuzhiyun add.l &12,%d3 # inc d3 to next rtable entry 13247*4882a593Smuzhiyun tst.l %d0 # check if d0 is zero 13248*4882a593Smuzhiyun bne.b ap_p_el # if not, get next bit 13249*4882a593Smuzhiyun fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted) 13250*4882a593Smuzhiyun bra.b pwrten # go calc pwrten 13251*4882a593Smuzhiyun# 13252*4882a593Smuzhiyun# This section handles a negative adjusted exponent. 13253*4882a593Smuzhiyun# 13254*4882a593Smuzhiyunap_st_n: 13255*4882a593Smuzhiyun clr.l %d1 # clr counter 13256*4882a593Smuzhiyun mov.l &2,%d5 # set up d5 to point to lword 3 13257*4882a593Smuzhiyun mov.l (%a0,%d5.L*4),%d4 # get lword 3 13258*4882a593Smuzhiyun bne.b ap_n_cl # if not zero, check digits 13259*4882a593Smuzhiyun sub.l &1,%d5 # dec d5 to point to lword 2 13260*4882a593Smuzhiyun addq.l &8,%d1 # inc counter by 8 13261*4882a593Smuzhiyun mov.l (%a0,%d5.L*4),%d4 # get lword 2 13262*4882a593Smuzhiyunap_n_cl: 13263*4882a593Smuzhiyun mov.l &28,%d3 # point to last digit 13264*4882a593Smuzhiyun mov.l &7,%d2 # init digit counter 13265*4882a593Smuzhiyunap_n_gd: 13266*4882a593Smuzhiyun bfextu %d4{%d3:&4},%d0 # get digit 13267*4882a593Smuzhiyun bne.b ap_n_fx # if non-zero, go to exp fix 13268*4882a593Smuzhiyun subq.l &4,%d3 # point to previous digit 13269*4882a593Smuzhiyun addq.l &1,%d1 # inc digit counter 13270*4882a593Smuzhiyun dbf.w %d2,ap_n_gd # get next digit 13271*4882a593Smuzhiyunap_n_fx: 13272*4882a593Smuzhiyun mov.l %d1,%d0 # copy counter to d0 13273*4882a593Smuzhiyun mov.l (%sp),%d1 # get adjusted exp from memory 13274*4882a593Smuzhiyun sub.l %d0,%d1 # subtract count from exp 13275*4882a593Smuzhiyun bgt.b ap_n_fm # if still pos, go fix mantissa 13276*4882a593Smuzhiyun neg.l %d1 # take abs of exp and clr SE 13277*4882a593Smuzhiyun mov.l (%a0),%d4 # load lword 1 to d4 13278*4882a593Smuzhiyun and.l &0xbfffffff,%d4 # and clr SE in d4 13279*4882a593Smuzhiyun and.l &0xbfffffff,(%a0) # and in memory 13280*4882a593Smuzhiyun# 13281*4882a593Smuzhiyun# Calculate the mantissa multiplier to compensate for the appending of 13282*4882a593Smuzhiyun# zeros to the mantissa. 13283*4882a593Smuzhiyun# 13284*4882a593Smuzhiyunap_n_fm: 13285*4882a593Smuzhiyun lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 13286*4882a593Smuzhiyun clr.l %d3 # init table index 13287*4882a593Smuzhiyun fmov.s &0x3f800000,%fp1 # init fp1 to 1 13288*4882a593Smuzhiyun mov.l &3,%d2 # init d2 to count bits in counter 13289*4882a593Smuzhiyunap_n_el: 13290*4882a593Smuzhiyun asr.l &1,%d0 # shift lsb into carry 13291*4882a593Smuzhiyun bcc.b ap_n_en # if 1, mul fp1 by pwrten factor 13292*4882a593Smuzhiyun fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13293*4882a593Smuzhiyunap_n_en: 13294*4882a593Smuzhiyun add.l &12,%d3 # inc d3 to next rtable entry 13295*4882a593Smuzhiyun tst.l %d0 # check if d0 is zero 13296*4882a593Smuzhiyun bne.b ap_n_el # if not, get next bit 13297*4882a593Smuzhiyun fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted) 13298*4882a593Smuzhiyun# 13299*4882a593Smuzhiyun# 13300*4882a593Smuzhiyun# Calculate power-of-ten factor from adjusted and shifted exponent. 13301*4882a593Smuzhiyun# 13302*4882a593Smuzhiyun# Register usage: 13303*4882a593Smuzhiyun# 13304*4882a593Smuzhiyun# pwrten: 13305*4882a593Smuzhiyun# (*) d0: temp 13306*4882a593Smuzhiyun# ( ) d1: exponent 13307*4882a593Smuzhiyun# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp 13308*4882a593Smuzhiyun# (*) d3: FPCR work copy 13309*4882a593Smuzhiyun# ( ) d4: first word of bcd 13310*4882a593Smuzhiyun# (*) a1: RTABLE pointer 13311*4882a593Smuzhiyun# calc_p: 13312*4882a593Smuzhiyun# (*) d0: temp 13313*4882a593Smuzhiyun# ( ) d1: exponent 13314*4882a593Smuzhiyun# (*) d3: PWRTxx table index 13315*4882a593Smuzhiyun# ( ) a0: pointer to working copy of bcd 13316*4882a593Smuzhiyun# (*) a1: PWRTxx pointer 13317*4882a593Smuzhiyun# (*) fp1: power-of-ten accumulator 13318*4882a593Smuzhiyun# 13319*4882a593Smuzhiyun# Pwrten calculates the exponent factor in the selected rounding mode 13320*4882a593Smuzhiyun# according to the following table: 13321*4882a593Smuzhiyun# 13322*4882a593Smuzhiyun# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode 13323*4882a593Smuzhiyun# 13324*4882a593Smuzhiyun# ANY ANY RN RN 13325*4882a593Smuzhiyun# 13326*4882a593Smuzhiyun# + + RP RP 13327*4882a593Smuzhiyun# - + RP RM 13328*4882a593Smuzhiyun# + - RP RM 13329*4882a593Smuzhiyun# - - RP RP 13330*4882a593Smuzhiyun# 13331*4882a593Smuzhiyun# + + RM RM 13332*4882a593Smuzhiyun# - + RM RP 13333*4882a593Smuzhiyun# + - RM RP 13334*4882a593Smuzhiyun# - - RM RM 13335*4882a593Smuzhiyun# 13336*4882a593Smuzhiyun# + + RZ RM 13337*4882a593Smuzhiyun# - + RZ RM 13338*4882a593Smuzhiyun# + - RZ RP 13339*4882a593Smuzhiyun# - - RZ RP 13340*4882a593Smuzhiyun# 13341*4882a593Smuzhiyun# 13342*4882a593Smuzhiyunpwrten: 13343*4882a593Smuzhiyun mov.l USER_FPCR(%a6),%d3 # get user's FPCR 13344*4882a593Smuzhiyun bfextu %d3{&26:&2},%d2 # isolate rounding mode bits 13345*4882a593Smuzhiyun mov.l (%a0),%d4 # reload 1st bcd word to d4 13346*4882a593Smuzhiyun asl.l &2,%d2 # format d2 to be 13347*4882a593Smuzhiyun bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE} 13348*4882a593Smuzhiyun add.l %d0,%d2 # in d2 as index into RTABLE 13349*4882a593Smuzhiyun lea.l RTABLE(%pc),%a1 # load rtable base 13350*4882a593Smuzhiyun mov.b (%a1,%d2),%d0 # load new rounding bits from table 13351*4882a593Smuzhiyun clr.l %d3 # clear d3 to force no exc and extended 13352*4882a593Smuzhiyun bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR 13353*4882a593Smuzhiyun fmov.l %d3,%fpcr # write new FPCR 13354*4882a593Smuzhiyun asr.l &1,%d0 # write correct PTENxx table 13355*4882a593Smuzhiyun bcc.b not_rp # to a1 13356*4882a593Smuzhiyun lea.l PTENRP(%pc),%a1 # it is RP 13357*4882a593Smuzhiyun bra.b calc_p # go to init section 13358*4882a593Smuzhiyunnot_rp: 13359*4882a593Smuzhiyun asr.l &1,%d0 # keep checking 13360*4882a593Smuzhiyun bcc.b not_rm 13361*4882a593Smuzhiyun lea.l PTENRM(%pc),%a1 # it is RM 13362*4882a593Smuzhiyun bra.b calc_p # go to init section 13363*4882a593Smuzhiyunnot_rm: 13364*4882a593Smuzhiyun lea.l PTENRN(%pc),%a1 # it is RN 13365*4882a593Smuzhiyuncalc_p: 13366*4882a593Smuzhiyun mov.l %d1,%d0 # copy exp to d0;use d0 13367*4882a593Smuzhiyun bpl.b no_neg # if exp is negative, 13368*4882a593Smuzhiyun neg.l %d0 # invert it 13369*4882a593Smuzhiyun or.l &0x40000000,(%a0) # and set SE bit 13370*4882a593Smuzhiyunno_neg: 13371*4882a593Smuzhiyun clr.l %d3 # table index 13372*4882a593Smuzhiyun fmov.s &0x3f800000,%fp1 # init fp1 to 1 13373*4882a593Smuzhiyune_loop: 13374*4882a593Smuzhiyun asr.l &1,%d0 # shift next bit into carry 13375*4882a593Smuzhiyun bcc.b e_next # if zero, skip the mul 13376*4882a593Smuzhiyun fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13377*4882a593Smuzhiyune_next: 13378*4882a593Smuzhiyun add.l &12,%d3 # inc d3 to next rtable entry 13379*4882a593Smuzhiyun tst.l %d0 # check if d0 is zero 13380*4882a593Smuzhiyun bne.b e_loop # not zero, continue shifting 13381*4882a593Smuzhiyun# 13382*4882a593Smuzhiyun# 13383*4882a593Smuzhiyun# Check the sign of the adjusted exp and make the value in fp0 the 13384*4882a593Smuzhiyun# same sign. If the exp was pos then multiply fp1*fp0; 13385*4882a593Smuzhiyun# else divide fp0/fp1. 13386*4882a593Smuzhiyun# 13387*4882a593Smuzhiyun# Register Usage: 13388*4882a593Smuzhiyun# norm: 13389*4882a593Smuzhiyun# ( ) a0: pointer to working bcd value 13390*4882a593Smuzhiyun# (*) fp0: mantissa accumulator 13391*4882a593Smuzhiyun# ( ) fp1: scaling factor - 10**(abs(exp)) 13392*4882a593Smuzhiyun# 13393*4882a593Smuzhiyunpnorm: 13394*4882a593Smuzhiyun btst &30,(%a0) # test the sign of the exponent 13395*4882a593Smuzhiyun beq.b mul # if clear, go to multiply 13396*4882a593Smuzhiyundiv: 13397*4882a593Smuzhiyun fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp 13398*4882a593Smuzhiyun bra.b end_dec 13399*4882a593Smuzhiyunmul: 13400*4882a593Smuzhiyun fmul.x %fp1,%fp0 # exp is positive, so multiply by exp 13401*4882a593Smuzhiyun# 13402*4882a593Smuzhiyun# 13403*4882a593Smuzhiyun# Clean up and return with result in fp0. 13404*4882a593Smuzhiyun# 13405*4882a593Smuzhiyun# If the final mul/div in decbin incurred an inex exception, 13406*4882a593Smuzhiyun# it will be inex2, but will be reported as inex1 by get_op. 13407*4882a593Smuzhiyun# 13408*4882a593Smuzhiyunend_dec: 13409*4882a593Smuzhiyun fmov.l %fpsr,%d0 # get status register 13410*4882a593Smuzhiyun bclr &inex2_bit+8,%d0 # test for inex2 and clear it 13411*4882a593Smuzhiyun beq.b no_exc # skip this if no exc 13412*4882a593Smuzhiyun ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX 13413*4882a593Smuzhiyunno_exc: 13414*4882a593Smuzhiyun add.l &0x4,%sp # clear 1 lw param 13415*4882a593Smuzhiyun fmovm.x (%sp)+,&0x40 # restore fp1 13416*4882a593Smuzhiyun movm.l (%sp)+,&0x3c # restore d2-d5 13417*4882a593Smuzhiyun fmov.l &0x0,%fpcr 13418*4882a593Smuzhiyun fmov.l &0x0,%fpsr 13419*4882a593Smuzhiyun rts 13420*4882a593Smuzhiyun 13421*4882a593Smuzhiyun######################################################################### 13422*4882a593Smuzhiyun# bindec(): Converts an input in extended precision format to bcd format# 13423*4882a593Smuzhiyun# # 13424*4882a593Smuzhiyun# INPUT *************************************************************** # 13425*4882a593Smuzhiyun# a0 = pointer to the input extended precision value in memory. # 13426*4882a593Smuzhiyun# the input may be either normalized, unnormalized, or # 13427*4882a593Smuzhiyun# denormalized. # 13428*4882a593Smuzhiyun# d0 = contains the k-factor sign-extended to 32-bits. # 13429*4882a593Smuzhiyun# # 13430*4882a593Smuzhiyun# OUTPUT ************************************************************** # 13431*4882a593Smuzhiyun# FP_SCR0(a6) = bcd format result on the stack. # 13432*4882a593Smuzhiyun# # 13433*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 13434*4882a593Smuzhiyun# # 13435*4882a593Smuzhiyun# A1. Set RM and size ext; Set SIGMA = sign of input. # 13436*4882a593Smuzhiyun# The k-factor is saved for use in d7. Clear the # 13437*4882a593Smuzhiyun# BINDEC_FLG for separating normalized/denormalized # 13438*4882a593Smuzhiyun# input. If input is unnormalized or denormalized, # 13439*4882a593Smuzhiyun# normalize it. # 13440*4882a593Smuzhiyun# # 13441*4882a593Smuzhiyun# A2. Set X = abs(input). # 13442*4882a593Smuzhiyun# # 13443*4882a593Smuzhiyun# A3. Compute ILOG. # 13444*4882a593Smuzhiyun# ILOG is the log base 10 of the input value. It is # 13445*4882a593Smuzhiyun# approximated by adding e + 0.f when the original # 13446*4882a593Smuzhiyun# value is viewed as 2^^e * 1.f in extended precision. # 13447*4882a593Smuzhiyun# This value is stored in d6. # 13448*4882a593Smuzhiyun# # 13449*4882a593Smuzhiyun# A4. Clr INEX bit. # 13450*4882a593Smuzhiyun# The operation in A3 above may have set INEX2. # 13451*4882a593Smuzhiyun# # 13452*4882a593Smuzhiyun# A5. Set ICTR = 0; # 13453*4882a593Smuzhiyun# ICTR is a flag used in A13. It must be set before the # 13454*4882a593Smuzhiyun# loop entry A6. # 13455*4882a593Smuzhiyun# # 13456*4882a593Smuzhiyun# A6. Calculate LEN. # 13457*4882a593Smuzhiyun# LEN is the number of digits to be displayed. The # 13458*4882a593Smuzhiyun# k-factor can dictate either the total number of digits, # 13459*4882a593Smuzhiyun# if it is a positive number, or the number of digits # 13460*4882a593Smuzhiyun# after the decimal point which are to be included as # 13461*4882a593Smuzhiyun# significant. See the 68882 manual for examples. # 13462*4882a593Smuzhiyun# If LEN is computed to be greater than 17, set OPERR in # 13463*4882a593Smuzhiyun# USER_FPSR. LEN is stored in d4. # 13464*4882a593Smuzhiyun# # 13465*4882a593Smuzhiyun# A7. Calculate SCALE. # 13466*4882a593Smuzhiyun# SCALE is equal to 10^ISCALE, where ISCALE is the number # 13467*4882a593Smuzhiyun# of decimal places needed to insure LEN integer digits # 13468*4882a593Smuzhiyun# in the output before conversion to bcd. LAMBDA is the # 13469*4882a593Smuzhiyun# sign of ISCALE, used in A9. Fp1 contains # 13470*4882a593Smuzhiyun# 10^^(abs(ISCALE)) using a rounding mode which is a # 13471*4882a593Smuzhiyun# function of the original rounding mode and the signs # 13472*4882a593Smuzhiyun# of ISCALE and X. A table is given in the code. # 13473*4882a593Smuzhiyun# # 13474*4882a593Smuzhiyun# A8. Clr INEX; Force RZ. # 13475*4882a593Smuzhiyun# The operation in A3 above may have set INEX2. # 13476*4882a593Smuzhiyun# RZ mode is forced for the scaling operation to insure # 13477*4882a593Smuzhiyun# only one rounding error. The grs bits are collected in # 13478*4882a593Smuzhiyun# the INEX flag for use in A10. # 13479*4882a593Smuzhiyun# # 13480*4882a593Smuzhiyun# A9. Scale X -> Y. # 13481*4882a593Smuzhiyun# The mantissa is scaled to the desired number of # 13482*4882a593Smuzhiyun# significant digits. The excess digits are collected # 13483*4882a593Smuzhiyun# in INEX2. # 13484*4882a593Smuzhiyun# # 13485*4882a593Smuzhiyun# A10. Or in INEX. # 13486*4882a593Smuzhiyun# If INEX is set, round error occurred. This is # 13487*4882a593Smuzhiyun# compensated for by 'or-ing' in the INEX2 flag to # 13488*4882a593Smuzhiyun# the lsb of Y. # 13489*4882a593Smuzhiyun# # 13490*4882a593Smuzhiyun# A11. Restore original FPCR; set size ext. # 13491*4882a593Smuzhiyun# Perform FINT operation in the user's rounding mode. # 13492*4882a593Smuzhiyun# Keep the size to extended. # 13493*4882a593Smuzhiyun# # 13494*4882a593Smuzhiyun# A12. Calculate YINT = FINT(Y) according to user's rounding # 13495*4882a593Smuzhiyun# mode. The FPSP routine sintd0 is used. The output # 13496*4882a593Smuzhiyun# is in fp0. # 13497*4882a593Smuzhiyun# # 13498*4882a593Smuzhiyun# A13. Check for LEN digits. # 13499*4882a593Smuzhiyun# If the int operation results in more than LEN digits, # 13500*4882a593Smuzhiyun# or less than LEN -1 digits, adjust ILOG and repeat from # 13501*4882a593Smuzhiyun# A6. This test occurs only on the first pass. If the # 13502*4882a593Smuzhiyun# result is exactly 10^LEN, decrement ILOG and divide # 13503*4882a593Smuzhiyun# the mantissa by 10. # 13504*4882a593Smuzhiyun# # 13505*4882a593Smuzhiyun# A14. Convert the mantissa to bcd. # 13506*4882a593Smuzhiyun# The binstr routine is used to convert the LEN digit # 13507*4882a593Smuzhiyun# mantissa to bcd in memory. The input to binstr is # 13508*4882a593Smuzhiyun# to be a fraction; i.e. (mantissa)/10^LEN and adjusted # 13509*4882a593Smuzhiyun# such that the decimal point is to the left of bit 63. # 13510*4882a593Smuzhiyun# The bcd digits are stored in the correct position in # 13511*4882a593Smuzhiyun# the final string area in memory. # 13512*4882a593Smuzhiyun# # 13513*4882a593Smuzhiyun# A15. Convert the exponent to bcd. # 13514*4882a593Smuzhiyun# As in A14 above, the exp is converted to bcd and the # 13515*4882a593Smuzhiyun# digits are stored in the final string. # 13516*4882a593Smuzhiyun# Test the length of the final exponent string. If the # 13517*4882a593Smuzhiyun# length is 4, set operr. # 13518*4882a593Smuzhiyun# # 13519*4882a593Smuzhiyun# A16. Write sign bits to final string. # 13520*4882a593Smuzhiyun# # 13521*4882a593Smuzhiyun######################################################################### 13522*4882a593Smuzhiyun 13523*4882a593Smuzhiyunset BINDEC_FLG, EXC_TEMP # DENORM flag 13524*4882a593Smuzhiyun 13525*4882a593Smuzhiyun# Constants in extended precision 13526*4882a593SmuzhiyunPLOG2: 13527*4882a593Smuzhiyun long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000 13528*4882a593SmuzhiyunPLOG2UP1: 13529*4882a593Smuzhiyun long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000 13530*4882a593Smuzhiyun 13531*4882a593Smuzhiyun# Constants in single precision 13532*4882a593SmuzhiyunFONE: 13533*4882a593Smuzhiyun long 0x3F800000,0x00000000,0x00000000,0x00000000 13534*4882a593SmuzhiyunFTWO: 13535*4882a593Smuzhiyun long 0x40000000,0x00000000,0x00000000,0x00000000 13536*4882a593SmuzhiyunFTEN: 13537*4882a593Smuzhiyun long 0x41200000,0x00000000,0x00000000,0x00000000 13538*4882a593SmuzhiyunF4933: 13539*4882a593Smuzhiyun long 0x459A2800,0x00000000,0x00000000,0x00000000 13540*4882a593Smuzhiyun 13541*4882a593SmuzhiyunRBDTBL: 13542*4882a593Smuzhiyun byte 0,0,0,0 13543*4882a593Smuzhiyun byte 3,3,2,2 13544*4882a593Smuzhiyun byte 3,2,2,3 13545*4882a593Smuzhiyun byte 2,3,3,2 13546*4882a593Smuzhiyun 13547*4882a593Smuzhiyun# Implementation Notes: 13548*4882a593Smuzhiyun# 13549*4882a593Smuzhiyun# The registers are used as follows: 13550*4882a593Smuzhiyun# 13551*4882a593Smuzhiyun# d0: scratch; LEN input to binstr 13552*4882a593Smuzhiyun# d1: scratch 13553*4882a593Smuzhiyun# d2: upper 32-bits of mantissa for binstr 13554*4882a593Smuzhiyun# d3: scratch;lower 32-bits of mantissa for binstr 13555*4882a593Smuzhiyun# d4: LEN 13556*4882a593Smuzhiyun# d5: LAMBDA/ICTR 13557*4882a593Smuzhiyun# d6: ILOG 13558*4882a593Smuzhiyun# d7: k-factor 13559*4882a593Smuzhiyun# a0: ptr for original operand/final result 13560*4882a593Smuzhiyun# a1: scratch pointer 13561*4882a593Smuzhiyun# a2: pointer to FP_X; abs(original value) in ext 13562*4882a593Smuzhiyun# fp0: scratch 13563*4882a593Smuzhiyun# fp1: scratch 13564*4882a593Smuzhiyun# fp2: scratch 13565*4882a593Smuzhiyun# F_SCR1: 13566*4882a593Smuzhiyun# F_SCR2: 13567*4882a593Smuzhiyun# L_SCR1: 13568*4882a593Smuzhiyun# L_SCR2: 13569*4882a593Smuzhiyun 13570*4882a593Smuzhiyun global bindec 13571*4882a593Smuzhiyunbindec: 13572*4882a593Smuzhiyun movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2} 13573*4882a593Smuzhiyun fmovm.x &0x7,-(%sp) # {%fp0-%fp2} 13574*4882a593Smuzhiyun 13575*4882a593Smuzhiyun# A1. Set RM and size ext. Set SIGMA = sign input; 13576*4882a593Smuzhiyun# The k-factor is saved for use in d7. Clear BINDEC_FLG for 13577*4882a593Smuzhiyun# separating normalized/denormalized input. If the input 13578*4882a593Smuzhiyun# is a denormalized number, set the BINDEC_FLG memory word 13579*4882a593Smuzhiyun# to signal denorm. If the input is unnormalized, normalize 13580*4882a593Smuzhiyun# the input and test for denormalized result. 13581*4882a593Smuzhiyun# 13582*4882a593Smuzhiyun fmov.l &rm_mode*0x10,%fpcr # set RM and ext 13583*4882a593Smuzhiyun mov.l (%a0),L_SCR2(%a6) # save exponent for sign check 13584*4882a593Smuzhiyun mov.l %d0,%d7 # move k-factor to d7 13585*4882a593Smuzhiyun 13586*4882a593Smuzhiyun clr.b BINDEC_FLG(%a6) # clr norm/denorm flag 13587*4882a593Smuzhiyun cmpi.b STAG(%a6),&DENORM # is input a DENORM? 13588*4882a593Smuzhiyun bne.w A2_str # no; input is a NORM 13589*4882a593Smuzhiyun 13590*4882a593Smuzhiyun# 13591*4882a593Smuzhiyun# Normalize the denorm 13592*4882a593Smuzhiyun# 13593*4882a593Smuzhiyunun_de_norm: 13594*4882a593Smuzhiyun mov.w (%a0),%d0 13595*4882a593Smuzhiyun and.w &0x7fff,%d0 # strip sign of normalized exp 13596*4882a593Smuzhiyun mov.l 4(%a0),%d1 13597*4882a593Smuzhiyun mov.l 8(%a0),%d2 13598*4882a593Smuzhiyunnorm_loop: 13599*4882a593Smuzhiyun sub.w &1,%d0 13600*4882a593Smuzhiyun lsl.l &1,%d2 13601*4882a593Smuzhiyun roxl.l &1,%d1 13602*4882a593Smuzhiyun tst.l %d1 13603*4882a593Smuzhiyun bge.b norm_loop 13604*4882a593Smuzhiyun# 13605*4882a593Smuzhiyun# Test if the normalized input is denormalized 13606*4882a593Smuzhiyun# 13607*4882a593Smuzhiyun tst.w %d0 13608*4882a593Smuzhiyun bgt.b pos_exp # if greater than zero, it is a norm 13609*4882a593Smuzhiyun st BINDEC_FLG(%a6) # set flag for denorm 13610*4882a593Smuzhiyunpos_exp: 13611*4882a593Smuzhiyun and.w &0x7fff,%d0 # strip sign of normalized exp 13612*4882a593Smuzhiyun mov.w %d0,(%a0) 13613*4882a593Smuzhiyun mov.l %d1,4(%a0) 13614*4882a593Smuzhiyun mov.l %d2,8(%a0) 13615*4882a593Smuzhiyun 13616*4882a593Smuzhiyun# A2. Set X = abs(input). 13617*4882a593Smuzhiyun# 13618*4882a593SmuzhiyunA2_str: 13619*4882a593Smuzhiyun mov.l (%a0),FP_SCR1(%a6) # move input to work space 13620*4882a593Smuzhiyun mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space 13621*4882a593Smuzhiyun mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space 13622*4882a593Smuzhiyun and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X) 13623*4882a593Smuzhiyun 13624*4882a593Smuzhiyun# A3. Compute ILOG. 13625*4882a593Smuzhiyun# ILOG is the log base 10 of the input value. It is approx- 13626*4882a593Smuzhiyun# imated by adding e + 0.f when the original value is viewed 13627*4882a593Smuzhiyun# as 2^^e * 1.f in extended precision. This value is stored 13628*4882a593Smuzhiyun# in d6. 13629*4882a593Smuzhiyun# 13630*4882a593Smuzhiyun# Register usage: 13631*4882a593Smuzhiyun# Input/Output 13632*4882a593Smuzhiyun# d0: k-factor/exponent 13633*4882a593Smuzhiyun# d2: x/x 13634*4882a593Smuzhiyun# d3: x/x 13635*4882a593Smuzhiyun# d4: x/x 13636*4882a593Smuzhiyun# d5: x/x 13637*4882a593Smuzhiyun# d6: x/ILOG 13638*4882a593Smuzhiyun# d7: k-factor/Unchanged 13639*4882a593Smuzhiyun# a0: ptr for original operand/final result 13640*4882a593Smuzhiyun# a1: x/x 13641*4882a593Smuzhiyun# a2: x/x 13642*4882a593Smuzhiyun# fp0: x/float(ILOG) 13643*4882a593Smuzhiyun# fp1: x/x 13644*4882a593Smuzhiyun# fp2: x/x 13645*4882a593Smuzhiyun# F_SCR1:x/x 13646*4882a593Smuzhiyun# F_SCR2:Abs(X)/Abs(X) with $3fff exponent 13647*4882a593Smuzhiyun# L_SCR1:x/x 13648*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 13649*4882a593Smuzhiyun 13650*4882a593Smuzhiyun tst.b BINDEC_FLG(%a6) # check for denorm 13651*4882a593Smuzhiyun beq.b A3_cont # if clr, continue with norm 13652*4882a593Smuzhiyun mov.l &-4933,%d6 # force ILOG = -4933 13653*4882a593Smuzhiyun bra.b A4_str 13654*4882a593SmuzhiyunA3_cont: 13655*4882a593Smuzhiyun mov.w FP_SCR1(%a6),%d0 # move exp to d0 13656*4882a593Smuzhiyun mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff 13657*4882a593Smuzhiyun fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f 13658*4882a593Smuzhiyun sub.w &0x3fff,%d0 # strip off bias 13659*4882a593Smuzhiyun fadd.w %d0,%fp0 # add in exp 13660*4882a593Smuzhiyun fsub.s FONE(%pc),%fp0 # subtract off 1.0 13661*4882a593Smuzhiyun fbge.w pos_res # if pos, branch 13662*4882a593Smuzhiyun fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1 13663*4882a593Smuzhiyun fmov.l %fp0,%d6 # put ILOG in d6 as a lword 13664*4882a593Smuzhiyun bra.b A4_str # go move out ILOG 13665*4882a593Smuzhiyunpos_res: 13666*4882a593Smuzhiyun fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2 13667*4882a593Smuzhiyun fmov.l %fp0,%d6 # put ILOG in d6 as a lword 13668*4882a593Smuzhiyun 13669*4882a593Smuzhiyun 13670*4882a593Smuzhiyun# A4. Clr INEX bit. 13671*4882a593Smuzhiyun# The operation in A3 above may have set INEX2. 13672*4882a593Smuzhiyun 13673*4882a593SmuzhiyunA4_str: 13674*4882a593Smuzhiyun fmov.l &0,%fpsr # zero all of fpsr - nothing needed 13675*4882a593Smuzhiyun 13676*4882a593Smuzhiyun 13677*4882a593Smuzhiyun# A5. Set ICTR = 0; 13678*4882a593Smuzhiyun# ICTR is a flag used in A13. It must be set before the 13679*4882a593Smuzhiyun# loop entry A6. The lower word of d5 is used for ICTR. 13680*4882a593Smuzhiyun 13681*4882a593Smuzhiyun clr.w %d5 # clear ICTR 13682*4882a593Smuzhiyun 13683*4882a593Smuzhiyun# A6. Calculate LEN. 13684*4882a593Smuzhiyun# LEN is the number of digits to be displayed. The k-factor 13685*4882a593Smuzhiyun# can dictate either the total number of digits, if it is 13686*4882a593Smuzhiyun# a positive number, or the number of digits after the 13687*4882a593Smuzhiyun# original decimal point which are to be included as 13688*4882a593Smuzhiyun# significant. See the 68882 manual for examples. 13689*4882a593Smuzhiyun# If LEN is computed to be greater than 17, set OPERR in 13690*4882a593Smuzhiyun# USER_FPSR. LEN is stored in d4. 13691*4882a593Smuzhiyun# 13692*4882a593Smuzhiyun# Register usage: 13693*4882a593Smuzhiyun# Input/Output 13694*4882a593Smuzhiyun# d0: exponent/Unchanged 13695*4882a593Smuzhiyun# d2: x/x/scratch 13696*4882a593Smuzhiyun# d3: x/x 13697*4882a593Smuzhiyun# d4: exc picture/LEN 13698*4882a593Smuzhiyun# d5: ICTR/Unchanged 13699*4882a593Smuzhiyun# d6: ILOG/Unchanged 13700*4882a593Smuzhiyun# d7: k-factor/Unchanged 13701*4882a593Smuzhiyun# a0: ptr for original operand/final result 13702*4882a593Smuzhiyun# a1: x/x 13703*4882a593Smuzhiyun# a2: x/x 13704*4882a593Smuzhiyun# fp0: float(ILOG)/Unchanged 13705*4882a593Smuzhiyun# fp1: x/x 13706*4882a593Smuzhiyun# fp2: x/x 13707*4882a593Smuzhiyun# F_SCR1:x/x 13708*4882a593Smuzhiyun# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13709*4882a593Smuzhiyun# L_SCR1:x/x 13710*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 13711*4882a593Smuzhiyun 13712*4882a593SmuzhiyunA6_str: 13713*4882a593Smuzhiyun tst.l %d7 # branch on sign of k 13714*4882a593Smuzhiyun ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k 13715*4882a593Smuzhiyun mov.l %d7,%d4 # if k > 0, LEN = k 13716*4882a593Smuzhiyun bra.b len_ck # skip to LEN check 13717*4882a593Smuzhiyunk_neg: 13718*4882a593Smuzhiyun mov.l %d6,%d4 # first load ILOG to d4 13719*4882a593Smuzhiyun sub.l %d7,%d4 # subtract off k 13720*4882a593Smuzhiyun addq.l &1,%d4 # add in the 1 13721*4882a593Smuzhiyunlen_ck: 13722*4882a593Smuzhiyun tst.l %d4 # LEN check: branch on sign of LEN 13723*4882a593Smuzhiyun ble.b LEN_ng # if neg, set LEN = 1 13724*4882a593Smuzhiyun cmp.l %d4,&17 # test if LEN > 17 13725*4882a593Smuzhiyun ble.b A7_str # if not, forget it 13726*4882a593Smuzhiyun mov.l &17,%d4 # set max LEN = 17 13727*4882a593Smuzhiyun tst.l %d7 # if negative, never set OPERR 13728*4882a593Smuzhiyun ble.b A7_str # if positive, continue 13729*4882a593Smuzhiyun or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 13730*4882a593Smuzhiyun bra.b A7_str # finished here 13731*4882a593SmuzhiyunLEN_ng: 13732*4882a593Smuzhiyun mov.l &1,%d4 # min LEN is 1 13733*4882a593Smuzhiyun 13734*4882a593Smuzhiyun 13735*4882a593Smuzhiyun# A7. Calculate SCALE. 13736*4882a593Smuzhiyun# SCALE is equal to 10^ISCALE, where ISCALE is the number 13737*4882a593Smuzhiyun# of decimal places needed to insure LEN integer digits 13738*4882a593Smuzhiyun# in the output before conversion to bcd. LAMBDA is the sign 13739*4882a593Smuzhiyun# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using 13740*4882a593Smuzhiyun# the rounding mode as given in the following table (see 13741*4882a593Smuzhiyun# Coonen, p. 7.23 as ref.; however, the SCALE variable is 13742*4882a593Smuzhiyun# of opposite sign in bindec.sa from Coonen). 13743*4882a593Smuzhiyun# 13744*4882a593Smuzhiyun# Initial USE 13745*4882a593Smuzhiyun# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] 13746*4882a593Smuzhiyun# ---------------------------------------------- 13747*4882a593Smuzhiyun# RN 00 0 0 00/0 RN 13748*4882a593Smuzhiyun# RN 00 0 1 00/0 RN 13749*4882a593Smuzhiyun# RN 00 1 0 00/0 RN 13750*4882a593Smuzhiyun# RN 00 1 1 00/0 RN 13751*4882a593Smuzhiyun# RZ 01 0 0 11/3 RP 13752*4882a593Smuzhiyun# RZ 01 0 1 11/3 RP 13753*4882a593Smuzhiyun# RZ 01 1 0 10/2 RM 13754*4882a593Smuzhiyun# RZ 01 1 1 10/2 RM 13755*4882a593Smuzhiyun# RM 10 0 0 11/3 RP 13756*4882a593Smuzhiyun# RM 10 0 1 10/2 RM 13757*4882a593Smuzhiyun# RM 10 1 0 10/2 RM 13758*4882a593Smuzhiyun# RM 10 1 1 11/3 RP 13759*4882a593Smuzhiyun# RP 11 0 0 10/2 RM 13760*4882a593Smuzhiyun# RP 11 0 1 11/3 RP 13761*4882a593Smuzhiyun# RP 11 1 0 11/3 RP 13762*4882a593Smuzhiyun# RP 11 1 1 10/2 RM 13763*4882a593Smuzhiyun# 13764*4882a593Smuzhiyun# Register usage: 13765*4882a593Smuzhiyun# Input/Output 13766*4882a593Smuzhiyun# d0: exponent/scratch - final is 0 13767*4882a593Smuzhiyun# d2: x/0 or 24 for A9 13768*4882a593Smuzhiyun# d3: x/scratch - offset ptr into PTENRM array 13769*4882a593Smuzhiyun# d4: LEN/Unchanged 13770*4882a593Smuzhiyun# d5: 0/ICTR:LAMBDA 13771*4882a593Smuzhiyun# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k)) 13772*4882a593Smuzhiyun# d7: k-factor/Unchanged 13773*4882a593Smuzhiyun# a0: ptr for original operand/final result 13774*4882a593Smuzhiyun# a1: x/ptr to PTENRM array 13775*4882a593Smuzhiyun# a2: x/x 13776*4882a593Smuzhiyun# fp0: float(ILOG)/Unchanged 13777*4882a593Smuzhiyun# fp1: x/10^ISCALE 13778*4882a593Smuzhiyun# fp2: x/x 13779*4882a593Smuzhiyun# F_SCR1:x/x 13780*4882a593Smuzhiyun# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13781*4882a593Smuzhiyun# L_SCR1:x/x 13782*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 13783*4882a593Smuzhiyun 13784*4882a593SmuzhiyunA7_str: 13785*4882a593Smuzhiyun tst.l %d7 # test sign of k 13786*4882a593Smuzhiyun bgt.b k_pos # if pos and > 0, skip this 13787*4882a593Smuzhiyun cmp.l %d7,%d6 # test k - ILOG 13788*4882a593Smuzhiyun blt.b k_pos # if ILOG >= k, skip this 13789*4882a593Smuzhiyun mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k 13790*4882a593Smuzhiyunk_pos: 13791*4882a593Smuzhiyun mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0 13792*4882a593Smuzhiyun addq.l &1,%d0 # add the 1 13793*4882a593Smuzhiyun sub.l %d4,%d0 # sub off LEN 13794*4882a593Smuzhiyun swap %d5 # use upper word of d5 for LAMBDA 13795*4882a593Smuzhiyun clr.w %d5 # set it zero initially 13796*4882a593Smuzhiyun clr.w %d2 # set up d2 for very small case 13797*4882a593Smuzhiyun tst.l %d0 # test sign of ISCALE 13798*4882a593Smuzhiyun bge.b iscale # if pos, skip next inst 13799*4882a593Smuzhiyun addq.w &1,%d5 # if neg, set LAMBDA true 13800*4882a593Smuzhiyun cmp.l %d0,&0xffffecd4 # test iscale <= -4908 13801*4882a593Smuzhiyun bgt.b no_inf # if false, skip rest 13802*4882a593Smuzhiyun add.l &24,%d0 # add in 24 to iscale 13803*4882a593Smuzhiyun mov.l &24,%d2 # put 24 in d2 for A9 13804*4882a593Smuzhiyunno_inf: 13805*4882a593Smuzhiyun neg.l %d0 # and take abs of ISCALE 13806*4882a593Smuzhiyuniscale: 13807*4882a593Smuzhiyun fmov.s FONE(%pc),%fp1 # init fp1 to 1 13808*4882a593Smuzhiyun bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits 13809*4882a593Smuzhiyun lsl.w &1,%d1 # put them in bits 2:1 13810*4882a593Smuzhiyun add.w %d5,%d1 # add in LAMBDA 13811*4882a593Smuzhiyun lsl.w &1,%d1 # put them in bits 3:1 13812*4882a593Smuzhiyun tst.l L_SCR2(%a6) # test sign of original x 13813*4882a593Smuzhiyun bge.b x_pos # if pos, don't set bit 0 13814*4882a593Smuzhiyun addq.l &1,%d1 # if neg, set bit 0 13815*4882a593Smuzhiyunx_pos: 13816*4882a593Smuzhiyun lea.l RBDTBL(%pc),%a2 # load rbdtbl base 13817*4882a593Smuzhiyun mov.b (%a2,%d1),%d3 # load d3 with new rmode 13818*4882a593Smuzhiyun lsl.l &4,%d3 # put bits in proper position 13819*4882a593Smuzhiyun fmov.l %d3,%fpcr # load bits into fpu 13820*4882a593Smuzhiyun lsr.l &4,%d3 # put bits in proper position 13821*4882a593Smuzhiyun tst.b %d3 # decode new rmode for pten table 13822*4882a593Smuzhiyun bne.b not_rn # if zero, it is RN 13823*4882a593Smuzhiyun lea.l PTENRN(%pc),%a1 # load a1 with RN table base 13824*4882a593Smuzhiyun bra.b rmode # exit decode 13825*4882a593Smuzhiyunnot_rn: 13826*4882a593Smuzhiyun lsr.b &1,%d3 # get lsb in carry 13827*4882a593Smuzhiyun bcc.b not_rp2 # if carry clear, it is RM 13828*4882a593Smuzhiyun lea.l PTENRP(%pc),%a1 # load a1 with RP table base 13829*4882a593Smuzhiyun bra.b rmode # exit decode 13830*4882a593Smuzhiyunnot_rp2: 13831*4882a593Smuzhiyun lea.l PTENRM(%pc),%a1 # load a1 with RM table base 13832*4882a593Smuzhiyunrmode: 13833*4882a593Smuzhiyun clr.l %d3 # clr table index 13834*4882a593Smuzhiyune_loop2: 13835*4882a593Smuzhiyun lsr.l &1,%d0 # shift next bit into carry 13836*4882a593Smuzhiyun bcc.b e_next2 # if zero, skip the mul 13837*4882a593Smuzhiyun fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13838*4882a593Smuzhiyune_next2: 13839*4882a593Smuzhiyun add.l &12,%d3 # inc d3 to next pwrten table entry 13840*4882a593Smuzhiyun tst.l %d0 # test if ISCALE is zero 13841*4882a593Smuzhiyun bne.b e_loop2 # if not, loop 13842*4882a593Smuzhiyun 13843*4882a593Smuzhiyun# A8. Clr INEX; Force RZ. 13844*4882a593Smuzhiyun# The operation in A3 above may have set INEX2. 13845*4882a593Smuzhiyun# RZ mode is forced for the scaling operation to insure 13846*4882a593Smuzhiyun# only one rounding error. The grs bits are collected in 13847*4882a593Smuzhiyun# the INEX flag for use in A10. 13848*4882a593Smuzhiyun# 13849*4882a593Smuzhiyun# Register usage: 13850*4882a593Smuzhiyun# Input/Output 13851*4882a593Smuzhiyun 13852*4882a593Smuzhiyun fmov.l &0,%fpsr # clr INEX 13853*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode 13854*4882a593Smuzhiyun 13855*4882a593Smuzhiyun# A9. Scale X -> Y. 13856*4882a593Smuzhiyun# The mantissa is scaled to the desired number of significant 13857*4882a593Smuzhiyun# digits. The excess digits are collected in INEX2. If mul, 13858*4882a593Smuzhiyun# Check d2 for excess 10 exponential value. If not zero, 13859*4882a593Smuzhiyun# the iscale value would have caused the pwrten calculation 13860*4882a593Smuzhiyun# to overflow. Only a negative iscale can cause this, so 13861*4882a593Smuzhiyun# multiply by 10^(d2), which is now only allowed to be 24, 13862*4882a593Smuzhiyun# with a multiply by 10^8 and 10^16, which is exact since 13863*4882a593Smuzhiyun# 10^24 is exact. If the input was denormalized, we must 13864*4882a593Smuzhiyun# create a busy stack frame with the mul command and the 13865*4882a593Smuzhiyun# two operands, and allow the fpu to complete the multiply. 13866*4882a593Smuzhiyun# 13867*4882a593Smuzhiyun# Register usage: 13868*4882a593Smuzhiyun# Input/Output 13869*4882a593Smuzhiyun# d0: FPCR with RZ mode/Unchanged 13870*4882a593Smuzhiyun# d2: 0 or 24/unchanged 13871*4882a593Smuzhiyun# d3: x/x 13872*4882a593Smuzhiyun# d4: LEN/Unchanged 13873*4882a593Smuzhiyun# d5: ICTR:LAMBDA 13874*4882a593Smuzhiyun# d6: ILOG/Unchanged 13875*4882a593Smuzhiyun# d7: k-factor/Unchanged 13876*4882a593Smuzhiyun# a0: ptr for original operand/final result 13877*4882a593Smuzhiyun# a1: ptr to PTENRM array/Unchanged 13878*4882a593Smuzhiyun# a2: x/x 13879*4882a593Smuzhiyun# fp0: float(ILOG)/X adjusted for SCALE (Y) 13880*4882a593Smuzhiyun# fp1: 10^ISCALE/Unchanged 13881*4882a593Smuzhiyun# fp2: x/x 13882*4882a593Smuzhiyun# F_SCR1:x/x 13883*4882a593Smuzhiyun# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13884*4882a593Smuzhiyun# L_SCR1:x/x 13885*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 13886*4882a593Smuzhiyun 13887*4882a593SmuzhiyunA9_str: 13888*4882a593Smuzhiyun fmov.x (%a0),%fp0 # load X from memory 13889*4882a593Smuzhiyun fabs.x %fp0 # use abs(X) 13890*4882a593Smuzhiyun tst.w %d5 # LAMBDA is in lower word of d5 13891*4882a593Smuzhiyun bne.b sc_mul # if neg (LAMBDA = 1), scale by mul 13892*4882a593Smuzhiyun fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0 13893*4882a593Smuzhiyun bra.w A10_st # branch to A10 13894*4882a593Smuzhiyun 13895*4882a593Smuzhiyunsc_mul: 13896*4882a593Smuzhiyun tst.b BINDEC_FLG(%a6) # check for denorm 13897*4882a593Smuzhiyun beq.w A9_norm # if norm, continue with mul 13898*4882a593Smuzhiyun 13899*4882a593Smuzhiyun# for DENORM, we must calculate: 13900*4882a593Smuzhiyun# fp0 = input_op * 10^ISCALE * 10^24 13901*4882a593Smuzhiyun# since the input operand is a DENORM, we can't multiply it directly. 13902*4882a593Smuzhiyun# so, we do the multiplication of the exponents and mantissas separately. 13903*4882a593Smuzhiyun# in this way, we avoid underflow on intermediate stages of the 13904*4882a593Smuzhiyun# multiplication and guarantee a result without exception. 13905*4882a593Smuzhiyun fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack 13906*4882a593Smuzhiyun 13907*4882a593Smuzhiyun mov.w (%sp),%d3 # grab exponent 13908*4882a593Smuzhiyun andi.w &0x7fff,%d3 # clear sign 13909*4882a593Smuzhiyun ori.w &0x8000,(%a0) # make DENORM exp negative 13910*4882a593Smuzhiyun add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp 13911*4882a593Smuzhiyun subi.w &0x3fff,%d3 # subtract BIAS 13912*4882a593Smuzhiyun add.w 36(%a1),%d3 13913*4882a593Smuzhiyun subi.w &0x3fff,%d3 # subtract BIAS 13914*4882a593Smuzhiyun add.w 48(%a1),%d3 13915*4882a593Smuzhiyun subi.w &0x3fff,%d3 # subtract BIAS 13916*4882a593Smuzhiyun 13917*4882a593Smuzhiyun bmi.w sc_mul_err # is result is DENORM, punt!!! 13918*4882a593Smuzhiyun 13919*4882a593Smuzhiyun andi.w &0x8000,(%sp) # keep sign 13920*4882a593Smuzhiyun or.w %d3,(%sp) # insert new exponent 13921*4882a593Smuzhiyun andi.w &0x7fff,(%a0) # clear sign bit on DENORM again 13922*4882a593Smuzhiyun mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk 13923*4882a593Smuzhiyun mov.l 0x4(%a0),-(%sp) 13924*4882a593Smuzhiyun mov.l &0x3fff0000,-(%sp) # force exp to zero 13925*4882a593Smuzhiyun fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0 13926*4882a593Smuzhiyun fmul.x (%sp)+,%fp0 13927*4882a593Smuzhiyun 13928*4882a593Smuzhiyun# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 13929*4882a593Smuzhiyun# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 13930*4882a593Smuzhiyun mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa 13931*4882a593Smuzhiyun mov.l 36+4(%a1),-(%sp) 13932*4882a593Smuzhiyun mov.l &0x3fff0000,-(%sp) # force exp to zero 13933*4882a593Smuzhiyun mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa 13934*4882a593Smuzhiyun mov.l 48+4(%a1),-(%sp) 13935*4882a593Smuzhiyun mov.l &0x3fff0000,-(%sp)# force exp to zero 13936*4882a593Smuzhiyun fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8 13937*4882a593Smuzhiyun fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16 13938*4882a593Smuzhiyun bra.b A10_st 13939*4882a593Smuzhiyun 13940*4882a593Smuzhiyunsc_mul_err: 13941*4882a593Smuzhiyun bra.b sc_mul_err 13942*4882a593Smuzhiyun 13943*4882a593SmuzhiyunA9_norm: 13944*4882a593Smuzhiyun tst.w %d2 # test for small exp case 13945*4882a593Smuzhiyun beq.b A9_con # if zero, continue as normal 13946*4882a593Smuzhiyun fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 13947*4882a593Smuzhiyun fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 13948*4882a593SmuzhiyunA9_con: 13949*4882a593Smuzhiyun fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0 13950*4882a593Smuzhiyun 13951*4882a593Smuzhiyun# A10. Or in INEX. 13952*4882a593Smuzhiyun# If INEX is set, round error occurred. This is compensated 13953*4882a593Smuzhiyun# for by 'or-ing' in the INEX2 flag to the lsb of Y. 13954*4882a593Smuzhiyun# 13955*4882a593Smuzhiyun# Register usage: 13956*4882a593Smuzhiyun# Input/Output 13957*4882a593Smuzhiyun# d0: FPCR with RZ mode/FPSR with INEX2 isolated 13958*4882a593Smuzhiyun# d2: x/x 13959*4882a593Smuzhiyun# d3: x/x 13960*4882a593Smuzhiyun# d4: LEN/Unchanged 13961*4882a593Smuzhiyun# d5: ICTR:LAMBDA 13962*4882a593Smuzhiyun# d6: ILOG/Unchanged 13963*4882a593Smuzhiyun# d7: k-factor/Unchanged 13964*4882a593Smuzhiyun# a0: ptr for original operand/final result 13965*4882a593Smuzhiyun# a1: ptr to PTENxx array/Unchanged 13966*4882a593Smuzhiyun# a2: x/ptr to FP_SCR1(a6) 13967*4882a593Smuzhiyun# fp0: Y/Y with lsb adjusted 13968*4882a593Smuzhiyun# fp1: 10^ISCALE/Unchanged 13969*4882a593Smuzhiyun# fp2: x/x 13970*4882a593Smuzhiyun 13971*4882a593SmuzhiyunA10_st: 13972*4882a593Smuzhiyun fmov.l %fpsr,%d0 # get FPSR 13973*4882a593Smuzhiyun fmov.x %fp0,FP_SCR1(%a6) # move Y to memory 13974*4882a593Smuzhiyun lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1 13975*4882a593Smuzhiyun btst &9,%d0 # check if INEX2 set 13976*4882a593Smuzhiyun beq.b A11_st # if clear, skip rest 13977*4882a593Smuzhiyun or.l &1,8(%a2) # or in 1 to lsb of mantissa 13978*4882a593Smuzhiyun fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu 13979*4882a593Smuzhiyun 13980*4882a593Smuzhiyun 13981*4882a593Smuzhiyun# A11. Restore original FPCR; set size ext. 13982*4882a593Smuzhiyun# Perform FINT operation in the user's rounding mode. Keep 13983*4882a593Smuzhiyun# the size to extended. The sintdo entry point in the sint 13984*4882a593Smuzhiyun# routine expects the FPCR value to be in USER_FPCR for 13985*4882a593Smuzhiyun# mode and precision. The original FPCR is saved in L_SCR1. 13986*4882a593Smuzhiyun 13987*4882a593SmuzhiyunA11_st: 13988*4882a593Smuzhiyun mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later 13989*4882a593Smuzhiyun and.l &0x00000030,USER_FPCR(%a6) # set size to ext, 13990*4882a593Smuzhiyun# ;block exceptions 13991*4882a593Smuzhiyun 13992*4882a593Smuzhiyun 13993*4882a593Smuzhiyun# A12. Calculate YINT = FINT(Y) according to user's rounding mode. 13994*4882a593Smuzhiyun# The FPSP routine sintd0 is used. The output is in fp0. 13995*4882a593Smuzhiyun# 13996*4882a593Smuzhiyun# Register usage: 13997*4882a593Smuzhiyun# Input/Output 13998*4882a593Smuzhiyun# d0: FPSR with AINEX cleared/FPCR with size set to ext 13999*4882a593Smuzhiyun# d2: x/x/scratch 14000*4882a593Smuzhiyun# d3: x/x 14001*4882a593Smuzhiyun# d4: LEN/Unchanged 14002*4882a593Smuzhiyun# d5: ICTR:LAMBDA/Unchanged 14003*4882a593Smuzhiyun# d6: ILOG/Unchanged 14004*4882a593Smuzhiyun# d7: k-factor/Unchanged 14005*4882a593Smuzhiyun# a0: ptr for original operand/src ptr for sintdo 14006*4882a593Smuzhiyun# a1: ptr to PTENxx array/Unchanged 14007*4882a593Smuzhiyun# a2: ptr to FP_SCR1(a6)/Unchanged 14008*4882a593Smuzhiyun# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored 14009*4882a593Smuzhiyun# fp0: Y/YINT 14010*4882a593Smuzhiyun# fp1: 10^ISCALE/Unchanged 14011*4882a593Smuzhiyun# fp2: x/x 14012*4882a593Smuzhiyun# F_SCR1:x/x 14013*4882a593Smuzhiyun# F_SCR2:Y adjusted for inex/Y with original exponent 14014*4882a593Smuzhiyun# L_SCR1:x/original USER_FPCR 14015*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 14016*4882a593Smuzhiyun 14017*4882a593SmuzhiyunA12_st: 14018*4882a593Smuzhiyun movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1} 14019*4882a593Smuzhiyun mov.l L_SCR1(%a6),-(%sp) 14020*4882a593Smuzhiyun mov.l L_SCR2(%a6),-(%sp) 14021*4882a593Smuzhiyun 14022*4882a593Smuzhiyun lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6) 14023*4882a593Smuzhiyun fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6) 14024*4882a593Smuzhiyun tst.l L_SCR2(%a6) # test sign of original operand 14025*4882a593Smuzhiyun bge.b do_fint12 # if pos, use Y 14026*4882a593Smuzhiyun or.l &0x80000000,(%a0) # if neg, use -Y 14027*4882a593Smuzhiyundo_fint12: 14028*4882a593Smuzhiyun mov.l USER_FPSR(%a6),-(%sp) 14029*4882a593Smuzhiyun# bsr sintdo # sint routine returns int in fp0 14030*4882a593Smuzhiyun 14031*4882a593Smuzhiyun fmov.l USER_FPCR(%a6),%fpcr 14032*4882a593Smuzhiyun fmov.l &0x0,%fpsr # clear the AEXC bits!!! 14033*4882a593Smuzhiyun## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode 14034*4882a593Smuzhiyun## andi.l &0x00000030,%d0 14035*4882a593Smuzhiyun## fmov.l %d0,%fpcr 14036*4882a593Smuzhiyun fint.x FP_SCR1(%a6),%fp0 # do fint() 14037*4882a593Smuzhiyun fmov.l %fpsr,%d0 14038*4882a593Smuzhiyun or.w %d0,FPSR_EXCEPT(%a6) 14039*4882a593Smuzhiyun## fmov.l &0x0,%fpcr 14040*4882a593Smuzhiyun## fmov.l %fpsr,%d0 # don't keep ccodes 14041*4882a593Smuzhiyun## or.w %d0,FPSR_EXCEPT(%a6) 14042*4882a593Smuzhiyun 14043*4882a593Smuzhiyun mov.b (%sp),USER_FPSR(%a6) 14044*4882a593Smuzhiyun add.l &4,%sp 14045*4882a593Smuzhiyun 14046*4882a593Smuzhiyun mov.l (%sp)+,L_SCR2(%a6) 14047*4882a593Smuzhiyun mov.l (%sp)+,L_SCR1(%a6) 14048*4882a593Smuzhiyun movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1} 14049*4882a593Smuzhiyun 14050*4882a593Smuzhiyun mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent 14051*4882a593Smuzhiyun mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR 14052*4882a593Smuzhiyun 14053*4882a593Smuzhiyun# A13. Check for LEN digits. 14054*4882a593Smuzhiyun# If the int operation results in more than LEN digits, 14055*4882a593Smuzhiyun# or less than LEN -1 digits, adjust ILOG and repeat from 14056*4882a593Smuzhiyun# A6. This test occurs only on the first pass. If the 14057*4882a593Smuzhiyun# result is exactly 10^LEN, decrement ILOG and divide 14058*4882a593Smuzhiyun# the mantissa by 10. The calculation of 10^LEN cannot 14059*4882a593Smuzhiyun# be inexact, since all powers of ten up to 10^27 are exact 14060*4882a593Smuzhiyun# in extended precision, so the use of a previous power-of-ten 14061*4882a593Smuzhiyun# table will introduce no error. 14062*4882a593Smuzhiyun# 14063*4882a593Smuzhiyun# 14064*4882a593Smuzhiyun# Register usage: 14065*4882a593Smuzhiyun# Input/Output 14066*4882a593Smuzhiyun# d0: FPCR with size set to ext/scratch final = 0 14067*4882a593Smuzhiyun# d2: x/x 14068*4882a593Smuzhiyun# d3: x/scratch final = x 14069*4882a593Smuzhiyun# d4: LEN/LEN adjusted 14070*4882a593Smuzhiyun# d5: ICTR:LAMBDA/LAMBDA:ICTR 14071*4882a593Smuzhiyun# d6: ILOG/ILOG adjusted 14072*4882a593Smuzhiyun# d7: k-factor/Unchanged 14073*4882a593Smuzhiyun# a0: pointer into memory for packed bcd string formation 14074*4882a593Smuzhiyun# a1: ptr to PTENxx array/Unchanged 14075*4882a593Smuzhiyun# a2: ptr to FP_SCR1(a6)/Unchanged 14076*4882a593Smuzhiyun# fp0: int portion of Y/abs(YINT) adjusted 14077*4882a593Smuzhiyun# fp1: 10^ISCALE/Unchanged 14078*4882a593Smuzhiyun# fp2: x/10^LEN 14079*4882a593Smuzhiyun# F_SCR1:x/x 14080*4882a593Smuzhiyun# F_SCR2:Y with original exponent/Unchanged 14081*4882a593Smuzhiyun# L_SCR1:original USER_FPCR/Unchanged 14082*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 14083*4882a593Smuzhiyun 14084*4882a593SmuzhiyunA13_st: 14085*4882a593Smuzhiyun swap %d5 # put ICTR in lower word of d5 14086*4882a593Smuzhiyun tst.w %d5 # check if ICTR = 0 14087*4882a593Smuzhiyun bne not_zr # if non-zero, go to second test 14088*4882a593Smuzhiyun# 14089*4882a593Smuzhiyun# Compute 10^(LEN-1) 14090*4882a593Smuzhiyun# 14091*4882a593Smuzhiyun fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 14092*4882a593Smuzhiyun mov.l %d4,%d0 # put LEN in d0 14093*4882a593Smuzhiyun subq.l &1,%d0 # d0 = LEN -1 14094*4882a593Smuzhiyun clr.l %d3 # clr table index 14095*4882a593Smuzhiyunl_loop: 14096*4882a593Smuzhiyun lsr.l &1,%d0 # shift next bit into carry 14097*4882a593Smuzhiyun bcc.b l_next # if zero, skip the mul 14098*4882a593Smuzhiyun fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 14099*4882a593Smuzhiyunl_next: 14100*4882a593Smuzhiyun add.l &12,%d3 # inc d3 to next pwrten table entry 14101*4882a593Smuzhiyun tst.l %d0 # test if LEN is zero 14102*4882a593Smuzhiyun bne.b l_loop # if not, loop 14103*4882a593Smuzhiyun# 14104*4882a593Smuzhiyun# 10^LEN-1 is computed for this test and A14. If the input was 14105*4882a593Smuzhiyun# denormalized, check only the case in which YINT > 10^LEN. 14106*4882a593Smuzhiyun# 14107*4882a593Smuzhiyun tst.b BINDEC_FLG(%a6) # check if input was norm 14108*4882a593Smuzhiyun beq.b A13_con # if norm, continue with checking 14109*4882a593Smuzhiyun fabs.x %fp0 # take abs of YINT 14110*4882a593Smuzhiyun bra test_2 14111*4882a593Smuzhiyun# 14112*4882a593Smuzhiyun# Compare abs(YINT) to 10^(LEN-1) and 10^LEN 14113*4882a593Smuzhiyun# 14114*4882a593SmuzhiyunA13_con: 14115*4882a593Smuzhiyun fabs.x %fp0 # take abs of YINT 14116*4882a593Smuzhiyun fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1) 14117*4882a593Smuzhiyun fbge.w test_2 # if greater, do next test 14118*4882a593Smuzhiyun subq.l &1,%d6 # subtract 1 from ILOG 14119*4882a593Smuzhiyun mov.w &1,%d5 # set ICTR 14120*4882a593Smuzhiyun fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 14121*4882a593Smuzhiyun fmul.s FTEN(%pc),%fp2 # compute 10^LEN 14122*4882a593Smuzhiyun bra.w A6_str # return to A6 and recompute YINT 14123*4882a593Smuzhiyuntest_2: 14124*4882a593Smuzhiyun fmul.s FTEN(%pc),%fp2 # compute 10^LEN 14125*4882a593Smuzhiyun fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN 14126*4882a593Smuzhiyun fblt.w A14_st # if less, all is ok, go to A14 14127*4882a593Smuzhiyun fbgt.w fix_ex # if greater, fix and redo 14128*4882a593Smuzhiyun fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10 14129*4882a593Smuzhiyun addq.l &1,%d6 # and inc ILOG 14130*4882a593Smuzhiyun bra.b A14_st # and continue elsewhere 14131*4882a593Smuzhiyunfix_ex: 14132*4882a593Smuzhiyun addq.l &1,%d6 # increment ILOG by 1 14133*4882a593Smuzhiyun mov.w &1,%d5 # set ICTR 14134*4882a593Smuzhiyun fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 14135*4882a593Smuzhiyun bra.w A6_str # return to A6 and recompute YINT 14136*4882a593Smuzhiyun# 14137*4882a593Smuzhiyun# Since ICTR <> 0, we have already been through one adjustment, 14138*4882a593Smuzhiyun# and shouldn't have another; this is to check if abs(YINT) = 10^LEN 14139*4882a593Smuzhiyun# 10^LEN is again computed using whatever table is in a1 since the 14140*4882a593Smuzhiyun# value calculated cannot be inexact. 14141*4882a593Smuzhiyun# 14142*4882a593Smuzhiyunnot_zr: 14143*4882a593Smuzhiyun fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 14144*4882a593Smuzhiyun mov.l %d4,%d0 # put LEN in d0 14145*4882a593Smuzhiyun clr.l %d3 # clr table index 14146*4882a593Smuzhiyunz_loop: 14147*4882a593Smuzhiyun lsr.l &1,%d0 # shift next bit into carry 14148*4882a593Smuzhiyun bcc.b z_next # if zero, skip the mul 14149*4882a593Smuzhiyun fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 14150*4882a593Smuzhiyunz_next: 14151*4882a593Smuzhiyun add.l &12,%d3 # inc d3 to next pwrten table entry 14152*4882a593Smuzhiyun tst.l %d0 # test if LEN is zero 14153*4882a593Smuzhiyun bne.b z_loop # if not, loop 14154*4882a593Smuzhiyun fabs.x %fp0 # get abs(YINT) 14155*4882a593Smuzhiyun fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN 14156*4882a593Smuzhiyun fbneq.w A14_st # if not, skip this 14157*4882a593Smuzhiyun fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10 14158*4882a593Smuzhiyun addq.l &1,%d6 # and inc ILOG by 1 14159*4882a593Smuzhiyun addq.l &1,%d4 # and inc LEN 14160*4882a593Smuzhiyun fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN 14161*4882a593Smuzhiyun 14162*4882a593Smuzhiyun# A14. Convert the mantissa to bcd. 14163*4882a593Smuzhiyun# The binstr routine is used to convert the LEN digit 14164*4882a593Smuzhiyun# mantissa to bcd in memory. The input to binstr is 14165*4882a593Smuzhiyun# to be a fraction; i.e. (mantissa)/10^LEN and adjusted 14166*4882a593Smuzhiyun# such that the decimal point is to the left of bit 63. 14167*4882a593Smuzhiyun# The bcd digits are stored in the correct position in 14168*4882a593Smuzhiyun# the final string area in memory. 14169*4882a593Smuzhiyun# 14170*4882a593Smuzhiyun# 14171*4882a593Smuzhiyun# Register usage: 14172*4882a593Smuzhiyun# Input/Output 14173*4882a593Smuzhiyun# d0: x/LEN call to binstr - final is 0 14174*4882a593Smuzhiyun# d1: x/0 14175*4882a593Smuzhiyun# d2: x/ms 32-bits of mant of abs(YINT) 14176*4882a593Smuzhiyun# d3: x/ls 32-bits of mant of abs(YINT) 14177*4882a593Smuzhiyun# d4: LEN/Unchanged 14178*4882a593Smuzhiyun# d5: ICTR:LAMBDA/LAMBDA:ICTR 14179*4882a593Smuzhiyun# d6: ILOG 14180*4882a593Smuzhiyun# d7: k-factor/Unchanged 14181*4882a593Smuzhiyun# a0: pointer into memory for packed bcd string formation 14182*4882a593Smuzhiyun# /ptr to first mantissa byte in result string 14183*4882a593Smuzhiyun# a1: ptr to PTENxx array/Unchanged 14184*4882a593Smuzhiyun# a2: ptr to FP_SCR1(a6)/Unchanged 14185*4882a593Smuzhiyun# fp0: int portion of Y/abs(YINT) adjusted 14186*4882a593Smuzhiyun# fp1: 10^ISCALE/Unchanged 14187*4882a593Smuzhiyun# fp2: 10^LEN/Unchanged 14188*4882a593Smuzhiyun# F_SCR1:x/Work area for final result 14189*4882a593Smuzhiyun# F_SCR2:Y with original exponent/Unchanged 14190*4882a593Smuzhiyun# L_SCR1:original USER_FPCR/Unchanged 14191*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 14192*4882a593Smuzhiyun 14193*4882a593SmuzhiyunA14_st: 14194*4882a593Smuzhiyun fmov.l &rz_mode*0x10,%fpcr # force rz for conversion 14195*4882a593Smuzhiyun fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN 14196*4882a593Smuzhiyun lea.l FP_SCR0(%a6),%a0 14197*4882a593Smuzhiyun fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory 14198*4882a593Smuzhiyun mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2 14199*4882a593Smuzhiyun mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3 14200*4882a593Smuzhiyun clr.l 4(%a0) # zero word 2 of FP_RES 14201*4882a593Smuzhiyun clr.l 8(%a0) # zero word 3 of FP_RES 14202*4882a593Smuzhiyun mov.l (%a0),%d0 # move exponent to d0 14203*4882a593Smuzhiyun swap %d0 # put exponent in lower word 14204*4882a593Smuzhiyun beq.b no_sft # if zero, don't shift 14205*4882a593Smuzhiyun sub.l &0x3ffd,%d0 # sub bias less 2 to make fract 14206*4882a593Smuzhiyun tst.l %d0 # check if > 1 14207*4882a593Smuzhiyun bgt.b no_sft # if so, don't shift 14208*4882a593Smuzhiyun neg.l %d0 # make exp positive 14209*4882a593Smuzhiyunm_loop: 14210*4882a593Smuzhiyun lsr.l &1,%d2 # shift d2:d3 right, add 0s 14211*4882a593Smuzhiyun roxr.l &1,%d3 # the number of places 14212*4882a593Smuzhiyun dbf.w %d0,m_loop # given in d0 14213*4882a593Smuzhiyunno_sft: 14214*4882a593Smuzhiyun tst.l %d2 # check for mantissa of zero 14215*4882a593Smuzhiyun bne.b no_zr # if not, go on 14216*4882a593Smuzhiyun tst.l %d3 # continue zero check 14217*4882a593Smuzhiyun beq.b zer_m # if zero, go directly to binstr 14218*4882a593Smuzhiyunno_zr: 14219*4882a593Smuzhiyun clr.l %d1 # put zero in d1 for addx 14220*4882a593Smuzhiyun add.l &0x00000080,%d3 # inc at bit 7 14221*4882a593Smuzhiyun addx.l %d1,%d2 # continue inc 14222*4882a593Smuzhiyun and.l &0xffffff80,%d3 # strip off lsb not used by 882 14223*4882a593Smuzhiyunzer_m: 14224*4882a593Smuzhiyun mov.l %d4,%d0 # put LEN in d0 for binstr call 14225*4882a593Smuzhiyun addq.l &3,%a0 # a0 points to M16 byte in result 14226*4882a593Smuzhiyun bsr binstr # call binstr to convert mant 14227*4882a593Smuzhiyun 14228*4882a593Smuzhiyun 14229*4882a593Smuzhiyun# A15. Convert the exponent to bcd. 14230*4882a593Smuzhiyun# As in A14 above, the exp is converted to bcd and the 14231*4882a593Smuzhiyun# digits are stored in the final string. 14232*4882a593Smuzhiyun# 14233*4882a593Smuzhiyun# Digits are stored in L_SCR1(a6) on return from BINDEC as: 14234*4882a593Smuzhiyun# 14235*4882a593Smuzhiyun# 32 16 15 0 14236*4882a593Smuzhiyun# ----------------------------------------- 14237*4882a593Smuzhiyun# | 0 | e3 | e2 | e1 | e4 | X | X | X | 14238*4882a593Smuzhiyun# ----------------------------------------- 14239*4882a593Smuzhiyun# 14240*4882a593Smuzhiyun# And are moved into their proper places in FP_SCR0. If digit e4 14241*4882a593Smuzhiyun# is non-zero, OPERR is signaled. In all cases, all 4 digits are 14242*4882a593Smuzhiyun# written as specified in the 881/882 manual for packed decimal. 14243*4882a593Smuzhiyun# 14244*4882a593Smuzhiyun# Register usage: 14245*4882a593Smuzhiyun# Input/Output 14246*4882a593Smuzhiyun# d0: x/LEN call to binstr - final is 0 14247*4882a593Smuzhiyun# d1: x/scratch (0);shift count for final exponent packing 14248*4882a593Smuzhiyun# d2: x/ms 32-bits of exp fraction/scratch 14249*4882a593Smuzhiyun# d3: x/ls 32-bits of exp fraction 14250*4882a593Smuzhiyun# d4: LEN/Unchanged 14251*4882a593Smuzhiyun# d5: ICTR:LAMBDA/LAMBDA:ICTR 14252*4882a593Smuzhiyun# d6: ILOG 14253*4882a593Smuzhiyun# d7: k-factor/Unchanged 14254*4882a593Smuzhiyun# a0: ptr to result string/ptr to L_SCR1(a6) 14255*4882a593Smuzhiyun# a1: ptr to PTENxx array/Unchanged 14256*4882a593Smuzhiyun# a2: ptr to FP_SCR1(a6)/Unchanged 14257*4882a593Smuzhiyun# fp0: abs(YINT) adjusted/float(ILOG) 14258*4882a593Smuzhiyun# fp1: 10^ISCALE/Unchanged 14259*4882a593Smuzhiyun# fp2: 10^LEN/Unchanged 14260*4882a593Smuzhiyun# F_SCR1:Work area for final result/BCD result 14261*4882a593Smuzhiyun# F_SCR2:Y with original exponent/ILOG/10^4 14262*4882a593Smuzhiyun# L_SCR1:original USER_FPCR/Exponent digits on return from binstr 14263*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 14264*4882a593Smuzhiyun 14265*4882a593SmuzhiyunA15_st: 14266*4882a593Smuzhiyun tst.b BINDEC_FLG(%a6) # check for denorm 14267*4882a593Smuzhiyun beq.b not_denorm 14268*4882a593Smuzhiyun ftest.x %fp0 # test for zero 14269*4882a593Smuzhiyun fbeq.w den_zero # if zero, use k-factor or 4933 14270*4882a593Smuzhiyun fmov.l %d6,%fp0 # float ILOG 14271*4882a593Smuzhiyun fabs.x %fp0 # get abs of ILOG 14272*4882a593Smuzhiyun bra.b convrt 14273*4882a593Smuzhiyunden_zero: 14274*4882a593Smuzhiyun tst.l %d7 # check sign of the k-factor 14275*4882a593Smuzhiyun blt.b use_ilog # if negative, use ILOG 14276*4882a593Smuzhiyun fmov.s F4933(%pc),%fp0 # force exponent to 4933 14277*4882a593Smuzhiyun bra.b convrt # do it 14278*4882a593Smuzhiyunuse_ilog: 14279*4882a593Smuzhiyun fmov.l %d6,%fp0 # float ILOG 14280*4882a593Smuzhiyun fabs.x %fp0 # get abs of ILOG 14281*4882a593Smuzhiyun bra.b convrt 14282*4882a593Smuzhiyunnot_denorm: 14283*4882a593Smuzhiyun ftest.x %fp0 # test for zero 14284*4882a593Smuzhiyun fbneq.w not_zero # if zero, force exponent 14285*4882a593Smuzhiyun fmov.s FONE(%pc),%fp0 # force exponent to 1 14286*4882a593Smuzhiyun bra.b convrt # do it 14287*4882a593Smuzhiyunnot_zero: 14288*4882a593Smuzhiyun fmov.l %d6,%fp0 # float ILOG 14289*4882a593Smuzhiyun fabs.x %fp0 # get abs of ILOG 14290*4882a593Smuzhiyunconvrt: 14291*4882a593Smuzhiyun fdiv.x 24(%a1),%fp0 # compute ILOG/10^4 14292*4882a593Smuzhiyun fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory 14293*4882a593Smuzhiyun mov.l 4(%a2),%d2 # move word 2 to d2 14294*4882a593Smuzhiyun mov.l 8(%a2),%d3 # move word 3 to d3 14295*4882a593Smuzhiyun mov.w (%a2),%d0 # move exp to d0 14296*4882a593Smuzhiyun beq.b x_loop_fin # if zero, skip the shift 14297*4882a593Smuzhiyun sub.w &0x3ffd,%d0 # subtract off bias 14298*4882a593Smuzhiyun neg.w %d0 # make exp positive 14299*4882a593Smuzhiyunx_loop: 14300*4882a593Smuzhiyun lsr.l &1,%d2 # shift d2:d3 right 14301*4882a593Smuzhiyun roxr.l &1,%d3 # the number of places 14302*4882a593Smuzhiyun dbf.w %d0,x_loop # given in d0 14303*4882a593Smuzhiyunx_loop_fin: 14304*4882a593Smuzhiyun clr.l %d1 # put zero in d1 for addx 14305*4882a593Smuzhiyun add.l &0x00000080,%d3 # inc at bit 6 14306*4882a593Smuzhiyun addx.l %d1,%d2 # continue inc 14307*4882a593Smuzhiyun and.l &0xffffff80,%d3 # strip off lsb not used by 882 14308*4882a593Smuzhiyun mov.l &4,%d0 # put 4 in d0 for binstr call 14309*4882a593Smuzhiyun lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits 14310*4882a593Smuzhiyun bsr binstr # call binstr to convert exp 14311*4882a593Smuzhiyun mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0 14312*4882a593Smuzhiyun mov.l &12,%d1 # use d1 for shift count 14313*4882a593Smuzhiyun lsr.l %d1,%d0 # shift d0 right by 12 14314*4882a593Smuzhiyun bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0 14315*4882a593Smuzhiyun lsr.l %d1,%d0 # shift d0 right by 12 14316*4882a593Smuzhiyun bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0 14317*4882a593Smuzhiyun tst.b %d0 # check if e4 is zero 14318*4882a593Smuzhiyun beq.b A16_st # if zero, skip rest 14319*4882a593Smuzhiyun or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 14320*4882a593Smuzhiyun 14321*4882a593Smuzhiyun 14322*4882a593Smuzhiyun# A16. Write sign bits to final string. 14323*4882a593Smuzhiyun# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). 14324*4882a593Smuzhiyun# 14325*4882a593Smuzhiyun# Register usage: 14326*4882a593Smuzhiyun# Input/Output 14327*4882a593Smuzhiyun# d0: x/scratch - final is x 14328*4882a593Smuzhiyun# d2: x/x 14329*4882a593Smuzhiyun# d3: x/x 14330*4882a593Smuzhiyun# d4: LEN/Unchanged 14331*4882a593Smuzhiyun# d5: ICTR:LAMBDA/LAMBDA:ICTR 14332*4882a593Smuzhiyun# d6: ILOG/ILOG adjusted 14333*4882a593Smuzhiyun# d7: k-factor/Unchanged 14334*4882a593Smuzhiyun# a0: ptr to L_SCR1(a6)/Unchanged 14335*4882a593Smuzhiyun# a1: ptr to PTENxx array/Unchanged 14336*4882a593Smuzhiyun# a2: ptr to FP_SCR1(a6)/Unchanged 14337*4882a593Smuzhiyun# fp0: float(ILOG)/Unchanged 14338*4882a593Smuzhiyun# fp1: 10^ISCALE/Unchanged 14339*4882a593Smuzhiyun# fp2: 10^LEN/Unchanged 14340*4882a593Smuzhiyun# F_SCR1:BCD result with correct signs 14341*4882a593Smuzhiyun# F_SCR2:ILOG/10^4 14342*4882a593Smuzhiyun# L_SCR1:Exponent digits on return from binstr 14343*4882a593Smuzhiyun# L_SCR2:first word of X packed/Unchanged 14344*4882a593Smuzhiyun 14345*4882a593SmuzhiyunA16_st: 14346*4882a593Smuzhiyun clr.l %d0 # clr d0 for collection of signs 14347*4882a593Smuzhiyun and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0 14348*4882a593Smuzhiyun tst.l L_SCR2(%a6) # check sign of original mantissa 14349*4882a593Smuzhiyun bge.b mant_p # if pos, don't set SM 14350*4882a593Smuzhiyun mov.l &2,%d0 # move 2 in to d0 for SM 14351*4882a593Smuzhiyunmant_p: 14352*4882a593Smuzhiyun tst.l %d6 # check sign of ILOG 14353*4882a593Smuzhiyun bge.b wr_sgn # if pos, don't set SE 14354*4882a593Smuzhiyun addq.l &1,%d0 # set bit 0 in d0 for SE 14355*4882a593Smuzhiyunwr_sgn: 14356*4882a593Smuzhiyun bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0 14357*4882a593Smuzhiyun 14358*4882a593Smuzhiyun# Clean up and restore all registers used. 14359*4882a593Smuzhiyun 14360*4882a593Smuzhiyun fmov.l &0,%fpsr # clear possible inex2/ainex bits 14361*4882a593Smuzhiyun fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2} 14362*4882a593Smuzhiyun movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2} 14363*4882a593Smuzhiyun rts 14364*4882a593Smuzhiyun 14365*4882a593Smuzhiyun global PTENRN 14366*4882a593SmuzhiyunPTENRN: 14367*4882a593Smuzhiyun long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14368*4882a593Smuzhiyun long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14369*4882a593Smuzhiyun long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14370*4882a593Smuzhiyun long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14371*4882a593Smuzhiyun long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14372*4882a593Smuzhiyun long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 14373*4882a593Smuzhiyun long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 14374*4882a593Smuzhiyun long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 14375*4882a593Smuzhiyun long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 14376*4882a593Smuzhiyun long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 14377*4882a593Smuzhiyun long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 14378*4882a593Smuzhiyun long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 14379*4882a593Smuzhiyun long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 14380*4882a593Smuzhiyun 14381*4882a593Smuzhiyun global PTENRP 14382*4882a593SmuzhiyunPTENRP: 14383*4882a593Smuzhiyun long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14384*4882a593Smuzhiyun long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14385*4882a593Smuzhiyun long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14386*4882a593Smuzhiyun long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14387*4882a593Smuzhiyun long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14388*4882a593Smuzhiyun long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 14389*4882a593Smuzhiyun long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64 14390*4882a593Smuzhiyun long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 14391*4882a593Smuzhiyun long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 14392*4882a593Smuzhiyun long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 14393*4882a593Smuzhiyun long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024 14394*4882a593Smuzhiyun long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 14395*4882a593Smuzhiyun long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 14396*4882a593Smuzhiyun 14397*4882a593Smuzhiyun global PTENRM 14398*4882a593SmuzhiyunPTENRM: 14399*4882a593Smuzhiyun long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14400*4882a593Smuzhiyun long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14401*4882a593Smuzhiyun long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14402*4882a593Smuzhiyun long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14403*4882a593Smuzhiyun long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14404*4882a593Smuzhiyun long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32 14405*4882a593Smuzhiyun long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 14406*4882a593Smuzhiyun long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128 14407*4882a593Smuzhiyun long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256 14408*4882a593Smuzhiyun long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512 14409*4882a593Smuzhiyun long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 14410*4882a593Smuzhiyun long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048 14411*4882a593Smuzhiyun long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096 14412*4882a593Smuzhiyun 14413*4882a593Smuzhiyun######################################################################### 14414*4882a593Smuzhiyun# binstr(): Converts a 64-bit binary integer to bcd. # 14415*4882a593Smuzhiyun# # 14416*4882a593Smuzhiyun# INPUT *************************************************************** # 14417*4882a593Smuzhiyun# d2:d3 = 64-bit binary integer # 14418*4882a593Smuzhiyun# d0 = desired length (LEN) # 14419*4882a593Smuzhiyun# a0 = pointer to start in memory for bcd characters # 14420*4882a593Smuzhiyun# (This pointer must point to byte 4 of the first # 14421*4882a593Smuzhiyun# lword of the packed decimal memory string.) # 14422*4882a593Smuzhiyun# # 14423*4882a593Smuzhiyun# OUTPUT ************************************************************** # 14424*4882a593Smuzhiyun# a0 = pointer to LEN bcd digits representing the 64-bit integer. # 14425*4882a593Smuzhiyun# # 14426*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 14427*4882a593Smuzhiyun# The 64-bit binary is assumed to have a decimal point before # 14428*4882a593Smuzhiyun# bit 63. The fraction is multiplied by 10 using a mul by 2 # 14429*4882a593Smuzhiyun# shift and a mul by 8 shift. The bits shifted out of the # 14430*4882a593Smuzhiyun# msb form a decimal digit. This process is iterated until # 14431*4882a593Smuzhiyun# LEN digits are formed. # 14432*4882a593Smuzhiyun# # 14433*4882a593Smuzhiyun# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the # 14434*4882a593Smuzhiyun# digit formed will be assumed the least significant. This is # 14435*4882a593Smuzhiyun# to force the first byte formed to have a 0 in the upper 4 bits. # 14436*4882a593Smuzhiyun# # 14437*4882a593Smuzhiyun# A2. Beginning of the loop: # 14438*4882a593Smuzhiyun# Copy the fraction in d2:d3 to d4:d5. # 14439*4882a593Smuzhiyun# # 14440*4882a593Smuzhiyun# A3. Multiply the fraction in d2:d3 by 8 using bit-field # 14441*4882a593Smuzhiyun# extracts and shifts. The three msbs from d2 will go into d1. # 14442*4882a593Smuzhiyun# # 14443*4882a593Smuzhiyun# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb # 14444*4882a593Smuzhiyun# will be collected by the carry. # 14445*4882a593Smuzhiyun# # 14446*4882a593Smuzhiyun# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 # 14447*4882a593Smuzhiyun# into d2:d3. D1 will contain the bcd digit formed. # 14448*4882a593Smuzhiyun# # 14449*4882a593Smuzhiyun# A6. Test d7. If zero, the digit formed is the ms digit. If non- # 14450*4882a593Smuzhiyun# zero, it is the ls digit. Put the digit in its place in the # 14451*4882a593Smuzhiyun# upper word of d0. If it is the ls digit, write the word # 14452*4882a593Smuzhiyun# from d0 to memory. # 14453*4882a593Smuzhiyun# # 14454*4882a593Smuzhiyun# A7. Decrement d6 (LEN counter) and repeat the loop until zero. # 14455*4882a593Smuzhiyun# # 14456*4882a593Smuzhiyun######################################################################### 14457*4882a593Smuzhiyun 14458*4882a593Smuzhiyun# Implementation Notes: 14459*4882a593Smuzhiyun# 14460*4882a593Smuzhiyun# The registers are used as follows: 14461*4882a593Smuzhiyun# 14462*4882a593Smuzhiyun# d0: LEN counter 14463*4882a593Smuzhiyun# d1: temp used to form the digit 14464*4882a593Smuzhiyun# d2: upper 32-bits of fraction for mul by 8 14465*4882a593Smuzhiyun# d3: lower 32-bits of fraction for mul by 8 14466*4882a593Smuzhiyun# d4: upper 32-bits of fraction for mul by 2 14467*4882a593Smuzhiyun# d5: lower 32-bits of fraction for mul by 2 14468*4882a593Smuzhiyun# d6: temp for bit-field extracts 14469*4882a593Smuzhiyun# d7: byte digit formation word;digit count {0,1} 14470*4882a593Smuzhiyun# a0: pointer into memory for packed bcd string formation 14471*4882a593Smuzhiyun# 14472*4882a593Smuzhiyun 14473*4882a593Smuzhiyun global binstr 14474*4882a593Smuzhiyunbinstr: 14475*4882a593Smuzhiyun movm.l &0xff00,-(%sp) # {%d0-%d7} 14476*4882a593Smuzhiyun 14477*4882a593Smuzhiyun# 14478*4882a593Smuzhiyun# A1: Init d7 14479*4882a593Smuzhiyun# 14480*4882a593Smuzhiyun mov.l &1,%d7 # init d7 for second digit 14481*4882a593Smuzhiyun subq.l &1,%d0 # for dbf d0 would have LEN+1 passes 14482*4882a593Smuzhiyun# 14483*4882a593Smuzhiyun# A2. Copy d2:d3 to d4:d5. Start loop. 14484*4882a593Smuzhiyun# 14485*4882a593Smuzhiyunloop: 14486*4882a593Smuzhiyun mov.l %d2,%d4 # copy the fraction before muls 14487*4882a593Smuzhiyun mov.l %d3,%d5 # to d4:d5 14488*4882a593Smuzhiyun# 14489*4882a593Smuzhiyun# A3. Multiply d2:d3 by 8; extract msbs into d1. 14490*4882a593Smuzhiyun# 14491*4882a593Smuzhiyun bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1 14492*4882a593Smuzhiyun asl.l &3,%d2 # shift d2 left by 3 places 14493*4882a593Smuzhiyun bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6 14494*4882a593Smuzhiyun asl.l &3,%d3 # shift d3 left by 3 places 14495*4882a593Smuzhiyun or.l %d6,%d2 # or in msbs from d3 into d2 14496*4882a593Smuzhiyun# 14497*4882a593Smuzhiyun# A4. Multiply d4:d5 by 2; add carry out to d1. 14498*4882a593Smuzhiyun# 14499*4882a593Smuzhiyun asl.l &1,%d5 # mul d5 by 2 14500*4882a593Smuzhiyun roxl.l &1,%d4 # mul d4 by 2 14501*4882a593Smuzhiyun swap %d6 # put 0 in d6 lower word 14502*4882a593Smuzhiyun addx.w %d6,%d1 # add in extend from mul by 2 14503*4882a593Smuzhiyun# 14504*4882a593Smuzhiyun# A5. Add mul by 8 to mul by 2. D1 contains the digit formed. 14505*4882a593Smuzhiyun# 14506*4882a593Smuzhiyun add.l %d5,%d3 # add lower 32 bits 14507*4882a593Smuzhiyun nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 14508*4882a593Smuzhiyun addx.l %d4,%d2 # add with extend upper 32 bits 14509*4882a593Smuzhiyun nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 14510*4882a593Smuzhiyun addx.w %d6,%d1 # add in extend from add to d1 14511*4882a593Smuzhiyun swap %d6 # with d6 = 0; put 0 in upper word 14512*4882a593Smuzhiyun# 14513*4882a593Smuzhiyun# A6. Test d7 and branch. 14514*4882a593Smuzhiyun# 14515*4882a593Smuzhiyun tst.w %d7 # if zero, store digit & to loop 14516*4882a593Smuzhiyun beq.b first_d # if non-zero, form byte & write 14517*4882a593Smuzhiyunsec_d: 14518*4882a593Smuzhiyun swap %d7 # bring first digit to word d7b 14519*4882a593Smuzhiyun asl.w &4,%d7 # first digit in upper 4 bits d7b 14520*4882a593Smuzhiyun add.w %d1,%d7 # add in ls digit to d7b 14521*4882a593Smuzhiyun mov.b %d7,(%a0)+ # store d7b byte in memory 14522*4882a593Smuzhiyun swap %d7 # put LEN counter in word d7a 14523*4882a593Smuzhiyun clr.w %d7 # set d7a to signal no digits done 14524*4882a593Smuzhiyun dbf.w %d0,loop # do loop some more! 14525*4882a593Smuzhiyun bra.b end_bstr # finished, so exit 14526*4882a593Smuzhiyunfirst_d: 14527*4882a593Smuzhiyun swap %d7 # put digit word in d7b 14528*4882a593Smuzhiyun mov.w %d1,%d7 # put new digit in d7b 14529*4882a593Smuzhiyun swap %d7 # put LEN counter in word d7a 14530*4882a593Smuzhiyun addq.w &1,%d7 # set d7a to signal first digit done 14531*4882a593Smuzhiyun dbf.w %d0,loop # do loop some more! 14532*4882a593Smuzhiyun swap %d7 # put last digit in string 14533*4882a593Smuzhiyun lsl.w &4,%d7 # move it to upper 4 bits 14534*4882a593Smuzhiyun mov.b %d7,(%a0)+ # store it in memory string 14535*4882a593Smuzhiyun# 14536*4882a593Smuzhiyun# Clean up and return with result in fp0. 14537*4882a593Smuzhiyun# 14538*4882a593Smuzhiyunend_bstr: 14539*4882a593Smuzhiyun movm.l (%sp)+,&0xff # {%d0-%d7} 14540*4882a593Smuzhiyun rts 14541*4882a593Smuzhiyun 14542*4882a593Smuzhiyun######################################################################### 14543*4882a593Smuzhiyun# XDEF **************************************************************** # 14544*4882a593Smuzhiyun# facc_in_b(): dmem_read_byte failed # 14545*4882a593Smuzhiyun# facc_in_w(): dmem_read_word failed # 14546*4882a593Smuzhiyun# facc_in_l(): dmem_read_long failed # 14547*4882a593Smuzhiyun# facc_in_d(): dmem_read of dbl prec failed # 14548*4882a593Smuzhiyun# facc_in_x(): dmem_read of ext prec failed # 14549*4882a593Smuzhiyun# # 14550*4882a593Smuzhiyun# facc_out_b(): dmem_write_byte failed # 14551*4882a593Smuzhiyun# facc_out_w(): dmem_write_word failed # 14552*4882a593Smuzhiyun# facc_out_l(): dmem_write_long failed # 14553*4882a593Smuzhiyun# facc_out_d(): dmem_write of dbl prec failed # 14554*4882a593Smuzhiyun# facc_out_x(): dmem_write of ext prec failed # 14555*4882a593Smuzhiyun# # 14556*4882a593Smuzhiyun# XREF **************************************************************** # 14557*4882a593Smuzhiyun# _real_access() - exit through access error handler # 14558*4882a593Smuzhiyun# # 14559*4882a593Smuzhiyun# INPUT *************************************************************** # 14560*4882a593Smuzhiyun# None # 14561*4882a593Smuzhiyun# # 14562*4882a593Smuzhiyun# OUTPUT ************************************************************** # 14563*4882a593Smuzhiyun# None # 14564*4882a593Smuzhiyun# # 14565*4882a593Smuzhiyun# ALGORITHM *********************************************************** # 14566*4882a593Smuzhiyun# Flow jumps here when an FP data fetch call gets an error # 14567*4882a593Smuzhiyun# result. This means the operating system wants an access error frame # 14568*4882a593Smuzhiyun# made out of the current exception stack frame. # 14569*4882a593Smuzhiyun# So, we first call restore() which makes sure that any updated # 14570*4882a593Smuzhiyun# -(an)+ register gets returned to its pre-exception value and then # 14571*4882a593Smuzhiyun# we change the stack to an access error stack frame. # 14572*4882a593Smuzhiyun# # 14573*4882a593Smuzhiyun######################################################################### 14574*4882a593Smuzhiyun 14575*4882a593Smuzhiyunfacc_in_b: 14576*4882a593Smuzhiyun movq.l &0x1,%d0 # one byte 14577*4882a593Smuzhiyun bsr.w restore # fix An 14578*4882a593Smuzhiyun 14579*4882a593Smuzhiyun mov.w &0x0121,EXC_VOFF(%a6) # set FSLW 14580*4882a593Smuzhiyun bra.w facc_finish 14581*4882a593Smuzhiyun 14582*4882a593Smuzhiyunfacc_in_w: 14583*4882a593Smuzhiyun movq.l &0x2,%d0 # two bytes 14584*4882a593Smuzhiyun bsr.w restore # fix An 14585*4882a593Smuzhiyun 14586*4882a593Smuzhiyun mov.w &0x0141,EXC_VOFF(%a6) # set FSLW 14587*4882a593Smuzhiyun bra.b facc_finish 14588*4882a593Smuzhiyun 14589*4882a593Smuzhiyunfacc_in_l: 14590*4882a593Smuzhiyun movq.l &0x4,%d0 # four bytes 14591*4882a593Smuzhiyun bsr.w restore # fix An 14592*4882a593Smuzhiyun 14593*4882a593Smuzhiyun mov.w &0x0101,EXC_VOFF(%a6) # set FSLW 14594*4882a593Smuzhiyun bra.b facc_finish 14595*4882a593Smuzhiyun 14596*4882a593Smuzhiyunfacc_in_d: 14597*4882a593Smuzhiyun movq.l &0x8,%d0 # eight bytes 14598*4882a593Smuzhiyun bsr.w restore # fix An 14599*4882a593Smuzhiyun 14600*4882a593Smuzhiyun mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 14601*4882a593Smuzhiyun bra.b facc_finish 14602*4882a593Smuzhiyun 14603*4882a593Smuzhiyunfacc_in_x: 14604*4882a593Smuzhiyun movq.l &0xc,%d0 # twelve bytes 14605*4882a593Smuzhiyun bsr.w restore # fix An 14606*4882a593Smuzhiyun 14607*4882a593Smuzhiyun mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 14608*4882a593Smuzhiyun bra.b facc_finish 14609*4882a593Smuzhiyun 14610*4882a593Smuzhiyun################################################################ 14611*4882a593Smuzhiyun 14612*4882a593Smuzhiyunfacc_out_b: 14613*4882a593Smuzhiyun movq.l &0x1,%d0 # one byte 14614*4882a593Smuzhiyun bsr.w restore # restore An 14615*4882a593Smuzhiyun 14616*4882a593Smuzhiyun mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW 14617*4882a593Smuzhiyun bra.b facc_finish 14618*4882a593Smuzhiyun 14619*4882a593Smuzhiyunfacc_out_w: 14620*4882a593Smuzhiyun movq.l &0x2,%d0 # two bytes 14621*4882a593Smuzhiyun bsr.w restore # restore An 14622*4882a593Smuzhiyun 14623*4882a593Smuzhiyun mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW 14624*4882a593Smuzhiyun bra.b facc_finish 14625*4882a593Smuzhiyun 14626*4882a593Smuzhiyunfacc_out_l: 14627*4882a593Smuzhiyun movq.l &0x4,%d0 # four bytes 14628*4882a593Smuzhiyun bsr.w restore # restore An 14629*4882a593Smuzhiyun 14630*4882a593Smuzhiyun mov.w &0x0081,EXC_VOFF(%a6) # set FSLW 14631*4882a593Smuzhiyun bra.b facc_finish 14632*4882a593Smuzhiyun 14633*4882a593Smuzhiyunfacc_out_d: 14634*4882a593Smuzhiyun movq.l &0x8,%d0 # eight bytes 14635*4882a593Smuzhiyun bsr.w restore # restore An 14636*4882a593Smuzhiyun 14637*4882a593Smuzhiyun mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 14638*4882a593Smuzhiyun bra.b facc_finish 14639*4882a593Smuzhiyun 14640*4882a593Smuzhiyunfacc_out_x: 14641*4882a593Smuzhiyun mov.l &0xc,%d0 # twelve bytes 14642*4882a593Smuzhiyun bsr.w restore # restore An 14643*4882a593Smuzhiyun 14644*4882a593Smuzhiyun mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 14645*4882a593Smuzhiyun 14646*4882a593Smuzhiyun# here's where we actually create the access error frame from the 14647*4882a593Smuzhiyun# current exception stack frame. 14648*4882a593Smuzhiyunfacc_finish: 14649*4882a593Smuzhiyun mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC 14650*4882a593Smuzhiyun 14651*4882a593Smuzhiyun fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 14652*4882a593Smuzhiyun fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 14653*4882a593Smuzhiyun movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 14654*4882a593Smuzhiyun 14655*4882a593Smuzhiyun unlk %a6 14656*4882a593Smuzhiyun 14657*4882a593Smuzhiyun mov.l (%sp),-(%sp) # store SR, hi(PC) 14658*4882a593Smuzhiyun mov.l 0x8(%sp),0x4(%sp) # store lo(PC) 14659*4882a593Smuzhiyun mov.l 0xc(%sp),0x8(%sp) # store EA 14660*4882a593Smuzhiyun mov.l &0x00000001,0xc(%sp) # store FSLW 14661*4882a593Smuzhiyun mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size) 14662*4882a593Smuzhiyun mov.w &0x4008,0x6(%sp) # store voff 14663*4882a593Smuzhiyun 14664*4882a593Smuzhiyun btst &0x5,(%sp) # supervisor or user mode? 14665*4882a593Smuzhiyun beq.b facc_out2 # user 14666*4882a593Smuzhiyun bset &0x2,0xd(%sp) # set supervisor TM bit 14667*4882a593Smuzhiyun 14668*4882a593Smuzhiyunfacc_out2: 14669*4882a593Smuzhiyun bra.l _real_access 14670*4882a593Smuzhiyun 14671*4882a593Smuzhiyun################################################################## 14672*4882a593Smuzhiyun 14673*4882a593Smuzhiyun# if the effective addressing mode was predecrement or postincrement, 14674*4882a593Smuzhiyun# the emulation has already changed its value to the correct post- 14675*4882a593Smuzhiyun# instruction value. but since we're exiting to the access error 14676*4882a593Smuzhiyun# handler, then AN must be returned to its pre-instruction value. 14677*4882a593Smuzhiyun# we do that here. 14678*4882a593Smuzhiyunrestore: 14679*4882a593Smuzhiyun mov.b EXC_OPWORD+0x1(%a6),%d1 14680*4882a593Smuzhiyun andi.b &0x38,%d1 # extract opmode 14681*4882a593Smuzhiyun cmpi.b %d1,&0x18 # postinc? 14682*4882a593Smuzhiyun beq.w rest_inc 14683*4882a593Smuzhiyun cmpi.b %d1,&0x20 # predec? 14684*4882a593Smuzhiyun beq.w rest_dec 14685*4882a593Smuzhiyun rts 14686*4882a593Smuzhiyun 14687*4882a593Smuzhiyunrest_inc: 14688*4882a593Smuzhiyun mov.b EXC_OPWORD+0x1(%a6),%d1 14689*4882a593Smuzhiyun andi.w &0x0007,%d1 # fetch An 14690*4882a593Smuzhiyun 14691*4882a593Smuzhiyun mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1 14692*4882a593Smuzhiyun jmp (tbl_rest_inc.b,%pc,%d1.w*1) 14693*4882a593Smuzhiyun 14694*4882a593Smuzhiyuntbl_rest_inc: 14695*4882a593Smuzhiyun short ri_a0 - tbl_rest_inc 14696*4882a593Smuzhiyun short ri_a1 - tbl_rest_inc 14697*4882a593Smuzhiyun short ri_a2 - tbl_rest_inc 14698*4882a593Smuzhiyun short ri_a3 - tbl_rest_inc 14699*4882a593Smuzhiyun short ri_a4 - tbl_rest_inc 14700*4882a593Smuzhiyun short ri_a5 - tbl_rest_inc 14701*4882a593Smuzhiyun short ri_a6 - tbl_rest_inc 14702*4882a593Smuzhiyun short ri_a7 - tbl_rest_inc 14703*4882a593Smuzhiyun 14704*4882a593Smuzhiyunri_a0: 14705*4882a593Smuzhiyun sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0 14706*4882a593Smuzhiyun rts 14707*4882a593Smuzhiyunri_a1: 14708*4882a593Smuzhiyun sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1 14709*4882a593Smuzhiyun rts 14710*4882a593Smuzhiyunri_a2: 14711*4882a593Smuzhiyun sub.l %d0,%a2 # fix a2 14712*4882a593Smuzhiyun rts 14713*4882a593Smuzhiyunri_a3: 14714*4882a593Smuzhiyun sub.l %d0,%a3 # fix a3 14715*4882a593Smuzhiyun rts 14716*4882a593Smuzhiyunri_a4: 14717*4882a593Smuzhiyun sub.l %d0,%a4 # fix a4 14718*4882a593Smuzhiyun rts 14719*4882a593Smuzhiyunri_a5: 14720*4882a593Smuzhiyun sub.l %d0,%a5 # fix a5 14721*4882a593Smuzhiyun rts 14722*4882a593Smuzhiyunri_a6: 14723*4882a593Smuzhiyun sub.l %d0,(%a6) # fix stacked a6 14724*4882a593Smuzhiyun rts 14725*4882a593Smuzhiyun# if it's a fmove out instruction, we don't have to fix a7 14726*4882a593Smuzhiyun# because we hadn't changed it yet. if it's an opclass two 14727*4882a593Smuzhiyun# instruction (data moved in) and the exception was in supervisor 14728*4882a593Smuzhiyun# mode, then also also wasn't updated. if it was user mode, then 14729*4882a593Smuzhiyun# restore the correct a7 which is in the USP currently. 14730*4882a593Smuzhiyunri_a7: 14731*4882a593Smuzhiyun cmpi.b EXC_VOFF(%a6),&0x30 # move in or out? 14732*4882a593Smuzhiyun bne.b ri_a7_done # out 14733*4882a593Smuzhiyun 14734*4882a593Smuzhiyun btst &0x5,EXC_SR(%a6) # user or supervisor? 14735*4882a593Smuzhiyun bne.b ri_a7_done # supervisor 14736*4882a593Smuzhiyun movc %usp,%a0 # restore USP 14737*4882a593Smuzhiyun sub.l %d0,%a0 14738*4882a593Smuzhiyun movc %a0,%usp 14739*4882a593Smuzhiyunri_a7_done: 14740*4882a593Smuzhiyun rts 14741*4882a593Smuzhiyun 14742*4882a593Smuzhiyun# need to invert adjustment value if the <ea> was predec 14743*4882a593Smuzhiyunrest_dec: 14744*4882a593Smuzhiyun neg.l %d0 14745*4882a593Smuzhiyun bra.b rest_inc 14746