1*4882a593Smuzhiyun| 2*4882a593Smuzhiyun| round.sa 3.4 7/29/91 3*4882a593Smuzhiyun| 4*4882a593Smuzhiyun| handle rounding and normalization tasks 5*4882a593Smuzhiyun| 6*4882a593Smuzhiyun| 7*4882a593Smuzhiyun| 8*4882a593Smuzhiyun| Copyright (C) Motorola, Inc. 1990 9*4882a593Smuzhiyun| All Rights Reserved 10*4882a593Smuzhiyun| 11*4882a593Smuzhiyun| For details on the license for this file, please see the 12*4882a593Smuzhiyun| file, README, in this same directory. 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun|ROUND idnt 2,1 | Motorola 040 Floating Point Software Package 15*4882a593Smuzhiyun 16*4882a593Smuzhiyun |section 8 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun#include "fpsp.h" 19*4882a593Smuzhiyun 20*4882a593Smuzhiyun| 21*4882a593Smuzhiyun| round --- round result according to precision/mode 22*4882a593Smuzhiyun| 23*4882a593Smuzhiyun| a0 points to the input operand in the internal extended format 24*4882a593Smuzhiyun| d1(high word) contains rounding precision: 25*4882a593Smuzhiyun| ext = $0000xxxx 26*4882a593Smuzhiyun| sgl = $0001xxxx 27*4882a593Smuzhiyun| dbl = $0002xxxx 28*4882a593Smuzhiyun| d1(low word) contains rounding mode: 29*4882a593Smuzhiyun| RN = $xxxx0000 30*4882a593Smuzhiyun| RZ = $xxxx0001 31*4882a593Smuzhiyun| RM = $xxxx0010 32*4882a593Smuzhiyun| RP = $xxxx0011 33*4882a593Smuzhiyun| d0{31:29} contains the g,r,s bits (extended) 34*4882a593Smuzhiyun| 35*4882a593Smuzhiyun| On return the value pointed to by a0 is correctly rounded, 36*4882a593Smuzhiyun| a0 is preserved and the g-r-s bits in d0 are cleared. 37*4882a593Smuzhiyun| The result is not typed - the tag field is invalid. The 38*4882a593Smuzhiyun| result is still in the internal extended format. 39*4882a593Smuzhiyun| 40*4882a593Smuzhiyun| The INEX bit of USER_FPSR will be set if the rounded result was 41*4882a593Smuzhiyun| inexact (i.e. if any of the g-r-s bits were set). 42*4882a593Smuzhiyun| 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun .global round 45*4882a593Smuzhiyunround: 46*4882a593Smuzhiyun| If g=r=s=0 then result is exact and round is done, else set 47*4882a593Smuzhiyun| the inex flag in status reg and continue. 48*4882a593Smuzhiyun| 49*4882a593Smuzhiyun bsrs ext_grs |this subroutine looks at the 50*4882a593Smuzhiyun| :rounding precision and sets 51*4882a593Smuzhiyun| ;the appropriate g-r-s bits. 52*4882a593Smuzhiyun tstl %d0 |if grs are zero, go force 53*4882a593Smuzhiyun bne rnd_cont |lower bits to zero for size 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun swap %d1 |set up d1.w for round prec. 56*4882a593Smuzhiyun bra truncate 57*4882a593Smuzhiyun 58*4882a593Smuzhiyunrnd_cont: 59*4882a593Smuzhiyun| 60*4882a593Smuzhiyun| Use rounding mode as an index into a jump table for these modes. 61*4882a593Smuzhiyun| 62*4882a593Smuzhiyun orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 63*4882a593Smuzhiyun lea mode_tab,%a1 64*4882a593Smuzhiyun movel (%a1,%d1.w*4),%a1 65*4882a593Smuzhiyun jmp (%a1) 66*4882a593Smuzhiyun| 67*4882a593Smuzhiyun| Jump table indexed by rounding mode in d1.w. All following assumes 68*4882a593Smuzhiyun| grs != 0. 69*4882a593Smuzhiyun| 70*4882a593Smuzhiyunmode_tab: 71*4882a593Smuzhiyun .long rnd_near 72*4882a593Smuzhiyun .long rnd_zero 73*4882a593Smuzhiyun .long rnd_mnus 74*4882a593Smuzhiyun .long rnd_plus 75*4882a593Smuzhiyun| 76*4882a593Smuzhiyun| ROUND PLUS INFINITY 77*4882a593Smuzhiyun| 78*4882a593Smuzhiyun| If sign of fp number = 0 (positive), then add 1 to l. 79*4882a593Smuzhiyun| 80*4882a593Smuzhiyunrnd_plus: 81*4882a593Smuzhiyun swap %d1 |set up d1 for round prec. 82*4882a593Smuzhiyun tstb LOCAL_SGN(%a0) |check for sign 83*4882a593Smuzhiyun bmi truncate |if positive then truncate 84*4882a593Smuzhiyun movel #0xffffffff,%d0 |force g,r,s to be all f's 85*4882a593Smuzhiyun lea add_to_l,%a1 86*4882a593Smuzhiyun movel (%a1,%d1.w*4),%a1 87*4882a593Smuzhiyun jmp (%a1) 88*4882a593Smuzhiyun| 89*4882a593Smuzhiyun| ROUND MINUS INFINITY 90*4882a593Smuzhiyun| 91*4882a593Smuzhiyun| If sign of fp number = 1 (negative), then add 1 to l. 92*4882a593Smuzhiyun| 93*4882a593Smuzhiyunrnd_mnus: 94*4882a593Smuzhiyun swap %d1 |set up d1 for round prec. 95*4882a593Smuzhiyun tstb LOCAL_SGN(%a0) |check for sign 96*4882a593Smuzhiyun bpl truncate |if negative then truncate 97*4882a593Smuzhiyun movel #0xffffffff,%d0 |force g,r,s to be all f's 98*4882a593Smuzhiyun lea add_to_l,%a1 99*4882a593Smuzhiyun movel (%a1,%d1.w*4),%a1 100*4882a593Smuzhiyun jmp (%a1) 101*4882a593Smuzhiyun| 102*4882a593Smuzhiyun| ROUND ZERO 103*4882a593Smuzhiyun| 104*4882a593Smuzhiyun| Always truncate. 105*4882a593Smuzhiyunrnd_zero: 106*4882a593Smuzhiyun swap %d1 |set up d1 for round prec. 107*4882a593Smuzhiyun bra truncate 108*4882a593Smuzhiyun| 109*4882a593Smuzhiyun| 110*4882a593Smuzhiyun| ROUND NEAREST 111*4882a593Smuzhiyun| 112*4882a593Smuzhiyun| If (g=1), then add 1 to l and if (r=s=0), then clear l 113*4882a593Smuzhiyun| Note that this will round to even in case of a tie. 114*4882a593Smuzhiyun| 115*4882a593Smuzhiyunrnd_near: 116*4882a593Smuzhiyun swap %d1 |set up d1 for round prec. 117*4882a593Smuzhiyun asll #1,%d0 |shift g-bit to c-bit 118*4882a593Smuzhiyun bcc truncate |if (g=1) then 119*4882a593Smuzhiyun lea add_to_l,%a1 120*4882a593Smuzhiyun movel (%a1,%d1.w*4),%a1 121*4882a593Smuzhiyun jmp (%a1) 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun| 124*4882a593Smuzhiyun| ext_grs --- extract guard, round and sticky bits 125*4882a593Smuzhiyun| 126*4882a593Smuzhiyun| Input: d1 = PREC:ROUND 127*4882a593Smuzhiyun| Output: d0{31:29}= guard, round, sticky 128*4882a593Smuzhiyun| 129*4882a593Smuzhiyun| The ext_grs extract the guard/round/sticky bits according to the 130*4882a593Smuzhiyun| selected rounding precision. It is called by the round subroutine 131*4882a593Smuzhiyun| only. All registers except d0 are kept intact. d0 becomes an 132*4882a593Smuzhiyun| updated guard,round,sticky in d0{31:29} 133*4882a593Smuzhiyun| 134*4882a593Smuzhiyun| Notes: the ext_grs uses the round PREC, and therefore has to swap d1 135*4882a593Smuzhiyun| prior to usage, and needs to restore d1 to original. 136*4882a593Smuzhiyun| 137*4882a593Smuzhiyunext_grs: 138*4882a593Smuzhiyun swap %d1 |have d1.w point to round precision 139*4882a593Smuzhiyun cmpiw #0,%d1 140*4882a593Smuzhiyun bnes sgl_or_dbl 141*4882a593Smuzhiyun bras end_ext_grs 142*4882a593Smuzhiyun 143*4882a593Smuzhiyunsgl_or_dbl: 144*4882a593Smuzhiyun moveml %d2/%d3,-(%a7) |make some temp registers 145*4882a593Smuzhiyun cmpiw #1,%d1 146*4882a593Smuzhiyun bnes grs_dbl 147*4882a593Smuzhiyungrs_sgl: 148*4882a593Smuzhiyun bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right 149*4882a593Smuzhiyun movel #30,%d2 |of the sgl prec. limits 150*4882a593Smuzhiyun lsll %d2,%d3 |shift g-r bits to MSB of d3 151*4882a593Smuzhiyun movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test 152*4882a593Smuzhiyun andil #0x0000003f,%d2 |s bit is the or of all other 153*4882a593Smuzhiyun bnes st_stky |bits to the right of g-r 154*4882a593Smuzhiyun tstl LOCAL_LO(%a0) |test lower mantissa 155*4882a593Smuzhiyun bnes st_stky |if any are set, set sticky 156*4882a593Smuzhiyun tstl %d0 |test original g,r,s 157*4882a593Smuzhiyun bnes st_stky |if any are set, set sticky 158*4882a593Smuzhiyun bras end_sd |if words 3 and 4 are clr, exit 159*4882a593Smuzhiyungrs_dbl: 160*4882a593Smuzhiyun bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right 161*4882a593Smuzhiyun movel #30,%d2 |of the dbl prec. limits 162*4882a593Smuzhiyun lsll %d2,%d3 |shift g-r bits to the MSB of d3 163*4882a593Smuzhiyun movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test 164*4882a593Smuzhiyun andil #0x000001ff,%d2 |s bit is the or-ing of all 165*4882a593Smuzhiyun bnes st_stky |other bits to the right of g-r 166*4882a593Smuzhiyun tstl %d0 |test word original g,r,s 167*4882a593Smuzhiyun bnes st_stky |if any are set, set sticky 168*4882a593Smuzhiyun bras end_sd |if clear, exit 169*4882a593Smuzhiyunst_stky: 170*4882a593Smuzhiyun bset #rnd_stky_bit,%d3 171*4882a593Smuzhiyunend_sd: 172*4882a593Smuzhiyun movel %d3,%d0 |return grs to d0 173*4882a593Smuzhiyun moveml (%a7)+,%d2/%d3 |restore scratch registers 174*4882a593Smuzhiyunend_ext_grs: 175*4882a593Smuzhiyun swap %d1 |restore d1 to original 176*4882a593Smuzhiyun rts 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun|******************* Local Equates 179*4882a593Smuzhiyun .set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec 180*4882a593Smuzhiyun .set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun|Jump table for adding 1 to the l-bit indexed by rnd prec 184*4882a593Smuzhiyun 185*4882a593Smuzhiyunadd_to_l: 186*4882a593Smuzhiyun .long add_ext 187*4882a593Smuzhiyun .long add_sgl 188*4882a593Smuzhiyun .long add_dbl 189*4882a593Smuzhiyun .long add_dbl 190*4882a593Smuzhiyun| 191*4882a593Smuzhiyun| ADD SINGLE 192*4882a593Smuzhiyun| 193*4882a593Smuzhiyunadd_sgl: 194*4882a593Smuzhiyun addl #ad_1_sgl,LOCAL_HI(%a0) 195*4882a593Smuzhiyun bccs scc_clr |no mantissa overflow 196*4882a593Smuzhiyun roxrw LOCAL_HI(%a0) |shift v-bit back in 197*4882a593Smuzhiyun roxrw LOCAL_HI+2(%a0) |shift v-bit back in 198*4882a593Smuzhiyun addw #0x1,LOCAL_EX(%a0) |and incr exponent 199*4882a593Smuzhiyunscc_clr: 200*4882a593Smuzhiyun tstl %d0 |test for rs = 0 201*4882a593Smuzhiyun bnes sgl_done 202*4882a593Smuzhiyun andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit 203*4882a593Smuzhiyunsgl_done: 204*4882a593Smuzhiyun andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit 205*4882a593Smuzhiyun clrl LOCAL_LO(%a0) |clear d2 206*4882a593Smuzhiyun rts 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun| 209*4882a593Smuzhiyun| ADD EXTENDED 210*4882a593Smuzhiyun| 211*4882a593Smuzhiyunadd_ext: 212*4882a593Smuzhiyun addql #1,LOCAL_LO(%a0) |add 1 to l-bit 213*4882a593Smuzhiyun bccs xcc_clr |test for carry out 214*4882a593Smuzhiyun addql #1,LOCAL_HI(%a0) |propagate carry 215*4882a593Smuzhiyun bccs xcc_clr 216*4882a593Smuzhiyun roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit 217*4882a593Smuzhiyun roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit 218*4882a593Smuzhiyun roxrw LOCAL_LO(%a0) 219*4882a593Smuzhiyun roxrw LOCAL_LO+2(%a0) 220*4882a593Smuzhiyun addw #0x1,LOCAL_EX(%a0) |and inc exp 221*4882a593Smuzhiyunxcc_clr: 222*4882a593Smuzhiyun tstl %d0 |test rs = 0 223*4882a593Smuzhiyun bnes add_ext_done 224*4882a593Smuzhiyun andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit 225*4882a593Smuzhiyunadd_ext_done: 226*4882a593Smuzhiyun rts 227*4882a593Smuzhiyun| 228*4882a593Smuzhiyun| ADD DOUBLE 229*4882a593Smuzhiyun| 230*4882a593Smuzhiyunadd_dbl: 231*4882a593Smuzhiyun addl #ad_1_dbl,LOCAL_LO(%a0) 232*4882a593Smuzhiyun bccs dcc_clr 233*4882a593Smuzhiyun addql #1,LOCAL_HI(%a0) |propagate carry 234*4882a593Smuzhiyun bccs dcc_clr 235*4882a593Smuzhiyun roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit 236*4882a593Smuzhiyun roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit 237*4882a593Smuzhiyun roxrw LOCAL_LO(%a0) 238*4882a593Smuzhiyun roxrw LOCAL_LO+2(%a0) 239*4882a593Smuzhiyun addw #0x1,LOCAL_EX(%a0) |incr exponent 240*4882a593Smuzhiyundcc_clr: 241*4882a593Smuzhiyun tstl %d0 |test for rs = 0 242*4882a593Smuzhiyun bnes dbl_done 243*4882a593Smuzhiyun andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit 244*4882a593Smuzhiyun 245*4882a593Smuzhiyundbl_done: 246*4882a593Smuzhiyun andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit 247*4882a593Smuzhiyun rts 248*4882a593Smuzhiyun 249*4882a593Smuzhiyunerror: 250*4882a593Smuzhiyun rts 251*4882a593Smuzhiyun| 252*4882a593Smuzhiyun| Truncate all other bits 253*4882a593Smuzhiyun| 254*4882a593Smuzhiyuntrunct: 255*4882a593Smuzhiyun .long end_rnd 256*4882a593Smuzhiyun .long sgl_done 257*4882a593Smuzhiyun .long dbl_done 258*4882a593Smuzhiyun .long dbl_done 259*4882a593Smuzhiyun 260*4882a593Smuzhiyuntruncate: 261*4882a593Smuzhiyun lea trunct,%a1 262*4882a593Smuzhiyun movel (%a1,%d1.w*4),%a1 263*4882a593Smuzhiyun jmp (%a1) 264*4882a593Smuzhiyun 265*4882a593Smuzhiyunend_rnd: 266*4882a593Smuzhiyun rts 267*4882a593Smuzhiyun 268*4882a593Smuzhiyun| 269*4882a593Smuzhiyun| NORMALIZE 270*4882a593Smuzhiyun| 271*4882a593Smuzhiyun| These routines (nrm_zero & nrm_set) normalize the unnorm. This 272*4882a593Smuzhiyun| is done by shifting the mantissa left while decrementing the 273*4882a593Smuzhiyun| exponent. 274*4882a593Smuzhiyun| 275*4882a593Smuzhiyun| NRM_SET shifts and decrements until there is a 1 set in the integer 276*4882a593Smuzhiyun| bit of the mantissa (msb in d1). 277*4882a593Smuzhiyun| 278*4882a593Smuzhiyun| NRM_ZERO shifts and decrements until there is a 1 set in the integer 279*4882a593Smuzhiyun| bit of the mantissa (msb in d1) unless this would mean the exponent 280*4882a593Smuzhiyun| would go less than 0. In that case the number becomes a denorm - the 281*4882a593Smuzhiyun| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not 282*4882a593Smuzhiyun| normalized. 283*4882a593Smuzhiyun| 284*4882a593Smuzhiyun| Note that both routines have been optimized (for the worst case) and 285*4882a593Smuzhiyun| therefore do not have the easy to follow decrement/shift loop. 286*4882a593Smuzhiyun| 287*4882a593Smuzhiyun| NRM_ZERO 288*4882a593Smuzhiyun| 289*4882a593Smuzhiyun| Distance to first 1 bit in mantissa = X 290*4882a593Smuzhiyun| Distance to 0 from exponent = Y 291*4882a593Smuzhiyun| If X < Y 292*4882a593Smuzhiyun| Then 293*4882a593Smuzhiyun| nrm_set 294*4882a593Smuzhiyun| Else 295*4882a593Smuzhiyun| shift mantissa by Y 296*4882a593Smuzhiyun| set exponent = 0 297*4882a593Smuzhiyun| 298*4882a593Smuzhiyun|input: 299*4882a593Smuzhiyun| FP_SCR1 = exponent, ms mantissa part, ls mantissa part 300*4882a593Smuzhiyun|output: 301*4882a593Smuzhiyun| L_SCR1{4} = fpte15 or ete15 bit 302*4882a593Smuzhiyun| 303*4882a593Smuzhiyun .global nrm_zero 304*4882a593Smuzhiyunnrm_zero: 305*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d0 306*4882a593Smuzhiyun cmpw #64,%d0 |see if exp > 64 307*4882a593Smuzhiyun bmis d0_less 308*4882a593Smuzhiyun bsr nrm_set |exp > 64 so exp won't exceed 0 309*4882a593Smuzhiyun rts 310*4882a593Smuzhiyund0_less: 311*4882a593Smuzhiyun moveml %d2/%d3/%d5/%d6,-(%a7) 312*4882a593Smuzhiyun movel LOCAL_HI(%a0),%d1 313*4882a593Smuzhiyun movel LOCAL_LO(%a0),%d2 314*4882a593Smuzhiyun 315*4882a593Smuzhiyun bfffo %d1{#0:#32},%d3 |get the distance to the first 1 316*4882a593Smuzhiyun| ;in ms mant 317*4882a593Smuzhiyun beqs ms_clr |branch if no bits were set 318*4882a593Smuzhiyun cmpw %d3,%d0 |of X>Y 319*4882a593Smuzhiyun bmis greater |then exp will go past 0 (neg) if 320*4882a593Smuzhiyun| ;it is just shifted 321*4882a593Smuzhiyun bsr nrm_set |else exp won't go past 0 322*4882a593Smuzhiyun moveml (%a7)+,%d2/%d3/%d5/%d6 323*4882a593Smuzhiyun rts 324*4882a593Smuzhiyungreater: 325*4882a593Smuzhiyun movel %d2,%d6 |save ls mant in d6 326*4882a593Smuzhiyun lsll %d0,%d2 |shift ls mant by count 327*4882a593Smuzhiyun lsll %d0,%d1 |shift ms mant by count 328*4882a593Smuzhiyun movel #32,%d5 329*4882a593Smuzhiyun subl %d0,%d5 |make op a denorm by shifting bits 330*4882a593Smuzhiyun lsrl %d5,%d6 |by the number in the exp, then 331*4882a593Smuzhiyun| ;set exp = 0. 332*4882a593Smuzhiyun orl %d6,%d1 |shift the ls mant bits into the ms mant 333*4882a593Smuzhiyun movel #0,%d0 |same as if decremented exp to 0 334*4882a593Smuzhiyun| ;while shifting 335*4882a593Smuzhiyun movew %d0,LOCAL_EX(%a0) 336*4882a593Smuzhiyun movel %d1,LOCAL_HI(%a0) 337*4882a593Smuzhiyun movel %d2,LOCAL_LO(%a0) 338*4882a593Smuzhiyun moveml (%a7)+,%d2/%d3/%d5/%d6 339*4882a593Smuzhiyun rts 340*4882a593Smuzhiyunms_clr: 341*4882a593Smuzhiyun bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant 342*4882a593Smuzhiyun beqs all_clr |branch if none set 343*4882a593Smuzhiyun addw #32,%d3 344*4882a593Smuzhiyun cmpw %d3,%d0 |if X>Y 345*4882a593Smuzhiyun bmis greater |then branch 346*4882a593Smuzhiyun bsr nrm_set |else exp won't go past 0 347*4882a593Smuzhiyun moveml (%a7)+,%d2/%d3/%d5/%d6 348*4882a593Smuzhiyun rts 349*4882a593Smuzhiyunall_clr: 350*4882a593Smuzhiyun movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0. 351*4882a593Smuzhiyun moveml (%a7)+,%d2/%d3/%d5/%d6 352*4882a593Smuzhiyun rts 353*4882a593Smuzhiyun| 354*4882a593Smuzhiyun| NRM_SET 355*4882a593Smuzhiyun| 356*4882a593Smuzhiyun .global nrm_set 357*4882a593Smuzhiyunnrm_set: 358*4882a593Smuzhiyun movel %d7,-(%a7) 359*4882a593Smuzhiyun bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) 360*4882a593Smuzhiyun beqs lower |branch if ms mant is all 0's 361*4882a593Smuzhiyun 362*4882a593Smuzhiyun movel %d6,-(%a7) 363*4882a593Smuzhiyun 364*4882a593Smuzhiyun subw %d7,LOCAL_EX(%a0) |sub exponent by count 365*4882a593Smuzhiyun movel LOCAL_HI(%a0),%d0 |d0 has ms mant 366*4882a593Smuzhiyun movel LOCAL_LO(%a0),%d1 |d1 has ls mant 367*4882a593Smuzhiyun 368*4882a593Smuzhiyun lsll %d7,%d0 |shift first 1 to j bit position 369*4882a593Smuzhiyun movel %d1,%d6 |copy ls mant into d6 370*4882a593Smuzhiyun lsll %d7,%d6 |shift ls mant by count 371*4882a593Smuzhiyun movel %d6,LOCAL_LO(%a0) |store ls mant into memory 372*4882a593Smuzhiyun moveql #32,%d6 373*4882a593Smuzhiyun subl %d7,%d6 |continue shift 374*4882a593Smuzhiyun lsrl %d6,%d1 |shift off all bits but those that will 375*4882a593Smuzhiyun| ;be shifted into ms mant 376*4882a593Smuzhiyun orl %d1,%d0 |shift the ls mant bits into the ms mant 377*4882a593Smuzhiyun movel %d0,LOCAL_HI(%a0) |store ms mant into memory 378*4882a593Smuzhiyun moveml (%a7)+,%d7/%d6 |restore registers 379*4882a593Smuzhiyun rts 380*4882a593Smuzhiyun 381*4882a593Smuzhiyun| 382*4882a593Smuzhiyun| We get here if ms mant was = 0, and we assume ls mant has bits 383*4882a593Smuzhiyun| set (otherwise this would have been tagged a zero not a denorm). 384*4882a593Smuzhiyun| 385*4882a593Smuzhiyunlower: 386*4882a593Smuzhiyun movew LOCAL_EX(%a0),%d0 |d0 has exponent 387*4882a593Smuzhiyun movel LOCAL_LO(%a0),%d1 |d1 has ls mant 388*4882a593Smuzhiyun subw #32,%d0 |account for ms mant being all zeros 389*4882a593Smuzhiyun bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7) 390*4882a593Smuzhiyun subw %d7,%d0 |subtract shift count from exp 391*4882a593Smuzhiyun lsll %d7,%d1 |shift first 1 to integer bit in ms mant 392*4882a593Smuzhiyun movew %d0,LOCAL_EX(%a0) |store ms mant 393*4882a593Smuzhiyun movel %d1,LOCAL_HI(%a0) |store exp 394*4882a593Smuzhiyun clrl LOCAL_LO(%a0) |clear ls mant 395*4882a593Smuzhiyun movel (%a7)+,%d7 396*4882a593Smuzhiyun rts 397*4882a593Smuzhiyun| 398*4882a593Smuzhiyun| denorm --- denormalize an intermediate result 399*4882a593Smuzhiyun| 400*4882a593Smuzhiyun| Used by underflow. 401*4882a593Smuzhiyun| 402*4882a593Smuzhiyun| Input: 403*4882a593Smuzhiyun| a0 points to the operand to be denormalized 404*4882a593Smuzhiyun| (in the internal extended format) 405*4882a593Smuzhiyun| 406*4882a593Smuzhiyun| d0: rounding precision 407*4882a593Smuzhiyun| Output: 408*4882a593Smuzhiyun| a0 points to the denormalized result 409*4882a593Smuzhiyun| (in the internal extended format) 410*4882a593Smuzhiyun| 411*4882a593Smuzhiyun| d0 is guard,round,sticky 412*4882a593Smuzhiyun| 413*4882a593Smuzhiyun| d0 comes into this routine with the rounding precision. It 414*4882a593Smuzhiyun| is then loaded with the denormalized exponent threshold for the 415*4882a593Smuzhiyun| rounding precision. 416*4882a593Smuzhiyun| 417*4882a593Smuzhiyun 418*4882a593Smuzhiyun .global denorm 419*4882a593Smuzhiyundenorm: 420*4882a593Smuzhiyun btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000 421*4882a593Smuzhiyun beqs no_sgn_ext 422*4882a593Smuzhiyun bsetb #7,LOCAL_EX(%a0) |sign extend if it is so 423*4882a593Smuzhiyunno_sgn_ext: 424*4882a593Smuzhiyun 425*4882a593Smuzhiyun cmpib #0,%d0 |if 0 then extended precision 426*4882a593Smuzhiyun bnes not_ext |else branch 427*4882a593Smuzhiyun 428*4882a593Smuzhiyun clrl %d1 |load d1 with ext threshold 429*4882a593Smuzhiyun clrl %d0 |clear the sticky flag 430*4882a593Smuzhiyun bsr dnrm_lp |denormalize the number 431*4882a593Smuzhiyun tstb %d1 |check for inex 432*4882a593Smuzhiyun beq no_inex |if clr, no inex 433*4882a593Smuzhiyun bras dnrm_inex |if set, set inex 434*4882a593Smuzhiyun 435*4882a593Smuzhiyunnot_ext: 436*4882a593Smuzhiyun cmpil #1,%d0 |if 1 then single precision 437*4882a593Smuzhiyun beqs load_sgl |else must be 2, double prec 438*4882a593Smuzhiyun 439*4882a593Smuzhiyunload_dbl: 440*4882a593Smuzhiyun movew #dbl_thresh,%d1 |put copy of threshold in d1 441*4882a593Smuzhiyun movel %d1,%d0 |copy d1 into d0 442*4882a593Smuzhiyun subw LOCAL_EX(%a0),%d0 |diff = threshold - exp 443*4882a593Smuzhiyun cmpw #67,%d0 |if diff > 67 (mant + grs bits) 444*4882a593Smuzhiyun bpls chk_stky |then branch (all bits would be 445*4882a593Smuzhiyun| ; shifted off in denorm routine) 446*4882a593Smuzhiyun clrl %d0 |else clear the sticky flag 447*4882a593Smuzhiyun bsr dnrm_lp |denormalize the number 448*4882a593Smuzhiyun tstb %d1 |check flag 449*4882a593Smuzhiyun beqs no_inex |if clr, no inex 450*4882a593Smuzhiyun bras dnrm_inex |if set, set inex 451*4882a593Smuzhiyun 452*4882a593Smuzhiyunload_sgl: 453*4882a593Smuzhiyun movew #sgl_thresh,%d1 |put copy of threshold in d1 454*4882a593Smuzhiyun movel %d1,%d0 |copy d1 into d0 455*4882a593Smuzhiyun subw LOCAL_EX(%a0),%d0 |diff = threshold - exp 456*4882a593Smuzhiyun cmpw #67,%d0 |if diff > 67 (mant + grs bits) 457*4882a593Smuzhiyun bpls chk_stky |then branch (all bits would be 458*4882a593Smuzhiyun| ; shifted off in denorm routine) 459*4882a593Smuzhiyun clrl %d0 |else clear the sticky flag 460*4882a593Smuzhiyun bsr dnrm_lp |denormalize the number 461*4882a593Smuzhiyun tstb %d1 |check flag 462*4882a593Smuzhiyun beqs no_inex |if clr, no inex 463*4882a593Smuzhiyun bras dnrm_inex |if set, set inex 464*4882a593Smuzhiyun 465*4882a593Smuzhiyunchk_stky: 466*4882a593Smuzhiyun tstl LOCAL_HI(%a0) |check for any bits set 467*4882a593Smuzhiyun bnes set_stky 468*4882a593Smuzhiyun tstl LOCAL_LO(%a0) |check for any bits set 469*4882a593Smuzhiyun bnes set_stky 470*4882a593Smuzhiyun bras clr_mant 471*4882a593Smuzhiyunset_stky: 472*4882a593Smuzhiyun orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 473*4882a593Smuzhiyun movel #0x20000000,%d0 |set sticky bit in return value 474*4882a593Smuzhiyunclr_mant: 475*4882a593Smuzhiyun movew %d1,LOCAL_EX(%a0) |load exp with threshold 476*4882a593Smuzhiyun movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa) 477*4882a593Smuzhiyun movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa) 478*4882a593Smuzhiyun rts 479*4882a593Smuzhiyundnrm_inex: 480*4882a593Smuzhiyun orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 481*4882a593Smuzhiyunno_inex: 482*4882a593Smuzhiyun rts 483*4882a593Smuzhiyun 484*4882a593Smuzhiyun| 485*4882a593Smuzhiyun| dnrm_lp --- normalize exponent/mantissa to specified threshold 486*4882a593Smuzhiyun| 487*4882a593Smuzhiyun| Input: 488*4882a593Smuzhiyun| a0 points to the operand to be denormalized 489*4882a593Smuzhiyun| d0{31:29} initial guard,round,sticky 490*4882a593Smuzhiyun| d1{15:0} denormalization threshold 491*4882a593Smuzhiyun| Output: 492*4882a593Smuzhiyun| a0 points to the denormalized operand 493*4882a593Smuzhiyun| d0{31:29} final guard,round,sticky 494*4882a593Smuzhiyun| d1.b inexact flag: all ones means inexact result 495*4882a593Smuzhiyun| 496*4882a593Smuzhiyun| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 497*4882a593Smuzhiyun| so that bfext can be used to extract the new low part of the mantissa. 498*4882a593Smuzhiyun| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there 499*4882a593Smuzhiyun| is no LOCAL_GRS scratch word following it on the fsave frame. 500*4882a593Smuzhiyun| 501*4882a593Smuzhiyun .global dnrm_lp 502*4882a593Smuzhiyundnrm_lp: 503*4882a593Smuzhiyun movel %d2,-(%sp) |save d2 for temp use 504*4882a593Smuzhiyun btstb #E3,E_BYTE(%a6) |test for type E3 exception 505*4882a593Smuzhiyun beqs not_E3 |not type E3 exception 506*4882a593Smuzhiyun bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit 507*4882a593Smuzhiyun movel #29,%d0 508*4882a593Smuzhiyun lsll %d0,%d2 |shift g,r,s to their positions 509*4882a593Smuzhiyun movel %d2,%d0 510*4882a593Smuzhiyunnot_E3: 511*4882a593Smuzhiyun movel (%sp)+,%d2 |restore d2 512*4882a593Smuzhiyun movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) 513*4882a593Smuzhiyun movel %d0,FP_SCR2+LOCAL_GRS(%a6) 514*4882a593Smuzhiyun movel %d1,%d0 |copy the denorm threshold 515*4882a593Smuzhiyun subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent 516*4882a593Smuzhiyun bles no_lp |d1 <= 0 517*4882a593Smuzhiyun cmpw #32,%d1 518*4882a593Smuzhiyun blts case_1 |0 = d1 < 32 519*4882a593Smuzhiyun cmpw #64,%d1 520*4882a593Smuzhiyun blts case_2 |32 <= d1 < 64 521*4882a593Smuzhiyun bra case_3 |d1 >= 64 522*4882a593Smuzhiyun| 523*4882a593Smuzhiyun| No normalization necessary 524*4882a593Smuzhiyun| 525*4882a593Smuzhiyunno_lp: 526*4882a593Smuzhiyun clrb %d1 |set no inex2 reported 527*4882a593Smuzhiyun movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s 528*4882a593Smuzhiyun rts 529*4882a593Smuzhiyun| 530*4882a593Smuzhiyun| case (0<d1<32) 531*4882a593Smuzhiyun| 532*4882a593Smuzhiyuncase_1: 533*4882a593Smuzhiyun movel %d2,-(%sp) 534*4882a593Smuzhiyun movew %d0,LOCAL_EX(%a0) |exponent = denorm threshold 535*4882a593Smuzhiyun movel #32,%d0 536*4882a593Smuzhiyun subw %d1,%d0 |d0 = 32 - d1 537*4882a593Smuzhiyun bfextu LOCAL_EX(%a0){%d0:#32},%d2 538*4882a593Smuzhiyun bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_HI 539*4882a593Smuzhiyun bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO 540*4882a593Smuzhiyun bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S 541*4882a593Smuzhiyun movel %d2,LOCAL_HI(%a0) |store new LOCAL_HI 542*4882a593Smuzhiyun movel %d1,LOCAL_LO(%a0) |store new LOCAL_LO 543*4882a593Smuzhiyun clrb %d1 544*4882a593Smuzhiyun bftst %d0{#2:#30} 545*4882a593Smuzhiyun beqs c1nstky 546*4882a593Smuzhiyun bsetl #rnd_stky_bit,%d0 547*4882a593Smuzhiyun st %d1 548*4882a593Smuzhiyunc1nstky: 549*4882a593Smuzhiyun movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s 550*4882a593Smuzhiyun andil #0xe0000000,%d2 |clear all but G,R,S 551*4882a593Smuzhiyun tstl %d2 |test if original G,R,S are clear 552*4882a593Smuzhiyun beqs grs_clear 553*4882a593Smuzhiyun orl #0x20000000,%d0 |set sticky bit in d0 554*4882a593Smuzhiyungrs_clear: 555*4882a593Smuzhiyun andil #0xe0000000,%d0 |clear all but G,R,S 556*4882a593Smuzhiyun movel (%sp)+,%d2 557*4882a593Smuzhiyun rts 558*4882a593Smuzhiyun| 559*4882a593Smuzhiyun| case (32<=d1<64) 560*4882a593Smuzhiyun| 561*4882a593Smuzhiyuncase_2: 562*4882a593Smuzhiyun movel %d2,-(%sp) 563*4882a593Smuzhiyun movew %d0,LOCAL_EX(%a0) |unsigned exponent = threshold 564*4882a593Smuzhiyun subw #32,%d1 |d1 now between 0 and 32 565*4882a593Smuzhiyun movel #32,%d0 566*4882a593Smuzhiyun subw %d1,%d0 |d0 = 32 - d1 567*4882a593Smuzhiyun bfextu LOCAL_EX(%a0){%d0:#32},%d2 568*4882a593Smuzhiyun bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_LO 569*4882a593Smuzhiyun bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S 570*4882a593Smuzhiyun bftst %d1{#2:#30} 571*4882a593Smuzhiyun bnes c2_sstky |bra if sticky bit to be set 572*4882a593Smuzhiyun bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} 573*4882a593Smuzhiyun bnes c2_sstky |bra if sticky bit to be set 574*4882a593Smuzhiyun movel %d1,%d0 575*4882a593Smuzhiyun clrb %d1 576*4882a593Smuzhiyun bras end_c2 577*4882a593Smuzhiyunc2_sstky: 578*4882a593Smuzhiyun movel %d1,%d0 579*4882a593Smuzhiyun bsetl #rnd_stky_bit,%d0 580*4882a593Smuzhiyun st %d1 581*4882a593Smuzhiyunend_c2: 582*4882a593Smuzhiyun clrl LOCAL_HI(%a0) |store LOCAL_HI = 0 583*4882a593Smuzhiyun movel %d2,LOCAL_LO(%a0) |store LOCAL_LO 584*4882a593Smuzhiyun movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s 585*4882a593Smuzhiyun andil #0xe0000000,%d2 |clear all but G,R,S 586*4882a593Smuzhiyun tstl %d2 |test if original G,R,S are clear 587*4882a593Smuzhiyun beqs clear_grs 588*4882a593Smuzhiyun orl #0x20000000,%d0 |set sticky bit in d0 589*4882a593Smuzhiyunclear_grs: 590*4882a593Smuzhiyun andil #0xe0000000,%d0 |get rid of all but G,R,S 591*4882a593Smuzhiyun movel (%sp)+,%d2 592*4882a593Smuzhiyun rts 593*4882a593Smuzhiyun| 594*4882a593Smuzhiyun| d1 >= 64 Force the exponent to be the denorm threshold with the 595*4882a593Smuzhiyun| correct sign. 596*4882a593Smuzhiyun| 597*4882a593Smuzhiyuncase_3: 598*4882a593Smuzhiyun movew %d0,LOCAL_EX(%a0) 599*4882a593Smuzhiyun tstw LOCAL_SGN(%a0) 600*4882a593Smuzhiyun bges c3con 601*4882a593Smuzhiyunc3neg: 602*4882a593Smuzhiyun orl #0x80000000,LOCAL_EX(%a0) 603*4882a593Smuzhiyunc3con: 604*4882a593Smuzhiyun cmpw #64,%d1 605*4882a593Smuzhiyun beqs sixty_four 606*4882a593Smuzhiyun cmpw #65,%d1 607*4882a593Smuzhiyun beqs sixty_five 608*4882a593Smuzhiyun| 609*4882a593Smuzhiyun| Shift value is out of range. Set d1 for inex2 flag and 610*4882a593Smuzhiyun| return a zero with the given threshold. 611*4882a593Smuzhiyun| 612*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 613*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 614*4882a593Smuzhiyun movel #0x20000000,%d0 615*4882a593Smuzhiyun st %d1 616*4882a593Smuzhiyun rts 617*4882a593Smuzhiyun 618*4882a593Smuzhiyunsixty_four: 619*4882a593Smuzhiyun movel LOCAL_HI(%a0),%d0 620*4882a593Smuzhiyun bfextu %d0{#2:#30},%d1 621*4882a593Smuzhiyun andil #0xc0000000,%d0 622*4882a593Smuzhiyun bras c3com 623*4882a593Smuzhiyun 624*4882a593Smuzhiyunsixty_five: 625*4882a593Smuzhiyun movel LOCAL_HI(%a0),%d0 626*4882a593Smuzhiyun bfextu %d0{#1:#31},%d1 627*4882a593Smuzhiyun andil #0x80000000,%d0 628*4882a593Smuzhiyun lsrl #1,%d0 |shift high bit into R bit 629*4882a593Smuzhiyun 630*4882a593Smuzhiyunc3com: 631*4882a593Smuzhiyun tstl %d1 632*4882a593Smuzhiyun bnes c3ssticky 633*4882a593Smuzhiyun tstl LOCAL_LO(%a0) 634*4882a593Smuzhiyun bnes c3ssticky 635*4882a593Smuzhiyun tstb FP_SCR2+LOCAL_GRS(%a6) 636*4882a593Smuzhiyun bnes c3ssticky 637*4882a593Smuzhiyun clrb %d1 638*4882a593Smuzhiyun bras c3end 639*4882a593Smuzhiyun 640*4882a593Smuzhiyunc3ssticky: 641*4882a593Smuzhiyun bsetl #rnd_stky_bit,%d0 642*4882a593Smuzhiyun st %d1 643*4882a593Smuzhiyunc3end: 644*4882a593Smuzhiyun clrl LOCAL_HI(%a0) 645*4882a593Smuzhiyun clrl LOCAL_LO(%a0) 646*4882a593Smuzhiyun rts 647*4882a593Smuzhiyun 648*4882a593Smuzhiyun |end 649