1*4882a593Smuzhiyun| 2*4882a593Smuzhiyun| scale.sa 3.3 7/30/91 3*4882a593Smuzhiyun| 4*4882a593Smuzhiyun| The entry point sSCALE computes the destination operand 5*4882a593Smuzhiyun| scaled by the source operand. If the absolute value of 6*4882a593Smuzhiyun| the source operand is (>= 2^14) an overflow or underflow 7*4882a593Smuzhiyun| is returned. 8*4882a593Smuzhiyun| 9*4882a593Smuzhiyun| The entry point sscale is called from do_func to emulate 10*4882a593Smuzhiyun| the fscale unimplemented instruction. 11*4882a593Smuzhiyun| 12*4882a593Smuzhiyun| Input: Double-extended destination operand in FPTEMP, 13*4882a593Smuzhiyun| double-extended source operand in ETEMP. 14*4882a593Smuzhiyun| 15*4882a593Smuzhiyun| Output: The function returns scale(X,Y) to fp0. 16*4882a593Smuzhiyun| 17*4882a593Smuzhiyun| Modifies: fp0. 18*4882a593Smuzhiyun| 19*4882a593Smuzhiyun| Algorithm: 20*4882a593Smuzhiyun| 21*4882a593Smuzhiyun| Copyright (C) Motorola, Inc. 1990 22*4882a593Smuzhiyun| All Rights Reserved 23*4882a593Smuzhiyun| 24*4882a593Smuzhiyun| For details on the license for this file, please see the 25*4882a593Smuzhiyun| file, README, in this same directory. 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun|SCALE idnt 2,1 | Motorola 040 Floating Point Software Package 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun |section 8 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun#include "fpsp.h" 32*4882a593Smuzhiyun 33*4882a593Smuzhiyun |xref t_ovfl2 34*4882a593Smuzhiyun |xref t_unfl 35*4882a593Smuzhiyun |xref round 36*4882a593Smuzhiyun |xref t_resdnrm 37*4882a593Smuzhiyun 38*4882a593SmuzhiyunSRC_BNDS: .short 0x3fff,0x400c 39*4882a593Smuzhiyun 40*4882a593Smuzhiyun| 41*4882a593Smuzhiyun| This entry point is used by the unimplemented instruction exception 42*4882a593Smuzhiyun| handler. 43*4882a593Smuzhiyun| 44*4882a593Smuzhiyun| 45*4882a593Smuzhiyun| 46*4882a593Smuzhiyun| FSCALE 47*4882a593Smuzhiyun| 48*4882a593Smuzhiyun .global sscale 49*4882a593Smuzhiyunsscale: 50*4882a593Smuzhiyun fmovel #0,%fpcr |clr user enabled exc 51*4882a593Smuzhiyun clrl %d1 52*4882a593Smuzhiyun movew FPTEMP(%a6),%d1 |get dest exponent 53*4882a593Smuzhiyun smi L_SCR1(%a6) |use L_SCR1 to hold sign 54*4882a593Smuzhiyun andil #0x7fff,%d1 |strip sign 55*4882a593Smuzhiyun movew ETEMP(%a6),%d0 |check src bounds 56*4882a593Smuzhiyun andiw #0x7fff,%d0 |clr sign bit 57*4882a593Smuzhiyun cmp2w SRC_BNDS,%d0 58*4882a593Smuzhiyun bccs src_in 59*4882a593Smuzhiyun cmpiw #0x400c,%d0 |test for too large 60*4882a593Smuzhiyun bge src_out 61*4882a593Smuzhiyun| 62*4882a593Smuzhiyun| The source input is below 1, so we check for denormalized numbers 63*4882a593Smuzhiyun| and set unfl. 64*4882a593Smuzhiyun| 65*4882a593Smuzhiyunsrc_small: 66*4882a593Smuzhiyun moveb DTAG(%a6),%d0 67*4882a593Smuzhiyun andib #0xe0,%d0 68*4882a593Smuzhiyun tstb %d0 69*4882a593Smuzhiyun beqs no_denorm 70*4882a593Smuzhiyun st STORE_FLG(%a6) |dest already contains result 71*4882a593Smuzhiyun orl #unfl_mask,USER_FPSR(%a6) |set UNFL 72*4882a593Smuzhiyunden_done: 73*4882a593Smuzhiyun leal FPTEMP(%a6),%a0 74*4882a593Smuzhiyun bra t_resdnrm 75*4882a593Smuzhiyunno_denorm: 76*4882a593Smuzhiyun fmovel USER_FPCR(%a6),%FPCR 77*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 |simply return dest 78*4882a593Smuzhiyun rts 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun| 82*4882a593Smuzhiyun| Source is within 2^14 range. To perform the int operation, 83*4882a593Smuzhiyun| move it to d0. 84*4882a593Smuzhiyun| 85*4882a593Smuzhiyunsrc_in: 86*4882a593Smuzhiyun fmovex ETEMP(%a6),%fp0 |move in src for int 87*4882a593Smuzhiyun fmovel #rz_mode,%fpcr |force rz for src conversion 88*4882a593Smuzhiyun fmovel %fp0,%d0 |int src to d0 89*4882a593Smuzhiyun fmovel #0,%FPSR |clr status from above 90*4882a593Smuzhiyun tstw ETEMP(%a6) |check src sign 91*4882a593Smuzhiyun blt src_neg 92*4882a593Smuzhiyun| 93*4882a593Smuzhiyun| Source is positive. Add the src to the dest exponent. 94*4882a593Smuzhiyun| The result can be denormalized, if src = 0, or overflow, 95*4882a593Smuzhiyun| if the result of the add sets a bit in the upper word. 96*4882a593Smuzhiyun| 97*4882a593Smuzhiyunsrc_pos: 98*4882a593Smuzhiyun tstw %d1 |check for denorm 99*4882a593Smuzhiyun beq dst_dnrm 100*4882a593Smuzhiyun addl %d0,%d1 |add src to dest exp 101*4882a593Smuzhiyun beqs denorm |if zero, result is denorm 102*4882a593Smuzhiyun cmpil #0x7fff,%d1 |test for overflow 103*4882a593Smuzhiyun bges ovfl 104*4882a593Smuzhiyun tstb L_SCR1(%a6) 105*4882a593Smuzhiyun beqs spos_pos 106*4882a593Smuzhiyun orw #0x8000,%d1 107*4882a593Smuzhiyunspos_pos: 108*4882a593Smuzhiyun movew %d1,FPTEMP(%a6) |result in FPTEMP 109*4882a593Smuzhiyun fmovel USER_FPCR(%a6),%FPCR 110*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 |write result to fp0 111*4882a593Smuzhiyun rts 112*4882a593Smuzhiyunovfl: 113*4882a593Smuzhiyun tstb L_SCR1(%a6) 114*4882a593Smuzhiyun beqs sovl_pos 115*4882a593Smuzhiyun orw #0x8000,%d1 116*4882a593Smuzhiyunsovl_pos: 117*4882a593Smuzhiyun movew FPTEMP(%a6),ETEMP(%a6) |result in ETEMP 118*4882a593Smuzhiyun movel FPTEMP_HI(%a6),ETEMP_HI(%a6) 119*4882a593Smuzhiyun movel FPTEMP_LO(%a6),ETEMP_LO(%a6) 120*4882a593Smuzhiyun bra t_ovfl2 121*4882a593Smuzhiyun 122*4882a593Smuzhiyundenorm: 123*4882a593Smuzhiyun tstb L_SCR1(%a6) 124*4882a593Smuzhiyun beqs den_pos 125*4882a593Smuzhiyun orw #0x8000,%d1 126*4882a593Smuzhiyunden_pos: 127*4882a593Smuzhiyun tstl FPTEMP_HI(%a6) |check j bit 128*4882a593Smuzhiyun blts nden_exit |if set, not denorm 129*4882a593Smuzhiyun movew %d1,ETEMP(%a6) |input expected in ETEMP 130*4882a593Smuzhiyun movel FPTEMP_HI(%a6),ETEMP_HI(%a6) 131*4882a593Smuzhiyun movel FPTEMP_LO(%a6),ETEMP_LO(%a6) 132*4882a593Smuzhiyun orl #unfl_bit,USER_FPSR(%a6) |set unfl 133*4882a593Smuzhiyun leal ETEMP(%a6),%a0 134*4882a593Smuzhiyun bra t_resdnrm 135*4882a593Smuzhiyunnden_exit: 136*4882a593Smuzhiyun movew %d1,FPTEMP(%a6) |result in FPTEMP 137*4882a593Smuzhiyun fmovel USER_FPCR(%a6),%FPCR 138*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 |write result to fp0 139*4882a593Smuzhiyun rts 140*4882a593Smuzhiyun 141*4882a593Smuzhiyun| 142*4882a593Smuzhiyun| Source is negative. Add the src to the dest exponent. 143*4882a593Smuzhiyun| (The result exponent will be reduced). The result can be 144*4882a593Smuzhiyun| denormalized. 145*4882a593Smuzhiyun| 146*4882a593Smuzhiyunsrc_neg: 147*4882a593Smuzhiyun addl %d0,%d1 |add src to dest 148*4882a593Smuzhiyun beqs denorm |if zero, result is denorm 149*4882a593Smuzhiyun blts fix_dnrm |if negative, result is 150*4882a593Smuzhiyun| ;needing denormalization 151*4882a593Smuzhiyun tstb L_SCR1(%a6) 152*4882a593Smuzhiyun beqs sneg_pos 153*4882a593Smuzhiyun orw #0x8000,%d1 154*4882a593Smuzhiyunsneg_pos: 155*4882a593Smuzhiyun movew %d1,FPTEMP(%a6) |result in FPTEMP 156*4882a593Smuzhiyun fmovel USER_FPCR(%a6),%FPCR 157*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 |write result to fp0 158*4882a593Smuzhiyun rts 159*4882a593Smuzhiyun 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun| 162*4882a593Smuzhiyun| The result exponent is below denorm value. Test for catastrophic 163*4882a593Smuzhiyun| underflow and force zero if true. If not, try to shift the 164*4882a593Smuzhiyun| mantissa right until a zero exponent exists. 165*4882a593Smuzhiyun| 166*4882a593Smuzhiyunfix_dnrm: 167*4882a593Smuzhiyun cmpiw #0xffc0,%d1 |lower bound for normalization 168*4882a593Smuzhiyun blt fix_unfl |if lower, catastrophic unfl 169*4882a593Smuzhiyun movew %d1,%d0 |use d0 for exp 170*4882a593Smuzhiyun movel %d2,-(%a7) |free d2 for norm 171*4882a593Smuzhiyun movel FPTEMP_HI(%a6),%d1 172*4882a593Smuzhiyun movel FPTEMP_LO(%a6),%d2 173*4882a593Smuzhiyun clrl L_SCR2(%a6) 174*4882a593Smuzhiyunfix_loop: 175*4882a593Smuzhiyun addw #1,%d0 |drive d0 to 0 176*4882a593Smuzhiyun lsrl #1,%d1 |while shifting the 177*4882a593Smuzhiyun roxrl #1,%d2 |mantissa to the right 178*4882a593Smuzhiyun bccs no_carry 179*4882a593Smuzhiyun st L_SCR2(%a6) |use L_SCR2 to capture inex 180*4882a593Smuzhiyunno_carry: 181*4882a593Smuzhiyun tstw %d0 |it is finished when 182*4882a593Smuzhiyun blts fix_loop |d0 is zero or the mantissa 183*4882a593Smuzhiyun tstb L_SCR2(%a6) 184*4882a593Smuzhiyun beqs tst_zero 185*4882a593Smuzhiyun orl #unfl_inx_mask,USER_FPSR(%a6) 186*4882a593Smuzhiyun| ;set unfl, aunfl, ainex 187*4882a593Smuzhiyun| 188*4882a593Smuzhiyun| Test for zero. If zero, simply use fmove to return +/- zero 189*4882a593Smuzhiyun| to the fpu. 190*4882a593Smuzhiyun| 191*4882a593Smuzhiyuntst_zero: 192*4882a593Smuzhiyun clrw FPTEMP_EX(%a6) 193*4882a593Smuzhiyun tstb L_SCR1(%a6) |test for sign 194*4882a593Smuzhiyun beqs tst_con 195*4882a593Smuzhiyun orw #0x8000,FPTEMP_EX(%a6) |set sign bit 196*4882a593Smuzhiyuntst_con: 197*4882a593Smuzhiyun movel %d1,FPTEMP_HI(%a6) 198*4882a593Smuzhiyun movel %d2,FPTEMP_LO(%a6) 199*4882a593Smuzhiyun movel (%a7)+,%d2 200*4882a593Smuzhiyun tstl %d1 201*4882a593Smuzhiyun bnes not_zero 202*4882a593Smuzhiyun tstl FPTEMP_LO(%a6) 203*4882a593Smuzhiyun bnes not_zero 204*4882a593Smuzhiyun| 205*4882a593Smuzhiyun| Result is zero. Check for rounding mode to set lsb. If the 206*4882a593Smuzhiyun| mode is rp, and the zero is positive, return smallest denorm. 207*4882a593Smuzhiyun| If the mode is rm, and the zero is negative, return smallest 208*4882a593Smuzhiyun| negative denorm. 209*4882a593Smuzhiyun| 210*4882a593Smuzhiyun btstb #5,FPCR_MODE(%a6) |test if rm or rp 211*4882a593Smuzhiyun beqs no_dir 212*4882a593Smuzhiyun btstb #4,FPCR_MODE(%a6) |check which one 213*4882a593Smuzhiyun beqs zer_rm 214*4882a593Smuzhiyunzer_rp: 215*4882a593Smuzhiyun tstb L_SCR1(%a6) |check sign 216*4882a593Smuzhiyun bnes no_dir |if set, neg op, no inc 217*4882a593Smuzhiyun movel #1,FPTEMP_LO(%a6) |set lsb 218*4882a593Smuzhiyun bras sm_dnrm 219*4882a593Smuzhiyunzer_rm: 220*4882a593Smuzhiyun tstb L_SCR1(%a6) |check sign 221*4882a593Smuzhiyun beqs no_dir |if clr, neg op, no inc 222*4882a593Smuzhiyun movel #1,FPTEMP_LO(%a6) |set lsb 223*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) |set N 224*4882a593Smuzhiyun bras sm_dnrm 225*4882a593Smuzhiyunno_dir: 226*4882a593Smuzhiyun fmovel USER_FPCR(%a6),%FPCR 227*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 |use fmove to set cc's 228*4882a593Smuzhiyun rts 229*4882a593Smuzhiyun 230*4882a593Smuzhiyun| 231*4882a593Smuzhiyun| The rounding mode changed the zero to a smallest denorm. Call 232*4882a593Smuzhiyun| t_resdnrm with exceptional operand in ETEMP. 233*4882a593Smuzhiyun| 234*4882a593Smuzhiyunsm_dnrm: 235*4882a593Smuzhiyun movel FPTEMP_EX(%a6),ETEMP_EX(%a6) 236*4882a593Smuzhiyun movel FPTEMP_HI(%a6),ETEMP_HI(%a6) 237*4882a593Smuzhiyun movel FPTEMP_LO(%a6),ETEMP_LO(%a6) 238*4882a593Smuzhiyun leal ETEMP(%a6),%a0 239*4882a593Smuzhiyun bra t_resdnrm 240*4882a593Smuzhiyun 241*4882a593Smuzhiyun| 242*4882a593Smuzhiyun| Result is still denormalized. 243*4882a593Smuzhiyun| 244*4882a593Smuzhiyunnot_zero: 245*4882a593Smuzhiyun orl #unfl_mask,USER_FPSR(%a6) |set unfl 246*4882a593Smuzhiyun tstb L_SCR1(%a6) |check for sign 247*4882a593Smuzhiyun beqs fix_exit 248*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) |set N 249*4882a593Smuzhiyunfix_exit: 250*4882a593Smuzhiyun bras sm_dnrm 251*4882a593Smuzhiyun 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun| 254*4882a593Smuzhiyun| The result has underflowed to zero. Return zero and set 255*4882a593Smuzhiyun| unfl, aunfl, and ainex. 256*4882a593Smuzhiyun| 257*4882a593Smuzhiyunfix_unfl: 258*4882a593Smuzhiyun orl #unfl_inx_mask,USER_FPSR(%a6) 259*4882a593Smuzhiyun btstb #5,FPCR_MODE(%a6) |test if rm or rp 260*4882a593Smuzhiyun beqs no_dir2 261*4882a593Smuzhiyun btstb #4,FPCR_MODE(%a6) |check which one 262*4882a593Smuzhiyun beqs zer_rm2 263*4882a593Smuzhiyunzer_rp2: 264*4882a593Smuzhiyun tstb L_SCR1(%a6) |check sign 265*4882a593Smuzhiyun bnes no_dir2 |if set, neg op, no inc 266*4882a593Smuzhiyun clrl FPTEMP_EX(%a6) 267*4882a593Smuzhiyun clrl FPTEMP_HI(%a6) 268*4882a593Smuzhiyun movel #1,FPTEMP_LO(%a6) |set lsb 269*4882a593Smuzhiyun bras sm_dnrm |return smallest denorm 270*4882a593Smuzhiyunzer_rm2: 271*4882a593Smuzhiyun tstb L_SCR1(%a6) |check sign 272*4882a593Smuzhiyun beqs no_dir2 |if clr, neg op, no inc 273*4882a593Smuzhiyun movew #0x8000,FPTEMP_EX(%a6) 274*4882a593Smuzhiyun clrl FPTEMP_HI(%a6) 275*4882a593Smuzhiyun movel #1,FPTEMP_LO(%a6) |set lsb 276*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) |set N 277*4882a593Smuzhiyun bra sm_dnrm |return smallest denorm 278*4882a593Smuzhiyun 279*4882a593Smuzhiyunno_dir2: 280*4882a593Smuzhiyun tstb L_SCR1(%a6) 281*4882a593Smuzhiyun bges pos_zero 282*4882a593Smuzhiyunneg_zero: 283*4882a593Smuzhiyun clrl FP_SCR1(%a6) |clear the exceptional operand 284*4882a593Smuzhiyun clrl FP_SCR1+4(%a6) |for gen_except. 285*4882a593Smuzhiyun clrl FP_SCR1+8(%a6) 286*4882a593Smuzhiyun fmoves #0x80000000,%fp0 287*4882a593Smuzhiyun rts 288*4882a593Smuzhiyunpos_zero: 289*4882a593Smuzhiyun clrl FP_SCR1(%a6) |clear the exceptional operand 290*4882a593Smuzhiyun clrl FP_SCR1+4(%a6) |for gen_except. 291*4882a593Smuzhiyun clrl FP_SCR1+8(%a6) 292*4882a593Smuzhiyun fmoves #0x00000000,%fp0 293*4882a593Smuzhiyun rts 294*4882a593Smuzhiyun 295*4882a593Smuzhiyun| 296*4882a593Smuzhiyun| The destination is a denormalized number. It must be handled 297*4882a593Smuzhiyun| by first shifting the bits in the mantissa until it is normalized, 298*4882a593Smuzhiyun| then adding the remainder of the source to the exponent. 299*4882a593Smuzhiyun| 300*4882a593Smuzhiyundst_dnrm: 301*4882a593Smuzhiyun moveml %d2/%d3,-(%a7) 302*4882a593Smuzhiyun movew FPTEMP_EX(%a6),%d1 303*4882a593Smuzhiyun movel FPTEMP_HI(%a6),%d2 304*4882a593Smuzhiyun movel FPTEMP_LO(%a6),%d3 305*4882a593Smuzhiyundst_loop: 306*4882a593Smuzhiyun tstl %d2 |test for normalized result 307*4882a593Smuzhiyun blts dst_norm |exit loop if so 308*4882a593Smuzhiyun tstl %d0 |otherwise, test shift count 309*4882a593Smuzhiyun beqs dst_fin |if zero, shifting is done 310*4882a593Smuzhiyun subil #1,%d0 |dec src 311*4882a593Smuzhiyun lsll #1,%d3 312*4882a593Smuzhiyun roxll #1,%d2 313*4882a593Smuzhiyun bras dst_loop 314*4882a593Smuzhiyun| 315*4882a593Smuzhiyun| Destination became normalized. Simply add the remaining 316*4882a593Smuzhiyun| portion of the src to the exponent. 317*4882a593Smuzhiyun| 318*4882a593Smuzhiyundst_norm: 319*4882a593Smuzhiyun addw %d0,%d1 |dst is normalized; add src 320*4882a593Smuzhiyun tstb L_SCR1(%a6) 321*4882a593Smuzhiyun beqs dnrm_pos 322*4882a593Smuzhiyun orl #0x8000,%d1 323*4882a593Smuzhiyundnrm_pos: 324*4882a593Smuzhiyun movemw %d1,FPTEMP_EX(%a6) 325*4882a593Smuzhiyun moveml %d2,FPTEMP_HI(%a6) 326*4882a593Smuzhiyun moveml %d3,FPTEMP_LO(%a6) 327*4882a593Smuzhiyun fmovel USER_FPCR(%a6),%FPCR 328*4882a593Smuzhiyun fmovex FPTEMP(%a6),%fp0 329*4882a593Smuzhiyun moveml (%a7)+,%d2/%d3 330*4882a593Smuzhiyun rts 331*4882a593Smuzhiyun 332*4882a593Smuzhiyun| 333*4882a593Smuzhiyun| Destination remained denormalized. Call t_excdnrm with 334*4882a593Smuzhiyun| exceptional operand in ETEMP. 335*4882a593Smuzhiyun| 336*4882a593Smuzhiyundst_fin: 337*4882a593Smuzhiyun tstb L_SCR1(%a6) |check for sign 338*4882a593Smuzhiyun beqs dst_exit 339*4882a593Smuzhiyun orl #neg_mask,USER_FPSR(%a6) |set N 340*4882a593Smuzhiyun orl #0x8000,%d1 341*4882a593Smuzhiyundst_exit: 342*4882a593Smuzhiyun movemw %d1,ETEMP_EX(%a6) 343*4882a593Smuzhiyun moveml %d2,ETEMP_HI(%a6) 344*4882a593Smuzhiyun moveml %d3,ETEMP_LO(%a6) 345*4882a593Smuzhiyun orl #unfl_mask,USER_FPSR(%a6) |set unfl 346*4882a593Smuzhiyun moveml (%a7)+,%d2/%d3 347*4882a593Smuzhiyun leal ETEMP(%a6),%a0 348*4882a593Smuzhiyun bra t_resdnrm 349*4882a593Smuzhiyun 350*4882a593Smuzhiyun| 351*4882a593Smuzhiyun| Source is outside of 2^14 range. Test the sign and branch 352*4882a593Smuzhiyun| to the appropriate exception handler. 353*4882a593Smuzhiyun| 354*4882a593Smuzhiyunsrc_out: 355*4882a593Smuzhiyun tstb L_SCR1(%a6) 356*4882a593Smuzhiyun beqs scro_pos 357*4882a593Smuzhiyun orl #0x8000,%d1 358*4882a593Smuzhiyunscro_pos: 359*4882a593Smuzhiyun movel FPTEMP_HI(%a6),ETEMP_HI(%a6) 360*4882a593Smuzhiyun movel FPTEMP_LO(%a6),ETEMP_LO(%a6) 361*4882a593Smuzhiyun tstw ETEMP(%a6) 362*4882a593Smuzhiyun blts res_neg 363*4882a593Smuzhiyunres_pos: 364*4882a593Smuzhiyun movew %d1,ETEMP(%a6) |result in ETEMP 365*4882a593Smuzhiyun bra t_ovfl2 366*4882a593Smuzhiyunres_neg: 367*4882a593Smuzhiyun movew %d1,ETEMP(%a6) |result in ETEMP 368*4882a593Smuzhiyun leal ETEMP(%a6),%a0 369*4882a593Smuzhiyun bra t_unfl 370*4882a593Smuzhiyun |end 371