1*4882a593Smuzhiyun| 2*4882a593Smuzhiyun| stanh.sa 3.1 12/10/90 3*4882a593Smuzhiyun| 4*4882a593Smuzhiyun| The entry point sTanh computes the hyperbolic tangent of 5*4882a593Smuzhiyun| an input argument; sTanhd does the same except for denormalized 6*4882a593Smuzhiyun| input. 7*4882a593Smuzhiyun| 8*4882a593Smuzhiyun| Input: Double-extended number X in location pointed to 9*4882a593Smuzhiyun| by address register a0. 10*4882a593Smuzhiyun| 11*4882a593Smuzhiyun| Output: The value tanh(X) returned in floating-point register Fp0. 12*4882a593Smuzhiyun| 13*4882a593Smuzhiyun| Accuracy and Monotonicity: The returned result is within 3 ulps in 14*4882a593Smuzhiyun| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 15*4882a593Smuzhiyun| result is subsequently rounded to double precision. The 16*4882a593Smuzhiyun| result is provably monotonic in double precision. 17*4882a593Smuzhiyun| 18*4882a593Smuzhiyun| Speed: The program stanh takes approximately 270 cycles. 19*4882a593Smuzhiyun| 20*4882a593Smuzhiyun| Algorithm: 21*4882a593Smuzhiyun| 22*4882a593Smuzhiyun| TANH 23*4882a593Smuzhiyun| 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. 24*4882a593Smuzhiyun| 25*4882a593Smuzhiyun| 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by 26*4882a593Smuzhiyun| sgn := sign(X), y := 2|X|, z := expm1(Y), and 27*4882a593Smuzhiyun| tanh(X) = sgn*( z/(2+z) ). 28*4882a593Smuzhiyun| Exit. 29*4882a593Smuzhiyun| 30*4882a593Smuzhiyun| 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, 31*4882a593Smuzhiyun| go to 7. 32*4882a593Smuzhiyun| 33*4882a593Smuzhiyun| 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. 34*4882a593Smuzhiyun| 35*4882a593Smuzhiyun| 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by 36*4882a593Smuzhiyun| sgn := sign(X), y := 2|X|, z := exp(Y), 37*4882a593Smuzhiyun| tanh(X) = sgn - [ sgn*2/(1+z) ]. 38*4882a593Smuzhiyun| Exit. 39*4882a593Smuzhiyun| 40*4882a593Smuzhiyun| 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we 41*4882a593Smuzhiyun| calculate Tanh(X) by 42*4882a593Smuzhiyun| sgn := sign(X), Tiny := 2**(-126), 43*4882a593Smuzhiyun| tanh(X) := sgn - sgn*Tiny. 44*4882a593Smuzhiyun| Exit. 45*4882a593Smuzhiyun| 46*4882a593Smuzhiyun| 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. 47*4882a593Smuzhiyun| 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun| Copyright (C) Motorola, Inc. 1990 50*4882a593Smuzhiyun| All Rights Reserved 51*4882a593Smuzhiyun| 52*4882a593Smuzhiyun| For details on the license for this file, please see the 53*4882a593Smuzhiyun| file, README, in this same directory. 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun|STANH idnt 2,1 | Motorola 040 Floating Point Software Package 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun |section 8 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun#include "fpsp.h" 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun .set X,FP_SCR5 62*4882a593Smuzhiyun .set XDCARE,X+2 63*4882a593Smuzhiyun .set XFRAC,X+4 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun .set SGN,L_SCR3 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun .set V,FP_SCR6 68*4882a593Smuzhiyun 69*4882a593SmuzhiyunBOUNDS1: .long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun |xref t_frcinx 72*4882a593Smuzhiyun |xref t_extdnrm 73*4882a593Smuzhiyun |xref setox 74*4882a593Smuzhiyun |xref setoxm1 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun .global stanhd 77*4882a593Smuzhiyunstanhd: 78*4882a593Smuzhiyun|--TANH(X) = X FOR DENORMALIZED X 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun bra t_extdnrm 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun .global stanh 83*4882a593Smuzhiyunstanh: 84*4882a593Smuzhiyun fmovex (%a0),%fp0 | ...LOAD INPUT 85*4882a593Smuzhiyun 86*4882a593Smuzhiyun fmovex %fp0,X(%a6) 87*4882a593Smuzhiyun movel (%a0),%d0 88*4882a593Smuzhiyun movew 4(%a0),%d0 89*4882a593Smuzhiyun movel %d0,X(%a6) 90*4882a593Smuzhiyun andl #0x7FFFFFFF,%d0 91*4882a593Smuzhiyun cmp2l BOUNDS1(%pc),%d0 | ...2**(-40) < |X| < (5/2)LOG2 ? 92*4882a593Smuzhiyun bcss TANHBORS 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun|--THIS IS THE USUAL CASE 95*4882a593Smuzhiyun|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun movel X(%a6),%d0 98*4882a593Smuzhiyun movel %d0,SGN(%a6) 99*4882a593Smuzhiyun andl #0x7FFF0000,%d0 100*4882a593Smuzhiyun addl #0x00010000,%d0 | ...EXPONENT OF 2|X| 101*4882a593Smuzhiyun movel %d0,X(%a6) 102*4882a593Smuzhiyun andl #0x80000000,SGN(%a6) 103*4882a593Smuzhiyun fmovex X(%a6),%fp0 | ...FP0 IS Y = 2|X| 104*4882a593Smuzhiyun 105*4882a593Smuzhiyun movel %d1,-(%a7) 106*4882a593Smuzhiyun clrl %d1 107*4882a593Smuzhiyun fmovemx %fp0-%fp0,(%a0) 108*4882a593Smuzhiyun bsr setoxm1 | ...FP0 IS Z = EXPM1(Y) 109*4882a593Smuzhiyun movel (%a7)+,%d1 110*4882a593Smuzhiyun 111*4882a593Smuzhiyun fmovex %fp0,%fp1 112*4882a593Smuzhiyun fadds #0x40000000,%fp1 | ...Z+2 113*4882a593Smuzhiyun movel SGN(%a6),%d0 114*4882a593Smuzhiyun fmovex %fp1,V(%a6) 115*4882a593Smuzhiyun eorl %d0,V(%a6) 116*4882a593Smuzhiyun 117*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 118*4882a593Smuzhiyun fdivx V(%a6),%fp0 119*4882a593Smuzhiyun bra t_frcinx 120*4882a593Smuzhiyun 121*4882a593SmuzhiyunTANHBORS: 122*4882a593Smuzhiyun cmpl #0x3FFF8000,%d0 123*4882a593Smuzhiyun blt TANHSM 124*4882a593Smuzhiyun 125*4882a593Smuzhiyun cmpl #0x40048AA1,%d0 126*4882a593Smuzhiyun bgt TANHHUGE 127*4882a593Smuzhiyun 128*4882a593Smuzhiyun|-- (5/2) LOG2 < |X| < 50 LOG2, 129*4882a593Smuzhiyun|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), 130*4882a593Smuzhiyun|--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun movel X(%a6),%d0 133*4882a593Smuzhiyun movel %d0,SGN(%a6) 134*4882a593Smuzhiyun andl #0x7FFF0000,%d0 135*4882a593Smuzhiyun addl #0x00010000,%d0 | ...EXPO OF 2|X| 136*4882a593Smuzhiyun movel %d0,X(%a6) | ...Y = 2|X| 137*4882a593Smuzhiyun andl #0x80000000,SGN(%a6) 138*4882a593Smuzhiyun movel SGN(%a6),%d0 139*4882a593Smuzhiyun fmovex X(%a6),%fp0 | ...Y = 2|X| 140*4882a593Smuzhiyun 141*4882a593Smuzhiyun movel %d1,-(%a7) 142*4882a593Smuzhiyun clrl %d1 143*4882a593Smuzhiyun fmovemx %fp0-%fp0,(%a0) 144*4882a593Smuzhiyun bsr setox | ...FP0 IS EXP(Y) 145*4882a593Smuzhiyun movel (%a7)+,%d1 146*4882a593Smuzhiyun movel SGN(%a6),%d0 147*4882a593Smuzhiyun fadds #0x3F800000,%fp0 | ...EXP(Y)+1 148*4882a593Smuzhiyun 149*4882a593Smuzhiyun eorl #0xC0000000,%d0 | ...-SIGN(X)*2 150*4882a593Smuzhiyun fmoves %d0,%fp1 | ...-SIGN(X)*2 IN SGL FMT 151*4882a593Smuzhiyun fdivx %fp0,%fp1 | ...-SIGN(X)2 / [EXP(Y)+1 ] 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun movel SGN(%a6),%d0 154*4882a593Smuzhiyun orl #0x3F800000,%d0 | ...SGN 155*4882a593Smuzhiyun fmoves %d0,%fp0 | ...SGN IN SGL FMT 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 158*4882a593Smuzhiyun faddx %fp1,%fp0 159*4882a593Smuzhiyun 160*4882a593Smuzhiyun bra t_frcinx 161*4882a593Smuzhiyun 162*4882a593SmuzhiyunTANHSM: 163*4882a593Smuzhiyun movew #0x0000,XDCARE(%a6) 164*4882a593Smuzhiyun 165*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 166*4882a593Smuzhiyun fmovex X(%a6),%fp0 |last inst - possible exception set 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun bra t_frcinx 169*4882a593Smuzhiyun 170*4882a593SmuzhiyunTANHHUGE: 171*4882a593Smuzhiyun|---RETURN SGN(X) - SGN(X)EPS 172*4882a593Smuzhiyun movel X(%a6),%d0 173*4882a593Smuzhiyun andl #0x80000000,%d0 174*4882a593Smuzhiyun orl #0x3F800000,%d0 175*4882a593Smuzhiyun fmoves %d0,%fp0 176*4882a593Smuzhiyun andl #0x80000000,%d0 177*4882a593Smuzhiyun eorl #0x80800000,%d0 | ...-SIGN(X)*EPS 178*4882a593Smuzhiyun 179*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 180*4882a593Smuzhiyun fadds %d0,%fp0 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun bra t_frcinx 183*4882a593Smuzhiyun 184*4882a593Smuzhiyun |end 185