1*4882a593Smuzhiyun| 2*4882a593Smuzhiyun| satan.sa 3.3 12/19/90 3*4882a593Smuzhiyun| 4*4882a593Smuzhiyun| The entry point satan computes the arctangent of an 5*4882a593Smuzhiyun| input value. satand does the same except the input value is a 6*4882a593Smuzhiyun| denormalized number. 7*4882a593Smuzhiyun| 8*4882a593Smuzhiyun| Input: Double-extended value in memory location pointed to by address 9*4882a593Smuzhiyun| register a0. 10*4882a593Smuzhiyun| 11*4882a593Smuzhiyun| Output: Arctan(X) returned in floating-point register Fp0. 12*4882a593Smuzhiyun| 13*4882a593Smuzhiyun| Accuracy and Monotonicity: The returned result is within 2 ulps in 14*4882a593Smuzhiyun| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 15*4882a593Smuzhiyun| result is subsequently rounded to double precision. The 16*4882a593Smuzhiyun| result is provably monotonic in double precision. 17*4882a593Smuzhiyun| 18*4882a593Smuzhiyun| Speed: The program satan takes approximately 160 cycles for input 19*4882a593Smuzhiyun| argument X such that 1/16 < |X| < 16. For the other arguments, 20*4882a593Smuzhiyun| the program will run no worse than 10% slower. 21*4882a593Smuzhiyun| 22*4882a593Smuzhiyun| Algorithm: 23*4882a593Smuzhiyun| Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. 24*4882a593Smuzhiyun| 25*4882a593Smuzhiyun| Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. 26*4882a593Smuzhiyun| Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits 27*4882a593Smuzhiyun| of X with a bit-1 attached at the 6-th bit position. Define u 28*4882a593Smuzhiyun| to be u = (X-F) / (1 + X*F). 29*4882a593Smuzhiyun| 30*4882a593Smuzhiyun| Step 3. Approximate arctan(u) by a polynomial poly. 31*4882a593Smuzhiyun| 32*4882a593Smuzhiyun| Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values 33*4882a593Smuzhiyun| calculated beforehand. Exit. 34*4882a593Smuzhiyun| 35*4882a593Smuzhiyun| Step 5. If |X| >= 16, go to Step 7. 36*4882a593Smuzhiyun| 37*4882a593Smuzhiyun| Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. 38*4882a593Smuzhiyun| 39*4882a593Smuzhiyun| Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. 40*4882a593Smuzhiyun| Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. 41*4882a593Smuzhiyun| 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun| Copyright (C) Motorola, Inc. 1990 44*4882a593Smuzhiyun| All Rights Reserved 45*4882a593Smuzhiyun| 46*4882a593Smuzhiyun| For details on the license for this file, please see the 47*4882a593Smuzhiyun| file, README, in this same directory. 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun|satan idnt 2,1 | Motorola 040 Floating Point Software Package 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun |section 8 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun#include "fpsp.h" 54*4882a593Smuzhiyun 55*4882a593SmuzhiyunBOUNDS1: .long 0x3FFB8000,0x4002FFFF 56*4882a593Smuzhiyun 57*4882a593SmuzhiyunONE: .long 0x3F800000 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun .long 0x00000000 60*4882a593Smuzhiyun 61*4882a593SmuzhiyunATANA3: .long 0xBFF6687E,0x314987D8 62*4882a593SmuzhiyunATANA2: .long 0x4002AC69,0x34A26DB3 63*4882a593Smuzhiyun 64*4882a593SmuzhiyunATANA1: .long 0xBFC2476F,0x4E1DA28E 65*4882a593SmuzhiyunATANB6: .long 0x3FB34444,0x7F876989 66*4882a593Smuzhiyun 67*4882a593SmuzhiyunATANB5: .long 0xBFB744EE,0x7FAF45DB 68*4882a593SmuzhiyunATANB4: .long 0x3FBC71C6,0x46940220 69*4882a593Smuzhiyun 70*4882a593SmuzhiyunATANB3: .long 0xBFC24924,0x921872F9 71*4882a593SmuzhiyunATANB2: .long 0x3FC99999,0x99998FA9 72*4882a593Smuzhiyun 73*4882a593SmuzhiyunATANB1: .long 0xBFD55555,0x55555555 74*4882a593SmuzhiyunATANC5: .long 0xBFB70BF3,0x98539E6A 75*4882a593Smuzhiyun 76*4882a593SmuzhiyunATANC4: .long 0x3FBC7187,0x962D1D7D 77*4882a593SmuzhiyunATANC3: .long 0xBFC24924,0x827107B8 78*4882a593Smuzhiyun 79*4882a593SmuzhiyunATANC2: .long 0x3FC99999,0x9996263E 80*4882a593SmuzhiyunATANC1: .long 0xBFD55555,0x55555536 81*4882a593Smuzhiyun 82*4882a593SmuzhiyunPPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 83*4882a593SmuzhiyunNPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 84*4882a593SmuzhiyunPTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000 85*4882a593SmuzhiyunNTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000 86*4882a593Smuzhiyun 87*4882a593SmuzhiyunATANTBL: 88*4882a593Smuzhiyun .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 89*4882a593Smuzhiyun .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 90*4882a593Smuzhiyun .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 91*4882a593Smuzhiyun .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 92*4882a593Smuzhiyun .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 93*4882a593Smuzhiyun .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 94*4882a593Smuzhiyun .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 95*4882a593Smuzhiyun .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 96*4882a593Smuzhiyun .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 97*4882a593Smuzhiyun .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 98*4882a593Smuzhiyun .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 99*4882a593Smuzhiyun .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 100*4882a593Smuzhiyun .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 101*4882a593Smuzhiyun .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 102*4882a593Smuzhiyun .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 103*4882a593Smuzhiyun .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 104*4882a593Smuzhiyun .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 105*4882a593Smuzhiyun .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 106*4882a593Smuzhiyun .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 107*4882a593Smuzhiyun .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 108*4882a593Smuzhiyun .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 109*4882a593Smuzhiyun .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 110*4882a593Smuzhiyun .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 111*4882a593Smuzhiyun .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 112*4882a593Smuzhiyun .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 113*4882a593Smuzhiyun .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 114*4882a593Smuzhiyun .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 115*4882a593Smuzhiyun .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 116*4882a593Smuzhiyun .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 117*4882a593Smuzhiyun .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 118*4882a593Smuzhiyun .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 119*4882a593Smuzhiyun .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 120*4882a593Smuzhiyun .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 121*4882a593Smuzhiyun .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 122*4882a593Smuzhiyun .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 123*4882a593Smuzhiyun .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 124*4882a593Smuzhiyun .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 125*4882a593Smuzhiyun .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 126*4882a593Smuzhiyun .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 127*4882a593Smuzhiyun .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 128*4882a593Smuzhiyun .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 129*4882a593Smuzhiyun .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 130*4882a593Smuzhiyun .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 131*4882a593Smuzhiyun .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 132*4882a593Smuzhiyun .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 133*4882a593Smuzhiyun .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 134*4882a593Smuzhiyun .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 135*4882a593Smuzhiyun .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 136*4882a593Smuzhiyun .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 137*4882a593Smuzhiyun .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 138*4882a593Smuzhiyun .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 139*4882a593Smuzhiyun .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 140*4882a593Smuzhiyun .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 141*4882a593Smuzhiyun .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 142*4882a593Smuzhiyun .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 143*4882a593Smuzhiyun .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 144*4882a593Smuzhiyun .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 145*4882a593Smuzhiyun .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 146*4882a593Smuzhiyun .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 147*4882a593Smuzhiyun .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 148*4882a593Smuzhiyun .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 149*4882a593Smuzhiyun .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 150*4882a593Smuzhiyun .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 151*4882a593Smuzhiyun .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 152*4882a593Smuzhiyun .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 153*4882a593Smuzhiyun .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 154*4882a593Smuzhiyun .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 155*4882a593Smuzhiyun .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 156*4882a593Smuzhiyun .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 157*4882a593Smuzhiyun .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 158*4882a593Smuzhiyun .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 159*4882a593Smuzhiyun .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 160*4882a593Smuzhiyun .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 161*4882a593Smuzhiyun .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 162*4882a593Smuzhiyun .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 163*4882a593Smuzhiyun .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 164*4882a593Smuzhiyun .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 165*4882a593Smuzhiyun .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 166*4882a593Smuzhiyun .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 167*4882a593Smuzhiyun .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 168*4882a593Smuzhiyun .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 169*4882a593Smuzhiyun .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 170*4882a593Smuzhiyun .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 171*4882a593Smuzhiyun .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 172*4882a593Smuzhiyun .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 173*4882a593Smuzhiyun .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 174*4882a593Smuzhiyun .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 175*4882a593Smuzhiyun .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 176*4882a593Smuzhiyun .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 177*4882a593Smuzhiyun .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 178*4882a593Smuzhiyun .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 179*4882a593Smuzhiyun .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 180*4882a593Smuzhiyun .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 181*4882a593Smuzhiyun .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 182*4882a593Smuzhiyun .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 183*4882a593Smuzhiyun .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 184*4882a593Smuzhiyun .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 185*4882a593Smuzhiyun .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 186*4882a593Smuzhiyun .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 187*4882a593Smuzhiyun .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 188*4882a593Smuzhiyun .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 189*4882a593Smuzhiyun .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 190*4882a593Smuzhiyun .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 191*4882a593Smuzhiyun .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 192*4882a593Smuzhiyun .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 193*4882a593Smuzhiyun .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 194*4882a593Smuzhiyun .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 195*4882a593Smuzhiyun .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 196*4882a593Smuzhiyun .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 197*4882a593Smuzhiyun .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 198*4882a593Smuzhiyun .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 199*4882a593Smuzhiyun .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 200*4882a593Smuzhiyun .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 201*4882a593Smuzhiyun .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 202*4882a593Smuzhiyun .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 203*4882a593Smuzhiyun .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 204*4882a593Smuzhiyun .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 205*4882a593Smuzhiyun .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 206*4882a593Smuzhiyun .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 207*4882a593Smuzhiyun .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 208*4882a593Smuzhiyun .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 209*4882a593Smuzhiyun .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 210*4882a593Smuzhiyun .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 211*4882a593Smuzhiyun .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 212*4882a593Smuzhiyun .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 213*4882a593Smuzhiyun .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 214*4882a593Smuzhiyun .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 215*4882a593Smuzhiyun .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 216*4882a593Smuzhiyun 217*4882a593Smuzhiyun .set X,FP_SCR1 218*4882a593Smuzhiyun .set XDCARE,X+2 219*4882a593Smuzhiyun .set XFRAC,X+4 220*4882a593Smuzhiyun .set XFRACLO,X+8 221*4882a593Smuzhiyun 222*4882a593Smuzhiyun .set ATANF,FP_SCR2 223*4882a593Smuzhiyun .set ATANFHI,ATANF+4 224*4882a593Smuzhiyun .set ATANFLO,ATANF+8 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun 227*4882a593Smuzhiyun | xref t_frcinx 228*4882a593Smuzhiyun |xref t_extdnrm 229*4882a593Smuzhiyun 230*4882a593Smuzhiyun .global satand 231*4882a593Smuzhiyunsatand: 232*4882a593Smuzhiyun|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT 233*4882a593Smuzhiyun 234*4882a593Smuzhiyun bra t_extdnrm 235*4882a593Smuzhiyun 236*4882a593Smuzhiyun .global satan 237*4882a593Smuzhiyunsatan: 238*4882a593Smuzhiyun|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 239*4882a593Smuzhiyun 240*4882a593Smuzhiyun fmovex (%a0),%fp0 | ...LOAD INPUT 241*4882a593Smuzhiyun 242*4882a593Smuzhiyun movel (%a0),%d0 243*4882a593Smuzhiyun movew 4(%a0),%d0 244*4882a593Smuzhiyun fmovex %fp0,X(%a6) 245*4882a593Smuzhiyun andil #0x7FFFFFFF,%d0 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16? 248*4882a593Smuzhiyun bges ATANOK1 249*4882a593Smuzhiyun bra ATANSM 250*4882a593Smuzhiyun 251*4882a593SmuzhiyunATANOK1: 252*4882a593Smuzhiyun cmpil #0x4002FFFF,%d0 | ...|X| < 16 ? 253*4882a593Smuzhiyun bles ATANMAIN 254*4882a593Smuzhiyun bra ATANBIG 255*4882a593Smuzhiyun 256*4882a593Smuzhiyun 257*4882a593Smuzhiyun|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE 258*4882a593Smuzhiyun|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). 259*4882a593Smuzhiyun|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN 260*4882a593Smuzhiyun|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE 261*4882a593Smuzhiyun|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS 262*4882a593Smuzhiyun|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR 263*4882a593Smuzhiyun|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO 264*4882a593Smuzhiyun|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE 265*4882a593Smuzhiyun|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL 266*4882a593Smuzhiyun|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE 267*4882a593Smuzhiyun|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION 268*4882a593Smuzhiyun|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION 269*4882a593Smuzhiyun|--WILL INVOLVE A VERY LONG POLYNOMIAL. 270*4882a593Smuzhiyun 271*4882a593Smuzhiyun|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS 272*4882a593Smuzhiyun|--WE CHOSE F TO BE +-2^K * 1.BBBB1 273*4882a593Smuzhiyun|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE 274*4882a593Smuzhiyun|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE 275*4882a593Smuzhiyun|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS 276*4882a593Smuzhiyun|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). 277*4882a593Smuzhiyun 278*4882a593SmuzhiyunATANMAIN: 279*4882a593Smuzhiyun 280*4882a593Smuzhiyun movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE 281*4882a593Smuzhiyun andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS 282*4882a593Smuzhiyun oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1 283*4882a593Smuzhiyun movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F 284*4882a593Smuzhiyun 285*4882a593Smuzhiyun fmovex %fp0,%fp1 | ...FP1 IS X 286*4882a593Smuzhiyun fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0 287*4882a593Smuzhiyun fsubx X(%a6),%fp0 | ...FP0 IS X-F 288*4882a593Smuzhiyun fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F 289*4882a593Smuzhiyun fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F) 290*4882a593Smuzhiyun 291*4882a593Smuzhiyun|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) 292*4882a593Smuzhiyun|--CREATE ATAN(F) AND STORE IT IN ATANF, AND 293*4882a593Smuzhiyun|--SAVE REGISTERS FP2. 294*4882a593Smuzhiyun 295*4882a593Smuzhiyun movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY 296*4882a593Smuzhiyun movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X 297*4882a593Smuzhiyun andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION 298*4882a593Smuzhiyun andil #0x7FFF0000,%d2 | ...EXPONENT OF F 299*4882a593Smuzhiyun subil #0x3FFB0000,%d2 | ...K+4 300*4882a593Smuzhiyun asrl #1,%d2 301*4882a593Smuzhiyun addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F 302*4882a593Smuzhiyun asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|) 303*4882a593Smuzhiyun lea ATANTBL,%a1 304*4882a593Smuzhiyun addal %d0,%a1 | ...ADDRESS OF ATAN(|F|) 305*4882a593Smuzhiyun movel (%a1)+,ATANF(%a6) 306*4882a593Smuzhiyun movel (%a1)+,ATANFHI(%a6) 307*4882a593Smuzhiyun movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|) 308*4882a593Smuzhiyun movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN 309*4882a593Smuzhiyun andil #0x80000000,%d0 | ...SIGN(F) 310*4882a593Smuzhiyun orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|) 311*4882a593Smuzhiyun movel (%a7)+,%d2 | ...RESTORE d2 312*4882a593Smuzhiyun 313*4882a593Smuzhiyun|--THAT'S ALL I HAVE TO DO FOR NOW, 314*4882a593Smuzhiyun|--BUT ALAS, THE DIVIDE IS STILL CRANKING! 315*4882a593Smuzhiyun 316*4882a593Smuzhiyun|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS 317*4882a593Smuzhiyun|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 318*4882a593Smuzhiyun|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. 319*4882a593Smuzhiyun|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) 320*4882a593Smuzhiyun|--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. 321*4882a593Smuzhiyun|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT 322*4882a593Smuzhiyun|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED 323*4882a593Smuzhiyun 324*4882a593Smuzhiyun 325*4882a593Smuzhiyun fmovex %fp0,%fp1 326*4882a593Smuzhiyun fmulx %fp1,%fp1 327*4882a593Smuzhiyun fmoved ATANA3,%fp2 328*4882a593Smuzhiyun faddx %fp1,%fp2 | ...A3+V 329*4882a593Smuzhiyun fmulx %fp1,%fp2 | ...V*(A3+V) 330*4882a593Smuzhiyun fmulx %fp0,%fp1 | ...U*V 331*4882a593Smuzhiyun faddd ATANA2,%fp2 | ...A2+V*(A3+V) 332*4882a593Smuzhiyun fmuld ATANA1,%fp1 | ...A1*U*V 333*4882a593Smuzhiyun fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V)) 334*4882a593Smuzhiyun 335*4882a593Smuzhiyun faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED 336*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 337*4882a593Smuzhiyun faddx ATANF(%a6),%fp0 | ...ATAN(X) 338*4882a593Smuzhiyun bra t_frcinx 339*4882a593Smuzhiyun 340*4882a593SmuzhiyunATANBORS: 341*4882a593Smuzhiyun|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. 342*4882a593Smuzhiyun|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 343*4882a593Smuzhiyun cmpil #0x3FFF8000,%d0 344*4882a593Smuzhiyun bgt ATANBIG | ...I.E. |X| >= 16 345*4882a593Smuzhiyun 346*4882a593SmuzhiyunATANSM: 347*4882a593Smuzhiyun|--|X| <= 1/16 348*4882a593Smuzhiyun|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE 349*4882a593Smuzhiyun|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) 350*4882a593Smuzhiyun|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) 351*4882a593Smuzhiyun|--WHERE Y = X*X, AND Z = Y*Y. 352*4882a593Smuzhiyun 353*4882a593Smuzhiyun cmpil #0x3FD78000,%d0 354*4882a593Smuzhiyun blt ATANTINY 355*4882a593Smuzhiyun|--COMPUTE POLYNOMIAL 356*4882a593Smuzhiyun fmulx %fp0,%fp0 | ...FP0 IS Y = X*X 357*4882a593Smuzhiyun 358*4882a593Smuzhiyun 359*4882a593Smuzhiyun movew #0x0000,XDCARE(%a6) 360*4882a593Smuzhiyun 361*4882a593Smuzhiyun fmovex %fp0,%fp1 362*4882a593Smuzhiyun fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y 363*4882a593Smuzhiyun 364*4882a593Smuzhiyun fmoved ATANB6,%fp2 365*4882a593Smuzhiyun fmoved ATANB5,%fp3 366*4882a593Smuzhiyun 367*4882a593Smuzhiyun fmulx %fp1,%fp2 | ...Z*B6 368*4882a593Smuzhiyun fmulx %fp1,%fp3 | ...Z*B5 369*4882a593Smuzhiyun 370*4882a593Smuzhiyun faddd ATANB4,%fp2 | ...B4+Z*B6 371*4882a593Smuzhiyun faddd ATANB3,%fp3 | ...B3+Z*B5 372*4882a593Smuzhiyun 373*4882a593Smuzhiyun fmulx %fp1,%fp2 | ...Z*(B4+Z*B6) 374*4882a593Smuzhiyun fmulx %fp3,%fp1 | ...Z*(B3+Z*B5) 375*4882a593Smuzhiyun 376*4882a593Smuzhiyun faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6) 377*4882a593Smuzhiyun faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5) 378*4882a593Smuzhiyun 379*4882a593Smuzhiyun fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6)) 380*4882a593Smuzhiyun fmulx X(%a6),%fp0 | ...X*Y 381*4882a593Smuzhiyun 382*4882a593Smuzhiyun faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] 383*4882a593Smuzhiyun 384*4882a593Smuzhiyun 385*4882a593Smuzhiyun fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) 386*4882a593Smuzhiyun 387*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 388*4882a593Smuzhiyun faddx X(%a6),%fp0 389*4882a593Smuzhiyun 390*4882a593Smuzhiyun bra t_frcinx 391*4882a593Smuzhiyun 392*4882a593SmuzhiyunATANTINY: 393*4882a593Smuzhiyun|--|X| < 2^(-40), ATAN(X) = X 394*4882a593Smuzhiyun movew #0x0000,XDCARE(%a6) 395*4882a593Smuzhiyun 396*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 397*4882a593Smuzhiyun fmovex X(%a6),%fp0 |last inst - possible exception set 398*4882a593Smuzhiyun 399*4882a593Smuzhiyun bra t_frcinx 400*4882a593Smuzhiyun 401*4882a593SmuzhiyunATANBIG: 402*4882a593Smuzhiyun|--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, 403*4882a593Smuzhiyun|--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 404*4882a593Smuzhiyun cmpil #0x40638000,%d0 405*4882a593Smuzhiyun bgt ATANHUGE 406*4882a593Smuzhiyun 407*4882a593Smuzhiyun|--APPROXIMATE ATAN(-1/X) BY 408*4882a593Smuzhiyun|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' 409*4882a593Smuzhiyun|--THIS CAN BE RE-WRITTEN AS 410*4882a593Smuzhiyun|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. 411*4882a593Smuzhiyun 412*4882a593Smuzhiyun fmoves #0xBF800000,%fp1 | ...LOAD -1 413*4882a593Smuzhiyun fdivx %fp0,%fp1 | ...FP1 IS -1/X 414*4882a593Smuzhiyun 415*4882a593Smuzhiyun 416*4882a593Smuzhiyun|--DIVIDE IS STILL CRANKING 417*4882a593Smuzhiyun 418*4882a593Smuzhiyun fmovex %fp1,%fp0 | ...FP0 IS X' 419*4882a593Smuzhiyun fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X' 420*4882a593Smuzhiyun fmovex %fp1,X(%a6) | ...X IS REALLY X' 421*4882a593Smuzhiyun 422*4882a593Smuzhiyun fmovex %fp0,%fp1 423*4882a593Smuzhiyun fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y 424*4882a593Smuzhiyun 425*4882a593Smuzhiyun fmoved ATANC5,%fp3 426*4882a593Smuzhiyun fmoved ATANC4,%fp2 427*4882a593Smuzhiyun 428*4882a593Smuzhiyun fmulx %fp1,%fp3 | ...Z*C5 429*4882a593Smuzhiyun fmulx %fp1,%fp2 | ...Z*B4 430*4882a593Smuzhiyun 431*4882a593Smuzhiyun faddd ATANC3,%fp3 | ...C3+Z*C5 432*4882a593Smuzhiyun faddd ATANC2,%fp2 | ...C2+Z*C4 433*4882a593Smuzhiyun 434*4882a593Smuzhiyun fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED 435*4882a593Smuzhiyun fmulx %fp0,%fp2 | ...Y*(C2+Z*C4) 436*4882a593Smuzhiyun 437*4882a593Smuzhiyun faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5) 438*4882a593Smuzhiyun fmulx X(%a6),%fp0 | ...X'*Y 439*4882a593Smuzhiyun 440*4882a593Smuzhiyun faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] 441*4882a593Smuzhiyun 442*4882a593Smuzhiyun 443*4882a593Smuzhiyun fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)] 444*4882a593Smuzhiyun| ... +[Y*(B2+Z*(B4+Z*B6))]) 445*4882a593Smuzhiyun faddx X(%a6),%fp0 446*4882a593Smuzhiyun 447*4882a593Smuzhiyun fmovel %d1,%FPCR |restore users exceptions 448*4882a593Smuzhiyun 449*4882a593Smuzhiyun btstb #7,(%a0) 450*4882a593Smuzhiyun beqs pos_big 451*4882a593Smuzhiyun 452*4882a593Smuzhiyunneg_big: 453*4882a593Smuzhiyun faddx NPIBY2,%fp0 454*4882a593Smuzhiyun bra t_frcinx 455*4882a593Smuzhiyun 456*4882a593Smuzhiyunpos_big: 457*4882a593Smuzhiyun faddx PPIBY2,%fp0 458*4882a593Smuzhiyun bra t_frcinx 459*4882a593Smuzhiyun 460*4882a593SmuzhiyunATANHUGE: 461*4882a593Smuzhiyun|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY 462*4882a593Smuzhiyun btstb #7,(%a0) 463*4882a593Smuzhiyun beqs pos_huge 464*4882a593Smuzhiyun 465*4882a593Smuzhiyunneg_huge: 466*4882a593Smuzhiyun fmovex NPIBY2,%fp0 467*4882a593Smuzhiyun fmovel %d1,%fpcr 468*4882a593Smuzhiyun fsubx NTINY,%fp0 469*4882a593Smuzhiyun bra t_frcinx 470*4882a593Smuzhiyun 471*4882a593Smuzhiyunpos_huge: 472*4882a593Smuzhiyun fmovex PPIBY2,%fp0 473*4882a593Smuzhiyun fmovel %d1,%fpcr 474*4882a593Smuzhiyun fsubx PTINY,%fp0 475*4882a593Smuzhiyun bra t_frcinx 476*4882a593Smuzhiyun 477*4882a593Smuzhiyun |end 478