xref: /OK3568_Linux_fs/kernel/arch/m68k/fpsp040/ssin.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun|
2*4882a593Smuzhiyun|	ssin.sa 3.3 7/29/91
3*4882a593Smuzhiyun|
4*4882a593Smuzhiyun|	The entry point sSIN computes the sine of an input argument
5*4882a593Smuzhiyun|	sCOS computes the cosine, and sSINCOS computes both. The
6*4882a593Smuzhiyun|	corresponding entry points with a "d" computes the same
7*4882a593Smuzhiyun|	corresponding function values for denormalized inputs.
8*4882a593Smuzhiyun|
9*4882a593Smuzhiyun|	Input: Double-extended number X in location pointed to
10*4882a593Smuzhiyun|		by address register a0.
11*4882a593Smuzhiyun|
12*4882a593Smuzhiyun|	Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
13*4882a593Smuzhiyun|		COS is requested. Otherwise, for SINCOS, sin(X) is returned
14*4882a593Smuzhiyun|		in Fp0, and cos(X) is returned in Fp1.
15*4882a593Smuzhiyun|
16*4882a593Smuzhiyun|	Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
17*4882a593Smuzhiyun|
18*4882a593Smuzhiyun|	Accuracy and Monotonicity: The returned result is within 1 ulp in
19*4882a593Smuzhiyun|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
20*4882a593Smuzhiyun|		result is subsequently rounded to double precision. The
21*4882a593Smuzhiyun|		result is provably monotonic in double precision.
22*4882a593Smuzhiyun|
23*4882a593Smuzhiyun|	Speed: The programs sSIN and sCOS take approximately 150 cycles for
24*4882a593Smuzhiyun|		input argument X such that |X| < 15Pi, which is the usual
25*4882a593Smuzhiyun|		situation. The speed for sSINCOS is approximately 190 cycles.
26*4882a593Smuzhiyun|
27*4882a593Smuzhiyun|	Algorithm:
28*4882a593Smuzhiyun|
29*4882a593Smuzhiyun|	SIN and COS:
30*4882a593Smuzhiyun|	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
31*4882a593Smuzhiyun|
32*4882a593Smuzhiyun|	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
33*4882a593Smuzhiyun|
34*4882a593Smuzhiyun|	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
35*4882a593Smuzhiyun|		k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
36*4882a593Smuzhiyun|		k by k := k + AdjN.
37*4882a593Smuzhiyun|
38*4882a593Smuzhiyun|	4. If k is even, go to 6.
39*4882a593Smuzhiyun|
40*4882a593Smuzhiyun|	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
41*4882a593Smuzhiyun|		where cos(r) is approximated by an even polynomial in r,
42*4882a593Smuzhiyun|		1 + r*r*(B1+s*(B2+ ... + s*B8)),	s = r*r.
43*4882a593Smuzhiyun|		Exit.
44*4882a593Smuzhiyun|
45*4882a593Smuzhiyun|	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
46*4882a593Smuzhiyun|		where sin(r) is approximated by an odd polynomial in r
47*4882a593Smuzhiyun|		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.
48*4882a593Smuzhiyun|		Exit.
49*4882a593Smuzhiyun|
50*4882a593Smuzhiyun|	7. If |X| > 1, go to 9.
51*4882a593Smuzhiyun|
52*4882a593Smuzhiyun|	8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
53*4882a593Smuzhiyun|
54*4882a593Smuzhiyun|	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
55*4882a593Smuzhiyun|
56*4882a593Smuzhiyun|	SINCOS:
57*4882a593Smuzhiyun|	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
58*4882a593Smuzhiyun|
59*4882a593Smuzhiyun|	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
60*4882a593Smuzhiyun|		k = N mod 4, so in particular, k = 0,1,2,or 3.
61*4882a593Smuzhiyun|
62*4882a593Smuzhiyun|	3. If k is even, go to 5.
63*4882a593Smuzhiyun|
64*4882a593Smuzhiyun|	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
65*4882a593Smuzhiyun|		j1 exclusive or with the l.s.b. of k.
66*4882a593Smuzhiyun|		sgn1 := (-1)**j1, sgn2 := (-1)**j2.
67*4882a593Smuzhiyun|		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
68*4882a593Smuzhiyun|		sin(r) and cos(r) are computed as odd and even polynomials
69*4882a593Smuzhiyun|		in r, respectively. Exit
70*4882a593Smuzhiyun|
71*4882a593Smuzhiyun|	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
72*4882a593Smuzhiyun|		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
73*4882a593Smuzhiyun|		sin(r) and cos(r) are computed as odd and even polynomials
74*4882a593Smuzhiyun|		in r, respectively. Exit
75*4882a593Smuzhiyun|
76*4882a593Smuzhiyun|	6. If |X| > 1, go to 8.
77*4882a593Smuzhiyun|
78*4882a593Smuzhiyun|	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
79*4882a593Smuzhiyun|
80*4882a593Smuzhiyun|	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
81*4882a593Smuzhiyun|
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
84*4882a593Smuzhiyun|			All Rights Reserved
85*4882a593Smuzhiyun|
86*4882a593Smuzhiyun|       For details on the license for this file, please see the
87*4882a593Smuzhiyun|       file, README, in this same directory.
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun|SSIN	idnt	2,1 | Motorola 040 Floating Point Software Package
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun	|section	8
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun#include "fpsp.h"
94*4882a593Smuzhiyun
95*4882a593SmuzhiyunBOUNDS1:	.long 0x3FD78000,0x4004BC7E
96*4882a593SmuzhiyunTWOBYPI:	.long 0x3FE45F30,0x6DC9C883
97*4882a593Smuzhiyun
98*4882a593SmuzhiyunSINA7:	.long 0xBD6AAA77,0xCCC994F5
99*4882a593SmuzhiyunSINA6:	.long 0x3DE61209,0x7AAE8DA1
100*4882a593Smuzhiyun
101*4882a593SmuzhiyunSINA5:	.long 0xBE5AE645,0x2A118AE4
102*4882a593SmuzhiyunSINA4:	.long 0x3EC71DE3,0xA5341531
103*4882a593Smuzhiyun
104*4882a593SmuzhiyunSINA3:	.long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
105*4882a593Smuzhiyun
106*4882a593SmuzhiyunSINA2:	.long 0x3FF80000,0x88888888,0x888859AF,0x00000000
107*4882a593Smuzhiyun
108*4882a593SmuzhiyunSINA1:	.long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
109*4882a593Smuzhiyun
110*4882a593SmuzhiyunCOSB8:	.long 0x3D2AC4D0,0xD6011EE3
111*4882a593SmuzhiyunCOSB7:	.long 0xBDA9396F,0x9F45AC19
112*4882a593Smuzhiyun
113*4882a593SmuzhiyunCOSB6:	.long 0x3E21EED9,0x0612C972
114*4882a593SmuzhiyunCOSB5:	.long 0xBE927E4F,0xB79D9FCF
115*4882a593Smuzhiyun
116*4882a593SmuzhiyunCOSB4:	.long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
117*4882a593Smuzhiyun
118*4882a593SmuzhiyunCOSB3:	.long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
119*4882a593Smuzhiyun
120*4882a593SmuzhiyunCOSB2:	.long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
121*4882a593SmuzhiyunCOSB1:	.long 0xBF000000
122*4882a593Smuzhiyun
123*4882a593SmuzhiyunINVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
124*4882a593Smuzhiyun
125*4882a593SmuzhiyunTWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
126*4882a593SmuzhiyunTWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun	|xref	PITBL
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun	.set	INARG,FP_SCR4
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun	.set	X,FP_SCR5
133*4882a593Smuzhiyun	.set	XDCARE,X+2
134*4882a593Smuzhiyun	.set	XFRAC,X+4
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun	.set	RPRIME,FP_SCR1
137*4882a593Smuzhiyun	.set	SPRIME,FP_SCR2
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun	.set	POSNEG1,L_SCR1
140*4882a593Smuzhiyun	.set	TWOTO63,L_SCR1
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun	.set	ENDFLAG,L_SCR2
143*4882a593Smuzhiyun	.set	N,L_SCR2
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun	.set	ADJN,L_SCR3
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun	| xref	t_frcinx
148*4882a593Smuzhiyun	|xref	t_extdnrm
149*4882a593Smuzhiyun	|xref	sto_cos
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun	.global	ssind
152*4882a593Smuzhiyunssind:
153*4882a593Smuzhiyun|--SIN(X) = X FOR DENORMALIZED X
154*4882a593Smuzhiyun	bra		t_extdnrm
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun	.global	scosd
157*4882a593Smuzhiyunscosd:
158*4882a593Smuzhiyun|--COS(X) = 1 FOR DENORMALIZED X
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun	fmoves		#0x3F800000,%fp0
161*4882a593Smuzhiyun|
162*4882a593Smuzhiyun|	9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
163*4882a593Smuzhiyun|
164*4882a593Smuzhiyun	fmovel		#0,%fpsr
165*4882a593Smuzhiyun|
166*4882a593Smuzhiyun	bra		t_frcinx
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun	.global	ssin
169*4882a593Smuzhiyunssin:
170*4882a593Smuzhiyun|--SET ADJN TO 0
171*4882a593Smuzhiyun	movel		#0,ADJN(%a6)
172*4882a593Smuzhiyun	bras		SINBGN
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun	.global	scos
175*4882a593Smuzhiyunscos:
176*4882a593Smuzhiyun|--SET ADJN TO 1
177*4882a593Smuzhiyun	movel		#1,ADJN(%a6)
178*4882a593Smuzhiyun
179*4882a593SmuzhiyunSINBGN:
180*4882a593Smuzhiyun|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun	fmovex		(%a0),%fp0	| ...LOAD INPUT
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun	movel		(%a0),%d0
185*4882a593Smuzhiyun	movew		4(%a0),%d0
186*4882a593Smuzhiyun	fmovex		%fp0,X(%a6)
187*4882a593Smuzhiyun	andil		#0x7FFFFFFF,%d0		| ...COMPACTIFY X
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
190*4882a593Smuzhiyun	bges		SOK1
191*4882a593Smuzhiyun	bra		SINSM
192*4882a593Smuzhiyun
193*4882a593SmuzhiyunSOK1:
194*4882a593Smuzhiyun	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
195*4882a593Smuzhiyun	blts		SINMAIN
196*4882a593Smuzhiyun	bra		REDUCEX
197*4882a593Smuzhiyun
198*4882a593SmuzhiyunSINMAIN:
199*4882a593Smuzhiyun|--THIS IS THE USUAL CASE, |X| <= 15 PI.
200*4882a593Smuzhiyun|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
201*4882a593Smuzhiyun	fmovex		%fp0,%fp1
202*4882a593Smuzhiyun	fmuld		TWOBYPI,%fp1	| ...X*2/PI
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun|--HIDE THE NEXT THREE INSTRUCTIONS
205*4882a593Smuzhiyun	lea		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun|--FP1 IS NOW READY
209*4882a593Smuzhiyun	fmovel		%fp1,N(%a6)		| ...CONVERT TO INTEGER
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun	movel		N(%a6),%d0
212*4882a593Smuzhiyun	asll		#4,%d0
213*4882a593Smuzhiyun	addal		%d0,%a1	| ...A1 IS THE ADDRESS OF N*PIBY2
214*4882a593Smuzhiyun|				...WHICH IS IN TWO PIECES Y1 & Y2
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun	fsubx		(%a1)+,%fp0	| ...X-Y1
217*4882a593Smuzhiyun|--HIDE THE NEXT ONE
218*4882a593Smuzhiyun	fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
219*4882a593Smuzhiyun
220*4882a593SmuzhiyunSINCONT:
221*4882a593Smuzhiyun|--continuation from REDUCEX
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
224*4882a593Smuzhiyun	movel		N(%a6),%d0
225*4882a593Smuzhiyun	addl		ADJN(%a6),%d0	| ...SEE IF D0 IS ODD OR EVEN
226*4882a593Smuzhiyun	rorl		#1,%d0	| ...D0 WAS ODD IFF D0 IS NEGATIVE
227*4882a593Smuzhiyun	cmpil		#0,%d0
228*4882a593Smuzhiyun	blt		COSPOLY
229*4882a593Smuzhiyun
230*4882a593SmuzhiyunSINPOLY:
231*4882a593Smuzhiyun|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
232*4882a593Smuzhiyun|--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
233*4882a593Smuzhiyun|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
234*4882a593Smuzhiyun|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
235*4882a593Smuzhiyun|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
236*4882a593Smuzhiyun|--WHERE T=S*S.
237*4882a593Smuzhiyun|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
238*4882a593Smuzhiyun|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
239*4882a593Smuzhiyun	fmovex		%fp0,X(%a6)	| ...X IS R
240*4882a593Smuzhiyun	fmulx		%fp0,%fp0	| ...FP0 IS S
241*4882a593Smuzhiyun|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
242*4882a593Smuzhiyun	fmoved		SINA7,%fp3
243*4882a593Smuzhiyun	fmoved		SINA6,%fp2
244*4882a593Smuzhiyun|--FP0 IS NOW READY
245*4882a593Smuzhiyun	fmovex		%fp0,%fp1
246*4882a593Smuzhiyun	fmulx		%fp1,%fp1	| ...FP1 IS T
247*4882a593Smuzhiyun|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun	rorl		#1,%d0
250*4882a593Smuzhiyun	andil		#0x80000000,%d0
251*4882a593Smuzhiyun|				...LEAST SIG. BIT OF D0 IN SIGN POSITION
252*4882a593Smuzhiyun	eorl		%d0,X(%a6)	| ...X IS NOW R'= SGN*R
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun	fmulx		%fp1,%fp3	| ...TA7
255*4882a593Smuzhiyun	fmulx		%fp1,%fp2	| ...TA6
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun	faddd		SINA5,%fp3 | ...A5+TA7
258*4882a593Smuzhiyun	faddd		SINA4,%fp2 | ...A4+TA6
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun	fmulx		%fp1,%fp3	| ...T(A5+TA7)
261*4882a593Smuzhiyun	fmulx		%fp1,%fp2	| ...T(A4+TA6)
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun	faddd		SINA3,%fp3 | ...A3+T(A5+TA7)
264*4882a593Smuzhiyun	faddx		SINA2,%fp2 | ...A2+T(A4+TA6)
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun	fmulx		%fp3,%fp1	| ...T(A3+T(A5+TA7))
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun	fmulx		%fp0,%fp2	| ...S(A2+T(A4+TA6))
269*4882a593Smuzhiyun	faddx		SINA1,%fp1 | ...A1+T(A3+T(A5+TA7))
270*4882a593Smuzhiyun	fmulx		X(%a6),%fp0	| ...R'*S
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun	faddx		%fp2,%fp1	| ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
273*4882a593Smuzhiyun|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
274*4882a593Smuzhiyun|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun	fmulx		%fp1,%fp0		| ...SIN(R')-R'
278*4882a593Smuzhiyun|--FP1 RELEASED.
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
281*4882a593Smuzhiyun	faddx		X(%a6),%fp0		|last inst - possible exception set
282*4882a593Smuzhiyun	bra		t_frcinx
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun
285*4882a593SmuzhiyunCOSPOLY:
286*4882a593Smuzhiyun|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
287*4882a593Smuzhiyun|--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
288*4882a593Smuzhiyun|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
289*4882a593Smuzhiyun|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
290*4882a593Smuzhiyun|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
291*4882a593Smuzhiyun|--WHERE T=S*S.
292*4882a593Smuzhiyun|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
293*4882a593Smuzhiyun|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
294*4882a593Smuzhiyun|--AND IS THEREFORE STORED AS SINGLE PRECISION.
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun	fmulx		%fp0,%fp0	| ...FP0 IS S
297*4882a593Smuzhiyun|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
298*4882a593Smuzhiyun	fmoved		COSB8,%fp2
299*4882a593Smuzhiyun	fmoved		COSB7,%fp3
300*4882a593Smuzhiyun|--FP0 IS NOW READY
301*4882a593Smuzhiyun	fmovex		%fp0,%fp1
302*4882a593Smuzhiyun	fmulx		%fp1,%fp1	| ...FP1 IS T
303*4882a593Smuzhiyun|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
304*4882a593Smuzhiyun	fmovex		%fp0,X(%a6)	| ...X IS S
305*4882a593Smuzhiyun	rorl		#1,%d0
306*4882a593Smuzhiyun	andil		#0x80000000,%d0
307*4882a593Smuzhiyun|			...LEAST SIG. BIT OF D0 IN SIGN POSITION
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun	fmulx		%fp1,%fp2	| ...TB8
310*4882a593Smuzhiyun|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
311*4882a593Smuzhiyun	eorl		%d0,X(%a6)	| ...X IS NOW S'= SGN*S
312*4882a593Smuzhiyun	andil		#0x80000000,%d0
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun	fmulx		%fp1,%fp3	| ...TB7
315*4882a593Smuzhiyun|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
316*4882a593Smuzhiyun	oril		#0x3F800000,%d0	| ...D0 IS SGN IN SINGLE
317*4882a593Smuzhiyun	movel		%d0,POSNEG1(%a6)
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun	faddd		COSB6,%fp2 | ...B6+TB8
320*4882a593Smuzhiyun	faddd		COSB5,%fp3 | ...B5+TB7
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun	fmulx		%fp1,%fp2	| ...T(B6+TB8)
323*4882a593Smuzhiyun	fmulx		%fp1,%fp3	| ...T(B5+TB7)
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun	faddd		COSB4,%fp2 | ...B4+T(B6+TB8)
326*4882a593Smuzhiyun	faddx		COSB3,%fp3 | ...B3+T(B5+TB7)
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun	fmulx		%fp1,%fp2	| ...T(B4+T(B6+TB8))
329*4882a593Smuzhiyun	fmulx		%fp3,%fp1	| ...T(B3+T(B5+TB7))
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun	faddx		COSB2,%fp2 | ...B2+T(B4+T(B6+TB8))
332*4882a593Smuzhiyun	fadds		COSB1,%fp1 | ...B1+T(B3+T(B5+TB7))
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun	fmulx		%fp2,%fp0	| ...S(B2+T(B4+T(B6+TB8)))
335*4882a593Smuzhiyun|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
336*4882a593Smuzhiyun|--FP2 RELEASED.
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun	faddx		%fp1,%fp0
340*4882a593Smuzhiyun|--FP1 RELEASED
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun	fmulx		X(%a6),%fp0
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
345*4882a593Smuzhiyun	fadds		POSNEG1(%a6),%fp0	|last inst - possible exception set
346*4882a593Smuzhiyun	bra		t_frcinx
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun
349*4882a593SmuzhiyunSINBORS:
350*4882a593Smuzhiyun|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
351*4882a593Smuzhiyun|--IF |X| < 2**(-40), RETURN X OR 1.
352*4882a593Smuzhiyun	cmpil		#0x3FFF8000,%d0
353*4882a593Smuzhiyun	bgts		REDUCEX
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun
356*4882a593SmuzhiyunSINSM:
357*4882a593Smuzhiyun	movel		ADJN(%a6),%d0
358*4882a593Smuzhiyun	cmpil		#0,%d0
359*4882a593Smuzhiyun	bgts		COSTINY
360*4882a593Smuzhiyun
361*4882a593SmuzhiyunSINTINY:
362*4882a593Smuzhiyun	movew		#0x0000,XDCARE(%a6)	| ...JUST IN CASE
363*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
364*4882a593Smuzhiyun	fmovex		X(%a6),%fp0		|last inst - possible exception set
365*4882a593Smuzhiyun	bra		t_frcinx
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun
368*4882a593SmuzhiyunCOSTINY:
369*4882a593Smuzhiyun	fmoves		#0x3F800000,%fp0
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
372*4882a593Smuzhiyun	fsubs		#0x00800000,%fp0	|last inst - possible exception set
373*4882a593Smuzhiyun	bra		t_frcinx
374*4882a593Smuzhiyun
375*4882a593Smuzhiyun
376*4882a593SmuzhiyunREDUCEX:
377*4882a593Smuzhiyun|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
378*4882a593Smuzhiyun|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
379*4882a593Smuzhiyun|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun	fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5
382*4882a593Smuzhiyun	movel		%d2,-(%a7)
383*4882a593Smuzhiyun        fmoves         #0x00000000,%fp1
384*4882a593Smuzhiyun|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
385*4882a593Smuzhiyun|--there is a danger of unwanted overflow in first LOOP iteration.  In this
386*4882a593Smuzhiyun|--case, reduce argument by one remainder step to make subsequent reduction
387*4882a593Smuzhiyun|--safe.
388*4882a593Smuzhiyun	cmpil	#0x7ffeffff,%d0		|is argument dangerously large?
389*4882a593Smuzhiyun	bnes	LOOP
390*4882a593Smuzhiyun	movel	#0x7ffe0000,FP_SCR2(%a6)	|yes
391*4882a593Smuzhiyun|					;create 2**16383*PI/2
392*4882a593Smuzhiyun	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
393*4882a593Smuzhiyun	clrl	FP_SCR2+8(%a6)
394*4882a593Smuzhiyun	ftstx	%fp0			|test sign of argument
395*4882a593Smuzhiyun	movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383*
396*4882a593Smuzhiyun|					;PI/2 at FP_SCR3
397*4882a593Smuzhiyun	movel	#0x85a308d3,FP_SCR3+4(%a6)
398*4882a593Smuzhiyun	clrl   FP_SCR3+8(%a6)
399*4882a593Smuzhiyun	fblt	red_neg
400*4882a593Smuzhiyun	orw	#0x8000,FP_SCR2(%a6)	|positive arg
401*4882a593Smuzhiyun	orw	#0x8000,FP_SCR3(%a6)
402*4882a593Smuzhiyunred_neg:
403*4882a593Smuzhiyun	faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact
404*4882a593Smuzhiyun	fmovex  %fp0,%fp1		|save high result in fp1
405*4882a593Smuzhiyun	faddx  FP_SCR3(%a6),%fp0		|low part of reduction
406*4882a593Smuzhiyun	fsubx  %fp0,%fp1			|determine low component of result
407*4882a593Smuzhiyun	faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument.
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
410*4882a593Smuzhiyun|--integer quotient will be stored in N
411*4882a593Smuzhiyun|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
412*4882a593Smuzhiyun
413*4882a593SmuzhiyunLOOP:
414*4882a593Smuzhiyun	fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2
415*4882a593Smuzhiyun	movew		INARG(%a6),%d0
416*4882a593Smuzhiyun        movel          %d0,%a1		| ...save a copy of D0
417*4882a593Smuzhiyun	andil		#0x00007FFF,%d0
418*4882a593Smuzhiyun	subil		#0x00003FFF,%d0	| ...D0 IS K
419*4882a593Smuzhiyun	cmpil		#28,%d0
420*4882a593Smuzhiyun	bles		LASTLOOP
421*4882a593SmuzhiyunCONTLOOP:
422*4882a593Smuzhiyun	subil		#27,%d0	 | ...D0 IS L := K-27
423*4882a593Smuzhiyun	movel		#0,ENDFLAG(%a6)
424*4882a593Smuzhiyun	bras		WORK
425*4882a593SmuzhiyunLASTLOOP:
426*4882a593Smuzhiyun	clrl		%d0		| ...D0 IS L := 0
427*4882a593Smuzhiyun	movel		#1,ENDFLAG(%a6)
428*4882a593Smuzhiyun
429*4882a593SmuzhiyunWORK:
430*4882a593Smuzhiyun|--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
431*4882a593Smuzhiyun|--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
434*4882a593Smuzhiyun|--2**L * (PIby2_1), 2**L * (PIby2_2)
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun	movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI
437*4882a593Smuzhiyun	subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI)
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun	movel		#0xA2F9836E,FP_SCR1+4(%a6)
440*4882a593Smuzhiyun	movel		#0x4E44152A,FP_SCR1+8(%a6)
441*4882a593Smuzhiyun	movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI)
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun	fmovex		%fp0,%fp2
444*4882a593Smuzhiyun	fmulx		FP_SCR1(%a6),%fp2
445*4882a593Smuzhiyun|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
446*4882a593Smuzhiyun|--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
447*4882a593Smuzhiyun|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
448*4882a593Smuzhiyun|--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
449*4882a593Smuzhiyun|--US THE DESIRED VALUE IN FLOATING POINT.
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun|--HIDE SIX CYCLES OF INSTRUCTION
452*4882a593Smuzhiyun        movel		%a1,%d2
453*4882a593Smuzhiyun        swap		%d2
454*4882a593Smuzhiyun	andil		#0x80000000,%d2
455*4882a593Smuzhiyun	oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL
456*4882a593Smuzhiyun	movel		%d2,TWOTO63(%a6)
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun	movel		%d0,%d2
459*4882a593Smuzhiyun	addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2)
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun|--FP2 IS READY
462*4882a593Smuzhiyun	fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
465*4882a593Smuzhiyun        movew		%d2,FP_SCR2(%a6)
466*4882a593Smuzhiyun	clrw           FP_SCR2+2(%a6)
467*4882a593Smuzhiyun	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
468*4882a593Smuzhiyun	clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun|--FP2 IS READY
471*4882a593Smuzhiyun	fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun	addil		#0x00003FDD,%d0
474*4882a593Smuzhiyun        movew		%d0,FP_SCR3(%a6)
475*4882a593Smuzhiyun	clrw           FP_SCR3+2(%a6)
476*4882a593Smuzhiyun	movel		#0x85A308D3,FP_SCR3+4(%a6)
477*4882a593Smuzhiyun	clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun	movel		ENDFLAG(%a6),%d0
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
482*4882a593Smuzhiyun|--P2 = 2**(L) * Piby2_2
483*4882a593Smuzhiyun	fmovex		%fp2,%fp4
484*4882a593Smuzhiyun	fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1
485*4882a593Smuzhiyun	fmovex		%fp2,%fp5
486*4882a593Smuzhiyun	fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2
487*4882a593Smuzhiyun	fmovex		%fp4,%fp3
488*4882a593Smuzhiyun|--we want P+p = W+w  but  |p| <= half ulp of P
489*4882a593Smuzhiyun|--Then, we need to compute  A := R-P   and  a := r-p
490*4882a593Smuzhiyun	faddx		%fp5,%fp3			| ...FP3 is P
491*4882a593Smuzhiyun	fsubx		%fp3,%fp4			| ...W-P
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun	fsubx		%fp3,%fp0			| ...FP0 is A := R - P
494*4882a593Smuzhiyun        faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun	fmovex		%fp0,%fp3			| ...FP3 A
497*4882a593Smuzhiyun	fsubx		%fp4,%fp1			| ...FP1 is a := r - p
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun|--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
500*4882a593Smuzhiyun|--|r| <= half ulp of R.
501*4882a593Smuzhiyun	faddx		%fp1,%fp0			| ...FP0 is R := A+a
502*4882a593Smuzhiyun|--No need to calculate r if this is the last loop
503*4882a593Smuzhiyun	cmpil		#0,%d0
504*4882a593Smuzhiyun	bgt		RESTORE
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun|--Need to calculate r
507*4882a593Smuzhiyun	fsubx		%fp0,%fp3			| ...A-R
508*4882a593Smuzhiyun	faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a
509*4882a593Smuzhiyun	bra		LOOP
510*4882a593Smuzhiyun
511*4882a593SmuzhiyunRESTORE:
512*4882a593Smuzhiyun        fmovel		%fp2,N(%a6)
513*4882a593Smuzhiyun	movel		(%a7)+,%d2
514*4882a593Smuzhiyun	fmovemx	(%a7)+,%fp2-%fp5
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun	movel		ADJN(%a6),%d0
518*4882a593Smuzhiyun	cmpil		#4,%d0
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun	blt		SINCONT
521*4882a593Smuzhiyun	bras		SCCONT
522*4882a593Smuzhiyun
523*4882a593Smuzhiyun	.global	ssincosd
524*4882a593Smuzhiyunssincosd:
525*4882a593Smuzhiyun|--SIN AND COS OF X FOR DENORMALIZED X
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun	fmoves		#0x3F800000,%fp1
528*4882a593Smuzhiyun	bsr		sto_cos		|store cosine result
529*4882a593Smuzhiyun	bra		t_extdnrm
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun	.global	ssincos
532*4882a593Smuzhiyunssincos:
533*4882a593Smuzhiyun|--SET ADJN TO 4
534*4882a593Smuzhiyun	movel		#4,ADJN(%a6)
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun	fmovex		(%a0),%fp0	| ...LOAD INPUT
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun	movel		(%a0),%d0
539*4882a593Smuzhiyun	movew		4(%a0),%d0
540*4882a593Smuzhiyun	fmovex		%fp0,X(%a6)
541*4882a593Smuzhiyun	andil		#0x7FFFFFFF,%d0		| ...COMPACTIFY X
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
544*4882a593Smuzhiyun	bges		SCOK1
545*4882a593Smuzhiyun	bra		SCSM
546*4882a593Smuzhiyun
547*4882a593SmuzhiyunSCOK1:
548*4882a593Smuzhiyun	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
549*4882a593Smuzhiyun	blts		SCMAIN
550*4882a593Smuzhiyun	bra		REDUCEX
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun
553*4882a593SmuzhiyunSCMAIN:
554*4882a593Smuzhiyun|--THIS IS THE USUAL CASE, |X| <= 15 PI.
555*4882a593Smuzhiyun|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
556*4882a593Smuzhiyun	fmovex		%fp0,%fp1
557*4882a593Smuzhiyun	fmuld		TWOBYPI,%fp1	| ...X*2/PI
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun|--HIDE THE NEXT THREE INSTRUCTIONS
560*4882a593Smuzhiyun	lea		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun|--FP1 IS NOW READY
564*4882a593Smuzhiyun	fmovel		%fp1,N(%a6)		| ...CONVERT TO INTEGER
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun	movel		N(%a6),%d0
567*4882a593Smuzhiyun	asll		#4,%d0
568*4882a593Smuzhiyun	addal		%d0,%a1		| ...ADDRESS OF N*PIBY2, IN Y1, Y2
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun	fsubx		(%a1)+,%fp0	| ...X-Y1
571*4882a593Smuzhiyun        fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
572*4882a593Smuzhiyun
573*4882a593SmuzhiyunSCCONT:
574*4882a593Smuzhiyun|--continuation point from REDUCEX
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun|--HIDE THE NEXT TWO
577*4882a593Smuzhiyun	movel		N(%a6),%d0
578*4882a593Smuzhiyun	rorl		#1,%d0
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun	cmpil		#0,%d0		| ...D0 < 0 IFF N IS ODD
581*4882a593Smuzhiyun	bge		NEVEN
582*4882a593Smuzhiyun
583*4882a593SmuzhiyunNODD:
584*4882a593Smuzhiyun|--REGISTERS SAVED SO FAR: D0, A0, FP2.
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun	fmovex		%fp0,RPRIME(%a6)
587*4882a593Smuzhiyun	fmulx		%fp0,%fp0	 | ...FP0 IS S = R*R
588*4882a593Smuzhiyun	fmoved		SINA7,%fp1	| ...A7
589*4882a593Smuzhiyun	fmoved		COSB8,%fp2	| ...B8
590*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...SA7
591*4882a593Smuzhiyun	movel		%d2,-(%a7)
592*4882a593Smuzhiyun	movel		%d0,%d2
593*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...SB8
594*4882a593Smuzhiyun	rorl		#1,%d2
595*4882a593Smuzhiyun	andil		#0x80000000,%d2
596*4882a593Smuzhiyun
597*4882a593Smuzhiyun	faddd		SINA6,%fp1	| ...A6+SA7
598*4882a593Smuzhiyun	eorl		%d0,%d2
599*4882a593Smuzhiyun	andil		#0x80000000,%d2
600*4882a593Smuzhiyun	faddd		COSB7,%fp2	| ...B7+SB8
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(A6+SA7)
603*4882a593Smuzhiyun	eorl		%d2,RPRIME(%a6)
604*4882a593Smuzhiyun	movel		(%a7)+,%d2
605*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(B7+SB8)
606*4882a593Smuzhiyun	rorl		#1,%d0
607*4882a593Smuzhiyun	andil		#0x80000000,%d0
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun	faddd		SINA5,%fp1	| ...A5+S(A6+SA7)
610*4882a593Smuzhiyun	movel		#0x3F800000,POSNEG1(%a6)
611*4882a593Smuzhiyun	eorl		%d0,POSNEG1(%a6)
612*4882a593Smuzhiyun	faddd		COSB6,%fp2	| ...B6+S(B7+SB8)
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(A5+S(A6+SA7))
615*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(B6+S(B7+SB8))
616*4882a593Smuzhiyun	fmovex		%fp0,SPRIME(%a6)
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun	faddd		SINA4,%fp1	| ...A4+S(A5+S(A6+SA7))
619*4882a593Smuzhiyun	eorl		%d0,SPRIME(%a6)
620*4882a593Smuzhiyun	faddd		COSB5,%fp2	| ...B5+S(B6+S(B7+SB8))
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(A4+...)
623*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(B5+...)
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun	faddd		SINA3,%fp1	| ...A3+S(A4+...)
626*4882a593Smuzhiyun	faddd		COSB4,%fp2	| ...B4+S(B5+...)
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(A3+...)
629*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(B4+...)
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun	faddx		SINA2,%fp1	| ...A2+S(A3+...)
632*4882a593Smuzhiyun	faddx		COSB3,%fp2	| ...B3+S(B4+...)
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(A2+...)
635*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(B3+...)
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun	faddx		SINA1,%fp1	| ...A1+S(A2+...)
638*4882a593Smuzhiyun	faddx		COSB2,%fp2	| ...B2+S(B3+...)
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(A1+...)
641*4882a593Smuzhiyun	fmulx		%fp2,%fp0	 | ...S(B2+...)
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun	fmulx		RPRIME(%a6),%fp1	| ...R'S(A1+...)
646*4882a593Smuzhiyun	fadds		COSB1,%fp0	| ...B1+S(B2...)
647*4882a593Smuzhiyun	fmulx		SPRIME(%a6),%fp0	| ...S'(B1+S(B2+...))
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun	movel		%d1,-(%sp)	|restore users mode & precision
650*4882a593Smuzhiyun	andil		#0xff,%d1		|mask off all exceptions
651*4882a593Smuzhiyun	fmovel		%d1,%FPCR
652*4882a593Smuzhiyun	faddx		RPRIME(%a6),%fp1	| ...COS(X)
653*4882a593Smuzhiyun	bsr		sto_cos		|store cosine result
654*4882a593Smuzhiyun	fmovel		(%sp)+,%FPCR	|restore users exceptions
655*4882a593Smuzhiyun	fadds		POSNEG1(%a6),%fp0	| ...SIN(X)
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun	bra		t_frcinx
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun
660*4882a593SmuzhiyunNEVEN:
661*4882a593Smuzhiyun|--REGISTERS SAVED SO FAR: FP2.
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun	fmovex		%fp0,RPRIME(%a6)
664*4882a593Smuzhiyun	fmulx		%fp0,%fp0	 | ...FP0 IS S = R*R
665*4882a593Smuzhiyun	fmoved		COSB8,%fp1			| ...B8
666*4882a593Smuzhiyun	fmoved		SINA7,%fp2			| ...A7
667*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...SB8
668*4882a593Smuzhiyun	fmovex		%fp0,SPRIME(%a6)
669*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...SA7
670*4882a593Smuzhiyun	rorl		#1,%d0
671*4882a593Smuzhiyun	andil		#0x80000000,%d0
672*4882a593Smuzhiyun	faddd		COSB7,%fp1	| ...B7+SB8
673*4882a593Smuzhiyun	faddd		SINA6,%fp2	| ...A6+SA7
674*4882a593Smuzhiyun	eorl		%d0,RPRIME(%a6)
675*4882a593Smuzhiyun	eorl		%d0,SPRIME(%a6)
676*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(B7+SB8)
677*4882a593Smuzhiyun	oril		#0x3F800000,%d0
678*4882a593Smuzhiyun	movel		%d0,POSNEG1(%a6)
679*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(A6+SA7)
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun	faddd		COSB6,%fp1	| ...B6+S(B7+SB8)
682*4882a593Smuzhiyun	faddd		SINA5,%fp2	| ...A5+S(A6+SA7)
683*4882a593Smuzhiyun
684*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(B6+S(B7+SB8))
685*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(A5+S(A6+SA7))
686*4882a593Smuzhiyun
687*4882a593Smuzhiyun	faddd		COSB5,%fp1	| ...B5+S(B6+S(B7+SB8))
688*4882a593Smuzhiyun	faddd		SINA4,%fp2	| ...A4+S(A5+S(A6+SA7))
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(B5+...)
691*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(A4+...)
692*4882a593Smuzhiyun
693*4882a593Smuzhiyun	faddd		COSB4,%fp1	| ...B4+S(B5+...)
694*4882a593Smuzhiyun	faddd		SINA3,%fp2	| ...A3+S(A4+...)
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(B4+...)
697*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(A3+...)
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun	faddx		COSB3,%fp1	| ...B3+S(B4+...)
700*4882a593Smuzhiyun	faddx		SINA2,%fp2	| ...A2+S(A3+...)
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(B3+...)
703*4882a593Smuzhiyun	fmulx		%fp0,%fp2	 | ...S(A2+...)
704*4882a593Smuzhiyun
705*4882a593Smuzhiyun	faddx		COSB2,%fp1	| ...B2+S(B3+...)
706*4882a593Smuzhiyun	faddx		SINA1,%fp2	| ...A1+S(A2+...)
707*4882a593Smuzhiyun
708*4882a593Smuzhiyun	fmulx		%fp0,%fp1	 | ...S(B2+...)
709*4882a593Smuzhiyun	fmulx		%fp2,%fp0	 | ...s(a1+...)
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun	fadds		COSB1,%fp1	| ...B1+S(B2...)
714*4882a593Smuzhiyun	fmulx		RPRIME(%a6),%fp0	| ...R'S(A1+...)
715*4882a593Smuzhiyun	fmulx		SPRIME(%a6),%fp1	| ...S'(B1+S(B2+...))
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun	movel		%d1,-(%sp)	|save users mode & precision
718*4882a593Smuzhiyun	andil		#0xff,%d1		|mask off all exceptions
719*4882a593Smuzhiyun	fmovel		%d1,%FPCR
720*4882a593Smuzhiyun	fadds		POSNEG1(%a6),%fp1	| ...COS(X)
721*4882a593Smuzhiyun	bsr		sto_cos		|store cosine result
722*4882a593Smuzhiyun	fmovel		(%sp)+,%FPCR	|restore users exceptions
723*4882a593Smuzhiyun	faddx		RPRIME(%a6),%fp0	| ...SIN(X)
724*4882a593Smuzhiyun
725*4882a593Smuzhiyun	bra		t_frcinx
726*4882a593Smuzhiyun
727*4882a593SmuzhiyunSCBORS:
728*4882a593Smuzhiyun	cmpil		#0x3FFF8000,%d0
729*4882a593Smuzhiyun	bgt		REDUCEX
730*4882a593Smuzhiyun
731*4882a593Smuzhiyun
732*4882a593SmuzhiyunSCSM:
733*4882a593Smuzhiyun	movew		#0x0000,XDCARE(%a6)
734*4882a593Smuzhiyun	fmoves		#0x3F800000,%fp1
735*4882a593Smuzhiyun
736*4882a593Smuzhiyun	movel		%d1,-(%sp)	|save users mode & precision
737*4882a593Smuzhiyun	andil		#0xff,%d1		|mask off all exceptions
738*4882a593Smuzhiyun	fmovel		%d1,%FPCR
739*4882a593Smuzhiyun	fsubs		#0x00800000,%fp1
740*4882a593Smuzhiyun	bsr		sto_cos		|store cosine result
741*4882a593Smuzhiyun	fmovel		(%sp)+,%FPCR	|restore users exceptions
742*4882a593Smuzhiyun	fmovex		X(%a6),%fp0
743*4882a593Smuzhiyun	bra		t_frcinx
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun	|end
746