xref: /OK3568_Linux_fs/kernel/arch/m68k/fpsp040/sacos.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun|
2*4882a593Smuzhiyun|	sacos.sa 3.3 12/19/90
3*4882a593Smuzhiyun|
4*4882a593Smuzhiyun|	Description: The entry point sAcos computes the inverse cosine of
5*4882a593Smuzhiyun|		an input argument; sAcosd does the same except for denormalized
6*4882a593Smuzhiyun|		input.
7*4882a593Smuzhiyun|
8*4882a593Smuzhiyun|	Input: Double-extended number X in location pointed to
9*4882a593Smuzhiyun|		by address register a0.
10*4882a593Smuzhiyun|
11*4882a593Smuzhiyun|	Output: The value arccos(X) returned in floating-point register Fp0.
12*4882a593Smuzhiyun|
13*4882a593Smuzhiyun|	Accuracy and Monotonicity: The returned result is within 3 ulps in
14*4882a593Smuzhiyun|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15*4882a593Smuzhiyun|		result is subsequently rounded to double precision. The
16*4882a593Smuzhiyun|		result is provably monotonic in double precision.
17*4882a593Smuzhiyun|
18*4882a593Smuzhiyun|	Speed: The program sCOS takes approximately 310 cycles.
19*4882a593Smuzhiyun|
20*4882a593Smuzhiyun|	Algorithm:
21*4882a593Smuzhiyun|
22*4882a593Smuzhiyun|	ACOS
23*4882a593Smuzhiyun|	1. If |X| >= 1, go to 3.
24*4882a593Smuzhiyun|
25*4882a593Smuzhiyun|	2. (|X| < 1) Calculate acos(X) by
26*4882a593Smuzhiyun|		z := (1-X) / (1+X)
27*4882a593Smuzhiyun|		acos(X) = 2 * atan( sqrt(z) ).
28*4882a593Smuzhiyun|		Exit.
29*4882a593Smuzhiyun|
30*4882a593Smuzhiyun|	3. If |X| > 1, go to 5.
31*4882a593Smuzhiyun|
32*4882a593Smuzhiyun|	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
33*4882a593Smuzhiyun|
34*4882a593Smuzhiyun|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
35*4882a593Smuzhiyun|		Exit.
36*4882a593Smuzhiyun|
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
39*4882a593Smuzhiyun|			All Rights Reserved
40*4882a593Smuzhiyun|
41*4882a593Smuzhiyun|       For details on the license for this file, please see the
42*4882a593Smuzhiyun|       file, README, in this same directory.
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun|SACOS	idnt	2,1 | Motorola 040 Floating Point Software Package
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun	|section	8
47*4882a593Smuzhiyun
48*4882a593SmuzhiyunPI:	.long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
49*4882a593SmuzhiyunPIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun	|xref	t_operr
52*4882a593Smuzhiyun	|xref	t_frcinx
53*4882a593Smuzhiyun	|xref	satan
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun	.global	sacosd
56*4882a593Smuzhiyunsacosd:
57*4882a593Smuzhiyun|--ACOS(X) = PI/2 FOR DENORMALIZED X
58*4882a593Smuzhiyun	fmovel		%d1,%fpcr		| ...load user's rounding mode/precision
59*4882a593Smuzhiyun	fmovex		PIBY2,%fp0
60*4882a593Smuzhiyun	bra		t_frcinx
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun	.global	sacos
63*4882a593Smuzhiyunsacos:
64*4882a593Smuzhiyun	fmovex		(%a0),%fp0	| ...LOAD INPUT
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
67*4882a593Smuzhiyun	movew		4(%a0),%d0
68*4882a593Smuzhiyun	andil		#0x7FFFFFFF,%d0
69*4882a593Smuzhiyun	cmpil		#0x3FFF8000,%d0
70*4882a593Smuzhiyun	bges		ACOSBIG
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun|--THIS IS THE USUAL CASE, |X| < 1
73*4882a593Smuzhiyun|--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) )	)
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun	fmoves		#0x3F800000,%fp1
76*4882a593Smuzhiyun	faddx		%fp0,%fp1		| ...1+X
77*4882a593Smuzhiyun	fnegx		%fp0		| ... -X
78*4882a593Smuzhiyun	fadds		#0x3F800000,%fp0	| ...1-X
79*4882a593Smuzhiyun	fdivx		%fp1,%fp0		| ...(1-X)/(1+X)
80*4882a593Smuzhiyun	fsqrtx		%fp0		| ...SQRT((1-X)/(1+X))
81*4882a593Smuzhiyun	fmovemx	%fp0-%fp0,(%a0)	| ...overwrite input
82*4882a593Smuzhiyun	movel		%d1,-(%sp)	|save original users fpcr
83*4882a593Smuzhiyun	clrl		%d1
84*4882a593Smuzhiyun	bsr		satan		| ...ATAN(SQRT([1-X]/[1+X]))
85*4882a593Smuzhiyun	fmovel		(%sp)+,%fpcr	|restore users exceptions
86*4882a593Smuzhiyun	faddx		%fp0,%fp0		| ...2 * ATAN( STUFF )
87*4882a593Smuzhiyun	bra		t_frcinx
88*4882a593Smuzhiyun
89*4882a593SmuzhiyunACOSBIG:
90*4882a593Smuzhiyun	fabsx		%fp0
91*4882a593Smuzhiyun	fcmps		#0x3F800000,%fp0
92*4882a593Smuzhiyun	fbgt		t_operr		|cause an operr exception
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun|--|X| = 1, ACOS(X) = 0 OR PI
95*4882a593Smuzhiyun	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
96*4882a593Smuzhiyun	movew		4(%a0),%d0
97*4882a593Smuzhiyun	cmpl		#0,%d0		|D0 has original exponent+fraction
98*4882a593Smuzhiyun	bgts		ACOSP1
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun|--X = -1
101*4882a593Smuzhiyun|Returns PI and inexact exception
102*4882a593Smuzhiyun	fmovex		PI,%fp0
103*4882a593Smuzhiyun	fmovel		%d1,%FPCR
104*4882a593Smuzhiyun	fadds		#0x00800000,%fp0	|cause an inexact exception to be put
105*4882a593Smuzhiyun|					;into the 040 - will not trap until next
106*4882a593Smuzhiyun|					;fp inst.
107*4882a593Smuzhiyun	bra		t_frcinx
108*4882a593Smuzhiyun
109*4882a593SmuzhiyunACOSP1:
110*4882a593Smuzhiyun	fmovel		%d1,%FPCR
111*4882a593Smuzhiyun	fmoves		#0x00000000,%fp0
112*4882a593Smuzhiyun	rts				|Facos ; of +1 is exact
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun	|end
115