xref: /OK3568_Linux_fs/kernel/arch/m68k/fpsp040/stanh.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun|
2*4882a593Smuzhiyun|	stanh.sa 3.1 12/10/90
3*4882a593Smuzhiyun|
4*4882a593Smuzhiyun|	The entry point sTanh computes the hyperbolic tangent of
5*4882a593Smuzhiyun|	an input argument; sTanhd does the same except for denormalized
6*4882a593Smuzhiyun|	input.
7*4882a593Smuzhiyun|
8*4882a593Smuzhiyun|	Input: Double-extended number X in location pointed to
9*4882a593Smuzhiyun|		by address register a0.
10*4882a593Smuzhiyun|
11*4882a593Smuzhiyun|	Output: The value tanh(X) returned in floating-point register Fp0.
12*4882a593Smuzhiyun|
13*4882a593Smuzhiyun|	Accuracy and Monotonicity: The returned result is within 3 ulps in
14*4882a593Smuzhiyun|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15*4882a593Smuzhiyun|		result is subsequently rounded to double precision. The
16*4882a593Smuzhiyun|		result is provably monotonic in double precision.
17*4882a593Smuzhiyun|
18*4882a593Smuzhiyun|	Speed: The program stanh takes approximately 270 cycles.
19*4882a593Smuzhiyun|
20*4882a593Smuzhiyun|	Algorithm:
21*4882a593Smuzhiyun|
22*4882a593Smuzhiyun|	TANH
23*4882a593Smuzhiyun|	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
24*4882a593Smuzhiyun|
25*4882a593Smuzhiyun|	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
26*4882a593Smuzhiyun|		sgn := sign(X), y := 2|X|, z := expm1(Y), and
27*4882a593Smuzhiyun|		tanh(X) = sgn*( z/(2+z) ).
28*4882a593Smuzhiyun|		Exit.
29*4882a593Smuzhiyun|
30*4882a593Smuzhiyun|	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
31*4882a593Smuzhiyun|		go to 7.
32*4882a593Smuzhiyun|
33*4882a593Smuzhiyun|	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
34*4882a593Smuzhiyun|
35*4882a593Smuzhiyun|	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
36*4882a593Smuzhiyun|		sgn := sign(X), y := 2|X|, z := exp(Y),
37*4882a593Smuzhiyun|		tanh(X) = sgn - [ sgn*2/(1+z) ].
38*4882a593Smuzhiyun|		Exit.
39*4882a593Smuzhiyun|
40*4882a593Smuzhiyun|	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
41*4882a593Smuzhiyun|		calculate Tanh(X) by
42*4882a593Smuzhiyun|		sgn := sign(X), Tiny := 2**(-126),
43*4882a593Smuzhiyun|		tanh(X) := sgn - sgn*Tiny.
44*4882a593Smuzhiyun|		Exit.
45*4882a593Smuzhiyun|
46*4882a593Smuzhiyun|	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.
47*4882a593Smuzhiyun|
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
50*4882a593Smuzhiyun|			All Rights Reserved
51*4882a593Smuzhiyun|
52*4882a593Smuzhiyun|       For details on the license for this file, please see the
53*4882a593Smuzhiyun|       file, README, in this same directory.
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun|STANH	idnt	2,1 | Motorola 040 Floating Point Software Package
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun	|section	8
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun#include "fpsp.h"
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun	.set	X,FP_SCR5
62*4882a593Smuzhiyun	.set	XDCARE,X+2
63*4882a593Smuzhiyun	.set	XFRAC,X+4
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun	.set	SGN,L_SCR3
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun	.set	V,FP_SCR6
68*4882a593Smuzhiyun
69*4882a593SmuzhiyunBOUNDS1:	.long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun	|xref	t_frcinx
72*4882a593Smuzhiyun	|xref	t_extdnrm
73*4882a593Smuzhiyun	|xref	setox
74*4882a593Smuzhiyun	|xref	setoxm1
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun	.global	stanhd
77*4882a593Smuzhiyunstanhd:
78*4882a593Smuzhiyun|--TANH(X) = X FOR DENORMALIZED X
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun	bra		t_extdnrm
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun	.global	stanh
83*4882a593Smuzhiyunstanh:
84*4882a593Smuzhiyun	fmovex		(%a0),%fp0	| ...LOAD INPUT
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun	fmovex		%fp0,X(%a6)
87*4882a593Smuzhiyun	movel		(%a0),%d0
88*4882a593Smuzhiyun	movew		4(%a0),%d0
89*4882a593Smuzhiyun	movel		%d0,X(%a6)
90*4882a593Smuzhiyun	andl		#0x7FFFFFFF,%d0
91*4882a593Smuzhiyun	cmp2l		BOUNDS1(%pc),%d0	| ...2**(-40) < |X| < (5/2)LOG2 ?
92*4882a593Smuzhiyun	bcss		TANHBORS
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun|--THIS IS THE USUAL CASE
95*4882a593Smuzhiyun|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun	movel		X(%a6),%d0
98*4882a593Smuzhiyun	movel		%d0,SGN(%a6)
99*4882a593Smuzhiyun	andl		#0x7FFF0000,%d0
100*4882a593Smuzhiyun	addl		#0x00010000,%d0	| ...EXPONENT OF 2|X|
101*4882a593Smuzhiyun	movel		%d0,X(%a6)
102*4882a593Smuzhiyun	andl		#0x80000000,SGN(%a6)
103*4882a593Smuzhiyun	fmovex		X(%a6),%fp0		| ...FP0 IS Y = 2|X|
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun	movel		%d1,-(%a7)
106*4882a593Smuzhiyun	clrl		%d1
107*4882a593Smuzhiyun	fmovemx	%fp0-%fp0,(%a0)
108*4882a593Smuzhiyun	bsr		setoxm1		| ...FP0 IS Z = EXPM1(Y)
109*4882a593Smuzhiyun	movel		(%a7)+,%d1
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun	fmovex		%fp0,%fp1
112*4882a593Smuzhiyun	fadds		#0x40000000,%fp1	| ...Z+2
113*4882a593Smuzhiyun	movel		SGN(%a6),%d0
114*4882a593Smuzhiyun	fmovex		%fp1,V(%a6)
115*4882a593Smuzhiyun	eorl		%d0,V(%a6)
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
118*4882a593Smuzhiyun	fdivx		V(%a6),%fp0
119*4882a593Smuzhiyun	bra		t_frcinx
120*4882a593Smuzhiyun
121*4882a593SmuzhiyunTANHBORS:
122*4882a593Smuzhiyun	cmpl		#0x3FFF8000,%d0
123*4882a593Smuzhiyun	blt		TANHSM
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun	cmpl		#0x40048AA1,%d0
126*4882a593Smuzhiyun	bgt		TANHHUGE
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun|-- (5/2) LOG2 < |X| < 50 LOG2,
129*4882a593Smuzhiyun|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
130*4882a593Smuzhiyun|--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun	movel		X(%a6),%d0
133*4882a593Smuzhiyun	movel		%d0,SGN(%a6)
134*4882a593Smuzhiyun	andl		#0x7FFF0000,%d0
135*4882a593Smuzhiyun	addl		#0x00010000,%d0	| ...EXPO OF 2|X|
136*4882a593Smuzhiyun	movel		%d0,X(%a6)		| ...Y = 2|X|
137*4882a593Smuzhiyun	andl		#0x80000000,SGN(%a6)
138*4882a593Smuzhiyun	movel		SGN(%a6),%d0
139*4882a593Smuzhiyun	fmovex		X(%a6),%fp0		| ...Y = 2|X|
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun	movel		%d1,-(%a7)
142*4882a593Smuzhiyun	clrl		%d1
143*4882a593Smuzhiyun	fmovemx	%fp0-%fp0,(%a0)
144*4882a593Smuzhiyun	bsr		setox		| ...FP0 IS EXP(Y)
145*4882a593Smuzhiyun	movel		(%a7)+,%d1
146*4882a593Smuzhiyun	movel		SGN(%a6),%d0
147*4882a593Smuzhiyun	fadds		#0x3F800000,%fp0	| ...EXP(Y)+1
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun	eorl		#0xC0000000,%d0	| ...-SIGN(X)*2
150*4882a593Smuzhiyun	fmoves		%d0,%fp1		| ...-SIGN(X)*2 IN SGL FMT
151*4882a593Smuzhiyun	fdivx		%fp0,%fp1		| ...-SIGN(X)2 / [EXP(Y)+1 ]
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun	movel		SGN(%a6),%d0
154*4882a593Smuzhiyun	orl		#0x3F800000,%d0	| ...SGN
155*4882a593Smuzhiyun	fmoves		%d0,%fp0		| ...SGN IN SGL FMT
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
158*4882a593Smuzhiyun	faddx		%fp1,%fp0
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun	bra		t_frcinx
161*4882a593Smuzhiyun
162*4882a593SmuzhiyunTANHSM:
163*4882a593Smuzhiyun	movew		#0x0000,XDCARE(%a6)
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
166*4882a593Smuzhiyun	fmovex		X(%a6),%fp0		|last inst - possible exception set
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun	bra		t_frcinx
169*4882a593Smuzhiyun
170*4882a593SmuzhiyunTANHHUGE:
171*4882a593Smuzhiyun|---RETURN SGN(X) - SGN(X)EPS
172*4882a593Smuzhiyun	movel		X(%a6),%d0
173*4882a593Smuzhiyun	andl		#0x80000000,%d0
174*4882a593Smuzhiyun	orl		#0x3F800000,%d0
175*4882a593Smuzhiyun	fmoves		%d0,%fp0
176*4882a593Smuzhiyun	andl		#0x80000000,%d0
177*4882a593Smuzhiyun	eorl		#0x80800000,%d0	| ...-SIGN(X)*EPS
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun	fmovel		%d1,%FPCR		|restore users exceptions
180*4882a593Smuzhiyun	fadds		%d0,%fp0
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun	bra		t_frcinx
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun	|end
185