xref: /OK3568_Linux_fs/kernel/arch/m68k/fpsp040/scosh.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun|
2*4882a593Smuzhiyun|	scosh.sa 3.1 12/10/90
3*4882a593Smuzhiyun|
4*4882a593Smuzhiyun|	The entry point sCosh computes the hyperbolic cosine of
5*4882a593Smuzhiyun|	an input argument; sCoshd does the same except for denormalized
6*4882a593Smuzhiyun|	input.
7*4882a593Smuzhiyun|
8*4882a593Smuzhiyun|	Input: Double-extended number X in location pointed to
9*4882a593Smuzhiyun|		by address register a0.
10*4882a593Smuzhiyun|
11*4882a593Smuzhiyun|	Output: The value cosh(X) returned in floating-point register Fp0.
12*4882a593Smuzhiyun|
13*4882a593Smuzhiyun|	Accuracy and Monotonicity: The returned result is within 3 ulps in
14*4882a593Smuzhiyun|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15*4882a593Smuzhiyun|		result is subsequently rounded to double precision. The
16*4882a593Smuzhiyun|		result is provably monotonic in double precision.
17*4882a593Smuzhiyun|
18*4882a593Smuzhiyun|	Speed: The program sCOSH takes approximately 250 cycles.
19*4882a593Smuzhiyun|
20*4882a593Smuzhiyun|	Algorithm:
21*4882a593Smuzhiyun|
22*4882a593Smuzhiyun|	COSH
23*4882a593Smuzhiyun|	1. If |X| > 16380 log2, go to 3.
24*4882a593Smuzhiyun|
25*4882a593Smuzhiyun|	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
26*4882a593Smuzhiyun|		y = |X|, z = exp(Y), and
27*4882a593Smuzhiyun|		cosh(X) = (1/2)*( z + 1/z ).
28*4882a593Smuzhiyun|		Exit.
29*4882a593Smuzhiyun|
30*4882a593Smuzhiyun|	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
31*4882a593Smuzhiyun|
32*4882a593Smuzhiyun|	4. (16380 log2 < |X| <= 16480 log2)
33*4882a593Smuzhiyun|		cosh(X) = sign(X) * exp(|X|)/2.
34*4882a593Smuzhiyun|		However, invoking exp(|X|) may cause premature overflow.
35*4882a593Smuzhiyun|		Thus, we calculate sinh(X) as follows:
36*4882a593Smuzhiyun|		Y	:= |X|
37*4882a593Smuzhiyun|		Fact	:=	2**(16380)
38*4882a593Smuzhiyun|		Y'	:= Y - 16381 log2
39*4882a593Smuzhiyun|		cosh(X) := Fact * exp(Y').
40*4882a593Smuzhiyun|		Exit.
41*4882a593Smuzhiyun|
42*4882a593Smuzhiyun|	5. (|X| > 16480 log2) sinh(X) must overflow. Return
43*4882a593Smuzhiyun|		Huge*Huge to generate overflow and an infinity with
44*4882a593Smuzhiyun|		the appropriate sign. Huge is the largest finite number in
45*4882a593Smuzhiyun|		extended format. Exit.
46*4882a593Smuzhiyun|
47*4882a593Smuzhiyun|
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
50*4882a593Smuzhiyun|			All Rights Reserved
51*4882a593Smuzhiyun|
52*4882a593Smuzhiyun|       For details on the license for this file, please see the
53*4882a593Smuzhiyun|       file, README, in this same directory.
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun|SCOSH	idnt	2,1 | Motorola 040 Floating Point Software Package
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun	|section	8
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun	|xref	t_ovfl
60*4882a593Smuzhiyun	|xref	t_frcinx
61*4882a593Smuzhiyun	|xref	setox
62*4882a593Smuzhiyun
63*4882a593SmuzhiyunT1:	.long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
64*4882a593SmuzhiyunT2:	.long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
65*4882a593Smuzhiyun
66*4882a593SmuzhiyunTWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun	.global	scoshd
69*4882a593Smuzhiyunscoshd:
70*4882a593Smuzhiyun|--COSH(X) = 1 FOR DENORMALIZED X
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun	fmoves		#0x3F800000,%fp0
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun	fmovel		%d1,%FPCR
75*4882a593Smuzhiyun	fadds		#0x00800000,%fp0
76*4882a593Smuzhiyun	bra		t_frcinx
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun	.global	scosh
79*4882a593Smuzhiyunscosh:
80*4882a593Smuzhiyun	fmovex		(%a0),%fp0	| ...LOAD INPUT
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun	movel		(%a0),%d0
83*4882a593Smuzhiyun	movew		4(%a0),%d0
84*4882a593Smuzhiyun	andil		#0x7FFFFFFF,%d0
85*4882a593Smuzhiyun	cmpil		#0x400CB167,%d0
86*4882a593Smuzhiyun	bgts		COSHBIG
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
89*4882a593Smuzhiyun|--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun	fabsx		%fp0		| ...|X|
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun	movel		%d1,-(%sp)
94*4882a593Smuzhiyun	clrl		%d1
95*4882a593Smuzhiyun	fmovemx	%fp0-%fp0,(%a0)	|pass parameter to setox
96*4882a593Smuzhiyun	bsr		setox		| ...FP0 IS EXP(|X|)
97*4882a593Smuzhiyun	fmuls		#0x3F000000,%fp0	| ...(1/2)EXP(|X|)
98*4882a593Smuzhiyun	movel		(%sp)+,%d1
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun	fmoves		#0x3E800000,%fp1	| ...(1/4)
101*4882a593Smuzhiyun	fdivx		%fp0,%fp1		| ...1/(2 EXP(|X|))
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun	fmovel		%d1,%FPCR
104*4882a593Smuzhiyun	faddx		%fp1,%fp0
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun	bra		t_frcinx
107*4882a593Smuzhiyun
108*4882a593SmuzhiyunCOSHBIG:
109*4882a593Smuzhiyun	cmpil		#0x400CB2B3,%d0
110*4882a593Smuzhiyun	bgts		COSHHUGE
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun	fabsx		%fp0
113*4882a593Smuzhiyun	fsubd		T1(%pc),%fp0		| ...(|X|-16381LOG2_LEAD)
114*4882a593Smuzhiyun	fsubd		T2(%pc),%fp0		| ...|X| - 16381 LOG2, ACCURATE
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun	movel		%d1,-(%sp)
117*4882a593Smuzhiyun	clrl		%d1
118*4882a593Smuzhiyun	fmovemx	%fp0-%fp0,(%a0)
119*4882a593Smuzhiyun	bsr		setox
120*4882a593Smuzhiyun	fmovel		(%sp)+,%fpcr
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun	fmulx		TWO16380(%pc),%fp0
123*4882a593Smuzhiyun	bra		t_frcinx
124*4882a593Smuzhiyun
125*4882a593SmuzhiyunCOSHHUGE:
126*4882a593Smuzhiyun	fmovel		#0,%fpsr		|clr N bit if set by source
127*4882a593Smuzhiyun	bclrb		#7,(%a0)		|always return positive value
128*4882a593Smuzhiyun	fmovemx	(%a0),%fp0-%fp0
129*4882a593Smuzhiyun	bra		t_ovfl
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun	|end
132