xref: /OK3568_Linux_fs/kernel/arch/m68k/fpsp040/slog2.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun|
2*4882a593Smuzhiyun|	slog2.sa 3.1 12/10/90
3*4882a593Smuzhiyun|
4*4882a593Smuzhiyun|       The entry point slog10 computes the base-10
5*4882a593Smuzhiyun|	logarithm of an input argument X.
6*4882a593Smuzhiyun|	slog10d does the same except the input value is a
7*4882a593Smuzhiyun|	denormalized number.
8*4882a593Smuzhiyun|	sLog2 and sLog2d are the base-2 analogues.
9*4882a593Smuzhiyun|
10*4882a593Smuzhiyun|       INPUT:	Double-extended value in memory location pointed to
11*4882a593Smuzhiyun|		by address register a0.
12*4882a593Smuzhiyun|
13*4882a593Smuzhiyun|       OUTPUT: log_10(X) or log_2(X) returned in floating-point
14*4882a593Smuzhiyun|		register fp0.
15*4882a593Smuzhiyun|
16*4882a593Smuzhiyun|       ACCURACY and MONOTONICITY: The returned result is within 1.7
17*4882a593Smuzhiyun|		ulps in 64 significant bit, i.e. within 0.5003 ulp
18*4882a593Smuzhiyun|		to 53 bits if the result is subsequently rounded
19*4882a593Smuzhiyun|		to double precision. The result is provably monotonic
20*4882a593Smuzhiyun|		in double precision.
21*4882a593Smuzhiyun|
22*4882a593Smuzhiyun|       SPEED:	Two timings are measured, both in the copy-back mode.
23*4882a593Smuzhiyun|		The first one is measured when the function is invoked
24*4882a593Smuzhiyun|		the first time (so the instructions and data are not
25*4882a593Smuzhiyun|		in cache), and the second one is measured when the
26*4882a593Smuzhiyun|		function is reinvoked at the same input argument.
27*4882a593Smuzhiyun|
28*4882a593Smuzhiyun|       ALGORITHM and IMPLEMENTATION NOTES:
29*4882a593Smuzhiyun|
30*4882a593Smuzhiyun|       slog10d:
31*4882a593Smuzhiyun|
32*4882a593Smuzhiyun|       Step 0.   If X < 0, create a NaN and raise the invalid operation
33*4882a593Smuzhiyun|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
34*4882a593Smuzhiyun|       Notes:    Default means round-to-nearest mode, no floating-point
35*4882a593Smuzhiyun|                 traps, and precision control = double extended.
36*4882a593Smuzhiyun|
37*4882a593Smuzhiyun|       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
38*4882a593Smuzhiyun|       Notes:    Even if X is denormalized, log(X) is always normalized.
39*4882a593Smuzhiyun|
40*4882a593Smuzhiyun|       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
41*4882a593Smuzhiyun|            2.1  Restore the user FPCR
42*4882a593Smuzhiyun|            2.2  Return ans := Y * INV_L10.
43*4882a593Smuzhiyun|
44*4882a593Smuzhiyun|
45*4882a593Smuzhiyun|       slog10:
46*4882a593Smuzhiyun|
47*4882a593Smuzhiyun|       Step 0.   If X < 0, create a NaN and raise the invalid operation
48*4882a593Smuzhiyun|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
49*4882a593Smuzhiyun|       Notes:    Default means round-to-nearest mode, no floating-point
50*4882a593Smuzhiyun|                 traps, and precision control = double extended.
51*4882a593Smuzhiyun|
52*4882a593Smuzhiyun|       Step 1.   Call sLogN to obtain Y = log(X), the natural log of X.
53*4882a593Smuzhiyun|
54*4882a593Smuzhiyun|       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
55*4882a593Smuzhiyun|            2.1  Restore the user FPCR
56*4882a593Smuzhiyun|            2.2  Return ans := Y * INV_L10.
57*4882a593Smuzhiyun|
58*4882a593Smuzhiyun|
59*4882a593Smuzhiyun|       sLog2d:
60*4882a593Smuzhiyun|
61*4882a593Smuzhiyun|       Step 0.   If X < 0, create a NaN and raise the invalid operation
62*4882a593Smuzhiyun|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
63*4882a593Smuzhiyun|       Notes:    Default means round-to-nearest mode, no floating-point
64*4882a593Smuzhiyun|                 traps, and precision control = double extended.
65*4882a593Smuzhiyun|
66*4882a593Smuzhiyun|       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
67*4882a593Smuzhiyun|       Notes:    Even if X is denormalized, log(X) is always normalized.
68*4882a593Smuzhiyun|
69*4882a593Smuzhiyun|       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).
70*4882a593Smuzhiyun|            2.1  Restore the user FPCR
71*4882a593Smuzhiyun|            2.2  Return ans := Y * INV_L2.
72*4882a593Smuzhiyun|
73*4882a593Smuzhiyun|
74*4882a593Smuzhiyun|       sLog2:
75*4882a593Smuzhiyun|
76*4882a593Smuzhiyun|       Step 0.   If X < 0, create a NaN and raise the invalid operation
77*4882a593Smuzhiyun|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
78*4882a593Smuzhiyun|       Notes:    Default means round-to-nearest mode, no floating-point
79*4882a593Smuzhiyun|                 traps, and precision control = double extended.
80*4882a593Smuzhiyun|
81*4882a593Smuzhiyun|       Step 1.   If X is not an integer power of two, i.e., X != 2^k,
82*4882a593Smuzhiyun|                 go to Step 3.
83*4882a593Smuzhiyun|
84*4882a593Smuzhiyun|       Step 2.   Return k.
85*4882a593Smuzhiyun|            2.1  Get integer k, X = 2^k.
86*4882a593Smuzhiyun|            2.2  Restore the user FPCR.
87*4882a593Smuzhiyun|            2.3  Return ans := convert-to-double-extended(k).
88*4882a593Smuzhiyun|
89*4882a593Smuzhiyun|       Step 3.   Call sLogN to obtain Y = log(X), the natural log of X.
90*4882a593Smuzhiyun|
91*4882a593Smuzhiyun|       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).
92*4882a593Smuzhiyun|            4.1  Restore the user FPCR
93*4882a593Smuzhiyun|            4.2  Return ans := Y * INV_L2.
94*4882a593Smuzhiyun|
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
97*4882a593Smuzhiyun|			All Rights Reserved
98*4882a593Smuzhiyun|
99*4882a593Smuzhiyun|       For details on the license for this file, please see the
100*4882a593Smuzhiyun|       file, README, in this same directory.
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun|SLOG2    idnt    2,1 | Motorola 040 Floating Point Software Package
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun	|section	8
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun	|xref	t_frcinx
107*4882a593Smuzhiyun	|xref	t_operr
108*4882a593Smuzhiyun	|xref	slogn
109*4882a593Smuzhiyun	|xref	slognd
110*4882a593Smuzhiyun
111*4882a593SmuzhiyunINV_L10:  .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
112*4882a593Smuzhiyun
113*4882a593SmuzhiyunINV_L2:   .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun	.global	slog10d
116*4882a593Smuzhiyunslog10d:
117*4882a593Smuzhiyun|--entry point for Log10(X), X is denormalized
118*4882a593Smuzhiyun	movel		(%a0),%d0
119*4882a593Smuzhiyun	blt		invalid
120*4882a593Smuzhiyun	movel		%d1,-(%sp)
121*4882a593Smuzhiyun	clrl		%d1
122*4882a593Smuzhiyun	bsr		slognd			| ...log(X), X denorm.
123*4882a593Smuzhiyun	fmovel		(%sp)+,%fpcr
124*4882a593Smuzhiyun	fmulx		INV_L10,%fp0
125*4882a593Smuzhiyun	bra		t_frcinx
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun	.global	slog10
128*4882a593Smuzhiyunslog10:
129*4882a593Smuzhiyun|--entry point for Log10(X), X is normalized
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun	movel		(%a0),%d0
132*4882a593Smuzhiyun	blt		invalid
133*4882a593Smuzhiyun	movel		%d1,-(%sp)
134*4882a593Smuzhiyun	clrl		%d1
135*4882a593Smuzhiyun	bsr		slogn			| ...log(X), X normal.
136*4882a593Smuzhiyun	fmovel		(%sp)+,%fpcr
137*4882a593Smuzhiyun	fmulx		INV_L10,%fp0
138*4882a593Smuzhiyun	bra		t_frcinx
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun	.global	slog2d
142*4882a593Smuzhiyunslog2d:
143*4882a593Smuzhiyun|--entry point for Log2(X), X is denormalized
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun	movel		(%a0),%d0
146*4882a593Smuzhiyun	blt		invalid
147*4882a593Smuzhiyun	movel		%d1,-(%sp)
148*4882a593Smuzhiyun	clrl		%d1
149*4882a593Smuzhiyun	bsr		slognd			| ...log(X), X denorm.
150*4882a593Smuzhiyun	fmovel		(%sp)+,%fpcr
151*4882a593Smuzhiyun	fmulx		INV_L2,%fp0
152*4882a593Smuzhiyun	bra		t_frcinx
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun	.global	slog2
155*4882a593Smuzhiyunslog2:
156*4882a593Smuzhiyun|--entry point for Log2(X), X is normalized
157*4882a593Smuzhiyun	movel		(%a0),%d0
158*4882a593Smuzhiyun	blt		invalid
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun	movel		8(%a0),%d0
161*4882a593Smuzhiyun	bnes		continue		| ...X is not 2^k
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun	movel		4(%a0),%d0
164*4882a593Smuzhiyun	andl		#0x7FFFFFFF,%d0
165*4882a593Smuzhiyun	tstl		%d0
166*4882a593Smuzhiyun	bnes		continue
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun|--X = 2^k.
169*4882a593Smuzhiyun	movew		(%a0),%d0
170*4882a593Smuzhiyun	andl		#0x00007FFF,%d0
171*4882a593Smuzhiyun	subl		#0x3FFF,%d0
172*4882a593Smuzhiyun	fmovel		%d1,%fpcr
173*4882a593Smuzhiyun	fmovel		%d0,%fp0
174*4882a593Smuzhiyun	bra		t_frcinx
175*4882a593Smuzhiyun
176*4882a593Smuzhiyuncontinue:
177*4882a593Smuzhiyun	movel		%d1,-(%sp)
178*4882a593Smuzhiyun	clrl		%d1
179*4882a593Smuzhiyun	bsr		slogn			| ...log(X), X normal.
180*4882a593Smuzhiyun	fmovel		(%sp)+,%fpcr
181*4882a593Smuzhiyun	fmulx		INV_L2,%fp0
182*4882a593Smuzhiyun	bra		t_frcinx
183*4882a593Smuzhiyun
184*4882a593Smuzhiyuninvalid:
185*4882a593Smuzhiyun	bra		t_operr
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun	|end
188