xref: /OK3568_Linux_fs/kernel/arch/m68k/fpsp040/round.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun|
2*4882a593Smuzhiyun|	round.sa 3.4 7/29/91
3*4882a593Smuzhiyun|
4*4882a593Smuzhiyun|	handle rounding and normalization tasks
5*4882a593Smuzhiyun|
6*4882a593Smuzhiyun|
7*4882a593Smuzhiyun|
8*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
9*4882a593Smuzhiyun|			All Rights Reserved
10*4882a593Smuzhiyun|
11*4882a593Smuzhiyun|       For details on the license for this file, please see the
12*4882a593Smuzhiyun|       file, README, in this same directory.
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun|ROUND	idnt    2,1 | Motorola 040 Floating Point Software Package
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun	|section	8
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun#include "fpsp.h"
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun|
21*4882a593Smuzhiyun|	round --- round result according to precision/mode
22*4882a593Smuzhiyun|
23*4882a593Smuzhiyun|	a0 points to the input operand in the internal extended format
24*4882a593Smuzhiyun|	d1(high word) contains rounding precision:
25*4882a593Smuzhiyun|		ext = $0000xxxx
26*4882a593Smuzhiyun|		sgl = $0001xxxx
27*4882a593Smuzhiyun|		dbl = $0002xxxx
28*4882a593Smuzhiyun|	d1(low word) contains rounding mode:
29*4882a593Smuzhiyun|		RN  = $xxxx0000
30*4882a593Smuzhiyun|		RZ  = $xxxx0001
31*4882a593Smuzhiyun|		RM  = $xxxx0010
32*4882a593Smuzhiyun|		RP  = $xxxx0011
33*4882a593Smuzhiyun|	d0{31:29} contains the g,r,s bits (extended)
34*4882a593Smuzhiyun|
35*4882a593Smuzhiyun|	On return the value pointed to by a0 is correctly rounded,
36*4882a593Smuzhiyun|	a0 is preserved and the g-r-s bits in d0 are cleared.
37*4882a593Smuzhiyun|	The result is not typed - the tag field is invalid.  The
38*4882a593Smuzhiyun|	result is still in the internal extended format.
39*4882a593Smuzhiyun|
40*4882a593Smuzhiyun|	The INEX bit of USER_FPSR will be set if the rounded result was
41*4882a593Smuzhiyun|	inexact (i.e. if any of the g-r-s bits were set).
42*4882a593Smuzhiyun|
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun	.global	round
45*4882a593Smuzhiyunround:
46*4882a593Smuzhiyun| If g=r=s=0 then result is exact and round is done, else set
47*4882a593Smuzhiyun| the inex flag in status reg and continue.
48*4882a593Smuzhiyun|
49*4882a593Smuzhiyun	bsrs	ext_grs			|this subroutine looks at the
50*4882a593Smuzhiyun|					:rounding precision and sets
51*4882a593Smuzhiyun|					;the appropriate g-r-s bits.
52*4882a593Smuzhiyun	tstl	%d0			|if grs are zero, go force
53*4882a593Smuzhiyun	bne	rnd_cont		|lower bits to zero for size
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun	swap	%d1			|set up d1.w for round prec.
56*4882a593Smuzhiyun	bra	truncate
57*4882a593Smuzhiyun
58*4882a593Smuzhiyunrnd_cont:
59*4882a593Smuzhiyun|
60*4882a593Smuzhiyun| Use rounding mode as an index into a jump table for these modes.
61*4882a593Smuzhiyun|
62*4882a593Smuzhiyun	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
63*4882a593Smuzhiyun	lea	mode_tab,%a1
64*4882a593Smuzhiyun	movel	(%a1,%d1.w*4),%a1
65*4882a593Smuzhiyun	jmp	(%a1)
66*4882a593Smuzhiyun|
67*4882a593Smuzhiyun| Jump table indexed by rounding mode in d1.w.  All following assumes
68*4882a593Smuzhiyun| grs != 0.
69*4882a593Smuzhiyun|
70*4882a593Smuzhiyunmode_tab:
71*4882a593Smuzhiyun	.long	rnd_near
72*4882a593Smuzhiyun	.long	rnd_zero
73*4882a593Smuzhiyun	.long	rnd_mnus
74*4882a593Smuzhiyun	.long	rnd_plus
75*4882a593Smuzhiyun|
76*4882a593Smuzhiyun|	ROUND PLUS INFINITY
77*4882a593Smuzhiyun|
78*4882a593Smuzhiyun|	If sign of fp number = 0 (positive), then add 1 to l.
79*4882a593Smuzhiyun|
80*4882a593Smuzhiyunrnd_plus:
81*4882a593Smuzhiyun	swap	%d1			|set up d1 for round prec.
82*4882a593Smuzhiyun	tstb	LOCAL_SGN(%a0)		|check for sign
83*4882a593Smuzhiyun	bmi	truncate		|if positive then truncate
84*4882a593Smuzhiyun	movel	#0xffffffff,%d0		|force g,r,s to be all f's
85*4882a593Smuzhiyun	lea	add_to_l,%a1
86*4882a593Smuzhiyun	movel	(%a1,%d1.w*4),%a1
87*4882a593Smuzhiyun	jmp	(%a1)
88*4882a593Smuzhiyun|
89*4882a593Smuzhiyun|	ROUND MINUS INFINITY
90*4882a593Smuzhiyun|
91*4882a593Smuzhiyun|	If sign of fp number = 1 (negative), then add 1 to l.
92*4882a593Smuzhiyun|
93*4882a593Smuzhiyunrnd_mnus:
94*4882a593Smuzhiyun	swap	%d1			|set up d1 for round prec.
95*4882a593Smuzhiyun	tstb	LOCAL_SGN(%a0)		|check for sign
96*4882a593Smuzhiyun	bpl	truncate		|if negative then truncate
97*4882a593Smuzhiyun	movel	#0xffffffff,%d0		|force g,r,s to be all f's
98*4882a593Smuzhiyun	lea	add_to_l,%a1
99*4882a593Smuzhiyun	movel	(%a1,%d1.w*4),%a1
100*4882a593Smuzhiyun	jmp	(%a1)
101*4882a593Smuzhiyun|
102*4882a593Smuzhiyun|	ROUND ZERO
103*4882a593Smuzhiyun|
104*4882a593Smuzhiyun|	Always truncate.
105*4882a593Smuzhiyunrnd_zero:
106*4882a593Smuzhiyun	swap	%d1			|set up d1 for round prec.
107*4882a593Smuzhiyun	bra	truncate
108*4882a593Smuzhiyun|
109*4882a593Smuzhiyun|
110*4882a593Smuzhiyun|	ROUND NEAREST
111*4882a593Smuzhiyun|
112*4882a593Smuzhiyun|	If (g=1), then add 1 to l and if (r=s=0), then clear l
113*4882a593Smuzhiyun|	Note that this will round to even in case of a tie.
114*4882a593Smuzhiyun|
115*4882a593Smuzhiyunrnd_near:
116*4882a593Smuzhiyun	swap	%d1			|set up d1 for round prec.
117*4882a593Smuzhiyun	asll	#1,%d0			|shift g-bit to c-bit
118*4882a593Smuzhiyun	bcc	truncate		|if (g=1) then
119*4882a593Smuzhiyun	lea	add_to_l,%a1
120*4882a593Smuzhiyun	movel	(%a1,%d1.w*4),%a1
121*4882a593Smuzhiyun	jmp	(%a1)
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun|
124*4882a593Smuzhiyun|	ext_grs --- extract guard, round and sticky bits
125*4882a593Smuzhiyun|
126*4882a593Smuzhiyun| Input:	d1 =		PREC:ROUND
127*4882a593Smuzhiyun| Output:	d0{31:29}=	guard, round, sticky
128*4882a593Smuzhiyun|
129*4882a593Smuzhiyun| The ext_grs extract the guard/round/sticky bits according to the
130*4882a593Smuzhiyun| selected rounding precision. It is called by the round subroutine
131*4882a593Smuzhiyun| only.  All registers except d0 are kept intact. d0 becomes an
132*4882a593Smuzhiyun| updated guard,round,sticky in d0{31:29}
133*4882a593Smuzhiyun|
134*4882a593Smuzhiyun| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
135*4882a593Smuzhiyun|	 prior to usage, and needs to restore d1 to original.
136*4882a593Smuzhiyun|
137*4882a593Smuzhiyunext_grs:
138*4882a593Smuzhiyun	swap	%d1			|have d1.w point to round precision
139*4882a593Smuzhiyun	cmpiw	#0,%d1
140*4882a593Smuzhiyun	bnes	sgl_or_dbl
141*4882a593Smuzhiyun	bras	end_ext_grs
142*4882a593Smuzhiyun
143*4882a593Smuzhiyunsgl_or_dbl:
144*4882a593Smuzhiyun	moveml	%d2/%d3,-(%a7)		|make some temp registers
145*4882a593Smuzhiyun	cmpiw	#1,%d1
146*4882a593Smuzhiyun	bnes	grs_dbl
147*4882a593Smuzhiyungrs_sgl:
148*4882a593Smuzhiyun	bfextu	LOCAL_HI(%a0){#24:#2},%d3	|sgl prec. g-r are 2 bits right
149*4882a593Smuzhiyun	movel	#30,%d2			|of the sgl prec. limits
150*4882a593Smuzhiyun	lsll	%d2,%d3			|shift g-r bits to MSB of d3
151*4882a593Smuzhiyun	movel	LOCAL_HI(%a0),%d2		|get word 2 for s-bit test
152*4882a593Smuzhiyun	andil	#0x0000003f,%d2		|s bit is the or of all other
153*4882a593Smuzhiyun	bnes	st_stky			|bits to the right of g-r
154*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)		|test lower mantissa
155*4882a593Smuzhiyun	bnes	st_stky			|if any are set, set sticky
156*4882a593Smuzhiyun	tstl	%d0			|test original g,r,s
157*4882a593Smuzhiyun	bnes	st_stky			|if any are set, set sticky
158*4882a593Smuzhiyun	bras	end_sd			|if words 3 and 4 are clr, exit
159*4882a593Smuzhiyungrs_dbl:
160*4882a593Smuzhiyun	bfextu	LOCAL_LO(%a0){#21:#2},%d3	|dbl-prec. g-r are 2 bits right
161*4882a593Smuzhiyun	movel	#30,%d2			|of the dbl prec. limits
162*4882a593Smuzhiyun	lsll	%d2,%d3			|shift g-r bits to the MSB of d3
163*4882a593Smuzhiyun	movel	LOCAL_LO(%a0),%d2		|get lower mantissa  for s-bit test
164*4882a593Smuzhiyun	andil	#0x000001ff,%d2		|s bit is the or-ing of all
165*4882a593Smuzhiyun	bnes	st_stky			|other bits to the right of g-r
166*4882a593Smuzhiyun	tstl	%d0			|test word original g,r,s
167*4882a593Smuzhiyun	bnes	st_stky			|if any are set, set sticky
168*4882a593Smuzhiyun	bras	end_sd			|if clear, exit
169*4882a593Smuzhiyunst_stky:
170*4882a593Smuzhiyun	bset	#rnd_stky_bit,%d3
171*4882a593Smuzhiyunend_sd:
172*4882a593Smuzhiyun	movel	%d3,%d0			|return grs to d0
173*4882a593Smuzhiyun	moveml	(%a7)+,%d2/%d3		|restore scratch registers
174*4882a593Smuzhiyunend_ext_grs:
175*4882a593Smuzhiyun	swap	%d1			|restore d1 to original
176*4882a593Smuzhiyun	rts
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun|*******************  Local Equates
179*4882a593Smuzhiyun	.set	ad_1_sgl,0x00000100	|  constant to add 1 to l-bit in sgl prec
180*4882a593Smuzhiyun	.set	ad_1_dbl,0x00000800	|  constant to add 1 to l-bit in dbl prec
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun|Jump table for adding 1 to the l-bit indexed by rnd prec
184*4882a593Smuzhiyun
185*4882a593Smuzhiyunadd_to_l:
186*4882a593Smuzhiyun	.long	add_ext
187*4882a593Smuzhiyun	.long	add_sgl
188*4882a593Smuzhiyun	.long	add_dbl
189*4882a593Smuzhiyun	.long	add_dbl
190*4882a593Smuzhiyun|
191*4882a593Smuzhiyun|	ADD SINGLE
192*4882a593Smuzhiyun|
193*4882a593Smuzhiyunadd_sgl:
194*4882a593Smuzhiyun	addl	#ad_1_sgl,LOCAL_HI(%a0)
195*4882a593Smuzhiyun	bccs	scc_clr			|no mantissa overflow
196*4882a593Smuzhiyun	roxrw  LOCAL_HI(%a0)		|shift v-bit back in
197*4882a593Smuzhiyun	roxrw  LOCAL_HI+2(%a0)		|shift v-bit back in
198*4882a593Smuzhiyun	addw	#0x1,LOCAL_EX(%a0)	|and incr exponent
199*4882a593Smuzhiyunscc_clr:
200*4882a593Smuzhiyun	tstl	%d0			|test for rs = 0
201*4882a593Smuzhiyun	bnes	sgl_done
202*4882a593Smuzhiyun	andiw  #0xfe00,LOCAL_HI+2(%a0)	|clear the l-bit
203*4882a593Smuzhiyunsgl_done:
204*4882a593Smuzhiyun	andil	#0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
205*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)		|clear d2
206*4882a593Smuzhiyun	rts
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun|
209*4882a593Smuzhiyun|	ADD EXTENDED
210*4882a593Smuzhiyun|
211*4882a593Smuzhiyunadd_ext:
212*4882a593Smuzhiyun	addql  #1,LOCAL_LO(%a0)		|add 1 to l-bit
213*4882a593Smuzhiyun	bccs	xcc_clr			|test for carry out
214*4882a593Smuzhiyun	addql  #1,LOCAL_HI(%a0)		|propagate carry
215*4882a593Smuzhiyun	bccs	xcc_clr
216*4882a593Smuzhiyun	roxrw  LOCAL_HI(%a0)		|mant is 0 so restore v-bit
217*4882a593Smuzhiyun	roxrw  LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
218*4882a593Smuzhiyun	roxrw	LOCAL_LO(%a0)
219*4882a593Smuzhiyun	roxrw	LOCAL_LO+2(%a0)
220*4882a593Smuzhiyun	addw	#0x1,LOCAL_EX(%a0)	|and inc exp
221*4882a593Smuzhiyunxcc_clr:
222*4882a593Smuzhiyun	tstl	%d0			|test rs = 0
223*4882a593Smuzhiyun	bnes	add_ext_done
224*4882a593Smuzhiyun	andib	#0xfe,LOCAL_LO+3(%a0)	|clear the l bit
225*4882a593Smuzhiyunadd_ext_done:
226*4882a593Smuzhiyun	rts
227*4882a593Smuzhiyun|
228*4882a593Smuzhiyun|	ADD DOUBLE
229*4882a593Smuzhiyun|
230*4882a593Smuzhiyunadd_dbl:
231*4882a593Smuzhiyun	addl	#ad_1_dbl,LOCAL_LO(%a0)
232*4882a593Smuzhiyun	bccs	dcc_clr
233*4882a593Smuzhiyun	addql	#1,LOCAL_HI(%a0)		|propagate carry
234*4882a593Smuzhiyun	bccs	dcc_clr
235*4882a593Smuzhiyun	roxrw	LOCAL_HI(%a0)		|mant is 0 so restore v-bit
236*4882a593Smuzhiyun	roxrw	LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
237*4882a593Smuzhiyun	roxrw	LOCAL_LO(%a0)
238*4882a593Smuzhiyun	roxrw	LOCAL_LO+2(%a0)
239*4882a593Smuzhiyun	addw	#0x1,LOCAL_EX(%a0)	|incr exponent
240*4882a593Smuzhiyundcc_clr:
241*4882a593Smuzhiyun	tstl	%d0			|test for rs = 0
242*4882a593Smuzhiyun	bnes	dbl_done
243*4882a593Smuzhiyun	andiw	#0xf000,LOCAL_LO+2(%a0)	|clear the l-bit
244*4882a593Smuzhiyun
245*4882a593Smuzhiyundbl_done:
246*4882a593Smuzhiyun	andil	#0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
247*4882a593Smuzhiyun	rts
248*4882a593Smuzhiyun
249*4882a593Smuzhiyunerror:
250*4882a593Smuzhiyun	rts
251*4882a593Smuzhiyun|
252*4882a593Smuzhiyun| Truncate all other bits
253*4882a593Smuzhiyun|
254*4882a593Smuzhiyuntrunct:
255*4882a593Smuzhiyun	.long	end_rnd
256*4882a593Smuzhiyun	.long	sgl_done
257*4882a593Smuzhiyun	.long	dbl_done
258*4882a593Smuzhiyun	.long	dbl_done
259*4882a593Smuzhiyun
260*4882a593Smuzhiyuntruncate:
261*4882a593Smuzhiyun	lea	trunct,%a1
262*4882a593Smuzhiyun	movel	(%a1,%d1.w*4),%a1
263*4882a593Smuzhiyun	jmp	(%a1)
264*4882a593Smuzhiyun
265*4882a593Smuzhiyunend_rnd:
266*4882a593Smuzhiyun	rts
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun|
269*4882a593Smuzhiyun|	NORMALIZE
270*4882a593Smuzhiyun|
271*4882a593Smuzhiyun| These routines (nrm_zero & nrm_set) normalize the unnorm.  This
272*4882a593Smuzhiyun| is done by shifting the mantissa left while decrementing the
273*4882a593Smuzhiyun| exponent.
274*4882a593Smuzhiyun|
275*4882a593Smuzhiyun| NRM_SET shifts and decrements until there is a 1 set in the integer
276*4882a593Smuzhiyun| bit of the mantissa (msb in d1).
277*4882a593Smuzhiyun|
278*4882a593Smuzhiyun| NRM_ZERO shifts and decrements until there is a 1 set in the integer
279*4882a593Smuzhiyun| bit of the mantissa (msb in d1) unless this would mean the exponent
280*4882a593Smuzhiyun| would go less than 0.  In that case the number becomes a denorm - the
281*4882a593Smuzhiyun| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
282*4882a593Smuzhiyun| normalized.
283*4882a593Smuzhiyun|
284*4882a593Smuzhiyun| Note that both routines have been optimized (for the worst case) and
285*4882a593Smuzhiyun| therefore do not have the easy to follow decrement/shift loop.
286*4882a593Smuzhiyun|
287*4882a593Smuzhiyun|	NRM_ZERO
288*4882a593Smuzhiyun|
289*4882a593Smuzhiyun|	Distance to first 1 bit in mantissa = X
290*4882a593Smuzhiyun|	Distance to 0 from exponent = Y
291*4882a593Smuzhiyun|	If X < Y
292*4882a593Smuzhiyun|	Then
293*4882a593Smuzhiyun|	  nrm_set
294*4882a593Smuzhiyun|	Else
295*4882a593Smuzhiyun|	  shift mantissa by Y
296*4882a593Smuzhiyun|	  set exponent = 0
297*4882a593Smuzhiyun|
298*4882a593Smuzhiyun|input:
299*4882a593Smuzhiyun|	FP_SCR1 = exponent, ms mantissa part, ls mantissa part
300*4882a593Smuzhiyun|output:
301*4882a593Smuzhiyun|	L_SCR1{4} = fpte15 or ete15 bit
302*4882a593Smuzhiyun|
303*4882a593Smuzhiyun	.global	nrm_zero
304*4882a593Smuzhiyunnrm_zero:
305*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d0
306*4882a593Smuzhiyun	cmpw   #64,%d0          |see if exp > 64
307*4882a593Smuzhiyun	bmis	d0_less
308*4882a593Smuzhiyun	bsr	nrm_set		|exp > 64 so exp won't exceed 0
309*4882a593Smuzhiyun	rts
310*4882a593Smuzhiyund0_less:
311*4882a593Smuzhiyun	moveml	%d2/%d3/%d5/%d6,-(%a7)
312*4882a593Smuzhiyun	movel	LOCAL_HI(%a0),%d1
313*4882a593Smuzhiyun	movel	LOCAL_LO(%a0),%d2
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun	bfffo	%d1{#0:#32},%d3	|get the distance to the first 1
316*4882a593Smuzhiyun|				;in ms mant
317*4882a593Smuzhiyun	beqs	ms_clr		|branch if no bits were set
318*4882a593Smuzhiyun	cmpw	%d3,%d0		|of X>Y
319*4882a593Smuzhiyun	bmis	greater		|then exp will go past 0 (neg) if
320*4882a593Smuzhiyun|				;it is just shifted
321*4882a593Smuzhiyun	bsr	nrm_set		|else exp won't go past 0
322*4882a593Smuzhiyun	moveml	(%a7)+,%d2/%d3/%d5/%d6
323*4882a593Smuzhiyun	rts
324*4882a593Smuzhiyungreater:
325*4882a593Smuzhiyun	movel	%d2,%d6		|save ls mant in d6
326*4882a593Smuzhiyun	lsll	%d0,%d2		|shift ls mant by count
327*4882a593Smuzhiyun	lsll	%d0,%d1		|shift ms mant by count
328*4882a593Smuzhiyun	movel	#32,%d5
329*4882a593Smuzhiyun	subl	%d0,%d5		|make op a denorm by shifting bits
330*4882a593Smuzhiyun	lsrl	%d5,%d6		|by the number in the exp, then
331*4882a593Smuzhiyun|				;set exp = 0.
332*4882a593Smuzhiyun	orl	%d6,%d1		|shift the ls mant bits into the ms mant
333*4882a593Smuzhiyun	movel	#0,%d0		|same as if decremented exp to 0
334*4882a593Smuzhiyun|				;while shifting
335*4882a593Smuzhiyun	movew	%d0,LOCAL_EX(%a0)
336*4882a593Smuzhiyun	movel	%d1,LOCAL_HI(%a0)
337*4882a593Smuzhiyun	movel	%d2,LOCAL_LO(%a0)
338*4882a593Smuzhiyun	moveml	(%a7)+,%d2/%d3/%d5/%d6
339*4882a593Smuzhiyun	rts
340*4882a593Smuzhiyunms_clr:
341*4882a593Smuzhiyun	bfffo	%d2{#0:#32},%d3	|check if any bits set in ls mant
342*4882a593Smuzhiyun	beqs	all_clr		|branch if none set
343*4882a593Smuzhiyun	addw	#32,%d3
344*4882a593Smuzhiyun	cmpw	%d3,%d0		|if X>Y
345*4882a593Smuzhiyun	bmis	greater		|then branch
346*4882a593Smuzhiyun	bsr	nrm_set		|else exp won't go past 0
347*4882a593Smuzhiyun	moveml	(%a7)+,%d2/%d3/%d5/%d6
348*4882a593Smuzhiyun	rts
349*4882a593Smuzhiyunall_clr:
350*4882a593Smuzhiyun	movew	#0,LOCAL_EX(%a0)	|no mantissa bits set. Set exp = 0.
351*4882a593Smuzhiyun	moveml	(%a7)+,%d2/%d3/%d5/%d6
352*4882a593Smuzhiyun	rts
353*4882a593Smuzhiyun|
354*4882a593Smuzhiyun|	NRM_SET
355*4882a593Smuzhiyun|
356*4882a593Smuzhiyun	.global	nrm_set
357*4882a593Smuzhiyunnrm_set:
358*4882a593Smuzhiyun	movel	%d7,-(%a7)
359*4882a593Smuzhiyun	bfffo	LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
360*4882a593Smuzhiyun	beqs	lower		|branch if ms mant is all 0's
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun	movel	%d6,-(%a7)
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun	subw	%d7,LOCAL_EX(%a0)	|sub exponent by count
365*4882a593Smuzhiyun	movel	LOCAL_HI(%a0),%d0	|d0 has ms mant
366*4882a593Smuzhiyun	movel	LOCAL_LO(%a0),%d1 |d1 has ls mant
367*4882a593Smuzhiyun
368*4882a593Smuzhiyun	lsll	%d7,%d0		|shift first 1 to j bit position
369*4882a593Smuzhiyun	movel	%d1,%d6		|copy ls mant into d6
370*4882a593Smuzhiyun	lsll	%d7,%d6		|shift ls mant by count
371*4882a593Smuzhiyun	movel	%d6,LOCAL_LO(%a0)	|store ls mant into memory
372*4882a593Smuzhiyun	moveql	#32,%d6
373*4882a593Smuzhiyun	subl	%d7,%d6		|continue shift
374*4882a593Smuzhiyun	lsrl	%d6,%d1		|shift off all bits but those that will
375*4882a593Smuzhiyun|				;be shifted into ms mant
376*4882a593Smuzhiyun	orl	%d1,%d0		|shift the ls mant bits into the ms mant
377*4882a593Smuzhiyun	movel	%d0,LOCAL_HI(%a0)	|store ms mant into memory
378*4882a593Smuzhiyun	moveml	(%a7)+,%d7/%d6	|restore registers
379*4882a593Smuzhiyun	rts
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun|
382*4882a593Smuzhiyun| We get here if ms mant was = 0, and we assume ls mant has bits
383*4882a593Smuzhiyun| set (otherwise this would have been tagged a zero not a denorm).
384*4882a593Smuzhiyun|
385*4882a593Smuzhiyunlower:
386*4882a593Smuzhiyun	movew	LOCAL_EX(%a0),%d0	|d0 has exponent
387*4882a593Smuzhiyun	movel	LOCAL_LO(%a0),%d1	|d1 has ls mant
388*4882a593Smuzhiyun	subw	#32,%d0		|account for ms mant being all zeros
389*4882a593Smuzhiyun	bfffo	%d1{#0:#32},%d7	|find first 1 in ls mant to d7)
390*4882a593Smuzhiyun	subw	%d7,%d0		|subtract shift count from exp
391*4882a593Smuzhiyun	lsll	%d7,%d1		|shift first 1 to integer bit in ms mant
392*4882a593Smuzhiyun	movew	%d0,LOCAL_EX(%a0)	|store ms mant
393*4882a593Smuzhiyun	movel	%d1,LOCAL_HI(%a0)	|store exp
394*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)	|clear ls mant
395*4882a593Smuzhiyun	movel	(%a7)+,%d7
396*4882a593Smuzhiyun	rts
397*4882a593Smuzhiyun|
398*4882a593Smuzhiyun|	denorm --- denormalize an intermediate result
399*4882a593Smuzhiyun|
400*4882a593Smuzhiyun|	Used by underflow.
401*4882a593Smuzhiyun|
402*4882a593Smuzhiyun| Input:
403*4882a593Smuzhiyun|	a0	 points to the operand to be denormalized
404*4882a593Smuzhiyun|		 (in the internal extended format)
405*4882a593Smuzhiyun|
406*4882a593Smuzhiyun|	d0:	 rounding precision
407*4882a593Smuzhiyun| Output:
408*4882a593Smuzhiyun|	a0	 points to the denormalized result
409*4882a593Smuzhiyun|		 (in the internal extended format)
410*4882a593Smuzhiyun|
411*4882a593Smuzhiyun|	d0	is guard,round,sticky
412*4882a593Smuzhiyun|
413*4882a593Smuzhiyun| d0 comes into this routine with the rounding precision. It
414*4882a593Smuzhiyun| is then loaded with the denormalized exponent threshold for the
415*4882a593Smuzhiyun| rounding precision.
416*4882a593Smuzhiyun|
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun	.global	denorm
419*4882a593Smuzhiyundenorm:
420*4882a593Smuzhiyun	btstb	#6,LOCAL_EX(%a0)	|check for exponents between $7fff-$4000
421*4882a593Smuzhiyun	beqs	no_sgn_ext
422*4882a593Smuzhiyun	bsetb	#7,LOCAL_EX(%a0)	|sign extend if it is so
423*4882a593Smuzhiyunno_sgn_ext:
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun	cmpib	#0,%d0		|if 0 then extended precision
426*4882a593Smuzhiyun	bnes	not_ext		|else branch
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun	clrl	%d1		|load d1 with ext threshold
429*4882a593Smuzhiyun	clrl	%d0		|clear the sticky flag
430*4882a593Smuzhiyun	bsr	dnrm_lp		|denormalize the number
431*4882a593Smuzhiyun	tstb	%d1		|check for inex
432*4882a593Smuzhiyun	beq	no_inex		|if clr, no inex
433*4882a593Smuzhiyun	bras	dnrm_inex	|if set, set inex
434*4882a593Smuzhiyun
435*4882a593Smuzhiyunnot_ext:
436*4882a593Smuzhiyun	cmpil	#1,%d0		|if 1 then single precision
437*4882a593Smuzhiyun	beqs	load_sgl	|else must be 2, double prec
438*4882a593Smuzhiyun
439*4882a593Smuzhiyunload_dbl:
440*4882a593Smuzhiyun	movew	#dbl_thresh,%d1	|put copy of threshold in d1
441*4882a593Smuzhiyun	movel	%d1,%d0		|copy d1 into d0
442*4882a593Smuzhiyun	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
443*4882a593Smuzhiyun	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
444*4882a593Smuzhiyun	bpls	chk_stky	|then branch (all bits would be
445*4882a593Smuzhiyun|				; shifted off in denorm routine)
446*4882a593Smuzhiyun	clrl	%d0		|else clear the sticky flag
447*4882a593Smuzhiyun	bsr	dnrm_lp		|denormalize the number
448*4882a593Smuzhiyun	tstb	%d1		|check flag
449*4882a593Smuzhiyun	beqs	no_inex		|if clr, no inex
450*4882a593Smuzhiyun	bras	dnrm_inex	|if set, set inex
451*4882a593Smuzhiyun
452*4882a593Smuzhiyunload_sgl:
453*4882a593Smuzhiyun	movew	#sgl_thresh,%d1	|put copy of threshold in d1
454*4882a593Smuzhiyun	movel	%d1,%d0		|copy d1 into d0
455*4882a593Smuzhiyun	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
456*4882a593Smuzhiyun	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
457*4882a593Smuzhiyun	bpls	chk_stky	|then branch (all bits would be
458*4882a593Smuzhiyun|				; shifted off in denorm routine)
459*4882a593Smuzhiyun	clrl	%d0		|else clear the sticky flag
460*4882a593Smuzhiyun	bsr	dnrm_lp		|denormalize the number
461*4882a593Smuzhiyun	tstb	%d1		|check flag
462*4882a593Smuzhiyun	beqs	no_inex		|if clr, no inex
463*4882a593Smuzhiyun	bras	dnrm_inex	|if set, set inex
464*4882a593Smuzhiyun
465*4882a593Smuzhiyunchk_stky:
466*4882a593Smuzhiyun	tstl	LOCAL_HI(%a0)	|check for any bits set
467*4882a593Smuzhiyun	bnes	set_stky
468*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)	|check for any bits set
469*4882a593Smuzhiyun	bnes	set_stky
470*4882a593Smuzhiyun	bras	clr_mant
471*4882a593Smuzhiyunset_stky:
472*4882a593Smuzhiyun	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
473*4882a593Smuzhiyun	movel	#0x20000000,%d0	|set sticky bit in return value
474*4882a593Smuzhiyunclr_mant:
475*4882a593Smuzhiyun	movew	%d1,LOCAL_EX(%a0)		|load exp with threshold
476*4882a593Smuzhiyun	movel	#0,LOCAL_HI(%a0)	|set d1 = 0 (ms mantissa)
477*4882a593Smuzhiyun	movel	#0,LOCAL_LO(%a0)		|set d2 = 0 (ms mantissa)
478*4882a593Smuzhiyun	rts
479*4882a593Smuzhiyundnrm_inex:
480*4882a593Smuzhiyun	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
481*4882a593Smuzhiyunno_inex:
482*4882a593Smuzhiyun	rts
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun|
485*4882a593Smuzhiyun|	dnrm_lp --- normalize exponent/mantissa to specified threshold
486*4882a593Smuzhiyun|
487*4882a593Smuzhiyun| Input:
488*4882a593Smuzhiyun|	a0		points to the operand to be denormalized
489*4882a593Smuzhiyun|	d0{31:29}	initial guard,round,sticky
490*4882a593Smuzhiyun|	d1{15:0}	denormalization threshold
491*4882a593Smuzhiyun| Output:
492*4882a593Smuzhiyun|	a0		points to the denormalized operand
493*4882a593Smuzhiyun|	d0{31:29}	final guard,round,sticky
494*4882a593Smuzhiyun|	d1.b		inexact flag:  all ones means inexact result
495*4882a593Smuzhiyun|
496*4882a593Smuzhiyun| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
497*4882a593Smuzhiyun| so that bfext can be used to extract the new low part of the mantissa.
498*4882a593Smuzhiyun| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
499*4882a593Smuzhiyun| is no LOCAL_GRS scratch word following it on the fsave frame.
500*4882a593Smuzhiyun|
501*4882a593Smuzhiyun	.global	dnrm_lp
502*4882a593Smuzhiyundnrm_lp:
503*4882a593Smuzhiyun	movel	%d2,-(%sp)		|save d2 for temp use
504*4882a593Smuzhiyun	btstb	#E3,E_BYTE(%a6)		|test for type E3 exception
505*4882a593Smuzhiyun	beqs	not_E3			|not type E3 exception
506*4882a593Smuzhiyun	bfextu	WBTEMP_GRS(%a6){#6:#3},%d2	|extract guard,round, sticky  bit
507*4882a593Smuzhiyun	movel	#29,%d0
508*4882a593Smuzhiyun	lsll	%d0,%d2			|shift g,r,s to their positions
509*4882a593Smuzhiyun	movel	%d2,%d0
510*4882a593Smuzhiyunnot_E3:
511*4882a593Smuzhiyun	movel	(%sp)+,%d2		|restore d2
512*4882a593Smuzhiyun	movel	LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
513*4882a593Smuzhiyun	movel	%d0,FP_SCR2+LOCAL_GRS(%a6)
514*4882a593Smuzhiyun	movel	%d1,%d0			|copy the denorm threshold
515*4882a593Smuzhiyun	subw	LOCAL_EX(%a0),%d1		|d1 = threshold - uns exponent
516*4882a593Smuzhiyun	bles	no_lp			|d1 <= 0
517*4882a593Smuzhiyun	cmpw	#32,%d1
518*4882a593Smuzhiyun	blts	case_1			|0 = d1 < 32
519*4882a593Smuzhiyun	cmpw	#64,%d1
520*4882a593Smuzhiyun	blts	case_2			|32 <= d1 < 64
521*4882a593Smuzhiyun	bra	case_3			|d1 >= 64
522*4882a593Smuzhiyun|
523*4882a593Smuzhiyun| No normalization necessary
524*4882a593Smuzhiyun|
525*4882a593Smuzhiyunno_lp:
526*4882a593Smuzhiyun	clrb	%d1			|set no inex2 reported
527*4882a593Smuzhiyun	movel	FP_SCR2+LOCAL_GRS(%a6),%d0	|restore original g,r,s
528*4882a593Smuzhiyun	rts
529*4882a593Smuzhiyun|
530*4882a593Smuzhiyun| case (0<d1<32)
531*4882a593Smuzhiyun|
532*4882a593Smuzhiyuncase_1:
533*4882a593Smuzhiyun	movel	%d2,-(%sp)
534*4882a593Smuzhiyun	movew	%d0,LOCAL_EX(%a0)		|exponent = denorm threshold
535*4882a593Smuzhiyun	movel	#32,%d0
536*4882a593Smuzhiyun	subw	%d1,%d0			|d0 = 32 - d1
537*4882a593Smuzhiyun	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
538*4882a593Smuzhiyun	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_HI
539*4882a593Smuzhiyun	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new LOCAL_LO
540*4882a593Smuzhiyun	bfextu	FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0	|d0 = new G,R,S
541*4882a593Smuzhiyun	movel	%d2,LOCAL_HI(%a0)		|store new LOCAL_HI
542*4882a593Smuzhiyun	movel	%d1,LOCAL_LO(%a0)		|store new LOCAL_LO
543*4882a593Smuzhiyun	clrb	%d1
544*4882a593Smuzhiyun	bftst	%d0{#2:#30}
545*4882a593Smuzhiyun	beqs	c1nstky
546*4882a593Smuzhiyun	bsetl	#rnd_stky_bit,%d0
547*4882a593Smuzhiyun	st	%d1
548*4882a593Smuzhiyunc1nstky:
549*4882a593Smuzhiyun	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
550*4882a593Smuzhiyun	andil	#0xe0000000,%d2		|clear all but G,R,S
551*4882a593Smuzhiyun	tstl	%d2			|test if original G,R,S are clear
552*4882a593Smuzhiyun	beqs	grs_clear
553*4882a593Smuzhiyun	orl	#0x20000000,%d0		|set sticky bit in d0
554*4882a593Smuzhiyungrs_clear:
555*4882a593Smuzhiyun	andil	#0xe0000000,%d0		|clear all but G,R,S
556*4882a593Smuzhiyun	movel	(%sp)+,%d2
557*4882a593Smuzhiyun	rts
558*4882a593Smuzhiyun|
559*4882a593Smuzhiyun| case (32<=d1<64)
560*4882a593Smuzhiyun|
561*4882a593Smuzhiyuncase_2:
562*4882a593Smuzhiyun	movel	%d2,-(%sp)
563*4882a593Smuzhiyun	movew	%d0,LOCAL_EX(%a0)		|unsigned exponent = threshold
564*4882a593Smuzhiyun	subw	#32,%d1			|d1 now between 0 and 32
565*4882a593Smuzhiyun	movel	#32,%d0
566*4882a593Smuzhiyun	subw	%d1,%d0			|d0 = 32 - d1
567*4882a593Smuzhiyun	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
568*4882a593Smuzhiyun	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_LO
569*4882a593Smuzhiyun	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new G,R,S
570*4882a593Smuzhiyun	bftst	%d1{#2:#30}
571*4882a593Smuzhiyun	bnes	c2_sstky		|bra if sticky bit to be set
572*4882a593Smuzhiyun	bftst	FP_SCR2+LOCAL_LO(%a6){%d0:#32}
573*4882a593Smuzhiyun	bnes	c2_sstky		|bra if sticky bit to be set
574*4882a593Smuzhiyun	movel	%d1,%d0
575*4882a593Smuzhiyun	clrb	%d1
576*4882a593Smuzhiyun	bras	end_c2
577*4882a593Smuzhiyunc2_sstky:
578*4882a593Smuzhiyun	movel	%d1,%d0
579*4882a593Smuzhiyun	bsetl	#rnd_stky_bit,%d0
580*4882a593Smuzhiyun	st	%d1
581*4882a593Smuzhiyunend_c2:
582*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)		|store LOCAL_HI = 0
583*4882a593Smuzhiyun	movel	%d2,LOCAL_LO(%a0)		|store LOCAL_LO
584*4882a593Smuzhiyun	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
585*4882a593Smuzhiyun	andil	#0xe0000000,%d2		|clear all but G,R,S
586*4882a593Smuzhiyun	tstl	%d2			|test if original G,R,S are clear
587*4882a593Smuzhiyun	beqs	clear_grs
588*4882a593Smuzhiyun	orl	#0x20000000,%d0		|set sticky bit in d0
589*4882a593Smuzhiyunclear_grs:
590*4882a593Smuzhiyun	andil	#0xe0000000,%d0		|get rid of all but G,R,S
591*4882a593Smuzhiyun	movel	(%sp)+,%d2
592*4882a593Smuzhiyun	rts
593*4882a593Smuzhiyun|
594*4882a593Smuzhiyun| d1 >= 64 Force the exponent to be the denorm threshold with the
595*4882a593Smuzhiyun| correct sign.
596*4882a593Smuzhiyun|
597*4882a593Smuzhiyuncase_3:
598*4882a593Smuzhiyun	movew	%d0,LOCAL_EX(%a0)
599*4882a593Smuzhiyun	tstw	LOCAL_SGN(%a0)
600*4882a593Smuzhiyun	bges	c3con
601*4882a593Smuzhiyunc3neg:
602*4882a593Smuzhiyun	orl	#0x80000000,LOCAL_EX(%a0)
603*4882a593Smuzhiyunc3con:
604*4882a593Smuzhiyun	cmpw	#64,%d1
605*4882a593Smuzhiyun	beqs	sixty_four
606*4882a593Smuzhiyun	cmpw	#65,%d1
607*4882a593Smuzhiyun	beqs	sixty_five
608*4882a593Smuzhiyun|
609*4882a593Smuzhiyun| Shift value is out of range.  Set d1 for inex2 flag and
610*4882a593Smuzhiyun| return a zero with the given threshold.
611*4882a593Smuzhiyun|
612*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
613*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
614*4882a593Smuzhiyun	movel	#0x20000000,%d0
615*4882a593Smuzhiyun	st	%d1
616*4882a593Smuzhiyun	rts
617*4882a593Smuzhiyun
618*4882a593Smuzhiyunsixty_four:
619*4882a593Smuzhiyun	movel	LOCAL_HI(%a0),%d0
620*4882a593Smuzhiyun	bfextu	%d0{#2:#30},%d1
621*4882a593Smuzhiyun	andil	#0xc0000000,%d0
622*4882a593Smuzhiyun	bras	c3com
623*4882a593Smuzhiyun
624*4882a593Smuzhiyunsixty_five:
625*4882a593Smuzhiyun	movel	LOCAL_HI(%a0),%d0
626*4882a593Smuzhiyun	bfextu	%d0{#1:#31},%d1
627*4882a593Smuzhiyun	andil	#0x80000000,%d0
628*4882a593Smuzhiyun	lsrl	#1,%d0			|shift high bit into R bit
629*4882a593Smuzhiyun
630*4882a593Smuzhiyunc3com:
631*4882a593Smuzhiyun	tstl	%d1
632*4882a593Smuzhiyun	bnes	c3ssticky
633*4882a593Smuzhiyun	tstl	LOCAL_LO(%a0)
634*4882a593Smuzhiyun	bnes	c3ssticky
635*4882a593Smuzhiyun	tstb	FP_SCR2+LOCAL_GRS(%a6)
636*4882a593Smuzhiyun	bnes	c3ssticky
637*4882a593Smuzhiyun	clrb	%d1
638*4882a593Smuzhiyun	bras	c3end
639*4882a593Smuzhiyun
640*4882a593Smuzhiyunc3ssticky:
641*4882a593Smuzhiyun	bsetl	#rnd_stky_bit,%d0
642*4882a593Smuzhiyun	st	%d1
643*4882a593Smuzhiyunc3end:
644*4882a593Smuzhiyun	clrl	LOCAL_HI(%a0)
645*4882a593Smuzhiyun	clrl	LOCAL_LO(%a0)
646*4882a593Smuzhiyun	rts
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun	|end
649