xref: /OK3568_Linux_fs/kernel/arch/m68k/fpsp040/x_unfl.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun|
2*4882a593Smuzhiyun|	x_unfl.sa 3.4 7/1/91
3*4882a593Smuzhiyun|
4*4882a593Smuzhiyun|	fpsp_unfl --- FPSP handler for underflow exception
5*4882a593Smuzhiyun|
6*4882a593Smuzhiyun| Trap disabled results
7*4882a593Smuzhiyun|	For 881/2 compatibility, sw must denormalize the intermediate
8*4882a593Smuzhiyun| result, then store the result.  Denormalization is accomplished
9*4882a593Smuzhiyun| by taking the intermediate result (which is always normalized) and
10*4882a593Smuzhiyun| shifting the mantissa right while incrementing the exponent until
11*4882a593Smuzhiyun| it is equal to the denormalized exponent for the destination
12*4882a593Smuzhiyun| format.  After denormalization, the result is rounded to the
13*4882a593Smuzhiyun| destination format.
14*4882a593Smuzhiyun|
15*4882a593Smuzhiyun| Trap enabled results
16*4882a593Smuzhiyun|	All trap disabled code applies.	In addition the exceptional
17*4882a593Smuzhiyun| operand needs to made available to the user with a bias of $6000
18*4882a593Smuzhiyun| added to the exponent.
19*4882a593Smuzhiyun|
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun|		Copyright (C) Motorola, Inc. 1990
22*4882a593Smuzhiyun|			All Rights Reserved
23*4882a593Smuzhiyun|
24*4882a593Smuzhiyun|       For details on the license for this file, please see the
25*4882a593Smuzhiyun|       file, README, in this same directory.
26*4882a593Smuzhiyun
27*4882a593SmuzhiyunX_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun	|section	8
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun#include "fpsp.h"
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun	|xref	denorm
34*4882a593Smuzhiyun	|xref	round
35*4882a593Smuzhiyun	|xref	store
36*4882a593Smuzhiyun	|xref	g_rndpr
37*4882a593Smuzhiyun	|xref	g_opcls
38*4882a593Smuzhiyun	|xref	g_dfmtou
39*4882a593Smuzhiyun	|xref	real_unfl
40*4882a593Smuzhiyun	|xref	real_inex
41*4882a593Smuzhiyun	|xref	fpsp_done
42*4882a593Smuzhiyun	|xref	b1238_fix
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun	.global	fpsp_unfl
45*4882a593Smuzhiyunfpsp_unfl:
46*4882a593Smuzhiyun	link		%a6,#-LOCAL_SIZE
47*4882a593Smuzhiyun	fsave		-(%a7)
48*4882a593Smuzhiyun	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
49*4882a593Smuzhiyun	fmovemx	%fp0-%fp3,USER_FP0(%a6)
50*4882a593Smuzhiyun	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun|
53*4882a593Smuzhiyun	bsrl		unf_res	|denormalize, round & store interm op
54*4882a593Smuzhiyun|
55*4882a593Smuzhiyun| If underflow exceptions are not enabled, check for inexact
56*4882a593Smuzhiyun| exception
57*4882a593Smuzhiyun|
58*4882a593Smuzhiyun	btstb		#unfl_bit,FPCR_ENABLE(%a6)
59*4882a593Smuzhiyun	beqs		ck_inex
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun	btstb		#E3,E_BYTE(%a6)
62*4882a593Smuzhiyun	beqs		no_e3_1
63*4882a593Smuzhiyun|
64*4882a593Smuzhiyun| Clear dirty bit on dest resister in the frame before branching
65*4882a593Smuzhiyun| to b1238_fix.
66*4882a593Smuzhiyun|
67*4882a593Smuzhiyun	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
68*4882a593Smuzhiyun	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
69*4882a593Smuzhiyun	bsrl		b1238_fix		|test for bug1238 case
70*4882a593Smuzhiyun	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
71*4882a593Smuzhiyun	orl		#sx_mask,E_BYTE(%a6)
72*4882a593Smuzhiyunno_e3_1:
73*4882a593Smuzhiyun	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
74*4882a593Smuzhiyun	fmovemx	USER_FP0(%a6),%fp0-%fp3
75*4882a593Smuzhiyun	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
76*4882a593Smuzhiyun	frestore	(%a7)+
77*4882a593Smuzhiyun	unlk		%a6
78*4882a593Smuzhiyun	bral		real_unfl
79*4882a593Smuzhiyun|
80*4882a593Smuzhiyun| It is possible to have either inex2 or inex1 exceptions with the
81*4882a593Smuzhiyun| unfl.  If the inex enable bit is set in the FPCR, and either
82*4882a593Smuzhiyun| inex2 or inex1 occurred, we must clean up and branch to the
83*4882a593Smuzhiyun| real inex handler.
84*4882a593Smuzhiyun|
85*4882a593Smuzhiyunck_inex:
86*4882a593Smuzhiyun	moveb		FPCR_ENABLE(%a6),%d0
87*4882a593Smuzhiyun	andb		FPSR_EXCEPT(%a6),%d0
88*4882a593Smuzhiyun	andib		#0x3,%d0
89*4882a593Smuzhiyun	beqs		unfl_done
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun|
92*4882a593Smuzhiyun| Inexact enabled and reported, and we must take an inexact exception
93*4882a593Smuzhiyun|
94*4882a593Smuzhiyuntake_inex:
95*4882a593Smuzhiyun	btstb		#E3,E_BYTE(%a6)
96*4882a593Smuzhiyun	beqs		no_e3_2
97*4882a593Smuzhiyun|
98*4882a593Smuzhiyun| Clear dirty bit on dest resister in the frame before branching
99*4882a593Smuzhiyun| to b1238_fix.
100*4882a593Smuzhiyun|
101*4882a593Smuzhiyun	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
102*4882a593Smuzhiyun	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
103*4882a593Smuzhiyun	bsrl		b1238_fix		|test for bug1238 case
104*4882a593Smuzhiyun	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
105*4882a593Smuzhiyun	orl		#sx_mask,E_BYTE(%a6)
106*4882a593Smuzhiyunno_e3_2:
107*4882a593Smuzhiyun	moveb		#INEX_VEC,EXC_VEC+1(%a6)
108*4882a593Smuzhiyun	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
109*4882a593Smuzhiyun	fmovemx        USER_FP0(%a6),%fp0-%fp3
110*4882a593Smuzhiyun	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
111*4882a593Smuzhiyun	frestore        (%a7)+
112*4882a593Smuzhiyun	unlk            %a6
113*4882a593Smuzhiyun	bral		real_inex
114*4882a593Smuzhiyun
115*4882a593Smuzhiyununfl_done:
116*4882a593Smuzhiyun	bclrb		#E3,E_BYTE(%a6)
117*4882a593Smuzhiyun	beqs		e1_set		|if set then branch
118*4882a593Smuzhiyun|
119*4882a593Smuzhiyun| Clear dirty bit on dest resister in the frame before branching
120*4882a593Smuzhiyun| to b1238_fix.
121*4882a593Smuzhiyun|
122*4882a593Smuzhiyun	bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no
123*4882a593Smuzhiyun	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
124*4882a593Smuzhiyun	bsrl		b1238_fix		|test for bug1238 case
125*4882a593Smuzhiyun	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
126*4882a593Smuzhiyun	orl		#sx_mask,E_BYTE(%a6)
127*4882a593Smuzhiyun	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
128*4882a593Smuzhiyun	fmovemx	USER_FP0(%a6),%fp0-%fp3
129*4882a593Smuzhiyun	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
130*4882a593Smuzhiyun	frestore	(%a7)+
131*4882a593Smuzhiyun	unlk		%a6
132*4882a593Smuzhiyun	bral		fpsp_done
133*4882a593Smuzhiyune1_set:
134*4882a593Smuzhiyun	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
135*4882a593Smuzhiyun	fmovemx	USER_FP0(%a6),%fp0-%fp3
136*4882a593Smuzhiyun	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
137*4882a593Smuzhiyun	unlk		%a6
138*4882a593Smuzhiyun	bral		fpsp_done
139*4882a593Smuzhiyun|
140*4882a593Smuzhiyun|	unf_res --- underflow result calculation
141*4882a593Smuzhiyun|
142*4882a593Smuzhiyununf_res:
143*4882a593Smuzhiyun	bsrl		g_rndpr		|returns RND_PREC in d0 0=ext,
144*4882a593Smuzhiyun|					;1=sgl, 2=dbl
145*4882a593Smuzhiyun|					;we need the RND_PREC in the
146*4882a593Smuzhiyun|					;upper word for round
147*4882a593Smuzhiyun	movew		#0,-(%a7)
148*4882a593Smuzhiyun	movew		%d0,-(%a7)	|copy RND_PREC to stack
149*4882a593Smuzhiyun|
150*4882a593Smuzhiyun|
151*4882a593Smuzhiyun| If the exception bit set is E3, the exceptional operand from the
152*4882a593Smuzhiyun| fpu is in WBTEMP; else it is in FPTEMP.
153*4882a593Smuzhiyun|
154*4882a593Smuzhiyun	btstb		#E3,E_BYTE(%a6)
155*4882a593Smuzhiyun	beqs		unf_E1
156*4882a593Smuzhiyununf_E3:
157*4882a593Smuzhiyun	lea		WBTEMP(%a6),%a0	|a0 now points to operand
158*4882a593Smuzhiyun|
159*4882a593Smuzhiyun| Test for fsgldiv and fsglmul.  If the inst was one of these, then
160*4882a593Smuzhiyun| force the precision to extended for the denorm routine.  Use
161*4882a593Smuzhiyun| the user's precision for the round routine.
162*4882a593Smuzhiyun|
163*4882a593Smuzhiyun	movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul
164*4882a593Smuzhiyun	andiw		#0x7f,%d1
165*4882a593Smuzhiyun	cmpiw		#0x30,%d1		|check for sgldiv
166*4882a593Smuzhiyun	beqs		unf_sgl
167*4882a593Smuzhiyun	cmpiw		#0x33,%d1		|check for sglmul
168*4882a593Smuzhiyun	bnes		unf_cont	|if not, use fpcr prec in round
169*4882a593Smuzhiyununf_sgl:
170*4882a593Smuzhiyun	clrl		%d0
171*4882a593Smuzhiyun	movew		#0x1,(%a7)	|override g_rndpr precision
172*4882a593Smuzhiyun|					;force single
173*4882a593Smuzhiyun	bras		unf_cont
174*4882a593Smuzhiyununf_E1:
175*4882a593Smuzhiyun	lea		FPTEMP(%a6),%a0	|a0 now points to operand
176*4882a593Smuzhiyununf_cont:
177*4882a593Smuzhiyun	bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit
178*4882a593Smuzhiyun	sne		LOCAL_SGN(%a0)		|store sign
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun	bsrl		denorm		|returns denorm, a0 points to it
181*4882a593Smuzhiyun|
182*4882a593Smuzhiyun| WARNING:
183*4882a593Smuzhiyun|				;d0 has guard,round sticky bit
184*4882a593Smuzhiyun|				;make sure that it is not corrupted
185*4882a593Smuzhiyun|				;before it reaches the round subroutine
186*4882a593Smuzhiyun|				;also ensure that a0 isn't corrupted
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun|
189*4882a593Smuzhiyun| Set up d1 for round subroutine d1 contains the PREC/MODE
190*4882a593Smuzhiyun| information respectively on upper/lower register halves.
191*4882a593Smuzhiyun|
192*4882a593Smuzhiyun	bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR
193*4882a593Smuzhiyun|						;mode in lower d1
194*4882a593Smuzhiyun	addl		(%a7)+,%d1		|merge PREC/MODE
195*4882a593Smuzhiyun|
196*4882a593Smuzhiyun| WARNING: a0 and d0 are assumed to be intact between the denorm and
197*4882a593Smuzhiyun| round subroutines. All code between these two subroutines
198*4882a593Smuzhiyun| must not corrupt a0 and d0.
199*4882a593Smuzhiyun|
200*4882a593Smuzhiyun|
201*4882a593Smuzhiyun| Perform Round
202*4882a593Smuzhiyun|	Input:		a0 points to input operand
203*4882a593Smuzhiyun|			d0{31:29} has guard, round, sticky
204*4882a593Smuzhiyun|			d1{01:00} has rounding mode
205*4882a593Smuzhiyun|			d1{17:16} has rounding precision
206*4882a593Smuzhiyun|	Output:		a0 points to rounded operand
207*4882a593Smuzhiyun|
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun	bsrl		round		|returns rounded denorm at (a0)
210*4882a593Smuzhiyun|
211*4882a593Smuzhiyun| Differentiate between store to memory vs. store to register
212*4882a593Smuzhiyun|
213*4882a593Smuzhiyununf_store:
214*4882a593Smuzhiyun	bsrl		g_opcls		|returns opclass in d0{2:0}
215*4882a593Smuzhiyun	cmpib		#0x3,%d0
216*4882a593Smuzhiyun	bnes		not_opc011
217*4882a593Smuzhiyun|
218*4882a593Smuzhiyun| At this point, a store to memory is pending
219*4882a593Smuzhiyun|
220*4882a593Smuzhiyunopc011:
221*4882a593Smuzhiyun	bsrl		g_dfmtou
222*4882a593Smuzhiyun	tstb		%d0
223*4882a593Smuzhiyun	beqs		ext_opc011	|If extended, do not subtract
224*4882a593Smuzhiyun|				;If destination format is sgl/dbl,
225*4882a593Smuzhiyun	tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't
226*4882a593Smuzhiyun|					;subtract
227*4882a593Smuzhiyun	bmis		ext_opc011
228*4882a593Smuzhiyun	subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs.
229*4882a593Smuzhiyun|				;normalized bias
230*4882a593Smuzhiyun|				;          normalized   denormalized
231*4882a593Smuzhiyun|				;single       $7f           $7e
232*4882a593Smuzhiyun|				;double       $3ff          $3fe
233*4882a593Smuzhiyun|
234*4882a593Smuzhiyunext_opc011:
235*4882a593Smuzhiyun	bsrl		store		|stores to memory
236*4882a593Smuzhiyun	bras		unf_done	|finish up
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun|
239*4882a593Smuzhiyun| At this point, a store to a float register is pending
240*4882a593Smuzhiyun|
241*4882a593Smuzhiyunnot_opc011:
242*4882a593Smuzhiyun	bsrl		store	|stores to float register
243*4882a593Smuzhiyun|				;a0 is not corrupted on a store to a
244*4882a593Smuzhiyun|				;float register.
245*4882a593Smuzhiyun|
246*4882a593Smuzhiyun| Set the condition codes according to result
247*4882a593Smuzhiyun|
248*4882a593Smuzhiyun	tstl		LOCAL_HI(%a0)	|check upper mantissa
249*4882a593Smuzhiyun	bnes		ck_sgn
250*4882a593Smuzhiyun	tstl		LOCAL_LO(%a0)	|check lower mantissa
251*4882a593Smuzhiyun	bnes		ck_sgn
252*4882a593Smuzhiyun	bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero
253*4882a593Smuzhiyunck_sgn:
254*4882a593Smuzhiyun	btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit
255*4882a593Smuzhiyun	beqs		unf_done
256*4882a593Smuzhiyun	bsetb		#neg_bit,FPSR_CC(%a6)
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun|
259*4882a593Smuzhiyun| Finish.
260*4882a593Smuzhiyun|
261*4882a593Smuzhiyununf_done:
262*4882a593Smuzhiyun	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
263*4882a593Smuzhiyun	beqs		no_aunfl
264*4882a593Smuzhiyun	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
265*4882a593Smuzhiyunno_aunfl:
266*4882a593Smuzhiyun	rts
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun	|end
269