xref: /OK3568_Linux_fs/kernel/arch/x86/math-emu/wm_shrx.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun	.file	"wm_shrx.S"
3*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
4*4882a593Smuzhiyun |  wm_shrx.S                                                                |
5*4882a593Smuzhiyun |                                                                           |
6*4882a593Smuzhiyun | 64 bit right shift functions                                              |
7*4882a593Smuzhiyun |                                                                           |
8*4882a593Smuzhiyun | Copyright (C) 1992,1995                                                   |
9*4882a593Smuzhiyun |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
10*4882a593Smuzhiyun |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
11*4882a593Smuzhiyun |                                                                           |
12*4882a593Smuzhiyun | Call from C as:                                                           |
13*4882a593Smuzhiyun |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            |
14*4882a593Smuzhiyun | and                                                                       |
15*4882a593Smuzhiyun |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           |
16*4882a593Smuzhiyun |                                                                           |
17*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun#include "fpu_emu.h"
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun.text
22*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
23*4882a593Smuzhiyun |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            |
24*4882a593Smuzhiyun |                                                                           |
25*4882a593Smuzhiyun |   Extended shift right function.                                          |
26*4882a593Smuzhiyun |   Fastest for small shifts.                                               |
27*4882a593Smuzhiyun |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
28*4882a593Smuzhiyun |   right by the number of bits specified by the second arg (arg2).         |
29*4882a593Smuzhiyun |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
30*4882a593Smuzhiyun |                [  64 bit arg ][ eax ]                                     |
31*4882a593Smuzhiyun |            shift right  --------->                                        |
32*4882a593Smuzhiyun |   The eax register is initialized to 0 before the shifting.               |
33*4882a593Smuzhiyun |   Results returned in the 64 bit arg and eax.                             |
34*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
35*4882a593Smuzhiyun
36*4882a593SmuzhiyunSYM_FUNC_START(FPU_shrx)
37*4882a593Smuzhiyun	push	%ebp
38*4882a593Smuzhiyun	movl	%esp,%ebp
39*4882a593Smuzhiyun	pushl	%esi
40*4882a593Smuzhiyun	movl	PARAM2,%ecx
41*4882a593Smuzhiyun	movl	PARAM1,%esi
42*4882a593Smuzhiyun	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
43*4882a593Smuzhiyun	jnc	L_more_than_31
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun/* less than 32 bits */
46*4882a593Smuzhiyun	pushl	%ebx
47*4882a593Smuzhiyun	movl	(%esi),%ebx	/* lsl */
48*4882a593Smuzhiyun	movl	4(%esi),%edx	/* msl */
49*4882a593Smuzhiyun	xorl	%eax,%eax	/* extension */
50*4882a593Smuzhiyun	shrd	%cl,%ebx,%eax
51*4882a593Smuzhiyun	shrd	%cl,%edx,%ebx
52*4882a593Smuzhiyun	shr	%cl,%edx
53*4882a593Smuzhiyun	movl	%ebx,(%esi)
54*4882a593Smuzhiyun	movl	%edx,4(%esi)
55*4882a593Smuzhiyun	popl	%ebx
56*4882a593Smuzhiyun	popl	%esi
57*4882a593Smuzhiyun	leave
58*4882a593Smuzhiyun	RET
59*4882a593Smuzhiyun
60*4882a593SmuzhiyunL_more_than_31:
61*4882a593Smuzhiyun	cmpl	$64,%ecx
62*4882a593Smuzhiyun	jnc	L_more_than_63
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun	subb	$32,%cl
65*4882a593Smuzhiyun	movl	(%esi),%eax	/* lsl */
66*4882a593Smuzhiyun	movl	4(%esi),%edx	/* msl */
67*4882a593Smuzhiyun	shrd	%cl,%edx,%eax
68*4882a593Smuzhiyun	shr	%cl,%edx
69*4882a593Smuzhiyun	movl	%edx,(%esi)
70*4882a593Smuzhiyun	movl	$0,4(%esi)
71*4882a593Smuzhiyun	popl	%esi
72*4882a593Smuzhiyun	leave
73*4882a593Smuzhiyun	RET
74*4882a593Smuzhiyun
75*4882a593SmuzhiyunL_more_than_63:
76*4882a593Smuzhiyun	cmpl	$96,%ecx
77*4882a593Smuzhiyun	jnc	L_more_than_95
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun	subb	$64,%cl
80*4882a593Smuzhiyun	movl	4(%esi),%eax	/* msl */
81*4882a593Smuzhiyun	shr	%cl,%eax
82*4882a593Smuzhiyun	xorl	%edx,%edx
83*4882a593Smuzhiyun	movl	%edx,(%esi)
84*4882a593Smuzhiyun	movl	%edx,4(%esi)
85*4882a593Smuzhiyun	popl	%esi
86*4882a593Smuzhiyun	leave
87*4882a593Smuzhiyun	RET
88*4882a593Smuzhiyun
89*4882a593SmuzhiyunL_more_than_95:
90*4882a593Smuzhiyun	xorl	%eax,%eax
91*4882a593Smuzhiyun	movl	%eax,(%esi)
92*4882a593Smuzhiyun	movl	%eax,4(%esi)
93*4882a593Smuzhiyun	popl	%esi
94*4882a593Smuzhiyun	leave
95*4882a593Smuzhiyun	RET
96*4882a593SmuzhiyunSYM_FUNC_END(FPU_shrx)
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun/*---------------------------------------------------------------------------+
100*4882a593Smuzhiyun |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           |
101*4882a593Smuzhiyun |                                                                           |
102*4882a593Smuzhiyun |   Extended shift right function (optimized for small floating point       |
103*4882a593Smuzhiyun |   integers).                                                              |
104*4882a593Smuzhiyun |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
105*4882a593Smuzhiyun |   right by the number of bits specified by the second arg (arg2).         |
106*4882a593Smuzhiyun |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
107*4882a593Smuzhiyun |                [  64 bit arg ][ eax ]                                     |
108*4882a593Smuzhiyun |            shift right  --------->                                        |
109*4882a593Smuzhiyun |   The eax register is initialized to 0 before the shifting.               |
110*4882a593Smuzhiyun |   The lower 8 bits of eax are lost and replaced by a flag which is        |
111*4882a593Smuzhiyun |   set (to 0x01) if any bit, apart from the first one, is set in the       |
112*4882a593Smuzhiyun |   part which has been shifted out of the arg.                             |
113*4882a593Smuzhiyun |   Results returned in the 64 bit arg and eax.                             |
114*4882a593Smuzhiyun +---------------------------------------------------------------------------*/
115*4882a593SmuzhiyunSYM_FUNC_START(FPU_shrxs)
116*4882a593Smuzhiyun	push	%ebp
117*4882a593Smuzhiyun	movl	%esp,%ebp
118*4882a593Smuzhiyun	pushl	%esi
119*4882a593Smuzhiyun	pushl	%ebx
120*4882a593Smuzhiyun	movl	PARAM2,%ecx
121*4882a593Smuzhiyun	movl	PARAM1,%esi
122*4882a593Smuzhiyun	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
123*4882a593Smuzhiyun	jnc	Ls_more_than_63
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
126*4882a593Smuzhiyun	jc	Ls_less_than_32
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun/* We got here without jumps by assuming that the most common requirement
129*4882a593Smuzhiyun   is for small integers */
130*4882a593Smuzhiyun/* Shift by [32..63] bits */
131*4882a593Smuzhiyun	subb	$32,%cl
132*4882a593Smuzhiyun	movl	(%esi),%eax	/* lsl */
133*4882a593Smuzhiyun	movl	4(%esi),%edx	/* msl */
134*4882a593Smuzhiyun	xorl	%ebx,%ebx
135*4882a593Smuzhiyun	shrd	%cl,%eax,%ebx
136*4882a593Smuzhiyun	shrd	%cl,%edx,%eax
137*4882a593Smuzhiyun	shr	%cl,%edx
138*4882a593Smuzhiyun	orl	%ebx,%ebx		/* test these 32 bits */
139*4882a593Smuzhiyun	setne	%bl
140*4882a593Smuzhiyun	test	$0x7fffffff,%eax	/* and 31 bits here */
141*4882a593Smuzhiyun	setne	%bh
142*4882a593Smuzhiyun	orw	%bx,%bx			/* Any of the 63 bit set ? */
143*4882a593Smuzhiyun	setne	%al
144*4882a593Smuzhiyun	movl	%edx,(%esi)
145*4882a593Smuzhiyun	movl	$0,4(%esi)
146*4882a593Smuzhiyun	popl	%ebx
147*4882a593Smuzhiyun	popl	%esi
148*4882a593Smuzhiyun	leave
149*4882a593Smuzhiyun	RET
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun/* Shift by [0..31] bits */
152*4882a593SmuzhiyunLs_less_than_32:
153*4882a593Smuzhiyun	movl	(%esi),%ebx	/* lsl */
154*4882a593Smuzhiyun	movl	4(%esi),%edx	/* msl */
155*4882a593Smuzhiyun	xorl	%eax,%eax	/* extension */
156*4882a593Smuzhiyun	shrd	%cl,%ebx,%eax
157*4882a593Smuzhiyun	shrd	%cl,%edx,%ebx
158*4882a593Smuzhiyun	shr	%cl,%edx
159*4882a593Smuzhiyun	test	$0x7fffffff,%eax	/* only need to look at eax here */
160*4882a593Smuzhiyun	setne	%al
161*4882a593Smuzhiyun	movl	%ebx,(%esi)
162*4882a593Smuzhiyun	movl	%edx,4(%esi)
163*4882a593Smuzhiyun	popl	%ebx
164*4882a593Smuzhiyun	popl	%esi
165*4882a593Smuzhiyun	leave
166*4882a593Smuzhiyun	RET
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun/* Shift by [64..95] bits */
169*4882a593SmuzhiyunLs_more_than_63:
170*4882a593Smuzhiyun	cmpl	$96,%ecx
171*4882a593Smuzhiyun	jnc	Ls_more_than_95
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun	subb	$64,%cl
174*4882a593Smuzhiyun	movl	(%esi),%ebx	/* lsl */
175*4882a593Smuzhiyun	movl	4(%esi),%eax	/* msl */
176*4882a593Smuzhiyun	xorl	%edx,%edx	/* extension */
177*4882a593Smuzhiyun	shrd	%cl,%ebx,%edx
178*4882a593Smuzhiyun	shrd	%cl,%eax,%ebx
179*4882a593Smuzhiyun	shr	%cl,%eax
180*4882a593Smuzhiyun	orl	%ebx,%edx
181*4882a593Smuzhiyun	setne	%bl
182*4882a593Smuzhiyun	test	$0x7fffffff,%eax	/* only need to look at eax here */
183*4882a593Smuzhiyun	setne	%bh
184*4882a593Smuzhiyun	orw	%bx,%bx
185*4882a593Smuzhiyun	setne	%al
186*4882a593Smuzhiyun	xorl	%edx,%edx
187*4882a593Smuzhiyun	movl	%edx,(%esi)	/* set to zero */
188*4882a593Smuzhiyun	movl	%edx,4(%esi)	/* set to zero */
189*4882a593Smuzhiyun	popl	%ebx
190*4882a593Smuzhiyun	popl	%esi
191*4882a593Smuzhiyun	leave
192*4882a593Smuzhiyun	RET
193*4882a593Smuzhiyun
194*4882a593SmuzhiyunLs_more_than_95:
195*4882a593Smuzhiyun/* Shift by [96..inf) bits */
196*4882a593Smuzhiyun	xorl	%eax,%eax
197*4882a593Smuzhiyun	movl	(%esi),%ebx
198*4882a593Smuzhiyun	orl	4(%esi),%ebx
199*4882a593Smuzhiyun	setne	%al
200*4882a593Smuzhiyun	xorl	%ebx,%ebx
201*4882a593Smuzhiyun	movl	%ebx,(%esi)
202*4882a593Smuzhiyun	movl	%ebx,4(%esi)
203*4882a593Smuzhiyun	popl	%ebx
204*4882a593Smuzhiyun	popl	%esi
205*4882a593Smuzhiyun	leave
206*4882a593Smuzhiyun	RET
207*4882a593SmuzhiyunSYM_FUNC_END(FPU_shrxs)
208