xref: /OK3568_Linux_fs/u-boot/arch/arm/lib/div64.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/*
2*4882a593Smuzhiyun *  linux/arch/arm/lib/div64.S
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun *  Optimized computation of 64-bit dividend / 32-bit divisor
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun *  Author:	Nicolas Pitre
7*4882a593Smuzhiyun *  Created:	Oct 5, 2003
8*4882a593Smuzhiyun *  Copyright:	Monta Vista Software, Inc.
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun *  SPDX-License-Identifier:	GPL-2.0
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun#include <linux/linkage.h>
14*4882a593Smuzhiyun#include <asm/assembler.h>
15*4882a593Smuzhiyun#ifdef __UBOOT__
16*4882a593Smuzhiyun#define UNWIND(x...)
17*4882a593Smuzhiyun#endif
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun#ifdef __ARMEB__
20*4882a593Smuzhiyun#define xh r0
21*4882a593Smuzhiyun#define xl r1
22*4882a593Smuzhiyun#define yh r2
23*4882a593Smuzhiyun#define yl r3
24*4882a593Smuzhiyun#else
25*4882a593Smuzhiyun#define xl r0
26*4882a593Smuzhiyun#define xh r1
27*4882a593Smuzhiyun#define yl r2
28*4882a593Smuzhiyun#define yh r3
29*4882a593Smuzhiyun#endif
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun/*
32*4882a593Smuzhiyun * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
33*4882a593Smuzhiyun *
34*4882a593Smuzhiyun * Note: Calling convention is totally non standard for optimal code.
35*4882a593Smuzhiyun *       This is meant to be used by do_div() from include/asm/div64.h only.
36*4882a593Smuzhiyun *
37*4882a593Smuzhiyun * Input parameters:
38*4882a593Smuzhiyun * 	xh-xl	= dividend (clobbered)
39*4882a593Smuzhiyun * 	r4	= divisor (preserved)
40*4882a593Smuzhiyun *
41*4882a593Smuzhiyun * Output values:
42*4882a593Smuzhiyun * 	yh-yl	= result
43*4882a593Smuzhiyun * 	xh	= remainder
44*4882a593Smuzhiyun *
45*4882a593Smuzhiyun * Clobbered regs: xl, ip
46*4882a593Smuzhiyun */
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun.pushsection .text.__do_div64, "ax"
49*4882a593SmuzhiyunENTRY(__do_div64)
50*4882a593SmuzhiyunUNWIND(.fnstart)
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun	@ Test for easy paths first.
53*4882a593Smuzhiyun	subs	ip, r4, #1
54*4882a593Smuzhiyun	bls	9f			@ divisor is 0 or 1
55*4882a593Smuzhiyun	tst	ip, r4
56*4882a593Smuzhiyun	beq	8f			@ divisor is power of 2
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun	@ See if we need to handle upper 32-bit result.
59*4882a593Smuzhiyun	cmp	xh, r4
60*4882a593Smuzhiyun	mov	yh, #0
61*4882a593Smuzhiyun	blo	3f
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun	@ Align divisor with upper part of dividend.
64*4882a593Smuzhiyun	@ The aligned divisor is stored in yl preserving the original.
65*4882a593Smuzhiyun	@ The bit position is stored in ip.
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun	clz	yl, r4
70*4882a593Smuzhiyun	clz	ip, xh
71*4882a593Smuzhiyun	sub	yl, yl, ip
72*4882a593Smuzhiyun	mov	ip, #1
73*4882a593Smuzhiyun	mov	ip, ip, lsl yl
74*4882a593Smuzhiyun	mov	yl, r4, lsl yl
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun#else
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun	mov	yl, r4
79*4882a593Smuzhiyun	mov	ip, #1
80*4882a593Smuzhiyun1:	cmp	yl, #0x80000000
81*4882a593Smuzhiyun	cmpcc	yl, xh
82*4882a593Smuzhiyun	movcc	yl, yl, lsl #1
83*4882a593Smuzhiyun	movcc	ip, ip, lsl #1
84*4882a593Smuzhiyun	bcc	1b
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun#endif
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun	@ The division loop for needed upper bit positions.
89*4882a593Smuzhiyun 	@ Break out early if dividend reaches 0.
90*4882a593Smuzhiyun2:	cmp	xh, yl
91*4882a593Smuzhiyun	orrcs	yh, yh, ip
92*4882a593Smuzhiyun	subscs	xh, xh, yl
93*4882a593Smuzhiyun	movsne	ip, ip, lsr #1
94*4882a593Smuzhiyun	mov	yl, yl, lsr #1
95*4882a593Smuzhiyun	bne	2b
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun	@ See if we need to handle lower 32-bit result.
98*4882a593Smuzhiyun3:	cmp	xh, #0
99*4882a593Smuzhiyun	mov	yl, #0
100*4882a593Smuzhiyun	cmpeq	xl, r4
101*4882a593Smuzhiyun	movlo	xh, xl
102*4882a593Smuzhiyun	retlo	lr
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun	@ The division loop for lower bit positions.
105*4882a593Smuzhiyun	@ Here we shift remainer bits leftwards rather than moving the
106*4882a593Smuzhiyun	@ divisor for comparisons, considering the carry-out bit as well.
107*4882a593Smuzhiyun	mov	ip, #0x80000000
108*4882a593Smuzhiyun4:	movs	xl, xl, lsl #1
109*4882a593Smuzhiyun	adcs	xh, xh, xh
110*4882a593Smuzhiyun	beq	6f
111*4882a593Smuzhiyun	cmpcc	xh, r4
112*4882a593Smuzhiyun5:	orrcs	yl, yl, ip
113*4882a593Smuzhiyun	subcs	xh, xh, r4
114*4882a593Smuzhiyun	movs	ip, ip, lsr #1
115*4882a593Smuzhiyun	bne	4b
116*4882a593Smuzhiyun	ret	lr
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun	@ The top part of remainder became zero.  If carry is set
119*4882a593Smuzhiyun	@ (the 33th bit) this is a false positive so resume the loop.
120*4882a593Smuzhiyun	@ Otherwise, if lower part is also null then we are done.
121*4882a593Smuzhiyun6:	bcs	5b
122*4882a593Smuzhiyun	cmp	xl, #0
123*4882a593Smuzhiyun	reteq	lr
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun	@ We still have remainer bits in the low part.  Bring them up.
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun	clz	xh, xl			@ we know xh is zero here so...
130*4882a593Smuzhiyun	add	xh, xh, #1
131*4882a593Smuzhiyun	mov	xl, xl, lsl xh
132*4882a593Smuzhiyun	mov	ip, ip, lsr xh
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun#else
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun7:	movs	xl, xl, lsl #1
137*4882a593Smuzhiyun	mov	ip, ip, lsr #1
138*4882a593Smuzhiyun	bcc	7b
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun#endif
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun	@ Current remainder is now 1.  It is worthless to compare with
143*4882a593Smuzhiyun	@ divisor at this point since divisor can not be smaller than 3 here.
144*4882a593Smuzhiyun	@ If possible, branch for another shift in the division loop.
145*4882a593Smuzhiyun	@ If no bit position left then we are done.
146*4882a593Smuzhiyun	movs	ip, ip, lsr #1
147*4882a593Smuzhiyun	mov	xh, #1
148*4882a593Smuzhiyun	bne	4b
149*4882a593Smuzhiyun	ret	lr
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun8:	@ Division by a power of 2: determine what that divisor order is
152*4882a593Smuzhiyun	@ then simply shift values around
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun#if __LINUX_ARM_ARCH__ >= 5
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun	clz	ip, r4
157*4882a593Smuzhiyun	rsb	ip, ip, #31
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun#else
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun	mov	yl, r4
162*4882a593Smuzhiyun	cmp	r4, #(1 << 16)
163*4882a593Smuzhiyun	mov	ip, #0
164*4882a593Smuzhiyun	movhs	yl, yl, lsr #16
165*4882a593Smuzhiyun	movhs	ip, #16
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun	cmp	yl, #(1 << 8)
168*4882a593Smuzhiyun	movhs	yl, yl, lsr #8
169*4882a593Smuzhiyun	addhs	ip, ip, #8
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun	cmp	yl, #(1 << 4)
172*4882a593Smuzhiyun	movhs	yl, yl, lsr #4
173*4882a593Smuzhiyun	addhs	ip, ip, #4
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun	cmp	yl, #(1 << 2)
176*4882a593Smuzhiyun	addhi	ip, ip, #3
177*4882a593Smuzhiyun	addls	ip, ip, yl, lsr #1
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun#endif
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun	mov	yh, xh, lsr ip
182*4882a593Smuzhiyun	mov	yl, xl, lsr ip
183*4882a593Smuzhiyun	rsb	ip, ip, #32
184*4882a593Smuzhiyun ARM(	orr	yl, yl, xh, lsl ip	)
185*4882a593Smuzhiyun THUMB(	lsl	xh, xh, ip		)
186*4882a593Smuzhiyun THUMB(	orr	yl, yl, xh		)
187*4882a593Smuzhiyun	mov	xh, xl, lsl ip
188*4882a593Smuzhiyun	mov	xh, xh, lsr ip
189*4882a593Smuzhiyun	ret	lr
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun	@ eq -> division by 1: obvious enough...
192*4882a593Smuzhiyun9:	moveq	yl, xl
193*4882a593Smuzhiyun	moveq	yh, xh
194*4882a593Smuzhiyun	moveq	xh, #0
195*4882a593Smuzhiyun	reteq	lr
196*4882a593SmuzhiyunUNWIND(.fnend)
197*4882a593Smuzhiyun
198*4882a593SmuzhiyunUNWIND(.fnstart)
199*4882a593SmuzhiyunUNWIND(.pad #4)
200*4882a593SmuzhiyunUNWIND(.save {lr})
201*4882a593SmuzhiyunLdiv0_64:
202*4882a593Smuzhiyun	@ Division by 0:
203*4882a593Smuzhiyun	str	lr, [sp, #-8]!
204*4882a593Smuzhiyun	bl	__div0
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun	@ as wrong as it could be...
207*4882a593Smuzhiyun	mov	yl, #0
208*4882a593Smuzhiyun	mov	yh, #0
209*4882a593Smuzhiyun	mov	xh, #0
210*4882a593Smuzhiyun	ldr	pc, [sp], #8
211*4882a593Smuzhiyun
212*4882a593SmuzhiyunUNWIND(.fnend)
213*4882a593SmuzhiyunENDPROC(__do_div64)
214*4882a593Smuzhiyun.popsection
215