xref: /OK3568_Linux_fs/u-boot/arch/arm/lib/uldivmod.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/*
2*4882a593Smuzhiyun * Copyright 2010, Google Inc.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Brought in from coreboot uldivmod.S
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * SPDX-License-Identifier:     GPL-2.0
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun#include <linux/linkage.h>
10*4882a593Smuzhiyun#include <asm/assembler.h>
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun/*
13*4882a593Smuzhiyun * A, Q = r0 + (r1 << 32)
14*4882a593Smuzhiyun * B, R = r2 + (r3 << 32)
15*4882a593Smuzhiyun * A / B = Q ... R
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun
18*4882a593SmuzhiyunA_0	.req	r0
19*4882a593SmuzhiyunA_1	.req	r1
20*4882a593SmuzhiyunB_0	.req	r2
21*4882a593SmuzhiyunB_1	.req	r3
22*4882a593SmuzhiyunC_0	.req	r4
23*4882a593SmuzhiyunC_1	.req	r5
24*4882a593SmuzhiyunD_0	.req	r6
25*4882a593SmuzhiyunD_1	.req	r7
26*4882a593Smuzhiyun
27*4882a593SmuzhiyunQ_0	.req	r0
28*4882a593SmuzhiyunQ_1	.req	r1
29*4882a593SmuzhiyunR_0	.req	r2
30*4882a593SmuzhiyunR_1	.req	r3
31*4882a593Smuzhiyun
32*4882a593SmuzhiyunTHUMB(
33*4882a593SmuzhiyunTMP	.req	r8
34*4882a593Smuzhiyun)
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun.pushsection .text.__aeabi_uldivmod, "ax"
37*4882a593SmuzhiyunENTRY(__aeabi_uldivmod)
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun	stmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
40*4882a593Smuzhiyun	@ Test if B == 0
41*4882a593Smuzhiyun	orrs	ip, B_0, B_1		@ Z set -> B == 0
42*4882a593Smuzhiyun	beq	L_div_by_0
43*4882a593Smuzhiyun	@ Test if B is power of 2: (B & (B - 1)) == 0
44*4882a593Smuzhiyun	subs	C_0, B_0, #1
45*4882a593Smuzhiyun	sbc	C_1, B_1, #0
46*4882a593Smuzhiyun	tst	C_0, B_0
47*4882a593Smuzhiyun	tsteq	B_1, C_1
48*4882a593Smuzhiyun	beq	L_pow2
49*4882a593Smuzhiyun	@ Test if A_1 == B_1 == 0
50*4882a593Smuzhiyun	orrs	ip, A_1, B_1
51*4882a593Smuzhiyun	beq	L_div_32_32
52*4882a593Smuzhiyun
53*4882a593SmuzhiyunL_div_64_64:
54*4882a593Smuzhiyun/* CLZ only exists in ARM architecture version 5 and above. */
55*4882a593Smuzhiyun#ifdef HAVE_CLZ
56*4882a593Smuzhiyun	mov	C_0, #1
57*4882a593Smuzhiyun	mov	C_1, #0
58*4882a593Smuzhiyun	@ D_0 = clz A
59*4882a593Smuzhiyun	teq	A_1, #0
60*4882a593Smuzhiyun	clz	D_0, A_1
61*4882a593Smuzhiyun	clzeq	ip, A_0
62*4882a593Smuzhiyun	addeq	D_0, D_0, ip
63*4882a593Smuzhiyun	@ D_1 = clz B
64*4882a593Smuzhiyun	teq	B_1, #0
65*4882a593Smuzhiyun	clz	D_1, B_1
66*4882a593Smuzhiyun	clzeq	ip, B_0
67*4882a593Smuzhiyun	addeq	D_1, D_1, ip
68*4882a593Smuzhiyun	@ if clz B - clz A > 0
69*4882a593Smuzhiyun	subs	D_0, D_1, D_0
70*4882a593Smuzhiyun	bls	L_done_shift
71*4882a593Smuzhiyun	@ B <<= (clz B - clz A)
72*4882a593Smuzhiyun	subs	D_1, D_0, #32
73*4882a593Smuzhiyun	rsb	ip, D_0, #32
74*4882a593Smuzhiyun	movmi	B_1, B_1, lsl D_0
75*4882a593SmuzhiyunARM(	orrmi	B_1, B_1, B_0, lsr ip	)
76*4882a593SmuzhiyunTHUMB(	lsrmi	TMP, B_0, ip		)
77*4882a593SmuzhiyunTHUMB(	orrmi	B_1, B_1, TMP		)
78*4882a593Smuzhiyun	movpl	B_1, B_0, lsl D_1
79*4882a593Smuzhiyun	mov	B_0, B_0, lsl D_0
80*4882a593Smuzhiyun	@ C = 1 << (clz B - clz A)
81*4882a593Smuzhiyun	movmi	C_1, C_1, lsl D_0
82*4882a593SmuzhiyunARM(	orrmi	C_1, C_1, C_0, lsr ip	)
83*4882a593SmuzhiyunTHUMB(	lsrmi	TMP, C_0, ip		)
84*4882a593SmuzhiyunTHUMB(	orrmi	C_1, C_1, TMP		)
85*4882a593Smuzhiyun	movpl	C_1, C_0, lsl D_1
86*4882a593Smuzhiyun	mov	C_0, C_0, lsl D_0
87*4882a593SmuzhiyunL_done_shift:
88*4882a593Smuzhiyun	mov	D_0, #0
89*4882a593Smuzhiyun	mov	D_1, #0
90*4882a593Smuzhiyun	@ C: current bit; D: result
91*4882a593Smuzhiyun#else
92*4882a593Smuzhiyun	@ C: current bit; D: result
93*4882a593Smuzhiyun	mov	C_0, #1
94*4882a593Smuzhiyun	mov	C_1, #0
95*4882a593Smuzhiyun	mov	D_0, #0
96*4882a593Smuzhiyun	mov	D_1, #0
97*4882a593SmuzhiyunL_lsl_4:
98*4882a593Smuzhiyun	cmp	B_1, #0x10000000
99*4882a593Smuzhiyun	cmpcc	B_1, A_1
100*4882a593Smuzhiyun	cmpeq	B_0, A_0
101*4882a593Smuzhiyun	bcs	L_lsl_1
102*4882a593Smuzhiyun	@ B <<= 4
103*4882a593Smuzhiyun	mov	B_1, B_1, lsl #4
104*4882a593Smuzhiyun	orr	B_1, B_1, B_0, lsr #28
105*4882a593Smuzhiyun	mov	B_0, B_0, lsl #4
106*4882a593Smuzhiyun	@ C <<= 4
107*4882a593Smuzhiyun	mov	C_1, C_1, lsl #4
108*4882a593Smuzhiyun	orr	C_1, C_1, C_0, lsr #28
109*4882a593Smuzhiyun	mov	C_0, C_0, lsl #4
110*4882a593Smuzhiyun	b	L_lsl_4
111*4882a593SmuzhiyunL_lsl_1:
112*4882a593Smuzhiyun	cmp	B_1, #0x80000000
113*4882a593Smuzhiyun	cmpcc	B_1, A_1
114*4882a593Smuzhiyun	cmpeq	B_0, A_0
115*4882a593Smuzhiyun	bcs	L_subtract
116*4882a593Smuzhiyun	@ B <<= 1
117*4882a593Smuzhiyun	mov	B_1, B_1, lsl #1
118*4882a593Smuzhiyun	orr	B_1, B_1, B_0, lsr #31
119*4882a593Smuzhiyun	mov	B_0, B_0, lsl #1
120*4882a593Smuzhiyun	@ C <<= 1
121*4882a593Smuzhiyun	mov	C_1, C_1, lsl #1
122*4882a593Smuzhiyun	orr	C_1, C_1, C_0, lsr #31
123*4882a593Smuzhiyun	mov	C_0, C_0, lsl #1
124*4882a593Smuzhiyun	b	L_lsl_1
125*4882a593Smuzhiyun#endif
126*4882a593SmuzhiyunL_subtract:
127*4882a593Smuzhiyun	@ if A >= B
128*4882a593Smuzhiyun	cmp	A_1, B_1
129*4882a593Smuzhiyun	cmpeq	A_0, B_0
130*4882a593Smuzhiyun	bcc	L_update
131*4882a593Smuzhiyun	@ A -= B
132*4882a593Smuzhiyun	subs	A_0, A_0, B_0
133*4882a593Smuzhiyun	sbc	A_1, A_1, B_1
134*4882a593Smuzhiyun	@ D |= C
135*4882a593Smuzhiyun	orr	D_0, D_0, C_0
136*4882a593Smuzhiyun	orr	D_1, D_1, C_1
137*4882a593SmuzhiyunL_update:
138*4882a593Smuzhiyun	@ if A == 0: break
139*4882a593Smuzhiyun	orrs	ip, A_1, A_0
140*4882a593Smuzhiyun	beq	L_exit
141*4882a593Smuzhiyun	@ C >>= 1
142*4882a593Smuzhiyun	movs	C_1, C_1, lsr #1
143*4882a593Smuzhiyun	movs	C_0, C_0, rrx
144*4882a593Smuzhiyun	@ if C == 0: break
145*4882a593Smuzhiyun	orrs	ip, C_1, C_0
146*4882a593Smuzhiyun	beq	L_exit
147*4882a593Smuzhiyun	@ B >>= 1
148*4882a593Smuzhiyun	movs	B_1, B_1, lsr #1
149*4882a593Smuzhiyun	mov	B_0, B_0, rrx
150*4882a593Smuzhiyun	b	L_subtract
151*4882a593SmuzhiyunL_exit:
152*4882a593Smuzhiyun	@ Note: A, B & Q, R are aliases
153*4882a593Smuzhiyun	mov	R_0, A_0
154*4882a593Smuzhiyun	mov	R_1, A_1
155*4882a593Smuzhiyun	mov	Q_0, D_0
156*4882a593Smuzhiyun	mov	Q_1, D_1
157*4882a593Smuzhiyun	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
158*4882a593Smuzhiyun
159*4882a593SmuzhiyunL_div_32_32:
160*4882a593Smuzhiyun	@ Note:	A_0 &	r0 are aliases
161*4882a593Smuzhiyun	@	Q_1	r1
162*4882a593Smuzhiyun	mov	r1, B_0
163*4882a593Smuzhiyun	bl	__aeabi_uidivmod
164*4882a593Smuzhiyun	mov	R_0, r1
165*4882a593Smuzhiyun	mov	R_1, #0
166*4882a593Smuzhiyun	mov	Q_1, #0
167*4882a593Smuzhiyun	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
168*4882a593Smuzhiyun
169*4882a593SmuzhiyunL_pow2:
170*4882a593Smuzhiyun#ifdef HAVE_CLZ
171*4882a593Smuzhiyun	@ Note: A, B and Q, R are aliases
172*4882a593Smuzhiyun	@ R = A & (B - 1)
173*4882a593Smuzhiyun	and	C_0, A_0, C_0
174*4882a593Smuzhiyun	and	C_1, A_1, C_1
175*4882a593Smuzhiyun	@ Q = A >> log2(B)
176*4882a593Smuzhiyun	@ Note: B must not be 0 here!
177*4882a593Smuzhiyun	clz	D_0, B_0
178*4882a593Smuzhiyun	add	D_1, D_0, #1
179*4882a593Smuzhiyun	rsbs	D_0, D_0, #31
180*4882a593Smuzhiyun	bpl	L_1
181*4882a593Smuzhiyun	clz	D_0, B_1
182*4882a593Smuzhiyun	rsb	D_0, D_0, #31
183*4882a593Smuzhiyun	mov	A_0, A_1, lsr D_0
184*4882a593Smuzhiyun	add	D_0, D_0, #32
185*4882a593SmuzhiyunL_1:
186*4882a593Smuzhiyun	movpl	A_0, A_0, lsr D_0
187*4882a593SmuzhiyunARM(	orrpl	A_0, A_0, A_1, lsl D_1	)
188*4882a593SmuzhiyunTHUMB(	lslpl	TMP, A_1, D_1		)
189*4882a593SmuzhiyunTHUMB(	orrpl	A_0, A_0, TMP		)
190*4882a593Smuzhiyun	mov	A_1, A_1, lsr D_0
191*4882a593Smuzhiyun	@ Mov back C to R
192*4882a593Smuzhiyun	mov	R_0, C_0
193*4882a593Smuzhiyun	mov	R_1, C_1
194*4882a593Smuzhiyun	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
195*4882a593Smuzhiyun#else
196*4882a593Smuzhiyun	@ Note: A, B and Q, R are aliases
197*4882a593Smuzhiyun	@ R = A & (B - 1)
198*4882a593Smuzhiyun	and	C_0, A_0, C_0
199*4882a593Smuzhiyun	and	C_1, A_1, C_1
200*4882a593Smuzhiyun	@ Q = A >> log2(B)
201*4882a593Smuzhiyun	@ Note: B must not be 0 here!
202*4882a593Smuzhiyun	@ Count the leading zeroes in B.
203*4882a593Smuzhiyun	mov	D_0, #0
204*4882a593Smuzhiyun	orrs	B_0, B_0, B_0
205*4882a593Smuzhiyun	@ If B is greater than 1 << 31, divide A and B by 1 << 32.
206*4882a593Smuzhiyun	moveq	A_0, A_1
207*4882a593Smuzhiyun	moveq	A_1, #0
208*4882a593Smuzhiyun	moveq	B_0, B_1
209*4882a593Smuzhiyun	@ Count the remaining leading zeroes in B.
210*4882a593Smuzhiyun	movs	B_1, B_0, lsl #16
211*4882a593Smuzhiyun	addeq	D_0, #16
212*4882a593Smuzhiyun	moveq	B_0, B_0, lsr #16
213*4882a593Smuzhiyun	tst	B_0, #0xff
214*4882a593Smuzhiyun	addeq	D_0, #8
215*4882a593Smuzhiyun	moveq	B_0, B_0, lsr #8
216*4882a593Smuzhiyun	tst	B_0, #0xf
217*4882a593Smuzhiyun	addeq	D_0, #4
218*4882a593Smuzhiyun	moveq	B_0, B_0, lsr #4
219*4882a593Smuzhiyun	tst	B_0, #0x3
220*4882a593Smuzhiyun	addeq	D_0, #2
221*4882a593Smuzhiyun	moveq	B_0, B_0, lsr #2
222*4882a593Smuzhiyun	tst	B_0, #0x1
223*4882a593Smuzhiyun	addeq	D_0, #1
224*4882a593Smuzhiyun	@ Shift A to the right by the appropriate amount.
225*4882a593Smuzhiyun	rsb	D_1, D_0, #32
226*4882a593Smuzhiyun	mov	Q_0, A_0, lsr D_0
227*4882a593Smuzhiyun ARM(   orr     Q_0, Q_0, A_1, lsl D_1	)
228*4882a593Smuzhiyun THUMB(	lsl	A_1, D_1		)
229*4882a593Smuzhiyun THUMB(	orr	Q_0, A_1		)
230*4882a593Smuzhiyun	mov	Q_1, A_1, lsr D_0
231*4882a593Smuzhiyun	@ Move C to R
232*4882a593Smuzhiyun	mov	R_0, C_0
233*4882a593Smuzhiyun	mov	R_1, C_1
234*4882a593Smuzhiyun	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
235*4882a593Smuzhiyun#endif
236*4882a593Smuzhiyun
237*4882a593SmuzhiyunL_div_by_0:
238*4882a593Smuzhiyun	bl	__div0
239*4882a593Smuzhiyun	@ As wrong as it could be
240*4882a593Smuzhiyun	mov	Q_0, #0
241*4882a593Smuzhiyun	mov	Q_1, #0
242*4882a593Smuzhiyun	mov	R_0, #0
243*4882a593Smuzhiyun	mov	R_1, #0
244*4882a593Smuzhiyun	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
245*4882a593SmuzhiyunENDPROC(__aeabi_uldivmod)
246*4882a593Smuzhiyun.popsection
247