xref: /rk3399_rockchip-uboot/arch/arm/lib/uldivmod.S (revision 11b1a9b2c0fecb7334ccb23f29200da0be0cc156)
1*11b1a9b2SMarek Vasut/*
2*11b1a9b2SMarek Vasut * Copyright 2010, Google Inc.
3*11b1a9b2SMarek Vasut *
4*11b1a9b2SMarek Vasut * Brought in from coreboot uldivmod.S
5*11b1a9b2SMarek Vasut *
6*11b1a9b2SMarek Vasut * SPDX-License-Identifier:     GPL-2.0
7*11b1a9b2SMarek Vasut */
8*11b1a9b2SMarek Vasut
9*11b1a9b2SMarek Vasut#include <linux/linkage.h>
10*11b1a9b2SMarek Vasut#include <asm/assembler.h>
11*11b1a9b2SMarek Vasut
12*11b1a9b2SMarek Vasut/* We don't use Thumb instructions for now */
13*11b1a9b2SMarek Vasut#define ARM(x...)	x
14*11b1a9b2SMarek Vasut#define THUMB(x...)
15*11b1a9b2SMarek Vasut
16*11b1a9b2SMarek Vasut/*
17*11b1a9b2SMarek Vasut * A, Q = r0 + (r1 << 32)
18*11b1a9b2SMarek Vasut * B, R = r2 + (r3 << 32)
19*11b1a9b2SMarek Vasut * A / B = Q ... R
20*11b1a9b2SMarek Vasut */
21*11b1a9b2SMarek Vasut
22*11b1a9b2SMarek VasutA_0	.req	r0
23*11b1a9b2SMarek VasutA_1	.req	r1
24*11b1a9b2SMarek VasutB_0	.req	r2
25*11b1a9b2SMarek VasutB_1	.req	r3
26*11b1a9b2SMarek VasutC_0	.req	r4
27*11b1a9b2SMarek VasutC_1	.req	r5
28*11b1a9b2SMarek VasutD_0	.req	r6
29*11b1a9b2SMarek VasutD_1	.req	r7
30*11b1a9b2SMarek Vasut
31*11b1a9b2SMarek VasutQ_0	.req	r0
32*11b1a9b2SMarek VasutQ_1	.req	r1
33*11b1a9b2SMarek VasutR_0	.req	r2
34*11b1a9b2SMarek VasutR_1	.req	r3
35*11b1a9b2SMarek Vasut
36*11b1a9b2SMarek VasutTHUMB(
37*11b1a9b2SMarek VasutTMP	.req	r8
38*11b1a9b2SMarek Vasut)
39*11b1a9b2SMarek Vasut
40*11b1a9b2SMarek VasutENTRY(__aeabi_uldivmod)
41*11b1a9b2SMarek Vasut	stmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
42*11b1a9b2SMarek Vasut	@ Test if B == 0
43*11b1a9b2SMarek Vasut	orrs	ip, B_0, B_1		@ Z set -> B == 0
44*11b1a9b2SMarek Vasut	beq	L_div_by_0
45*11b1a9b2SMarek Vasut	@ Test if B is power of 2: (B & (B - 1)) == 0
46*11b1a9b2SMarek Vasut	subs	C_0, B_0, #1
47*11b1a9b2SMarek Vasut	sbc	C_1, B_1, #0
48*11b1a9b2SMarek Vasut	tst	C_0, B_0
49*11b1a9b2SMarek Vasut	tsteq	B_1, C_1
50*11b1a9b2SMarek Vasut	beq	L_pow2
51*11b1a9b2SMarek Vasut	@ Test if A_1 == B_1 == 0
52*11b1a9b2SMarek Vasut	orrs	ip, A_1, B_1
53*11b1a9b2SMarek Vasut	beq	L_div_32_32
54*11b1a9b2SMarek Vasut
55*11b1a9b2SMarek VasutL_div_64_64:
56*11b1a9b2SMarek Vasut/* CLZ only exists in ARM architecture version 5 and above. */
57*11b1a9b2SMarek Vasut#ifdef HAVE_CLZ
58*11b1a9b2SMarek Vasut	mov	C_0, #1
59*11b1a9b2SMarek Vasut	mov	C_1, #0
60*11b1a9b2SMarek Vasut	@ D_0 = clz A
61*11b1a9b2SMarek Vasut	teq	A_1, #0
62*11b1a9b2SMarek Vasut	clz	D_0, A_1
63*11b1a9b2SMarek Vasut	clzeq	ip, A_0
64*11b1a9b2SMarek Vasut	addeq	D_0, D_0, ip
65*11b1a9b2SMarek Vasut	@ D_1 = clz B
66*11b1a9b2SMarek Vasut	teq	B_1, #0
67*11b1a9b2SMarek Vasut	clz	D_1, B_1
68*11b1a9b2SMarek Vasut	clzeq	ip, B_0
69*11b1a9b2SMarek Vasut	addeq	D_1, D_1, ip
70*11b1a9b2SMarek Vasut	@ if clz B - clz A > 0
71*11b1a9b2SMarek Vasut	subs	D_0, D_1, D_0
72*11b1a9b2SMarek Vasut	bls	L_done_shift
73*11b1a9b2SMarek Vasut	@ B <<= (clz B - clz A)
74*11b1a9b2SMarek Vasut	subs	D_1, D_0, #32
75*11b1a9b2SMarek Vasut	rsb	ip, D_0, #32
76*11b1a9b2SMarek Vasut	movmi	B_1, B_1, lsl D_0
77*11b1a9b2SMarek VasutARM(	orrmi	B_1, B_1, B_0, lsr ip	)
78*11b1a9b2SMarek VasutTHUMB(	lsrmi	TMP, B_0, ip		)
79*11b1a9b2SMarek VasutTHUMB(	orrmi	B_1, B_1, TMP		)
80*11b1a9b2SMarek Vasut	movpl	B_1, B_0, lsl D_1
81*11b1a9b2SMarek Vasut	mov	B_0, B_0, lsl D_0
82*11b1a9b2SMarek Vasut	@ C = 1 << (clz B - clz A)
83*11b1a9b2SMarek Vasut	movmi	C_1, C_1, lsl D_0
84*11b1a9b2SMarek VasutARM(	orrmi	C_1, C_1, C_0, lsr ip	)
85*11b1a9b2SMarek VasutTHUMB(	lsrmi	TMP, C_0, ip		)
86*11b1a9b2SMarek VasutTHUMB(	orrmi	C_1, C_1, TMP		)
87*11b1a9b2SMarek Vasut	movpl	C_1, C_0, lsl D_1
88*11b1a9b2SMarek Vasut	mov	C_0, C_0, lsl D_0
89*11b1a9b2SMarek VasutL_done_shift:
90*11b1a9b2SMarek Vasut	mov	D_0, #0
91*11b1a9b2SMarek Vasut	mov	D_1, #0
92*11b1a9b2SMarek Vasut	@ C: current bit; D: result
93*11b1a9b2SMarek Vasut#else
94*11b1a9b2SMarek Vasut	@ C: current bit; D: result
95*11b1a9b2SMarek Vasut	mov	C_0, #1
96*11b1a9b2SMarek Vasut	mov	C_1, #0
97*11b1a9b2SMarek Vasut	mov	D_0, #0
98*11b1a9b2SMarek Vasut	mov	D_1, #0
99*11b1a9b2SMarek VasutL_lsl_4:
100*11b1a9b2SMarek Vasut	cmp	B_1, #0x10000000
101*11b1a9b2SMarek Vasut	cmpcc	B_1, A_1
102*11b1a9b2SMarek Vasut	cmpeq	B_0, A_0
103*11b1a9b2SMarek Vasut	bcs	L_lsl_1
104*11b1a9b2SMarek Vasut	@ B <<= 4
105*11b1a9b2SMarek Vasut	mov	B_1, B_1, lsl #4
106*11b1a9b2SMarek Vasut	orr	B_1, B_1, B_0, lsr #28
107*11b1a9b2SMarek Vasut	mov	B_0, B_0, lsl #4
108*11b1a9b2SMarek Vasut	@ C <<= 4
109*11b1a9b2SMarek Vasut	mov	C_1, C_1, lsl #4
110*11b1a9b2SMarek Vasut	orr	C_1, C_1, C_0, lsr #28
111*11b1a9b2SMarek Vasut	mov	C_0, C_0, lsl #4
112*11b1a9b2SMarek Vasut	b	L_lsl_4
113*11b1a9b2SMarek VasutL_lsl_1:
114*11b1a9b2SMarek Vasut	cmp	B_1, #0x80000000
115*11b1a9b2SMarek Vasut	cmpcc	B_1, A_1
116*11b1a9b2SMarek Vasut	cmpeq	B_0, A_0
117*11b1a9b2SMarek Vasut	bcs	L_subtract
118*11b1a9b2SMarek Vasut	@ B <<= 1
119*11b1a9b2SMarek Vasut	mov	B_1, B_1, lsl #1
120*11b1a9b2SMarek Vasut	orr	B_1, B_1, B_0, lsr #31
121*11b1a9b2SMarek Vasut	mov	B_0, B_0, lsl #1
122*11b1a9b2SMarek Vasut	@ C <<= 1
123*11b1a9b2SMarek Vasut	mov	C_1, C_1, lsl #1
124*11b1a9b2SMarek Vasut	orr	C_1, C_1, C_0, lsr #31
125*11b1a9b2SMarek Vasut	mov	C_0, C_0, lsl #1
126*11b1a9b2SMarek Vasut	b	L_lsl_1
127*11b1a9b2SMarek Vasut#endif
128*11b1a9b2SMarek VasutL_subtract:
129*11b1a9b2SMarek Vasut	@ if A >= B
130*11b1a9b2SMarek Vasut	cmp	A_1, B_1
131*11b1a9b2SMarek Vasut	cmpeq	A_0, B_0
132*11b1a9b2SMarek Vasut	bcc	L_update
133*11b1a9b2SMarek Vasut	@ A -= B
134*11b1a9b2SMarek Vasut	subs	A_0, A_0, B_0
135*11b1a9b2SMarek Vasut	sbc	A_1, A_1, B_1
136*11b1a9b2SMarek Vasut	@ D |= C
137*11b1a9b2SMarek Vasut	orr	D_0, D_0, C_0
138*11b1a9b2SMarek Vasut	orr	D_1, D_1, C_1
139*11b1a9b2SMarek VasutL_update:
140*11b1a9b2SMarek Vasut	@ if A == 0: break
141*11b1a9b2SMarek Vasut	orrs	ip, A_1, A_0
142*11b1a9b2SMarek Vasut	beq	L_exit
143*11b1a9b2SMarek Vasut	@ C >>= 1
144*11b1a9b2SMarek Vasut	movs	C_1, C_1, lsr #1
145*11b1a9b2SMarek Vasut	movs	C_0, C_0, rrx
146*11b1a9b2SMarek Vasut	@ if C == 0: break
147*11b1a9b2SMarek Vasut	orrs	ip, C_1, C_0
148*11b1a9b2SMarek Vasut	beq	L_exit
149*11b1a9b2SMarek Vasut	@ B >>= 1
150*11b1a9b2SMarek Vasut	movs	B_1, B_1, lsr #1
151*11b1a9b2SMarek Vasut	mov	B_0, B_0, rrx
152*11b1a9b2SMarek Vasut	b	L_subtract
153*11b1a9b2SMarek VasutL_exit:
154*11b1a9b2SMarek Vasut	@ Note: A, B & Q, R are aliases
155*11b1a9b2SMarek Vasut	mov	R_0, A_0
156*11b1a9b2SMarek Vasut	mov	R_1, A_1
157*11b1a9b2SMarek Vasut	mov	Q_0, D_0
158*11b1a9b2SMarek Vasut	mov	Q_1, D_1
159*11b1a9b2SMarek Vasut	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
160*11b1a9b2SMarek Vasut
161*11b1a9b2SMarek VasutL_div_32_32:
162*11b1a9b2SMarek Vasut	@ Note:	A_0 &	r0 are aliases
163*11b1a9b2SMarek Vasut	@	Q_1	r1
164*11b1a9b2SMarek Vasut	mov	r1, B_0
165*11b1a9b2SMarek Vasut	bl	__aeabi_uidivmod
166*11b1a9b2SMarek Vasut	mov	R_0, r1
167*11b1a9b2SMarek Vasut	mov	R_1, #0
168*11b1a9b2SMarek Vasut	mov	Q_1, #0
169*11b1a9b2SMarek Vasut	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
170*11b1a9b2SMarek Vasut
171*11b1a9b2SMarek VasutL_pow2:
172*11b1a9b2SMarek Vasut#ifdef HAVE_CLZ
173*11b1a9b2SMarek Vasut	@ Note: A, B and Q, R are aliases
174*11b1a9b2SMarek Vasut	@ R = A & (B - 1)
175*11b1a9b2SMarek Vasut	and	C_0, A_0, C_0
176*11b1a9b2SMarek Vasut	and	C_1, A_1, C_1
177*11b1a9b2SMarek Vasut	@ Q = A >> log2(B)
178*11b1a9b2SMarek Vasut	@ Note: B must not be 0 here!
179*11b1a9b2SMarek Vasut	clz	D_0, B_0
180*11b1a9b2SMarek Vasut	add	D_1, D_0, #1
181*11b1a9b2SMarek Vasut	rsbs	D_0, D_0, #31
182*11b1a9b2SMarek Vasut	bpl	L_1
183*11b1a9b2SMarek Vasut	clz	D_0, B_1
184*11b1a9b2SMarek Vasut	rsb	D_0, D_0, #31
185*11b1a9b2SMarek Vasut	mov	A_0, A_1, lsr D_0
186*11b1a9b2SMarek Vasut	add	D_0, D_0, #32
187*11b1a9b2SMarek VasutL_1:
188*11b1a9b2SMarek Vasut	movpl	A_0, A_0, lsr D_0
189*11b1a9b2SMarek VasutARM(	orrpl	A_0, A_0, A_1, lsl D_1	)
190*11b1a9b2SMarek VasutTHUMB(	lslpl	TMP, A_1, D_1		)
191*11b1a9b2SMarek VasutTHUMB(	orrpl	A_0, A_0, TMP		)
192*11b1a9b2SMarek Vasut	mov	A_1, A_1, lsr D_0
193*11b1a9b2SMarek Vasut	@ Mov back C to R
194*11b1a9b2SMarek Vasut	mov	R_0, C_0
195*11b1a9b2SMarek Vasut	mov	R_1, C_1
196*11b1a9b2SMarek Vasut	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
197*11b1a9b2SMarek Vasut#else
198*11b1a9b2SMarek Vasut	@ Note: A, B and Q, R are aliases
199*11b1a9b2SMarek Vasut	@ R = A & (B - 1)
200*11b1a9b2SMarek Vasut	and	C_0, A_0, C_0
201*11b1a9b2SMarek Vasut	and	C_1, A_1, C_1
202*11b1a9b2SMarek Vasut	@ Q = A >> log2(B)
203*11b1a9b2SMarek Vasut	@ Note: B must not be 0 here!
204*11b1a9b2SMarek Vasut	@ Count the leading zeroes in B.
205*11b1a9b2SMarek Vasut	mov	D_0, #0
206*11b1a9b2SMarek Vasut	orrs	B_0, B_0, B_0
207*11b1a9b2SMarek Vasut	@ If B is greater than 1 << 31, divide A and B by 1 << 32.
208*11b1a9b2SMarek Vasut	moveq	A_0, A_1
209*11b1a9b2SMarek Vasut	moveq	A_1, #0
210*11b1a9b2SMarek Vasut	moveq	B_0, B_1
211*11b1a9b2SMarek Vasut	@ Count the remaining leading zeroes in B.
212*11b1a9b2SMarek Vasut	movs	B_1, B_0, lsl #16
213*11b1a9b2SMarek Vasut	addeq	D_0, #16
214*11b1a9b2SMarek Vasut	moveq	B_0, B_0, lsr #16
215*11b1a9b2SMarek Vasut	tst	B_0, #0xff
216*11b1a9b2SMarek Vasut	addeq	D_0, #8
217*11b1a9b2SMarek Vasut	moveq	B_0, B_0, lsr #8
218*11b1a9b2SMarek Vasut	tst	B_0, #0xf
219*11b1a9b2SMarek Vasut	addeq	D_0, #4
220*11b1a9b2SMarek Vasut	moveq	B_0, B_0, lsr #4
221*11b1a9b2SMarek Vasut	tst	B_0, #0x3
222*11b1a9b2SMarek Vasut	addeq	D_0, #2
223*11b1a9b2SMarek Vasut	moveq	B_0, B_0, lsr #2
224*11b1a9b2SMarek Vasut	tst	B_0, #0x1
225*11b1a9b2SMarek Vasut	addeq	D_0, #1
226*11b1a9b2SMarek Vasut	@ Shift A to the right by the appropriate amount.
227*11b1a9b2SMarek Vasut	rsb	D_1, D_0, #32
228*11b1a9b2SMarek Vasut	mov	Q_0, A_0, lsr D_0
229*11b1a9b2SMarek Vasut	orr	Q_0, A_1, lsl D_1
230*11b1a9b2SMarek Vasut	mov	Q_1, A_1, lsr D_0
231*11b1a9b2SMarek Vasut	@ Move C to R
232*11b1a9b2SMarek Vasut	mov	R_0, C_0
233*11b1a9b2SMarek Vasut	mov	R_1, C_1
234*11b1a9b2SMarek Vasut	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
235*11b1a9b2SMarek Vasut#endif
236*11b1a9b2SMarek Vasut
237*11b1a9b2SMarek VasutL_div_by_0:
238*11b1a9b2SMarek Vasut	bl	__div0
239*11b1a9b2SMarek Vasut	@ As wrong as it could be
240*11b1a9b2SMarek Vasut	mov	Q_0, #0
241*11b1a9b2SMarek Vasut	mov	Q_1, #0
242*11b1a9b2SMarek Vasut	mov	R_0, #0
243*11b1a9b2SMarek Vasut	mov	R_1, #0
244*11b1a9b2SMarek Vasut	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
245*11b1a9b2SMarek VasutENDPROC(__aeabi_uldivmod)
246