xref: /rk3399_rockchip-uboot/arch/sh/lib/udivsi3_i4i-Os.S (revision 9b5b60a05cb8bba2d135439419b2030764e359bd)
1*5f91a3adSMasahiro Yamada/* Copyright (C) 2006 Free Software Foundation, Inc.
2*5f91a3adSMasahiro Yamada
3*5f91a3adSMasahiro Yamada * SPDX-License-Identifier:	GPL-2.0+
4*5f91a3adSMasahiro Yamada */
5*5f91a3adSMasahiro Yamada
6*5f91a3adSMasahiro Yamada/* Moderately Space-optimized libgcc routines for the Renesas SH /
7*5f91a3adSMasahiro Yamada   STMicroelectronics ST40 CPUs.
8*5f91a3adSMasahiro Yamada   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
9*5f91a3adSMasahiro Yamada
10*5f91a3adSMasahiro Yamada/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
11*5f91a3adSMasahiro Yamada   sh4-200 run times:
12*5f91a3adSMasahiro Yamada   udiv small divisor: 55 cycles
13*5f91a3adSMasahiro Yamada   udiv large divisor: 52 cycles
14*5f91a3adSMasahiro Yamada   sdiv small divisor, positive result: 59 cycles
15*5f91a3adSMasahiro Yamada   sdiv large divisor, positive result: 56 cycles
16*5f91a3adSMasahiro Yamada   sdiv small divisor, negative result: 65 cycles (*)
17*5f91a3adSMasahiro Yamada   sdiv large divisor, negative result: 62 cycles (*)
18*5f91a3adSMasahiro Yamada   (*): r2 is restored in the rts delay slot and has a lingering latency
19*5f91a3adSMasahiro Yamada        of two more cycles.  */
20*5f91a3adSMasahiro Yamada	.balign 4
21*5f91a3adSMasahiro Yamada	.global	__udivsi3_i4i
22*5f91a3adSMasahiro Yamada	.global	__udivsi3_i4
23*5f91a3adSMasahiro Yamada	.set	__udivsi3_i4, __udivsi3_i4i
24*5f91a3adSMasahiro Yamada	.type	__udivsi3_i4i, @function
25*5f91a3adSMasahiro Yamada	.type	__sdivsi3_i4i, @function
26*5f91a3adSMasahiro Yamada__udivsi3_i4i:
27*5f91a3adSMasahiro Yamada	sts pr,r1
28*5f91a3adSMasahiro Yamada	mov.l r4,@-r15
29*5f91a3adSMasahiro Yamada	extu.w r5,r0
30*5f91a3adSMasahiro Yamada	cmp/eq r5,r0
31*5f91a3adSMasahiro Yamada	swap.w r4,r0
32*5f91a3adSMasahiro Yamada	shlr16 r4
33*5f91a3adSMasahiro Yamada	bf/s large_divisor
34*5f91a3adSMasahiro Yamada	div0u
35*5f91a3adSMasahiro Yamada	mov.l r5,@-r15
36*5f91a3adSMasahiro Yamada	shll16 r5
37*5f91a3adSMasahiro Yamadasdiv_small_divisor:
38*5f91a3adSMasahiro Yamada	div1 r5,r4
39*5f91a3adSMasahiro Yamada	bsr div6
40*5f91a3adSMasahiro Yamada	div1 r5,r4
41*5f91a3adSMasahiro Yamada	div1 r5,r4
42*5f91a3adSMasahiro Yamada	bsr div6
43*5f91a3adSMasahiro Yamada	div1 r5,r4
44*5f91a3adSMasahiro Yamada	xtrct r4,r0
45*5f91a3adSMasahiro Yamada	xtrct r0,r4
46*5f91a3adSMasahiro Yamada	bsr div7
47*5f91a3adSMasahiro Yamada	swap.w r4,r4
48*5f91a3adSMasahiro Yamada	div1 r5,r4
49*5f91a3adSMasahiro Yamada	bsr div7
50*5f91a3adSMasahiro Yamada	div1 r5,r4
51*5f91a3adSMasahiro Yamada	xtrct r4,r0
52*5f91a3adSMasahiro Yamada	mov.l @r15+,r5
53*5f91a3adSMasahiro Yamada	swap.w r0,r0
54*5f91a3adSMasahiro Yamada	mov.l @r15+,r4
55*5f91a3adSMasahiro Yamada	jmp @r1
56*5f91a3adSMasahiro Yamada	rotcl r0
57*5f91a3adSMasahiro Yamadadiv7:
58*5f91a3adSMasahiro Yamada	div1 r5,r4
59*5f91a3adSMasahiro Yamadadiv6:
60*5f91a3adSMasahiro Yamada	            div1 r5,r4; div1 r5,r4; div1 r5,r4
61*5f91a3adSMasahiro Yamada	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
62*5f91a3adSMasahiro Yamada
63*5f91a3adSMasahiro Yamadadivx3:
64*5f91a3adSMasahiro Yamada	rotcl r0
65*5f91a3adSMasahiro Yamada	div1 r5,r4
66*5f91a3adSMasahiro Yamada	rotcl r0
67*5f91a3adSMasahiro Yamada	div1 r5,r4
68*5f91a3adSMasahiro Yamada	rotcl r0
69*5f91a3adSMasahiro Yamada	rts
70*5f91a3adSMasahiro Yamada	div1 r5,r4
71*5f91a3adSMasahiro Yamada
72*5f91a3adSMasahiro Yamadalarge_divisor:
73*5f91a3adSMasahiro Yamada	mov.l r5,@-r15
74*5f91a3adSMasahiro Yamadasdiv_large_divisor:
75*5f91a3adSMasahiro Yamada	xor r4,r0
76*5f91a3adSMasahiro Yamada	.rept 4
77*5f91a3adSMasahiro Yamada	rotcl r0
78*5f91a3adSMasahiro Yamada	bsr divx3
79*5f91a3adSMasahiro Yamada	div1 r5,r4
80*5f91a3adSMasahiro Yamada	.endr
81*5f91a3adSMasahiro Yamada	mov.l @r15+,r5
82*5f91a3adSMasahiro Yamada	mov.l @r15+,r4
83*5f91a3adSMasahiro Yamada	jmp @r1
84*5f91a3adSMasahiro Yamada	rotcl r0
85*5f91a3adSMasahiro Yamada
86*5f91a3adSMasahiro Yamada	.global	__sdivsi3_i4i
87*5f91a3adSMasahiro Yamada	.global __sdivsi3_i4
88*5f91a3adSMasahiro Yamada	.global __sdivsi3
89*5f91a3adSMasahiro Yamada	.set	__sdivsi3_i4, __sdivsi3_i4i
90*5f91a3adSMasahiro Yamada	.set	__sdivsi3, __sdivsi3_i4i
91*5f91a3adSMasahiro Yamada__sdivsi3_i4i:
92*5f91a3adSMasahiro Yamada	mov.l r4,@-r15
93*5f91a3adSMasahiro Yamada	cmp/pz r5
94*5f91a3adSMasahiro Yamada	mov.l r5,@-r15
95*5f91a3adSMasahiro Yamada	bt/s pos_divisor
96*5f91a3adSMasahiro Yamada	cmp/pz r4
97*5f91a3adSMasahiro Yamada	neg r5,r5
98*5f91a3adSMasahiro Yamada	extu.w r5,r0
99*5f91a3adSMasahiro Yamada	bt/s neg_result
100*5f91a3adSMasahiro Yamada	cmp/eq r5,r0
101*5f91a3adSMasahiro Yamada	neg r4,r4
102*5f91a3adSMasahiro Yamadapos_result:
103*5f91a3adSMasahiro Yamada	swap.w r4,r0
104*5f91a3adSMasahiro Yamada	bra sdiv_check_divisor
105*5f91a3adSMasahiro Yamada	sts pr,r1
106*5f91a3adSMasahiro Yamadapos_divisor:
107*5f91a3adSMasahiro Yamada	extu.w r5,r0
108*5f91a3adSMasahiro Yamada	bt/s pos_result
109*5f91a3adSMasahiro Yamada	cmp/eq r5,r0
110*5f91a3adSMasahiro Yamada	neg r4,r4
111*5f91a3adSMasahiro Yamadaneg_result:
112*5f91a3adSMasahiro Yamada	mova negate_result,r0
113*5f91a3adSMasahiro Yamada	;
114*5f91a3adSMasahiro Yamada	mov r0,r1
115*5f91a3adSMasahiro Yamada	swap.w r4,r0
116*5f91a3adSMasahiro Yamada	lds r2,macl
117*5f91a3adSMasahiro Yamada	sts pr,r2
118*5f91a3adSMasahiro Yamadasdiv_check_divisor:
119*5f91a3adSMasahiro Yamada	shlr16 r4
120*5f91a3adSMasahiro Yamada	bf/s sdiv_large_divisor
121*5f91a3adSMasahiro Yamada	div0u
122*5f91a3adSMasahiro Yamada	bra sdiv_small_divisor
123*5f91a3adSMasahiro Yamada	shll16 r5
124*5f91a3adSMasahiro Yamada	.balign 4
125*5f91a3adSMasahiro Yamadanegate_result:
126*5f91a3adSMasahiro Yamada	neg r0,r0
127*5f91a3adSMasahiro Yamada	jmp @r2
128*5f91a3adSMasahiro Yamada	sts macl,r2
129