xref: /OK3568_Linux_fs/u-boot/arch/sh/lib/udivsi3_i4i-Os.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* Copyright (C) 2006 Free Software Foundation, Inc.
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun * SPDX-License-Identifier:	GPL-2.0+
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun/* Moderately Space-optimized libgcc routines for the Renesas SH /
7*4882a593Smuzhiyun   STMicroelectronics ST40 CPUs.
8*4882a593Smuzhiyun   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
11*4882a593Smuzhiyun   sh4-200 run times:
12*4882a593Smuzhiyun   udiv small divisor: 55 cycles
13*4882a593Smuzhiyun   udiv large divisor: 52 cycles
14*4882a593Smuzhiyun   sdiv small divisor, positive result: 59 cycles
15*4882a593Smuzhiyun   sdiv large divisor, positive result: 56 cycles
16*4882a593Smuzhiyun   sdiv small divisor, negative result: 65 cycles (*)
17*4882a593Smuzhiyun   sdiv large divisor, negative result: 62 cycles (*)
18*4882a593Smuzhiyun   (*): r2 is restored in the rts delay slot and has a lingering latency
19*4882a593Smuzhiyun        of two more cycles.  */
20*4882a593Smuzhiyun	.balign 4
21*4882a593Smuzhiyun	.global	__udivsi3_i4i
22*4882a593Smuzhiyun	.global	__udivsi3_i4
23*4882a593Smuzhiyun	.set	__udivsi3_i4, __udivsi3_i4i
24*4882a593Smuzhiyun	.type	__udivsi3_i4i, @function
25*4882a593Smuzhiyun	.type	__sdivsi3_i4i, @function
26*4882a593Smuzhiyun__udivsi3_i4i:
27*4882a593Smuzhiyun	sts pr,r1
28*4882a593Smuzhiyun	mov.l r4,@-r15
29*4882a593Smuzhiyun	extu.w r5,r0
30*4882a593Smuzhiyun	cmp/eq r5,r0
31*4882a593Smuzhiyun	swap.w r4,r0
32*4882a593Smuzhiyun	shlr16 r4
33*4882a593Smuzhiyun	bf/s large_divisor
34*4882a593Smuzhiyun	div0u
35*4882a593Smuzhiyun	mov.l r5,@-r15
36*4882a593Smuzhiyun	shll16 r5
37*4882a593Smuzhiyunsdiv_small_divisor:
38*4882a593Smuzhiyun	div1 r5,r4
39*4882a593Smuzhiyun	bsr div6
40*4882a593Smuzhiyun	div1 r5,r4
41*4882a593Smuzhiyun	div1 r5,r4
42*4882a593Smuzhiyun	bsr div6
43*4882a593Smuzhiyun	div1 r5,r4
44*4882a593Smuzhiyun	xtrct r4,r0
45*4882a593Smuzhiyun	xtrct r0,r4
46*4882a593Smuzhiyun	bsr div7
47*4882a593Smuzhiyun	swap.w r4,r4
48*4882a593Smuzhiyun	div1 r5,r4
49*4882a593Smuzhiyun	bsr div7
50*4882a593Smuzhiyun	div1 r5,r4
51*4882a593Smuzhiyun	xtrct r4,r0
52*4882a593Smuzhiyun	mov.l @r15+,r5
53*4882a593Smuzhiyun	swap.w r0,r0
54*4882a593Smuzhiyun	mov.l @r15+,r4
55*4882a593Smuzhiyun	jmp @r1
56*4882a593Smuzhiyun	rotcl r0
57*4882a593Smuzhiyundiv7:
58*4882a593Smuzhiyun	div1 r5,r4
59*4882a593Smuzhiyundiv6:
60*4882a593Smuzhiyun	            div1 r5,r4; div1 r5,r4; div1 r5,r4
61*4882a593Smuzhiyun	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
62*4882a593Smuzhiyun
63*4882a593Smuzhiyundivx3:
64*4882a593Smuzhiyun	rotcl r0
65*4882a593Smuzhiyun	div1 r5,r4
66*4882a593Smuzhiyun	rotcl r0
67*4882a593Smuzhiyun	div1 r5,r4
68*4882a593Smuzhiyun	rotcl r0
69*4882a593Smuzhiyun	rts
70*4882a593Smuzhiyun	div1 r5,r4
71*4882a593Smuzhiyun
72*4882a593Smuzhiyunlarge_divisor:
73*4882a593Smuzhiyun	mov.l r5,@-r15
74*4882a593Smuzhiyunsdiv_large_divisor:
75*4882a593Smuzhiyun	xor r4,r0
76*4882a593Smuzhiyun	.rept 4
77*4882a593Smuzhiyun	rotcl r0
78*4882a593Smuzhiyun	bsr divx3
79*4882a593Smuzhiyun	div1 r5,r4
80*4882a593Smuzhiyun	.endr
81*4882a593Smuzhiyun	mov.l @r15+,r5
82*4882a593Smuzhiyun	mov.l @r15+,r4
83*4882a593Smuzhiyun	jmp @r1
84*4882a593Smuzhiyun	rotcl r0
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun	.global	__sdivsi3_i4i
87*4882a593Smuzhiyun	.global __sdivsi3_i4
88*4882a593Smuzhiyun	.global __sdivsi3
89*4882a593Smuzhiyun	.set	__sdivsi3_i4, __sdivsi3_i4i
90*4882a593Smuzhiyun	.set	__sdivsi3, __sdivsi3_i4i
91*4882a593Smuzhiyun__sdivsi3_i4i:
92*4882a593Smuzhiyun	mov.l r4,@-r15
93*4882a593Smuzhiyun	cmp/pz r5
94*4882a593Smuzhiyun	mov.l r5,@-r15
95*4882a593Smuzhiyun	bt/s pos_divisor
96*4882a593Smuzhiyun	cmp/pz r4
97*4882a593Smuzhiyun	neg r5,r5
98*4882a593Smuzhiyun	extu.w r5,r0
99*4882a593Smuzhiyun	bt/s neg_result
100*4882a593Smuzhiyun	cmp/eq r5,r0
101*4882a593Smuzhiyun	neg r4,r4
102*4882a593Smuzhiyunpos_result:
103*4882a593Smuzhiyun	swap.w r4,r0
104*4882a593Smuzhiyun	bra sdiv_check_divisor
105*4882a593Smuzhiyun	sts pr,r1
106*4882a593Smuzhiyunpos_divisor:
107*4882a593Smuzhiyun	extu.w r5,r0
108*4882a593Smuzhiyun	bt/s pos_result
109*4882a593Smuzhiyun	cmp/eq r5,r0
110*4882a593Smuzhiyun	neg r4,r4
111*4882a593Smuzhiyunneg_result:
112*4882a593Smuzhiyun	mova negate_result,r0
113*4882a593Smuzhiyun	;
114*4882a593Smuzhiyun	mov r0,r1
115*4882a593Smuzhiyun	swap.w r4,r0
116*4882a593Smuzhiyun	lds r2,macl
117*4882a593Smuzhiyun	sts pr,r2
118*4882a593Smuzhiyunsdiv_check_divisor:
119*4882a593Smuzhiyun	shlr16 r4
120*4882a593Smuzhiyun	bf/s sdiv_large_divisor
121*4882a593Smuzhiyun	div0u
122*4882a593Smuzhiyun	bra sdiv_small_divisor
123*4882a593Smuzhiyun	shll16 r5
124*4882a593Smuzhiyun	.balign 4
125*4882a593Smuzhiyunnegate_result:
126*4882a593Smuzhiyun	neg r0,r0
127*4882a593Smuzhiyun	jmp @r2
128*4882a593Smuzhiyun	sts macl,r2
129