xref: /rk3399_rockchip-uboot/arch/nios2/cpu/start.S (revision 4192b8c35851afd2ac0731b851711fc12ffe888d)
1/*
2 * (C) Copyright 2004, Psyent Corporation <www.psyent.com>
3 * Scott McNutt <smcnutt@psyent.com>
4 *
5 * SPDX-License-Identifier:	GPL-2.0+
6 */
7
8#include <asm-offsets.h>
9#include <config.h>
10#include <version.h>
11
12/*************************************************************************
13 * RESTART
14 ************************************************************************/
15
16	.text
17	.global _start
18
19_start:
20	wrctl	status, r0		/* Disable interrupts */
21	/* ICACHE INIT -- only the icache line at the reset address
22	 * is invalidated at reset. So the init must stay within
23	 * the cache line size (8 words). If GERMS is used, we'll
24	 * just be invalidating the cache a second time. If cache
25	 * is not implemented initi behaves as nop.
26	 */
27	ori	r4, r0, %lo(CONFIG_SYS_ICACHELINE_SIZE)
28	movhi	r5, %hi(CONFIG_SYS_ICACHE_SIZE)
29	ori	r5, r5, %lo(CONFIG_SYS_ICACHE_SIZE)
300:	initi	r5
31	sub	r5, r5, r4
32	bgt	r5, r0, 0b
33	br	_except_end	/* Skip the tramp */
34
35	/* EXCEPTION TRAMPOLINE -- the following gets copied
36	 * to the exception address (below), but is otherwise at the
37	 * default exception vector offset (0x0020).
38	 */
39_except_start:
40	movhi	et, %hi(_exception)
41	ori	et, et, %lo(_exception)
42	jmp	et
43_except_end:
44
45	/* INTERRUPTS -- for now, all interrupts masked and globally
46	 * disabled.
47	 */
48	wrctl	ienable, r0		/* All disabled	*/
49
50	/* DCACHE INIT -- if dcache not implemented, initd behaves as
51	 * nop.
52	 */
53	movhi	r4, %hi(CONFIG_SYS_DCACHELINE_SIZE)
54	ori	r4, r4, %lo(CONFIG_SYS_DCACHELINE_SIZE)
55	movhi	r5, %hi(CONFIG_SYS_DCACHE_SIZE)
56	ori	r5, r5, %lo(CONFIG_SYS_DCACHE_SIZE)
57	mov	r6, r0
581:	initd	0(r6)
59	add	r6, r6, r4
60	bltu	r6, r5, 1b
61
62	/* RELOCATE CODE, DATA & COMMAND TABLE -- the following code
63	 * assumes code, data and the command table are all
64	 * contiguous. This lets us relocate everything as a single
65	 * block. Make sure the linker script matches this ;-)
66	 */
67	nextpc	r4
68_cur:	movhi	r5, %hi(_cur - _start)
69	ori	r5, r5, %lo(_cur - _start)
70	sub	r4, r4, r5		/* r4 <- cur _start */
71	mov	r8, r4
72	movhi	r5, %hi(_start)
73	ori	r5, r5, %lo(_start)	/* r5 <- linked _start */
74	beq	r4, r5, 3f
75
76	movhi	r6, %hi(_edata)
77	ori	r6, r6, %lo(_edata)
782:	ldwio	r7, 0(r4)
79	addi	r4, r4, 4
80	stwio	r7, 0(r5)
81	addi	r5, r5, 4
82	bne	r5, r6, 2b
833:
84
85	/* JUMP TO RELOC ADDR */
86	movhi	r4, %hi(_reloc)
87	ori	r4, r4, %lo(_reloc)
88	jmp	r4
89_reloc:
90
91	/* COPY EXCEPTION TRAMPOLINE -- copy the tramp to the
92	 * exception address. Define CONFIG_ROM_STUBS to prevent
93	 * the copy (e.g. exception in flash or in other
94	 * softare/firmware component).
95	 */
96#if !defined(CONFIG_ROM_STUBS)
97	movhi	r4, %hi(_except_start)
98	ori	r4, r4, %lo(_except_start)
99	movhi	r5, %hi(_except_end)
100	ori	r5, r5, %lo(_except_end)
101	movhi	r6, %hi(CONFIG_SYS_EXCEPTION_ADDR)
102	ori	r6, r6, %lo(CONFIG_SYS_EXCEPTION_ADDR)
103	beq	r4, r6, 7f	/* Skip if at proper addr */
104
1056:	ldwio	r7, 0(r4)
106	stwio	r7, 0(r6)
107	addi	r4, r4, 4
108	addi	r6, r6, 4
109	bne	r4, r5, 6b
1107:
111#endif
112
113	/* STACK INIT -- zero top two words for call back chain.
114	 */
115	movhi	sp, %hi(CONFIG_SYS_INIT_SP)
116	ori	sp, sp, %lo(CONFIG_SYS_INIT_SP)
117	addi	sp, sp, -8
118	stw	r0, 0(sp)
119	stw	r0, 4(sp)
120	mov	fp, sp
121
122	/*
123	 * Call board_init_f -- never returns
124	 */
125	mov	r4, r0
126	movhi	r2, %hi(board_init_f@h)
127	ori	r2, r2, %lo(board_init_f@h)
128	callr	r2
129
130	/* NEVER RETURNS -- but branch to the _start just
131	 * in case ;-)
132	 */
133	br	_start
134
135
136
137/*
138 * relocate_code -- Nios2 handles the relocation above. But
139 * the generic board code monkeys with the heap, stack, etc.
140 * (it makes some assumptions that may not be appropriate
141 * for Nios). Nevertheless, we capitulate here.
142 *
143 * We'll call the board_init_r from here since this isn't
144 * supposed to return.
145 *
146 * void relocate_code (ulong sp, gd_t *global_data,
147 *			ulong reloc_addr)
148 *			__attribute__ ((noreturn));
149 */
150	.text
151	.global relocate_code
152
153relocate_code:
154	mov	sp, r4		/* Set the new sp */
155	mov	r4, r5
156
157	/*
158	 * ZERO BSS/SBSS -- bss and sbss are assumed to be adjacent
159	 * and between __bss_start and __bss_end.
160	 */
161	movhi	r5, %hi(__bss_start)
162	ori	r5, r5, %lo(__bss_start)
163	movhi	r6, %hi(__bss_end)
164	ori	r6, r6, %lo(__bss_end)
165	beq	r5, r6, 5f
166
1674:	stwio	r0, 0(r5)
168	addi	r5, r5, 4
169	bne	r5, r6, 4b
1705:
171
172	movhi	r8, %hi(board_init_r@h)
173	ori	r8, r8, %lo(board_init_r@h)
174	callr	r8
175	ret
176
177/*
178 * dly_clks -- Nios2 (like Nios1) doesn't have a timebase in
179 * the core. For simple delay loops, we do our best by counting
180 * instruction cycles.
181 *
182 * Instruction performance varies based on the core. For cores
183 * with icache and static/dynamic branch prediction (II/f, II/s):
184 *
185 *	Normal ALU (e.g. add, cmp, etc):	1 cycle
186 *	Branch (correctly predicted, taken):	2 cycles
187 *	Negative offset is predicted (II/s).
188 *
189 * For cores without icache and no branch prediction (II/e):
190 *
191 *	Normal ALU (e.g. add, cmp, etc):	6 cycles
192 *	Branch (no prediction):			6 cycles
193 *
194 * For simplicity, if an instruction cache is implemented we
195 * assume II/f or II/s. Otherwise, we use the II/e.
196 *
197 */
198	.globl dly_clks
199
200dly_clks:
201
202#if (CONFIG_SYS_ICACHE_SIZE > 0)
203	subi	r4, r4, 3		/* 3 clocks/loop	*/
204#else
205	subi	r4, r4, 12		/* 12 clocks/loop	*/
206#endif
207	bge	r4, r0, dly_clks
208	ret
209
210	.data
211	.globl	version_string
212
213version_string:
214	.ascii U_BOOT_VERSION_STRING, "\0"
215