xref: /optee_os/core/arch/arm/kernel/entry_a64.S (revision 77bdbf67c42209142ef43129e01113d29d9c62f6)
1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2015, Linaro Limited
4 */
5
6#include <platform_config.h>
7
8#include <arm64_macros.S>
9#include <arm.h>
10#include <asm.S>
11#include <generated/asm-defines.h>
12#include <keep.h>
13#include <sm/optee_smc.h>
14#include <sm/teesmc_opteed.h>
15#include <sm/teesmc_opteed_macros.h>
16
17	/*
18	 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0.
19	 * SP_EL0 is assigned stack_tmp_export + cpu_id * stack_tmp_stride
20	 * SP_EL1 is assigned thread_core_local[cpu_id]
21	 */
22	.macro set_sp
23		bl	__get_core_pos
24		cmp	x0, #CFG_TEE_CORE_NB_CORE
25		/* Unsupported CPU, park it before it breaks something */
26		bge	unhandled_cpu
27		adr	x1, stack_tmp_stride
28		ldr	w1, [x1]
29		mul	x1, x0, x1
30		adrp	x0, stack_tmp_export
31		add	x0, x0, :lo12:stack_tmp_export
32		ldr	x0, [x0]
33		msr	spsel, #0
34		add	sp, x1, x0
35		bl	thread_get_core_local
36		msr	spsel, #1
37		mov	sp, x0
38		msr	spsel, #0
39	.endm
40
41	.macro set_sctlr_el1
42		mrs	x0, sctlr_el1
43		orr	x0, x0, #SCTLR_I
44		orr	x0, x0, #SCTLR_SA
45		orr	x0, x0, #SCTLR_SPAN
46#if defined(CFG_CORE_RWDATA_NOEXEC)
47		orr	x0, x0, #SCTLR_WXN
48#endif
49#if defined(CFG_SCTLR_ALIGNMENT_CHECK)
50		orr	x0, x0, #SCTLR_A
51#else
52		bic	x0, x0, #SCTLR_A
53#endif
54		msr	sctlr_el1, x0
55	.endm
56
57FUNC _start , :
58	mov	x19, x0		/* Save pagable part address */
59#if defined(CFG_DT_ADDR)
60	ldr     x20, =CFG_DT_ADDR
61#else
62	mov	x20, x2		/* Save DT address */
63#endif
64
65	adr	x0, reset_vect_table
66	msr	vbar_el1, x0
67	isb
68
69	set_sctlr_el1
70	isb
71
72#ifdef CFG_WITH_PAGER
73	/*
74	 * Move init code into correct location and move hashes to a
75	 * temporary safe location until the heap is initialized.
76	 *
77	 * The binary is built as:
78	 * [Pager code, rodata and data] : In correct location
79	 * [Init code and rodata] : Should be copied to __init_start
80	 * [struct boot_embdata + data] : Should be saved before
81	 * initializing pager, first uint32_t tells the length of the data
82	 */
83	adr	x0, __init_start	/* dst */
84	adr	x1, __data_end		/* src */
85	adr	x2, __init_end
86	sub	x2, x2, x0		/* init len */
87	ldr	w4, [x1, x2]		/* length of hashes etc */
88	add	x2, x2, x4		/* length of init and hashes etc */
89	/* Copy backwards (as memmove) in case we're overlapping */
90	add	x0, x0, x2		/* __init_start + len */
91	add	x1, x1, x2		/* __data_end + len */
92	adr	x3, cached_mem_end
93	str	x0, [x3]
94	adr	x2, __init_start
95copy_init:
96	ldp	x3, x4, [x1, #-16]!
97	stp	x3, x4, [x0, #-16]!
98	cmp	x0, x2
99	b.gt	copy_init
100#else
101	/*
102	 * The binary is built as:
103	 * [Core, rodata and data] : In correct location
104	 * [struct boot_embdata + data] : Should be moved to __end, first
105	 * uint32_t tells the length of the struct + data
106	 */
107	adr_l	x0, __end		/* dst */
108	adr_l	x1, __data_end		/* src */
109	ldr	w2, [x1]		/* struct boot_embdata::total_len */
110	/* Copy backwards (as memmove) in case we're overlapping */
111	add	x0, x0, x2
112	add	x1, x1, x2
113	adr	x3, cached_mem_end
114	str	x0, [x3]
115	adr_l	x2, __end
116
117copy_init:
118	ldp	x3, x4, [x1, #-16]!
119	stp	x3, x4, [x0, #-16]!
120	cmp	x0, x2
121	b.gt	copy_init
122#endif
123
124	/*
125	 * Clear .bss, this code obviously depends on the linker keeping
126	 * start/end of .bss at least 8 byte aligned.
127	 */
128	adr_l	x0, __bss_start
129	adr_l	x1, __bss_end
130clear_bss:
131	str	xzr, [x0], #8
132	cmp	x0, x1
133	b.lt	clear_bss
134
135#ifdef CFG_VIRTUALIZATION
136	/*
137	 * Clear .nex_bss, this code obviously depends on the linker keeping
138	 * start/end of .bss at least 8 byte aligned.
139	 */
140	adr	x0, __nex_bss_start
141	adr	x1, __nex_bss_end
142clear_nex_bss:
143	str	xzr, [x0], #8
144	cmp	x0, x1
145	b.lt	clear_nex_bss
146#endif
147
148	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
149	set_sp
150
151	bl	thread_init_thread_core_local
152
153	/* Enable aborts now that we can receive exceptions */
154	msr	daifclr, #DAIFBIT_ABT
155
156	/*
157	 * Invalidate dcache for all memory used during initialization to
158	 * avoid nasty surprices when the cache is turned on. We must not
159	 * invalidate memory not used by OP-TEE since we may invalidate
160	 * entries used by for instance ARM Trusted Firmware.
161	 */
162	adr_l	x0, __text_start
163	ldr	x1, cached_mem_end
164	sub	x1, x1, x0
165	bl	dcache_cleaninv_range
166
167	/* Enable Console */
168	bl	console_init
169
170#ifdef CFG_CORE_ASLR
171	mov	x0, x20
172	bl	get_aslr_seed
173#else
174	mov	x0, #0
175#endif
176
177	adr	x1, boot_mmu_config
178	bl	core_init_mmu_map
179
180#ifdef CFG_CORE_ASLR
181	/*
182	 * Process relocation information again updating for the new
183	 * offset. We're doing this now before MMU is enabled as some of
184	 * the memory will become write protected.
185	 */
186	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
187	/*
188	 * Update cached_mem_end address with load offset since it was
189	 * calculated before relocation.
190	 */
191	adr	x5, cached_mem_end
192	ldr	x6, [x5]
193	add	x6, x6, x0
194	str	x6, [x5]
195	bl	relocate
196#endif
197
198	bl	__get_core_pos
199	bl	enable_mmu
200#ifdef CFG_CORE_ASLR
201	/*
202	 * Reinitialize console, since register_serial_console() has
203	 * previously registered a PA and with ASLR the VA is different
204	 * from the PA.
205	 */
206	bl	console_init
207#endif
208
209#ifdef CFG_VIRTUALIZATION
210	/*
211	 * Initialize partition tables for each partition to
212	 * default_partition which has been relocated now to a different VA
213	 */
214	bl	core_mmu_set_default_prtn_tbl
215#endif
216
217	mov	x0, x19		/* pagable part address */
218	mov	x1, #-1
219	bl	boot_init_primary_early
220#ifndef CFG_VIRTUALIZATION
221	mov	x21, sp
222	adr_l	x0, threads
223	ldr	x0, [x0, #THREAD_CTX_STACK_VA_END]
224	mov	sp, x0
225#endif
226	mov	x0, x20		/* DT address */
227	bl	boot_init_primary_late
228#ifndef CFG_VIRTUALIZATION
229	mov	sp, x21
230#endif
231
232	/*
233	 * In case we've touched memory that secondary CPUs will use before
234	 * they have turned on their D-cache, clean and invalidate the
235	 * D-cache before exiting to normal world.
236	 */
237	adr_l	x0, __text_start
238	ldr	x1, cached_mem_end
239	sub	x1, x1, x0
240	bl	dcache_cleaninv_range
241
242
243	/*
244	 * Clear current thread id now to allow the thread to be reused on
245	 * next entry. Matches the thread_init_boot_thread in
246	 * boot.c.
247	 */
248#ifndef CFG_VIRTUALIZATION
249	bl 	thread_clr_boot_thread
250#endif
251
252#ifdef CFG_CORE_FFA
253	adr	x0, cpu_on_handler
254	/*
255	 * Compensate for the load offset since cpu_on_handler() is
256	 * called with MMU off.
257	 */
258	ldr	x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
259	sub	x0, x0, x1
260	bl	ffa_secondary_cpu_boot_req
261	b	thread_ffa_msg_wait
262#else
263	/*
264	 * Pass the vector address returned from main_init
265	 * Compensate for the load offset since cpu_on_handler() is
266	 * called with MMU off.
267	 */
268	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
269	adr	x1, thread_vector_table
270	sub	x1, x1, x0
271	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE
272	smc	#0
273	b	.	/* SMC should not return */
274#endif
275END_FUNC _start
276DECLARE_KEEP_INIT _start
277
278	.balign	8
279LOCAL_DATA cached_mem_end , :
280	.skip	8
281END_DATA cached_mem_end
282
283#ifdef CFG_CORE_ASLR
284LOCAL_FUNC relocate , :
285	/* x0 holds load offset */
286#ifdef CFG_WITH_PAGER
287	adr_l	x6, __init_end
288#else
289	adr_l	x6, __end
290#endif
291	ldp	w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET]
292
293	mov_imm	x1, TEE_RAM_START
294	add	x2, x2, x6	/* start of relocations */
295	add	x3, x3, x2	/* end of relocations */
296
297	/*
298	 * Relocations are not formatted as Rela64, instead they are in a
299	 * compressed format created by get_reloc_bin() in
300	 * scripts/gen_tee_bin.py
301	 *
302	 * All the R_AARCH64_RELATIVE relocations are translated into a
303	 * list list of 32-bit offsets from TEE_RAM_START. At each address
304	 * a 64-bit value pointed out which increased with the load offset.
305	 */
306
307#ifdef CFG_WITH_PAGER
308	/*
309	 * With pager enabled we can only relocate the pager and init
310	 * parts, the rest has to be done when a page is populated.
311	 */
312	sub	x6, x6, x1
313#endif
314
315	b	2f
316	/* Loop over the relocation addresses and process all entries */
3171:	ldr	w4, [x2], #4
318#ifdef CFG_WITH_PAGER
319	/* Skip too large addresses */
320	cmp	x4, x6
321	b.ge	2f
322#endif
323	add	x4, x4, x1
324	ldr	x5, [x4]
325	add	x5, x5, x0
326	str	x5, [x4]
327
3282:	cmp	x2, x3
329	b.ne	1b
330
331	ret
332END_FUNC relocate
333#endif
334
335/*
336 * void enable_mmu(unsigned long core_pos);
337 *
338 * This function depends on being mapped with in the identity map where
339 * physical address and virtual address is the same. After MMU has been
340 * enabled the instruction pointer will be updated to execute as the new
341 * offset instead. Stack pointers and the return address are updated.
342 */
343LOCAL_FUNC enable_mmu , : , .identity_map
344	adr	x1, boot_mmu_config
345	load_xregs x1, 0, 2, 6
346	/*
347	 * x0 = core_pos
348	 * x2 = tcr_el1
349	 * x3 = mair_el1
350	 * x4 = ttbr0_el1_base
351	 * x5 = ttbr0_core_offset
352	 * x6 = load_offset
353	 */
354	msr	tcr_el1, x2
355	msr	mair_el1, x3
356
357	/*
358	 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos
359	 */
360	madd	x1, x5, x0, x4
361	msr	ttbr0_el1, x1
362	msr	ttbr1_el1, xzr
363	isb
364
365	/* Invalidate TLB */
366	tlbi	vmalle1
367
368	/*
369	 * Make sure translation table writes have drained into memory and
370	 * the TLB invalidation is complete.
371	 */
372	dsb	sy
373	isb
374
375	/* Enable the MMU */
376	mrs	x1, sctlr_el1
377	orr	x1, x1, #SCTLR_M
378	msr	sctlr_el1, x1
379	isb
380
381	/* Update vbar */
382	mrs	x1, vbar_el1
383	add	x1, x1, x6
384	msr	vbar_el1, x1
385	isb
386
387	/* Invalidate instruction cache and branch predictor */
388	ic	iallu
389	isb
390
391	/* Enable I and D cache */
392	mrs	x1, sctlr_el1
393	orr	x1, x1, #SCTLR_I
394	orr	x1, x1, #SCTLR_C
395	msr	sctlr_el1, x1
396	isb
397
398	/* Adjust stack pointers and return address */
399	msr	spsel, #1
400	add	sp, sp, x6
401	msr	spsel, #0
402	add	sp, sp, x6
403	add	x30, x30, x6
404
405	ret
406END_FUNC enable_mmu
407
408	.balign	8
409DATA boot_mmu_config , : /* struct core_mmu_config */
410	.skip	CORE_MMU_CONFIG_SIZE
411END_DATA boot_mmu_config
412
413FUNC cpu_on_handler , :
414	mov	x19, x0
415	mov	x20, x1
416	mov	x21, x30
417
418	adr	x0, reset_vect_table
419	msr	vbar_el1, x0
420	isb
421
422	set_sctlr_el1
423	isb
424
425	/* Enable aborts now that we can receive exceptions */
426	msr	daifclr, #DAIFBIT_ABT
427
428	bl	__get_core_pos
429	bl	enable_mmu
430
431	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
432	set_sp
433
434	mov	x0, x19
435	mov	x1, x20
436#ifdef CFG_CORE_FFA
437	bl	boot_cpu_on_handler
438	b	thread_ffa_msg_wait
439#else
440	mov	x30, x21
441	b	boot_cpu_on_handler
442#endif
443END_FUNC cpu_on_handler
444DECLARE_KEEP_PAGER cpu_on_handler
445
446LOCAL_FUNC unhandled_cpu , :
447	wfi
448	b	unhandled_cpu
449END_FUNC unhandled_cpu
450
451	/*
452	 * This macro verifies that the a given vector doesn't exceed the
453	 * architectural limit of 32 instructions. This is meant to be placed
454	 * immedately after the last instruction in the vector. It takes the
455	 * vector entry as the parameter
456	 */
457	.macro check_vector_size since
458	  .if (. - \since) > (32 * 4)
459	    .error "Vector exceeds 32 instructions"
460	  .endif
461	.endm
462
463	.section .identity_map, "ax", %progbits
464	.align	11
465LOCAL_FUNC reset_vect_table , :, .identity_map
466	/* -----------------------------------------------------
467	 * Current EL with SP0 : 0x0 - 0x180
468	 * -----------------------------------------------------
469	 */
470SynchronousExceptionSP0:
471	b	SynchronousExceptionSP0
472	check_vector_size SynchronousExceptionSP0
473
474	.align	7
475IrqSP0:
476	b	IrqSP0
477	check_vector_size IrqSP0
478
479	.align	7
480FiqSP0:
481	b	FiqSP0
482	check_vector_size FiqSP0
483
484	.align	7
485SErrorSP0:
486	b	SErrorSP0
487	check_vector_size SErrorSP0
488
489	/* -----------------------------------------------------
490	 * Current EL with SPx: 0x200 - 0x380
491	 * -----------------------------------------------------
492	 */
493	.align	7
494SynchronousExceptionSPx:
495	b	SynchronousExceptionSPx
496	check_vector_size SynchronousExceptionSPx
497
498	.align	7
499IrqSPx:
500	b	IrqSPx
501	check_vector_size IrqSPx
502
503	.align	7
504FiqSPx:
505	b	FiqSPx
506	check_vector_size FiqSPx
507
508	.align	7
509SErrorSPx:
510	b	SErrorSPx
511	check_vector_size SErrorSPx
512
513	/* -----------------------------------------------------
514	 * Lower EL using AArch64 : 0x400 - 0x580
515	 * -----------------------------------------------------
516	 */
517	.align	7
518SynchronousExceptionA64:
519	b	SynchronousExceptionA64
520	check_vector_size SynchronousExceptionA64
521
522	.align	7
523IrqA64:
524	b	IrqA64
525	check_vector_size IrqA64
526
527	.align	7
528FiqA64:
529	b	FiqA64
530	check_vector_size FiqA64
531
532	.align	7
533SErrorA64:
534	b   	SErrorA64
535	check_vector_size SErrorA64
536
537	/* -----------------------------------------------------
538	 * Lower EL using AArch32 : 0x0 - 0x180
539	 * -----------------------------------------------------
540	 */
541	.align	7
542SynchronousExceptionA32:
543	b	SynchronousExceptionA32
544	check_vector_size SynchronousExceptionA32
545
546	.align	7
547IrqA32:
548	b	IrqA32
549	check_vector_size IrqA32
550
551	.align	7
552FiqA32:
553	b	FiqA32
554	check_vector_size FiqA32
555
556	.align	7
557SErrorA32:
558	b	SErrorA32
559	check_vector_size SErrorA32
560
561END_FUNC reset_vect_table
562