xref: /optee_os/core/arch/arm/kernel/entry_a64.S (revision 165bd63b255192ea5aea3a329016c8490d63100c)
1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2015, Linaro Limited
4 * Copyright (c) 2021, Arm Limited
5 */
6
7#include <platform_config.h>
8
9#include <arm64_macros.S>
10#include <arm.h>
11#include <asm.S>
12#include <generated/asm-defines.h>
13#include <keep.h>
14#include <sm/optee_smc.h>
15#include <sm/teesmc_opteed.h>
16#include <sm/teesmc_opteed_macros.h>
17
18	/*
19	 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0.
20	 * SP_EL0 is assigned stack_tmp_export + cpu_id * stack_tmp_stride
21	 * SP_EL1 is assigned thread_core_local[cpu_id]
22	 */
23	.macro set_sp
24		bl	__get_core_pos
25		cmp	x0, #CFG_TEE_CORE_NB_CORE
26		/* Unsupported CPU, park it before it breaks something */
27		bge	unhandled_cpu
28		adr	x1, stack_tmp_stride
29		ldr	w1, [x1]
30		mul	x1, x0, x1
31		adrp	x0, stack_tmp_export
32		add	x0, x0, :lo12:stack_tmp_export
33		ldr	x0, [x0]
34		msr	spsel, #0
35		add	sp, x1, x0
36		bl	thread_get_core_local
37		msr	spsel, #1
38		mov	sp, x0
39		msr	spsel, #0
40	.endm
41
42	.macro set_sctlr_el1
43		mrs	x0, sctlr_el1
44		orr	x0, x0, #SCTLR_I
45		orr	x0, x0, #SCTLR_SA
46		orr	x0, x0, #SCTLR_SPAN
47#if defined(CFG_CORE_RWDATA_NOEXEC)
48		orr	x0, x0, #SCTLR_WXN
49#endif
50#if defined(CFG_SCTLR_ALIGNMENT_CHECK)
51		orr	x0, x0, #SCTLR_A
52#else
53		bic	x0, x0, #SCTLR_A
54#endif
55		msr	sctlr_el1, x0
56	.endm
57
58FUNC _start , :
59	mov	x19, x0		/* Save pagable part address */
60#if defined(CFG_DT_ADDR)
61	ldr     x20, =CFG_DT_ADDR
62#else
63	mov	x20, x2		/* Save DT address */
64#endif
65
66	adr	x0, reset_vect_table
67	msr	vbar_el1, x0
68	isb
69
70	set_sctlr_el1
71	isb
72
73#ifdef CFG_WITH_PAGER
74	/*
75	 * Move init code into correct location and move hashes to a
76	 * temporary safe location until the heap is initialized.
77	 *
78	 * The binary is built as:
79	 * [Pager code, rodata and data] : In correct location
80	 * [Init code and rodata] : Should be copied to __init_start
81	 * [struct boot_embdata + data] : Should be saved before
82	 * initializing pager, first uint32_t tells the length of the data
83	 */
84	adr	x0, __init_start	/* dst */
85	adr	x1, __data_end		/* src */
86	adr	x2, __init_end
87	sub	x2, x2, x0		/* init len */
88	ldr	w4, [x1, x2]		/* length of hashes etc */
89	add	x2, x2, x4		/* length of init and hashes etc */
90	/* Copy backwards (as memmove) in case we're overlapping */
91	add	x0, x0, x2		/* __init_start + len */
92	add	x1, x1, x2		/* __data_end + len */
93	adr	x3, cached_mem_end
94	str	x0, [x3]
95	adr	x2, __init_start
96copy_init:
97	ldp	x3, x4, [x1, #-16]!
98	stp	x3, x4, [x0, #-16]!
99	cmp	x0, x2
100	b.gt	copy_init
101#else
102	/*
103	 * The binary is built as:
104	 * [Core, rodata and data] : In correct location
105	 * [struct boot_embdata + data] : Should be moved to __end, first
106	 * uint32_t tells the length of the struct + data
107	 */
108	adr_l	x0, __end		/* dst */
109	adr_l	x1, __data_end		/* src */
110	ldr	w2, [x1]		/* struct boot_embdata::total_len */
111	/* Copy backwards (as memmove) in case we're overlapping */
112	add	x0, x0, x2
113	add	x1, x1, x2
114	adr	x3, cached_mem_end
115	str	x0, [x3]
116	adr_l	x2, __end
117
118copy_init:
119	ldp	x3, x4, [x1, #-16]!
120	stp	x3, x4, [x0, #-16]!
121	cmp	x0, x2
122	b.gt	copy_init
123#endif
124
125	/*
126	 * Clear .bss, this code obviously depends on the linker keeping
127	 * start/end of .bss at least 8 byte aligned.
128	 */
129	adr_l	x0, __bss_start
130	adr_l	x1, __bss_end
131clear_bss:
132	str	xzr, [x0], #8
133	cmp	x0, x1
134	b.lt	clear_bss
135
136#ifdef CFG_VIRTUALIZATION
137	/*
138	 * Clear .nex_bss, this code obviously depends on the linker keeping
139	 * start/end of .bss at least 8 byte aligned.
140	 */
141	adr	x0, __nex_bss_start
142	adr	x1, __nex_bss_end
143clear_nex_bss:
144	str	xzr, [x0], #8
145	cmp	x0, x1
146	b.lt	clear_nex_bss
147#endif
148
149	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
150	set_sp
151
152	bl	thread_init_thread_core_local
153
154	/* Enable aborts now that we can receive exceptions */
155	msr	daifclr, #DAIFBIT_ABT
156
157	/*
158	 * Invalidate dcache for all memory used during initialization to
159	 * avoid nasty surprices when the cache is turned on. We must not
160	 * invalidate memory not used by OP-TEE since we may invalidate
161	 * entries used by for instance ARM Trusted Firmware.
162	 */
163	adr_l	x0, __text_start
164	ldr	x1, cached_mem_end
165	sub	x1, x1, x0
166	bl	dcache_cleaninv_range
167
168	/* Enable Console */
169	bl	console_init
170
171#ifdef CFG_CORE_ASLR
172	mov	x0, x20
173	bl	get_aslr_seed
174#else
175	mov	x0, #0
176#endif
177
178	adr	x1, boot_mmu_config
179	bl	core_init_mmu_map
180
181#ifdef CFG_CORE_ASLR
182	/*
183	 * Process relocation information again updating for the new
184	 * offset. We're doing this now before MMU is enabled as some of
185	 * the memory will become write protected.
186	 */
187	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
188	/*
189	 * Update cached_mem_end address with load offset since it was
190	 * calculated before relocation.
191	 */
192	adr	x5, cached_mem_end
193	ldr	x6, [x5]
194	add	x6, x6, x0
195	str	x6, [x5]
196	bl	relocate
197#endif
198
199	bl	__get_core_pos
200	bl	enable_mmu
201#ifdef CFG_CORE_ASLR
202	/*
203	 * Reinitialize console, since register_serial_console() has
204	 * previously registered a PA and with ASLR the VA is different
205	 * from the PA.
206	 */
207	bl	console_init
208#endif
209
210#ifdef CFG_VIRTUALIZATION
211	/*
212	 * Initialize partition tables for each partition to
213	 * default_partition which has been relocated now to a different VA
214	 */
215	bl	core_mmu_set_default_prtn_tbl
216#endif
217
218	mov	x0, x19		/* pagable part address */
219	mov	x1, #-1
220	bl	boot_init_primary_early
221#ifndef CFG_VIRTUALIZATION
222	mov	x21, sp
223	adr_l	x0, threads
224	ldr	x0, [x0, #THREAD_CTX_STACK_VA_END]
225	mov	sp, x0
226#endif
227	mov	x0, x20		/* DT address */
228	bl	boot_init_primary_late
229#ifndef CFG_VIRTUALIZATION
230	mov	sp, x21
231#endif
232
233	/*
234	 * In case we've touched memory that secondary CPUs will use before
235	 * they have turned on their D-cache, clean and invalidate the
236	 * D-cache before exiting to normal world.
237	 */
238	adr_l	x0, __text_start
239	ldr	x1, cached_mem_end
240	sub	x1, x1, x0
241	bl	dcache_cleaninv_range
242
243
244	/*
245	 * Clear current thread id now to allow the thread to be reused on
246	 * next entry. Matches the thread_init_boot_thread in
247	 * boot.c.
248	 */
249#ifndef CFG_VIRTUALIZATION
250	bl 	thread_clr_boot_thread
251#endif
252
253#ifdef CFG_CORE_FFA
254	adr	x0, cpu_on_handler
255	/*
256	 * Compensate for the load offset since cpu_on_handler() is
257	 * called with MMU off.
258	 */
259	ldr	x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
260	sub	x0, x0, x1
261	bl	ffa_secondary_cpu_ep_register
262	b	thread_ffa_msg_wait
263#else
264	/*
265	 * Pass the vector address returned from main_init
266	 * Compensate for the load offset since cpu_on_handler() is
267	 * called with MMU off.
268	 */
269	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
270	adr	x1, thread_vector_table
271	sub	x1, x1, x0
272	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE
273	smc	#0
274	b	.	/* SMC should not return */
275#endif
276END_FUNC _start
277DECLARE_KEEP_INIT _start
278
279	.balign	8
280LOCAL_DATA cached_mem_end , :
281	.skip	8
282END_DATA cached_mem_end
283
284#ifdef CFG_CORE_ASLR
285LOCAL_FUNC relocate , :
286	/* x0 holds load offset */
287#ifdef CFG_WITH_PAGER
288	adr_l	x6, __init_end
289#else
290	adr_l	x6, __end
291#endif
292	ldp	w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET]
293
294	mov_imm	x1, TEE_RAM_START
295	add	x2, x2, x6	/* start of relocations */
296	add	x3, x3, x2	/* end of relocations */
297
298	/*
299	 * Relocations are not formatted as Rela64, instead they are in a
300	 * compressed format created by get_reloc_bin() in
301	 * scripts/gen_tee_bin.py
302	 *
303	 * All the R_AARCH64_RELATIVE relocations are translated into a
304	 * list list of 32-bit offsets from TEE_RAM_START. At each address
305	 * a 64-bit value pointed out which increased with the load offset.
306	 */
307
308#ifdef CFG_WITH_PAGER
309	/*
310	 * With pager enabled we can only relocate the pager and init
311	 * parts, the rest has to be done when a page is populated.
312	 */
313	sub	x6, x6, x1
314#endif
315
316	b	2f
317	/* Loop over the relocation addresses and process all entries */
3181:	ldr	w4, [x2], #4
319#ifdef CFG_WITH_PAGER
320	/* Skip too large addresses */
321	cmp	x4, x6
322	b.ge	2f
323#endif
324	add	x4, x4, x1
325	ldr	x5, [x4]
326	add	x5, x5, x0
327	str	x5, [x4]
328
3292:	cmp	x2, x3
330	b.ne	1b
331
332	ret
333END_FUNC relocate
334#endif
335
336/*
337 * void enable_mmu(unsigned long core_pos);
338 *
339 * This function depends on being mapped with in the identity map where
340 * physical address and virtual address is the same. After MMU has been
341 * enabled the instruction pointer will be updated to execute as the new
342 * offset instead. Stack pointers and the return address are updated.
343 */
344LOCAL_FUNC enable_mmu , : , .identity_map
345	adr	x1, boot_mmu_config
346	load_xregs x1, 0, 2, 6
347	/*
348	 * x0 = core_pos
349	 * x2 = tcr_el1
350	 * x3 = mair_el1
351	 * x4 = ttbr0_el1_base
352	 * x5 = ttbr0_core_offset
353	 * x6 = load_offset
354	 */
355	msr	tcr_el1, x2
356	msr	mair_el1, x3
357
358	/*
359	 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos
360	 */
361	madd	x1, x5, x0, x4
362	msr	ttbr0_el1, x1
363	msr	ttbr1_el1, xzr
364	isb
365
366	/* Invalidate TLB */
367	tlbi	vmalle1
368
369	/*
370	 * Make sure translation table writes have drained into memory and
371	 * the TLB invalidation is complete.
372	 */
373	dsb	sy
374	isb
375
376	/* Enable the MMU */
377	mrs	x1, sctlr_el1
378	orr	x1, x1, #SCTLR_M
379	msr	sctlr_el1, x1
380	isb
381
382	/* Update vbar */
383	mrs	x1, vbar_el1
384	add	x1, x1, x6
385	msr	vbar_el1, x1
386	isb
387
388	/* Invalidate instruction cache and branch predictor */
389	ic	iallu
390	isb
391
392	/* Enable I and D cache */
393	mrs	x1, sctlr_el1
394	orr	x1, x1, #SCTLR_I
395	orr	x1, x1, #SCTLR_C
396	msr	sctlr_el1, x1
397	isb
398
399	/* Adjust stack pointers and return address */
400	msr	spsel, #1
401	add	sp, sp, x6
402	msr	spsel, #0
403	add	sp, sp, x6
404	add	x30, x30, x6
405
406	ret
407END_FUNC enable_mmu
408
409	.balign	8
410DATA boot_mmu_config , : /* struct core_mmu_config */
411	.skip	CORE_MMU_CONFIG_SIZE
412END_DATA boot_mmu_config
413
414FUNC cpu_on_handler , :
415	mov	x19, x0
416	mov	x20, x1
417	mov	x21, x30
418
419	adr	x0, reset_vect_table
420	msr	vbar_el1, x0
421	isb
422
423	set_sctlr_el1
424	isb
425
426	/* Enable aborts now that we can receive exceptions */
427	msr	daifclr, #DAIFBIT_ABT
428
429	bl	__get_core_pos
430	bl	enable_mmu
431
432	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
433	set_sp
434
435	mov	x0, x19
436	mov	x1, x20
437#ifdef CFG_CORE_FFA
438	bl	boot_cpu_on_handler
439	b	thread_ffa_msg_wait
440#else
441	mov	x30, x21
442	b	boot_cpu_on_handler
443#endif
444END_FUNC cpu_on_handler
445DECLARE_KEEP_PAGER cpu_on_handler
446
447LOCAL_FUNC unhandled_cpu , :
448	wfi
449	b	unhandled_cpu
450END_FUNC unhandled_cpu
451
452	/*
453	 * This macro verifies that the a given vector doesn't exceed the
454	 * architectural limit of 32 instructions. This is meant to be placed
455	 * immedately after the last instruction in the vector. It takes the
456	 * vector entry as the parameter
457	 */
458	.macro check_vector_size since
459	  .if (. - \since) > (32 * 4)
460	    .error "Vector exceeds 32 instructions"
461	  .endif
462	.endm
463
464	.section .identity_map, "ax", %progbits
465	.align	11
466LOCAL_FUNC reset_vect_table , :, .identity_map
467	/* -----------------------------------------------------
468	 * Current EL with SP0 : 0x0 - 0x180
469	 * -----------------------------------------------------
470	 */
471SynchronousExceptionSP0:
472	b	SynchronousExceptionSP0
473	check_vector_size SynchronousExceptionSP0
474
475	.align	7
476IrqSP0:
477	b	IrqSP0
478	check_vector_size IrqSP0
479
480	.align	7
481FiqSP0:
482	b	FiqSP0
483	check_vector_size FiqSP0
484
485	.align	7
486SErrorSP0:
487	b	SErrorSP0
488	check_vector_size SErrorSP0
489
490	/* -----------------------------------------------------
491	 * Current EL with SPx: 0x200 - 0x380
492	 * -----------------------------------------------------
493	 */
494	.align	7
495SynchronousExceptionSPx:
496	b	SynchronousExceptionSPx
497	check_vector_size SynchronousExceptionSPx
498
499	.align	7
500IrqSPx:
501	b	IrqSPx
502	check_vector_size IrqSPx
503
504	.align	7
505FiqSPx:
506	b	FiqSPx
507	check_vector_size FiqSPx
508
509	.align	7
510SErrorSPx:
511	b	SErrorSPx
512	check_vector_size SErrorSPx
513
514	/* -----------------------------------------------------
515	 * Lower EL using AArch64 : 0x400 - 0x580
516	 * -----------------------------------------------------
517	 */
518	.align	7
519SynchronousExceptionA64:
520	b	SynchronousExceptionA64
521	check_vector_size SynchronousExceptionA64
522
523	.align	7
524IrqA64:
525	b	IrqA64
526	check_vector_size IrqA64
527
528	.align	7
529FiqA64:
530	b	FiqA64
531	check_vector_size FiqA64
532
533	.align	7
534SErrorA64:
535	b   	SErrorA64
536	check_vector_size SErrorA64
537
538	/* -----------------------------------------------------
539	 * Lower EL using AArch32 : 0x0 - 0x180
540	 * -----------------------------------------------------
541	 */
542	.align	7
543SynchronousExceptionA32:
544	b	SynchronousExceptionA32
545	check_vector_size SynchronousExceptionA32
546
547	.align	7
548IrqA32:
549	b	IrqA32
550	check_vector_size IrqA32
551
552	.align	7
553FiqA32:
554	b	FiqA32
555	check_vector_size FiqA32
556
557	.align	7
558SErrorA32:
559	b	SErrorA32
560	check_vector_size SErrorA32
561
562END_FUNC reset_vect_table
563