xref: /optee_os/core/arch/arm/kernel/entry_a64.S (revision 039e02df2716a0ed886b56e1e07b7ac1d8597228)
1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2015, Linaro Limited
4 * Copyright (c) 2021, Arm Limited
5 */
6
7#include <platform_config.h>
8
9#include <arm64_macros.S>
10#include <arm.h>
11#include <asm.S>
12#include <generated/asm-defines.h>
13#include <keep.h>
14#include <kernel/thread_private.h>
15#include <sm/optee_smc.h>
16#include <sm/teesmc_opteed.h>
17#include <sm/teesmc_opteed_macros.h>
18
19	/*
20	 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0.
21	 * SP_EL0 is assigned:
22	 *   stack_tmp + (cpu_id + 1) * stack_tmp_stride - STACK_TMP_GUARD
23	 * SP_EL1 is assigned thread_core_local[cpu_id]
24	 */
25	.macro set_sp
26		bl	__get_core_pos
27		cmp	x0, #CFG_TEE_CORE_NB_CORE
28		/* Unsupported CPU, park it before it breaks something */
29		bge	unhandled_cpu
30		add	x0, x0, #1
31		adr_l	x1, stack_tmp_stride
32		ldr	w1, [x1]
33		mul	x1, x0, x1
34
35		/* x0 = stack_tmp - STACK_TMP_GUARD */
36		adr_l	x2, stack_tmp_rel
37		ldr	w0, [x2]
38		add	x0, x0, x2
39
40		msr	spsel, #0
41		add	sp, x1, x0
42		bl	thread_get_core_local
43		msr	spsel, #1
44		mov	sp, x0
45		msr	spsel, #0
46	.endm
47
48	.macro read_feat_mte reg
49		mrs	\reg, id_aa64pfr1_el1
50		ubfx	\reg, \reg, #ID_AA64PFR1_EL1_MTE_SHIFT, #4
51	.endm
52
53	.macro set_sctlr_el1
54		mrs	x0, sctlr_el1
55		orr	x0, x0, #SCTLR_I
56		orr	x0, x0, #SCTLR_SA
57		orr	x0, x0, #SCTLR_SPAN
58#if defined(CFG_CORE_RWDATA_NOEXEC)
59		orr	x0, x0, #SCTLR_WXN
60#endif
61#if defined(CFG_SCTLR_ALIGNMENT_CHECK)
62		orr	x0, x0, #SCTLR_A
63#else
64		bic	x0, x0, #SCTLR_A
65#endif
66#ifdef CFG_MEMTAG
67		read_feat_mte x1
68		cmp	w1, #1
69		b.ls	111f
70		orr	x0, x0, #(SCTLR_ATA | SCTLR_ATA0)
71		bic	x0, x0, #SCTLR_TCF_MASK
72		bic	x0, x0, #SCTLR_TCF0_MASK
73111:
74#endif
75		msr	sctlr_el1, x0
76	.endm
77
78	.macro init_memtag_per_cpu
79		read_feat_mte x0
80		cmp	w0, #1
81		b.ls	11f
82
83#ifdef CFG_TEE_CORE_DEBUG
84		/*
85		 * This together with GCR_EL1.RRND = 0 will make the tags
86		 * acquired with the irg instruction deterministic.
87		 */
88		mov_imm	x0, 0xcafe00
89		msr	rgsr_el1, x0
90		/* Avoid tag = 0x0 and 0xf */
91		mov	x0, #0
92#else
93		/*
94		 * Still avoid tag = 0x0 and 0xf as we use that tag for
95		 * everything which isn't explicitly tagged. Setting
96		 * GCR_EL1.RRND = 1 to allow an implementation specific
97		 * method of generating the tags.
98		 */
99		mov	x0, #GCR_EL1_RRND
100#endif
101		orr	x0, x0, #1
102		orr	x0, x0, #(1 << 15)
103		msr	gcr_el1, x0
104
105		/*
106		 * Enable the tag checks on the current CPU.
107		 *
108		 * Depends on boot_init_memtag() having cleared tags for
109		 * TEE core memory. Well, not really, addresses with the
110		 * tag value 0b0000 will use unchecked access due to
111		 * TCR_TCMA0.
112		 */
113		mrs	x0, tcr_el1
114		orr	x0, x0, #TCR_TBI0
115		orr	x0, x0, #TCR_TCMA0
116		msr	tcr_el1, x0
117
118		mrs	x0, sctlr_el1
119		orr	x0, x0, #SCTLR_TCF_SYNC
120		orr	x0, x0, #SCTLR_TCF0_SYNC
121		msr	sctlr_el1, x0
122
123		isb
12411:
125	.endm
126
127FUNC _start , :
128#if defined(CFG_CORE_SEL1_SPMC)
129	/*
130	 * With OP-TEE as SPMC at S-EL1 the SPMD (SPD_spmd) in TF-A passes
131	 * the DTB in x0, pagaeble part in x1 and the rest of the registers
132	 * are unused
133	 */
134	mov	x19, x1		/* Save pagable part */
135	mov	x20, x0		/* Save DT address */
136#else
137	mov	x19, x0		/* Save pagable part address */
138#if defined(CFG_DT_ADDR)
139	ldr     x20, =CFG_DT_ADDR
140#else
141	mov	x20, x2		/* Save DT address */
142#endif
143#endif
144
145	adr	x0, reset_vect_table
146	msr	vbar_el1, x0
147	isb
148
149	set_sctlr_el1
150	isb
151
152#ifdef CFG_WITH_PAGER
153	/*
154	 * Move init code into correct location and move hashes to a
155	 * temporary safe location until the heap is initialized.
156	 *
157	 * The binary is built as:
158	 * [Pager code, rodata and data] : In correct location
159	 * [Init code and rodata] : Should be copied to __init_start
160	 * [struct boot_embdata + data] : Should be saved before
161	 * initializing pager, first uint32_t tells the length of the data
162	 */
163	adr	x0, __init_start	/* dst */
164	adr	x1, __data_end		/* src */
165	adr	x2, __init_end
166	sub	x2, x2, x0		/* init len */
167	ldr	w4, [x1, x2]		/* length of hashes etc */
168	add	x2, x2, x4		/* length of init and hashes etc */
169	/* Copy backwards (as memmove) in case we're overlapping */
170	add	x0, x0, x2		/* __init_start + len */
171	add	x1, x1, x2		/* __data_end + len */
172	adr	x3, cached_mem_end
173	str	x0, [x3]
174	adr	x2, __init_start
175copy_init:
176	ldp	x3, x4, [x1, #-16]!
177	stp	x3, x4, [x0, #-16]!
178	cmp	x0, x2
179	b.gt	copy_init
180#else
181	/*
182	 * The binary is built as:
183	 * [Core, rodata and data] : In correct location
184	 * [struct boot_embdata + data] : Should be moved to __end, first
185	 * uint32_t tells the length of the struct + data
186	 */
187	adr_l	x0, __end		/* dst */
188	adr_l	x1, __data_end		/* src */
189	ldr	w2, [x1]		/* struct boot_embdata::total_len */
190	/* Copy backwards (as memmove) in case we're overlapping */
191	add	x0, x0, x2
192	add	x1, x1, x2
193	adr	x3, cached_mem_end
194	str	x0, [x3]
195	adr_l	x2, __end
196
197copy_init:
198	ldp	x3, x4, [x1, #-16]!
199	stp	x3, x4, [x0, #-16]!
200	cmp	x0, x2
201	b.gt	copy_init
202#endif
203
204	/*
205	 * Clear .bss, this code obviously depends on the linker keeping
206	 * start/end of .bss at least 8 byte aligned.
207	 */
208	adr_l	x0, __bss_start
209	adr_l	x1, __bss_end
210clear_bss:
211	str	xzr, [x0], #8
212	cmp	x0, x1
213	b.lt	clear_bss
214
215#ifdef CFG_VIRTUALIZATION
216	/*
217	 * Clear .nex_bss, this code obviously depends on the linker keeping
218	 * start/end of .bss at least 8 byte aligned.
219	 */
220	adr	x0, __nex_bss_start
221	adr	x1, __nex_bss_end
222clear_nex_bss:
223	str	xzr, [x0], #8
224	cmp	x0, x1
225	b.lt	clear_nex_bss
226#endif
227
228	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
229	set_sp
230
231	bl	thread_init_thread_core_local
232
233	/* Enable aborts now that we can receive exceptions */
234	msr	daifclr, #DAIFBIT_ABT
235
236	/*
237	 * Invalidate dcache for all memory used during initialization to
238	 * avoid nasty surprices when the cache is turned on. We must not
239	 * invalidate memory not used by OP-TEE since we may invalidate
240	 * entries used by for instance ARM Trusted Firmware.
241	 */
242	adr_l	x0, __text_start
243	ldr	x1, cached_mem_end
244	sub	x1, x1, x0
245	bl	dcache_cleaninv_range
246
247	/* Enable Console */
248	bl	console_init
249
250#ifdef CFG_MEMTAG
251	/*
252	 * If FEAT_MTE2 is available, initializes the memtag callbacks.
253	 * Tags for OP-TEE core memory are then cleared to make it safe to
254	 * enable MEMTAG below.
255	 */
256	bl	boot_init_memtag
257#endif
258
259#ifdef CFG_CORE_ASLR
260	mov	x0, x20
261	bl	get_aslr_seed
262#else
263	mov	x0, #0
264#endif
265
266	adr	x1, boot_mmu_config
267	bl	core_init_mmu_map
268
269#ifdef CFG_CORE_ASLR
270	/*
271	 * Process relocation information again updating for the new
272	 * offset. We're doing this now before MMU is enabled as some of
273	 * the memory will become write protected.
274	 */
275	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
276	/*
277	 * Update cached_mem_end address with load offset since it was
278	 * calculated before relocation.
279	 */
280	adr	x5, cached_mem_end
281	ldr	x6, [x5]
282	add	x6, x6, x0
283	str	x6, [x5]
284	bl	relocate
285#endif
286
287	bl	__get_core_pos
288	bl	enable_mmu
289#ifdef CFG_CORE_ASLR
290	/*
291	 * Reinitialize console, since register_serial_console() has
292	 * previously registered a PA and with ASLR the VA is different
293	 * from the PA.
294	 */
295	bl	console_init
296#endif
297
298#ifdef CFG_VIRTUALIZATION
299	/*
300	 * Initialize partition tables for each partition to
301	 * default_partition which has been relocated now to a different VA
302	 */
303	bl	core_mmu_set_default_prtn_tbl
304#endif
305
306	mov	x0, x19		/* pagable part address */
307	mov	x1, #-1
308	bl	boot_init_primary_early
309
310#ifdef CFG_MEMTAG
311	init_memtag_per_cpu
312#endif
313
314#ifndef CFG_VIRTUALIZATION
315	mov	x21, sp
316	adr_l	x0, threads
317	ldr	x0, [x0, #THREAD_CTX_STACK_VA_END]
318	mov	sp, x0
319	bl	thread_get_core_local
320	mov	x22, x0
321	str	wzr, [x22, #THREAD_CORE_LOCAL_FLAGS]
322#endif
323	mov	x0, x20		/* DT address */
324	bl	boot_init_primary_late
325#ifndef CFG_VIRTUALIZATION
326	mov	x0, #THREAD_CLF_TMP
327	str     w0, [x22, #THREAD_CORE_LOCAL_FLAGS]
328	mov	sp, x21
329#endif
330
331	/*
332	 * In case we've touched memory that secondary CPUs will use before
333	 * they have turned on their D-cache, clean and invalidate the
334	 * D-cache before exiting to normal world.
335	 */
336	adr_l	x0, __text_start
337	ldr	x1, cached_mem_end
338	sub	x1, x1, x0
339	bl	dcache_cleaninv_range
340
341
342	/*
343	 * Clear current thread id now to allow the thread to be reused on
344	 * next entry. Matches the thread_init_boot_thread in
345	 * boot.c.
346	 */
347#ifndef CFG_VIRTUALIZATION
348	bl 	thread_clr_boot_thread
349#endif
350
351#ifdef CFG_CORE_FFA
352	adr	x0, cpu_on_handler
353	/*
354	 * Compensate for the load offset since cpu_on_handler() is
355	 * called with MMU off.
356	 */
357	ldr	x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
358	sub	x0, x0, x1
359	bl	thread_spmc_register_secondary_ep
360	b	thread_ffa_msg_wait
361#else
362	/*
363	 * Pass the vector address returned from main_init
364	 * Compensate for the load offset since cpu_on_handler() is
365	 * called with MMU off.
366	 */
367	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
368	adr	x1, thread_vector_table
369	sub	x1, x1, x0
370	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE
371	smc	#0
372	/* SMC should not return */
373	panic_at_smc_return
374#endif
375END_FUNC _start
376DECLARE_KEEP_INIT _start
377
378	.section .identity_map.data
379	.balign	8
380LOCAL_DATA cached_mem_end , :
381	.skip	8
382END_DATA cached_mem_end
383
384#ifdef CFG_CORE_ASLR
385LOCAL_FUNC relocate , :
386	/* x0 holds load offset */
387#ifdef CFG_WITH_PAGER
388	adr_l	x6, __init_end
389#else
390	adr_l	x6, __end
391#endif
392	ldp	w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET]
393
394	mov_imm	x1, TEE_RAM_START
395	add	x2, x2, x6	/* start of relocations */
396	add	x3, x3, x2	/* end of relocations */
397
398	/*
399	 * Relocations are not formatted as Rela64, instead they are in a
400	 * compressed format created by get_reloc_bin() in
401	 * scripts/gen_tee_bin.py
402	 *
403	 * All the R_AARCH64_RELATIVE relocations are translated into a
404	 * list list of 32-bit offsets from TEE_RAM_START. At each address
405	 * a 64-bit value pointed out which increased with the load offset.
406	 */
407
408#ifdef CFG_WITH_PAGER
409	/*
410	 * With pager enabled we can only relocate the pager and init
411	 * parts, the rest has to be done when a page is populated.
412	 */
413	sub	x6, x6, x1
414#endif
415
416	b	2f
417	/* Loop over the relocation addresses and process all entries */
4181:	ldr	w4, [x2], #4
419#ifdef CFG_WITH_PAGER
420	/* Skip too large addresses */
421	cmp	x4, x6
422	b.ge	2f
423#endif
424	add	x4, x4, x1
425	ldr	x5, [x4]
426	add	x5, x5, x0
427	str	x5, [x4]
428
4292:	cmp	x2, x3
430	b.ne	1b
431
432	ret
433END_FUNC relocate
434#endif
435
436/*
437 * void enable_mmu(unsigned long core_pos);
438 *
439 * This function depends on being mapped with in the identity map where
440 * physical address and virtual address is the same. After MMU has been
441 * enabled the instruction pointer will be updated to execute as the new
442 * offset instead. Stack pointers and the return address are updated.
443 */
444LOCAL_FUNC enable_mmu , : , .identity_map
445	adr	x1, boot_mmu_config
446	load_xregs x1, 0, 2, 6
447	/*
448	 * x0 = core_pos
449	 * x2 = tcr_el1
450	 * x3 = mair_el1
451	 * x4 = ttbr0_el1_base
452	 * x5 = ttbr0_core_offset
453	 * x6 = load_offset
454	 */
455	msr	tcr_el1, x2
456	msr	mair_el1, x3
457
458	/*
459	 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos
460	 */
461	madd	x1, x5, x0, x4
462	msr	ttbr0_el1, x1
463	msr	ttbr1_el1, xzr
464	isb
465
466	/* Invalidate TLB */
467	tlbi	vmalle1
468
469	/*
470	 * Make sure translation table writes have drained into memory and
471	 * the TLB invalidation is complete.
472	 */
473	dsb	sy
474	isb
475
476	/* Enable the MMU */
477	mrs	x1, sctlr_el1
478	orr	x1, x1, #SCTLR_M
479	msr	sctlr_el1, x1
480	isb
481
482	/* Update vbar */
483	mrs	x1, vbar_el1
484	add	x1, x1, x6
485	msr	vbar_el1, x1
486	isb
487
488	/* Invalidate instruction cache and branch predictor */
489	ic	iallu
490	isb
491
492	/* Enable I and D cache */
493	mrs	x1, sctlr_el1
494	orr	x1, x1, #SCTLR_I
495	orr	x1, x1, #SCTLR_C
496	msr	sctlr_el1, x1
497	isb
498
499	/* Adjust stack pointers and return address */
500	msr	spsel, #1
501	add	sp, sp, x6
502	msr	spsel, #0
503	add	sp, sp, x6
504	add	x30, x30, x6
505
506	ret
507END_FUNC enable_mmu
508
509	.section .identity_map.data
510	.balign	8
511DATA boot_mmu_config , : /* struct core_mmu_config */
512	.skip	CORE_MMU_CONFIG_SIZE
513END_DATA boot_mmu_config
514
515FUNC cpu_on_handler , :
516	mov	x19, x0
517	mov	x20, x1
518	mov	x21, x30
519
520	adr	x0, reset_vect_table
521	msr	vbar_el1, x0
522	isb
523
524	set_sctlr_el1
525	isb
526
527	/* Enable aborts now that we can receive exceptions */
528	msr	daifclr, #DAIFBIT_ABT
529
530	bl	__get_core_pos
531	bl	enable_mmu
532
533	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
534	set_sp
535
536#ifdef CFG_MEMTAG
537	init_memtag_per_cpu
538#endif
539
540	mov	x0, x19
541	mov	x1, x20
542#ifdef CFG_CORE_FFA
543	bl	boot_cpu_on_handler
544	b	thread_ffa_msg_wait
545#else
546	mov	x30, x21
547	b	boot_cpu_on_handler
548#endif
549END_FUNC cpu_on_handler
550DECLARE_KEEP_PAGER cpu_on_handler
551
552LOCAL_FUNC unhandled_cpu , :
553	wfi
554	b	unhandled_cpu
555END_FUNC unhandled_cpu
556
557LOCAL_DATA stack_tmp_rel , :
558	.word	stack_tmp - stack_tmp_rel - STACK_TMP_GUARD
559END_DATA stack_tmp_rel
560
561	/*
562	 * This macro verifies that the a given vector doesn't exceed the
563	 * architectural limit of 32 instructions. This is meant to be placed
564	 * immedately after the last instruction in the vector. It takes the
565	 * vector entry as the parameter
566	 */
567	.macro check_vector_size since
568	  .if (. - \since) > (32 * 4)
569	    .error "Vector exceeds 32 instructions"
570	  .endif
571	.endm
572
573	.section .identity_map, "ax", %progbits
574	.align	11
575LOCAL_FUNC reset_vect_table , :, .identity_map, , nobti
576	/* -----------------------------------------------------
577	 * Current EL with SP0 : 0x0 - 0x180
578	 * -----------------------------------------------------
579	 */
580SynchronousExceptionSP0:
581	b	SynchronousExceptionSP0
582	check_vector_size SynchronousExceptionSP0
583
584	.align	7
585IrqSP0:
586	b	IrqSP0
587	check_vector_size IrqSP0
588
589	.align	7
590FiqSP0:
591	b	FiqSP0
592	check_vector_size FiqSP0
593
594	.align	7
595SErrorSP0:
596	b	SErrorSP0
597	check_vector_size SErrorSP0
598
599	/* -----------------------------------------------------
600	 * Current EL with SPx: 0x200 - 0x380
601	 * -----------------------------------------------------
602	 */
603	.align	7
604SynchronousExceptionSPx:
605	b	SynchronousExceptionSPx
606	check_vector_size SynchronousExceptionSPx
607
608	.align	7
609IrqSPx:
610	b	IrqSPx
611	check_vector_size IrqSPx
612
613	.align	7
614FiqSPx:
615	b	FiqSPx
616	check_vector_size FiqSPx
617
618	.align	7
619SErrorSPx:
620	b	SErrorSPx
621	check_vector_size SErrorSPx
622
623	/* -----------------------------------------------------
624	 * Lower EL using AArch64 : 0x400 - 0x580
625	 * -----------------------------------------------------
626	 */
627	.align	7
628SynchronousExceptionA64:
629	b	SynchronousExceptionA64
630	check_vector_size SynchronousExceptionA64
631
632	.align	7
633IrqA64:
634	b	IrqA64
635	check_vector_size IrqA64
636
637	.align	7
638FiqA64:
639	b	FiqA64
640	check_vector_size FiqA64
641
642	.align	7
643SErrorA64:
644	b   	SErrorA64
645	check_vector_size SErrorA64
646
647	/* -----------------------------------------------------
648	 * Lower EL using AArch32 : 0x0 - 0x180
649	 * -----------------------------------------------------
650	 */
651	.align	7
652SynchronousExceptionA32:
653	b	SynchronousExceptionA32
654	check_vector_size SynchronousExceptionA32
655
656	.align	7
657IrqA32:
658	b	IrqA32
659	check_vector_size IrqA32
660
661	.align	7
662FiqA32:
663	b	FiqA32
664	check_vector_size FiqA32
665
666	.align	7
667SErrorA32:
668	b	SErrorA32
669	check_vector_size SErrorA32
670
671END_FUNC reset_vect_table
672
673BTI(emit_aarch64_feature_1_and     GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
674