xref: /optee_os/core/arch/arm/kernel/entry_a64.S (revision 17a66904a791447da1356331f01e7c1ca25329be)
1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2015-2022, Linaro Limited
4 * Copyright (c) 2021-2023, Arm Limited
5 */
6
7#include <platform_config.h>
8
9#include <arm64_macros.S>
10#include <arm.h>
11#include <asm.S>
12#include <generated/asm-defines.h>
13#include <keep.h>
14#include <kernel/thread_private.h>
15#include <mm/core_mmu.h>
16#include <sm/optee_smc.h>
17#include <sm/teesmc_opteed.h>
18#include <sm/teesmc_opteed_macros.h>
19
20	/*
21	 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0.
22	 * SP_EL0 is assigned:
23	 *   stack_tmp + (cpu_id + 1) * stack_tmp_stride - STACK_TMP_GUARD
24	 * SP_EL1 is assigned thread_core_local[cpu_id]
25	 */
26	.macro set_sp
27		bl	__get_core_pos
28		cmp	x0, #CFG_TEE_CORE_NB_CORE
29		/* Unsupported CPU, park it before it breaks something */
30		bge	unhandled_cpu
31		add	x0, x0, #1
32		adr_l	x1, stack_tmp_stride
33		ldr	w1, [x1]
34		mul	x1, x0, x1
35
36		/* x0 = stack_tmp - STACK_TMP_GUARD */
37		adr_l	x2, stack_tmp_rel
38		ldr	w0, [x2]
39		add	x0, x0, x2
40
41		msr	spsel, #0
42		add	sp, x1, x0
43		bl	thread_get_core_local
44		msr	spsel, #1
45		mov	sp, x0
46		msr	spsel, #0
47	.endm
48
49	.macro read_feat_mte reg
50		mrs	\reg, id_aa64pfr1_el1
51		ubfx	\reg, \reg, #ID_AA64PFR1_EL1_MTE_SHIFT, #4
52	.endm
53
54	.macro read_feat_pan reg
55		mrs	\reg, id_mmfr3_el1
56		ubfx	\reg, \reg, #ID_MMFR3_EL1_PAN_SHIFT, #4
57	.endm
58
59	.macro set_sctlr_el1
60		mrs	x0, sctlr_el1
61		orr	x0, x0, #SCTLR_I
62		orr	x0, x0, #SCTLR_SA
63		orr	x0, x0, #SCTLR_SPAN
64#if defined(CFG_CORE_RWDATA_NOEXEC)
65		orr	x0, x0, #SCTLR_WXN
66#endif
67#if defined(CFG_SCTLR_ALIGNMENT_CHECK)
68		orr	x0, x0, #SCTLR_A
69#else
70		bic	x0, x0, #SCTLR_A
71#endif
72#ifdef CFG_MEMTAG
73		read_feat_mte x1
74		cmp	w1, #1
75		b.ls	111f
76		orr	x0, x0, #(SCTLR_ATA | SCTLR_ATA0)
77		bic	x0, x0, #SCTLR_TCF_MASK
78		bic	x0, x0, #SCTLR_TCF0_MASK
79111:
80#endif
81#if defined(CFG_TA_PAUTH) && defined(CFG_TA_BTI)
82		orr	x0, x0, #SCTLR_BT0
83#endif
84#if defined(CFG_CORE_PAUTH) && defined(CFG_CORE_BTI)
85		orr	x0, x0, #SCTLR_BT1
86#endif
87		msr	sctlr_el1, x0
88	.endm
89
90	.macro init_memtag_per_cpu
91		read_feat_mte x0
92		cmp	w0, #1
93		b.ls	11f
94
95#ifdef CFG_TEE_CORE_DEBUG
96		/*
97		 * This together with GCR_EL1.RRND = 0 will make the tags
98		 * acquired with the irg instruction deterministic.
99		 */
100		mov_imm	x0, 0xcafe00
101		msr	rgsr_el1, x0
102		/* Avoid tag = 0x0 and 0xf */
103		mov	x0, #0
104#else
105		/*
106		 * Still avoid tag = 0x0 and 0xf as we use that tag for
107		 * everything which isn't explicitly tagged. Setting
108		 * GCR_EL1.RRND = 1 to allow an implementation specific
109		 * method of generating the tags.
110		 */
111		mov	x0, #GCR_EL1_RRND
112#endif
113		orr	x0, x0, #1
114		orr	x0, x0, #(1 << 15)
115		msr	gcr_el1, x0
116
117		/*
118		 * Enable the tag checks on the current CPU.
119		 *
120		 * Depends on boot_init_memtag() having cleared tags for
121		 * TEE core memory. Well, not really, addresses with the
122		 * tag value 0b0000 will use unchecked access due to
123		 * TCR_TCMA0.
124		 */
125		mrs	x0, tcr_el1
126		orr	x0, x0, #TCR_TBI0
127		orr	x0, x0, #TCR_TCMA0
128		msr	tcr_el1, x0
129
130		mrs	x0, sctlr_el1
131		orr	x0, x0, #SCTLR_TCF_SYNC
132		orr	x0, x0, #SCTLR_TCF0_SYNC
133		msr	sctlr_el1, x0
134
135		isb
13611:
137	.endm
138
139	.macro init_pauth_per_cpu
140		msr	spsel, #1
141		ldp	x0, x1, [sp, #THREAD_CORE_LOCAL_KEYS]
142		msr	spsel, #0
143		write_apiakeyhi x0
144		write_apiakeylo x1
145		mrs	x0, sctlr_el1
146		orr	x0, x0, #SCTLR_ENIA
147		msr	sctlr_el1, x0
148		isb
149	.endm
150
151	.macro init_pan
152		read_feat_pan x0
153		cmp	x0, #0
154		b.eq	1f
155		mrs	x0, sctlr_el1
156		bic	x0, x0, #SCTLR_SPAN
157		msr	sctlr_el1, x0
158		write_pan_enable
159	1:
160	.endm
161
162FUNC _start , :
163	/*
164	 * Temporary copy of boot argument registers, will be passed to
165	 * boot_save_args() further down.
166	 */
167	mov	x19, x0
168	mov	x20, x1
169	mov	x21, x2
170	mov	x22, x3
171
172	adr	x0, reset_vect_table
173	msr	vbar_el1, x0
174	isb
175
176#ifdef CFG_PAN
177	init_pan
178#endif
179
180	set_sctlr_el1
181	isb
182
183#ifdef CFG_WITH_PAGER
184	/*
185	 * Move init code into correct location and move hashes to a
186	 * temporary safe location until the heap is initialized.
187	 *
188	 * The binary is built as:
189	 * [Pager code, rodata and data] : In correct location
190	 * [Init code and rodata] : Should be copied to __init_start
191	 * [struct boot_embdata + data] : Should be saved before
192	 * initializing pager, first uint32_t tells the length of the data
193	 */
194	adr	x0, __init_start	/* dst */
195	adr	x1, __data_end		/* src */
196	adr	x2, __init_end
197	sub	x2, x2, x0		/* init len */
198	ldr	w4, [x1, x2]		/* length of hashes etc */
199	add	x2, x2, x4		/* length of init and hashes etc */
200	/* Copy backwards (as memmove) in case we're overlapping */
201	add	x0, x0, x2		/* __init_start + len */
202	add	x1, x1, x2		/* __data_end + len */
203	adr	x3, cached_mem_end
204	str	x0, [x3]
205	adr	x2, __init_start
206copy_init:
207	ldp	x3, x4, [x1, #-16]!
208	stp	x3, x4, [x0, #-16]!
209	cmp	x0, x2
210	b.gt	copy_init
211#else
212	/*
213	 * The binary is built as:
214	 * [Core, rodata and data] : In correct location
215	 * [struct boot_embdata + data] : Should be moved to __end, first
216	 * uint32_t tells the length of the struct + data
217	 */
218	adr_l	x0, __end		/* dst */
219	adr_l	x1, __data_end		/* src */
220	ldr	w2, [x1]		/* struct boot_embdata::total_len */
221	/* Copy backwards (as memmove) in case we're overlapping */
222	add	x0, x0, x2
223	add	x1, x1, x2
224	adr	x3, cached_mem_end
225	str	x0, [x3]
226	adr_l	x2, __end
227
228copy_init:
229	ldp	x3, x4, [x1, #-16]!
230	stp	x3, x4, [x0, #-16]!
231	cmp	x0, x2
232	b.gt	copy_init
233#endif
234
235	/*
236	 * Clear .bss, this code obviously depends on the linker keeping
237	 * start/end of .bss at least 8 byte aligned.
238	 */
239	adr_l	x0, __bss_start
240	adr_l	x1, __bss_end
241clear_bss:
242	str	xzr, [x0], #8
243	cmp	x0, x1
244	b.lt	clear_bss
245
246#ifdef CFG_NS_VIRTUALIZATION
247	/*
248	 * Clear .nex_bss, this code obviously depends on the linker keeping
249	 * start/end of .bss at least 8 byte aligned.
250	 */
251	adr_l	x0, __nex_bss_start
252	adr_l	x1, __nex_bss_end
253clear_nex_bss:
254	str	xzr, [x0], #8
255	cmp	x0, x1
256	b.lt	clear_nex_bss
257#endif
258
259
260#if defined(CFG_CORE_PHYS_RELOCATABLE)
261	/*
262	 * Save the base physical address, it will not change after this
263	 * point.
264	 */
265	adr_l	x2, core_mmu_tee_load_pa
266	adr	x1, _start		/* Load address */
267	str	x1, [x2]
268
269	mov_imm	x0, TEE_LOAD_ADDR	/* Compiled load address */
270	sub	x0, x1, x0		/* Relocatation offset */
271
272	cbz	x0, 1f
273	bl	relocate
2741:
275#endif
276
277	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
278	set_sp
279
280	bl	thread_init_thread_core_local
281
282	/* Enable aborts now that we can receive exceptions */
283	msr	daifclr, #DAIFBIT_ABT
284
285	/*
286	 * Invalidate dcache for all memory used during initialization to
287	 * avoid nasty surprices when the cache is turned on. We must not
288	 * invalidate memory not used by OP-TEE since we may invalidate
289	 * entries used by for instance ARM Trusted Firmware.
290	 */
291	adr_l	x0, __text_start
292	ldr	x1, cached_mem_end
293	sub	x1, x1, x0
294	bl	dcache_cleaninv_range
295
296	/* Enable Console */
297	bl	console_init
298
299	mov	x0, x19
300	mov	x1, x20
301	mov	x2, x21
302	mov	x3, x22
303	mov	x4, xzr
304	bl	boot_save_args
305
306#ifdef CFG_MEMTAG
307	/*
308	 * If FEAT_MTE2 is available, initializes the memtag callbacks.
309	 * Tags for OP-TEE core memory are then cleared to make it safe to
310	 * enable MEMTAG below.
311	 */
312	bl	boot_init_memtag
313#endif
314
315#ifdef CFG_CORE_ASLR
316	bl	get_aslr_seed
317#else
318	mov	x0, #0
319#endif
320
321	adr	x1, boot_mmu_config
322	bl	core_init_mmu_map
323
324#ifdef CFG_CORE_ASLR
325	/*
326	 * Process relocation information again updating for the virtual
327	 * map offset. We're doing this now before MMU is enabled as some
328	 * of the memory will become write protected.
329	 */
330	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_MAP_OFFSET
331	cbz	x0, 1f
332	/*
333	 * Update cached_mem_end address with load offset since it was
334	 * calculated before relocation.
335	 */
336	adr	x5, cached_mem_end
337	ldr	x6, [x5]
338	add	x6, x6, x0
339	str	x6, [x5]
340	adr	x1, _start		/* Load address */
341	bl	relocate
3421:
343#endif
344
345	bl	__get_core_pos
346	bl	enable_mmu
347#ifdef CFG_CORE_ASLR
348	/*
349	 * Reinitialize console, since register_serial_console() has
350	 * previously registered a PA and with ASLR the VA is different
351	 * from the PA.
352	 */
353	bl	console_init
354#endif
355
356#ifdef CFG_NS_VIRTUALIZATION
357	/*
358	 * Initialize partition tables for each partition to
359	 * default_partition which has been relocated now to a different VA
360	 */
361	bl	core_mmu_set_default_prtn_tbl
362#endif
363
364	bl	boot_init_primary_early
365
366#ifdef CFG_MEMTAG
367	init_memtag_per_cpu
368#endif
369
370#ifndef CFG_NS_VIRTUALIZATION
371	mov	x23, sp
372	adr_l	x0, threads
373	ldr	x0, [x0, #THREAD_CTX_STACK_VA_END]
374	mov	sp, x0
375	bl	thread_get_core_local
376	mov	x24, x0
377	str	wzr, [x24, #THREAD_CORE_LOCAL_FLAGS]
378#endif
379	bl	boot_init_primary_late
380#ifdef CFG_CORE_PAUTH
381	init_pauth_per_cpu
382#endif
383
384#ifndef CFG_NS_VIRTUALIZATION
385	mov	x0, #THREAD_CLF_TMP
386	str     w0, [x24, #THREAD_CORE_LOCAL_FLAGS]
387	mov	sp, x23
388#endif
389
390#ifdef _CFG_CORE_STACK_PROTECTOR
391	/* Update stack canary value */
392	sub	sp, sp, #0x10
393	mov	x0, sp
394	mov	x1, #1
395	mov	x2, #0x8
396	bl	plat_get_random_stack_canaries
397	ldr	x0, [sp]
398	adr_l	x5, __stack_chk_guard
399	str	x0, [x5]
400	add	sp, sp, #0x10
401#endif
402
403	/*
404	 * In case we've touched memory that secondary CPUs will use before
405	 * they have turned on their D-cache, clean and invalidate the
406	 * D-cache before exiting to normal world.
407	 */
408	adr_l	x0, __text_start
409	ldr	x1, cached_mem_end
410	sub	x1, x1, x0
411	bl	dcache_cleaninv_range
412
413
414	/*
415	 * Clear current thread id now to allow the thread to be reused on
416	 * next entry. Matches the thread_init_boot_thread in
417	 * boot.c.
418	 */
419#ifndef CFG_NS_VIRTUALIZATION
420	bl 	thread_clr_boot_thread
421#endif
422
423#ifdef CFG_CORE_FFA
424	adr	x0, cpu_on_handler
425	/*
426	 * Compensate for the virtual map offset since cpu_on_handler() is
427	 * called with MMU off.
428	 */
429	ldr	x1, boot_mmu_config + CORE_MMU_CONFIG_MAP_OFFSET
430	sub	x0, x0, x1
431	bl	thread_spmc_register_secondary_ep
432	b	thread_ffa_msg_wait
433#else
434	/*
435	 * Pass the vector address returned from main_init Compensate for
436	 * the virtual map offset since cpu_on_handler() is called with MMU
437	 * off.
438	 */
439	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_MAP_OFFSET
440	adr	x1, thread_vector_table
441	sub	x1, x1, x0
442	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE
443	smc	#0
444	/* SMC should not return */
445	panic_at_smc_return
446#endif
447END_FUNC _start
448DECLARE_KEEP_INIT _start
449
450	.section .identity_map.data
451	.balign	8
452LOCAL_DATA cached_mem_end , :
453	.skip	8
454END_DATA cached_mem_end
455
456#if defined(CFG_CORE_ASLR) || defined(CFG_CORE_PHYS_RELOCATABLE)
457LOCAL_FUNC relocate , :
458	/*
459	 * x0 holds relocate offset
460	 * x1 holds load address
461	 */
462#ifdef CFG_WITH_PAGER
463	adr_l	x6, __init_end
464#else
465	adr_l	x6, __end
466#endif
467	ldp	w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET]
468
469	add	x2, x2, x6	/* start of relocations */
470	add	x3, x3, x2	/* end of relocations */
471
472	/*
473	 * Relocations are not formatted as Rela64, instead they are in a
474	 * compressed format created by get_reloc_bin() in
475	 * scripts/gen_tee_bin.py
476	 *
477	 * All the R_AARCH64_RELATIVE relocations are translated into a
478	 * list of 32-bit offsets from TEE_LOAD_ADDR. At each address a
479	 * 64-bit value pointed out which increased with the load offset.
480	 */
481
482#ifdef CFG_WITH_PAGER
483	/*
484	 * With pager enabled we can only relocate the pager and init
485	 * parts, the rest has to be done when a page is populated.
486	 */
487	sub	x6, x6, x1
488#endif
489
490	b	2f
491	/* Loop over the relocation addresses and process all entries */
4921:	ldr	w4, [x2], #4
493#ifdef CFG_WITH_PAGER
494	/* Skip too large addresses */
495	cmp	x4, x6
496	b.ge	2f
497#endif
498	add	x4, x4, x1
499	ldr	x5, [x4]
500	add	x5, x5, x0
501	str	x5, [x4]
502
5032:	cmp	x2, x3
504	b.ne	1b
505
506	ret
507END_FUNC relocate
508#endif
509
510/*
511 * void enable_mmu(unsigned long core_pos);
512 *
513 * This function depends on being mapped with in the identity map where
514 * physical address and virtual address is the same. After MMU has been
515 * enabled the instruction pointer will be updated to execute as the new
516 * offset instead. Stack pointers and the return address are updated.
517 */
518LOCAL_FUNC enable_mmu , : , .identity_map
519	adr	x1, boot_mmu_config
520	load_xregs x1, 0, 2, 6
521	/*
522	 * x0 = core_pos
523	 * x2 = tcr_el1
524	 * x3 = mair_el1
525	 * x4 = ttbr0_el1_base
526	 * x5 = ttbr0_core_offset
527	 * x6 = load_offset
528	 */
529	msr	tcr_el1, x2
530	msr	mair_el1, x3
531
532	/*
533	 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos
534	 */
535	madd	x1, x5, x0, x4
536	msr	ttbr0_el1, x1
537	msr	ttbr1_el1, xzr
538	isb
539
540	/* Invalidate TLB */
541	tlbi	vmalle1
542
543	/*
544	 * Make sure translation table writes have drained into memory and
545	 * the TLB invalidation is complete.
546	 */
547	dsb	sy
548	isb
549
550	/* Enable the MMU */
551	mrs	x1, sctlr_el1
552	orr	x1, x1, #SCTLR_M
553	msr	sctlr_el1, x1
554	isb
555
556	/* Update vbar */
557	mrs	x1, vbar_el1
558	add	x1, x1, x6
559	msr	vbar_el1, x1
560	isb
561
562	/* Invalidate instruction cache and branch predictor */
563	ic	iallu
564	isb
565
566	/* Enable I and D cache */
567	mrs	x1, sctlr_el1
568	orr	x1, x1, #SCTLR_I
569	orr	x1, x1, #SCTLR_C
570	msr	sctlr_el1, x1
571	isb
572
573	/* Adjust stack pointers and return address */
574	msr	spsel, #1
575	add	sp, sp, x6
576	msr	spsel, #0
577	add	sp, sp, x6
578	add	x30, x30, x6
579
580	ret
581END_FUNC enable_mmu
582
583	.section .identity_map.data
584	.balign	8
585DATA boot_mmu_config , : /* struct core_mmu_config */
586	.skip	CORE_MMU_CONFIG_SIZE
587END_DATA boot_mmu_config
588
589FUNC cpu_on_handler , :
590	mov	x19, x0
591	mov	x20, x1
592	mov	x21, x30
593
594	adr	x0, reset_vect_table
595	msr	vbar_el1, x0
596	isb
597
598	set_sctlr_el1
599	isb
600
601#ifdef CFG_PAN
602	init_pan
603#endif
604
605	/* Enable aborts now that we can receive exceptions */
606	msr	daifclr, #DAIFBIT_ABT
607
608	bl	__get_core_pos
609	bl	enable_mmu
610
611	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
612	set_sp
613
614#ifdef CFG_MEMTAG
615	init_memtag_per_cpu
616#endif
617#ifdef CFG_CORE_PAUTH
618	init_pauth_per_cpu
619#endif
620
621	mov	x0, x19
622	mov	x1, x20
623#ifdef CFG_CORE_FFA
624	bl	boot_cpu_on_handler
625	b	thread_ffa_msg_wait
626#else
627	mov	x30, x21
628	b	boot_cpu_on_handler
629#endif
630END_FUNC cpu_on_handler
631DECLARE_KEEP_PAGER cpu_on_handler
632
633LOCAL_FUNC unhandled_cpu , :
634	wfi
635	b	unhandled_cpu
636END_FUNC unhandled_cpu
637
638LOCAL_DATA stack_tmp_rel , :
639	.word	stack_tmp - stack_tmp_rel - STACK_TMP_GUARD
640END_DATA stack_tmp_rel
641
642	/*
643	 * This macro verifies that the a given vector doesn't exceed the
644	 * architectural limit of 32 instructions. This is meant to be placed
645	 * immedately after the last instruction in the vector. It takes the
646	 * vector entry as the parameter
647	 */
648	.macro check_vector_size since
649	  .if (. - \since) > (32 * 4)
650	    .error "Vector exceeds 32 instructions"
651	  .endif
652	.endm
653
654	.section .identity_map, "ax", %progbits
655	.align	11
656LOCAL_FUNC reset_vect_table , :, .identity_map, , nobti
657	/* -----------------------------------------------------
658	 * Current EL with SP0 : 0x0 - 0x180
659	 * -----------------------------------------------------
660	 */
661SynchronousExceptionSP0:
662	b	SynchronousExceptionSP0
663	check_vector_size SynchronousExceptionSP0
664
665	.align	7
666IrqSP0:
667	b	IrqSP0
668	check_vector_size IrqSP0
669
670	.align	7
671FiqSP0:
672	b	FiqSP0
673	check_vector_size FiqSP0
674
675	.align	7
676SErrorSP0:
677	b	SErrorSP0
678	check_vector_size SErrorSP0
679
680	/* -----------------------------------------------------
681	 * Current EL with SPx: 0x200 - 0x380
682	 * -----------------------------------------------------
683	 */
684	.align	7
685SynchronousExceptionSPx:
686	b	SynchronousExceptionSPx
687	check_vector_size SynchronousExceptionSPx
688
689	.align	7
690IrqSPx:
691	b	IrqSPx
692	check_vector_size IrqSPx
693
694	.align	7
695FiqSPx:
696	b	FiqSPx
697	check_vector_size FiqSPx
698
699	.align	7
700SErrorSPx:
701	b	SErrorSPx
702	check_vector_size SErrorSPx
703
704	/* -----------------------------------------------------
705	 * Lower EL using AArch64 : 0x400 - 0x580
706	 * -----------------------------------------------------
707	 */
708	.align	7
709SynchronousExceptionA64:
710	b	SynchronousExceptionA64
711	check_vector_size SynchronousExceptionA64
712
713	.align	7
714IrqA64:
715	b	IrqA64
716	check_vector_size IrqA64
717
718	.align	7
719FiqA64:
720	b	FiqA64
721	check_vector_size FiqA64
722
723	.align	7
724SErrorA64:
725	b   	SErrorA64
726	check_vector_size SErrorA64
727
728	/* -----------------------------------------------------
729	 * Lower EL using AArch32 : 0x0 - 0x180
730	 * -----------------------------------------------------
731	 */
732	.align	7
733SynchronousExceptionA32:
734	b	SynchronousExceptionA32
735	check_vector_size SynchronousExceptionA32
736
737	.align	7
738IrqA32:
739	b	IrqA32
740	check_vector_size IrqA32
741
742	.align	7
743FiqA32:
744	b	FiqA32
745	check_vector_size FiqA32
746
747	.align	7
748SErrorA32:
749	b	SErrorA32
750	check_vector_size SErrorA32
751
752END_FUNC reset_vect_table
753
754BTI(emit_aarch64_feature_1_and     GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
755