xref: /optee_os/core/arch/arm/kernel/entry_a64.S (revision e5353ad98910f0cfc5cb867a51e6d6814b8a2e05)
1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2015-2022, Linaro Limited
4 * Copyright (c) 2021, Arm Limited
5 */
6
7#include <platform_config.h>
8
9#include <arm64_macros.S>
10#include <arm.h>
11#include <asm.S>
12#include <generated/asm-defines.h>
13#include <keep.h>
14#include <kernel/thread_private.h>
15#include <sm/optee_smc.h>
16#include <sm/teesmc_opteed.h>
17#include <sm/teesmc_opteed_macros.h>
18
19	/*
20	 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0.
21	 * SP_EL0 is assigned:
22	 *   stack_tmp + (cpu_id + 1) * stack_tmp_stride - STACK_TMP_GUARD
23	 * SP_EL1 is assigned thread_core_local[cpu_id]
24	 */
25	.macro set_sp
26		bl	__get_core_pos
27		cmp	x0, #CFG_TEE_CORE_NB_CORE
28		/* Unsupported CPU, park it before it breaks something */
29		bge	unhandled_cpu
30		add	x0, x0, #1
31		adr_l	x1, stack_tmp_stride
32		ldr	w1, [x1]
33		mul	x1, x0, x1
34
35		/* x0 = stack_tmp - STACK_TMP_GUARD */
36		adr_l	x2, stack_tmp_rel
37		ldr	w0, [x2]
38		add	x0, x0, x2
39
40		msr	spsel, #0
41		add	sp, x1, x0
42		bl	thread_get_core_local
43		msr	spsel, #1
44		mov	sp, x0
45		msr	spsel, #0
46	.endm
47
48	.macro read_feat_mte reg
49		mrs	\reg, id_aa64pfr1_el1
50		ubfx	\reg, \reg, #ID_AA64PFR1_EL1_MTE_SHIFT, #4
51	.endm
52
53	.macro set_sctlr_el1
54		mrs	x0, sctlr_el1
55		orr	x0, x0, #SCTLR_I
56		orr	x0, x0, #SCTLR_SA
57		orr	x0, x0, #SCTLR_SPAN
58#if defined(CFG_CORE_RWDATA_NOEXEC)
59		orr	x0, x0, #SCTLR_WXN
60#endif
61#if defined(CFG_SCTLR_ALIGNMENT_CHECK)
62		orr	x0, x0, #SCTLR_A
63#else
64		bic	x0, x0, #SCTLR_A
65#endif
66#ifdef CFG_MEMTAG
67		read_feat_mte x1
68		cmp	w1, #1
69		b.ls	111f
70		orr	x0, x0, #(SCTLR_ATA | SCTLR_ATA0)
71		bic	x0, x0, #SCTLR_TCF_MASK
72		bic	x0, x0, #SCTLR_TCF0_MASK
73111:
74#endif
75#if defined(CFG_TA_PAUTH) && defined(CFG_TA_BTI)
76		orr	x0, x0, #SCTLR_BT0
77#endif
78#if defined(CFG_CORE_PAUTH) && defined(CFG_CORE_BTI)
79		orr	x0, x0, #SCTLR_BT1
80#endif
81		msr	sctlr_el1, x0
82	.endm
83
84	.macro init_memtag_per_cpu
85		read_feat_mte x0
86		cmp	w0, #1
87		b.ls	11f
88
89#ifdef CFG_TEE_CORE_DEBUG
90		/*
91		 * This together with GCR_EL1.RRND = 0 will make the tags
92		 * acquired with the irg instruction deterministic.
93		 */
94		mov_imm	x0, 0xcafe00
95		msr	rgsr_el1, x0
96		/* Avoid tag = 0x0 and 0xf */
97		mov	x0, #0
98#else
99		/*
100		 * Still avoid tag = 0x0 and 0xf as we use that tag for
101		 * everything which isn't explicitly tagged. Setting
102		 * GCR_EL1.RRND = 1 to allow an implementation specific
103		 * method of generating the tags.
104		 */
105		mov	x0, #GCR_EL1_RRND
106#endif
107		orr	x0, x0, #1
108		orr	x0, x0, #(1 << 15)
109		msr	gcr_el1, x0
110
111		/*
112		 * Enable the tag checks on the current CPU.
113		 *
114		 * Depends on boot_init_memtag() having cleared tags for
115		 * TEE core memory. Well, not really, addresses with the
116		 * tag value 0b0000 will use unchecked access due to
117		 * TCR_TCMA0.
118		 */
119		mrs	x0, tcr_el1
120		orr	x0, x0, #TCR_TBI0
121		orr	x0, x0, #TCR_TCMA0
122		msr	tcr_el1, x0
123
124		mrs	x0, sctlr_el1
125		orr	x0, x0, #SCTLR_TCF_SYNC
126		orr	x0, x0, #SCTLR_TCF0_SYNC
127		msr	sctlr_el1, x0
128
129		isb
13011:
131	.endm
132
133	.macro init_pauth_per_cpu
134		msr	spsel, #1
135		ldp	x0, x1, [sp, #THREAD_CORE_LOCAL_KEYS]
136		msr	spsel, #0
137		write_apiakeyhi x0
138		write_apiakeylo x1
139		mrs	x0, sctlr_el1
140		orr	x0, x0, #SCTLR_ENIA
141		msr	sctlr_el1, x0
142		isb
143	.endm
144
145FUNC _start , :
146#if defined(CFG_CORE_SEL1_SPMC)
147	/*
148	 * With OP-TEE as SPMC at S-EL1 the SPMD (SPD_spmd) in TF-A passes
149	 * the DTB in x0, pagaeble part in x1 and the rest of the registers
150	 * are unused
151	 */
152	mov	x19, x1		/* Save pagable part */
153	mov	x20, x0		/* Save DT address */
154#else
155	mov	x19, x0		/* Save pagable part address */
156#if defined(CFG_DT_ADDR)
157	ldr     x20, =CFG_DT_ADDR
158#else
159	mov	x20, x2		/* Save DT address */
160#endif
161#endif
162
163	adr	x0, reset_vect_table
164	msr	vbar_el1, x0
165	isb
166
167	set_sctlr_el1
168	isb
169
170#ifdef CFG_WITH_PAGER
171	/*
172	 * Move init code into correct location and move hashes to a
173	 * temporary safe location until the heap is initialized.
174	 *
175	 * The binary is built as:
176	 * [Pager code, rodata and data] : In correct location
177	 * [Init code and rodata] : Should be copied to __init_start
178	 * [struct boot_embdata + data] : Should be saved before
179	 * initializing pager, first uint32_t tells the length of the data
180	 */
181	adr	x0, __init_start	/* dst */
182	adr	x1, __data_end		/* src */
183	adr	x2, __init_end
184	sub	x2, x2, x0		/* init len */
185	ldr	w4, [x1, x2]		/* length of hashes etc */
186	add	x2, x2, x4		/* length of init and hashes etc */
187	/* Copy backwards (as memmove) in case we're overlapping */
188	add	x0, x0, x2		/* __init_start + len */
189	add	x1, x1, x2		/* __data_end + len */
190	adr	x3, cached_mem_end
191	str	x0, [x3]
192	adr	x2, __init_start
193copy_init:
194	ldp	x3, x4, [x1, #-16]!
195	stp	x3, x4, [x0, #-16]!
196	cmp	x0, x2
197	b.gt	copy_init
198#else
199	/*
200	 * The binary is built as:
201	 * [Core, rodata and data] : In correct location
202	 * [struct boot_embdata + data] : Should be moved to __end, first
203	 * uint32_t tells the length of the struct + data
204	 */
205	adr_l	x0, __end		/* dst */
206	adr_l	x1, __data_end		/* src */
207	ldr	w2, [x1]		/* struct boot_embdata::total_len */
208	/* Copy backwards (as memmove) in case we're overlapping */
209	add	x0, x0, x2
210	add	x1, x1, x2
211	adr	x3, cached_mem_end
212	str	x0, [x3]
213	adr_l	x2, __end
214
215copy_init:
216	ldp	x3, x4, [x1, #-16]!
217	stp	x3, x4, [x0, #-16]!
218	cmp	x0, x2
219	b.gt	copy_init
220#endif
221
222	/*
223	 * Clear .bss, this code obviously depends on the linker keeping
224	 * start/end of .bss at least 8 byte aligned.
225	 */
226	adr_l	x0, __bss_start
227	adr_l	x1, __bss_end
228clear_bss:
229	str	xzr, [x0], #8
230	cmp	x0, x1
231	b.lt	clear_bss
232
233#ifdef CFG_VIRTUALIZATION
234	/*
235	 * Clear .nex_bss, this code obviously depends on the linker keeping
236	 * start/end of .bss at least 8 byte aligned.
237	 */
238	adr	x0, __nex_bss_start
239	adr	x1, __nex_bss_end
240clear_nex_bss:
241	str	xzr, [x0], #8
242	cmp	x0, x1
243	b.lt	clear_nex_bss
244#endif
245
246	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
247	set_sp
248
249	bl	thread_init_thread_core_local
250
251	/* Enable aborts now that we can receive exceptions */
252	msr	daifclr, #DAIFBIT_ABT
253
254	/*
255	 * Invalidate dcache for all memory used during initialization to
256	 * avoid nasty surprices when the cache is turned on. We must not
257	 * invalidate memory not used by OP-TEE since we may invalidate
258	 * entries used by for instance ARM Trusted Firmware.
259	 */
260	adr_l	x0, __text_start
261	ldr	x1, cached_mem_end
262	sub	x1, x1, x0
263	bl	dcache_cleaninv_range
264
265	/* Enable Console */
266	bl	console_init
267
268#ifdef CFG_MEMTAG
269	/*
270	 * If FEAT_MTE2 is available, initializes the memtag callbacks.
271	 * Tags for OP-TEE core memory are then cleared to make it safe to
272	 * enable MEMTAG below.
273	 */
274	bl	boot_init_memtag
275#endif
276
277#ifdef CFG_CORE_ASLR
278	mov	x0, x20
279	bl	get_aslr_seed
280#else
281	mov	x0, #0
282#endif
283
284	adr	x1, boot_mmu_config
285	bl	core_init_mmu_map
286
287#ifdef CFG_CORE_ASLR
288	/*
289	 * Process relocation information again updating for the new
290	 * offset. We're doing this now before MMU is enabled as some of
291	 * the memory will become write protected.
292	 */
293	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
294	/*
295	 * Update cached_mem_end address with load offset since it was
296	 * calculated before relocation.
297	 */
298	adr	x5, cached_mem_end
299	ldr	x6, [x5]
300	add	x6, x6, x0
301	str	x6, [x5]
302	bl	relocate
303#endif
304
305	bl	__get_core_pos
306	bl	enable_mmu
307#ifdef CFG_CORE_ASLR
308	/*
309	 * Reinitialize console, since register_serial_console() has
310	 * previously registered a PA and with ASLR the VA is different
311	 * from the PA.
312	 */
313	bl	console_init
314#endif
315
316#ifdef CFG_VIRTUALIZATION
317	/*
318	 * Initialize partition tables for each partition to
319	 * default_partition which has been relocated now to a different VA
320	 */
321	bl	core_mmu_set_default_prtn_tbl
322#endif
323
324	mov	x0, x19		/* pagable part address */
325	mov	x1, #-1
326	bl	boot_init_primary_early
327
328#ifdef CFG_MEMTAG
329	init_memtag_per_cpu
330#endif
331
332#ifndef CFG_VIRTUALIZATION
333	mov	x21, sp
334	adr_l	x0, threads
335	ldr	x0, [x0, #THREAD_CTX_STACK_VA_END]
336	mov	sp, x0
337	bl	thread_get_core_local
338	mov	x22, x0
339	str	wzr, [x22, #THREAD_CORE_LOCAL_FLAGS]
340#endif
341	mov	x0, x20		/* DT address */
342	bl	boot_init_primary_late
343#ifdef CFG_CORE_PAUTH
344	init_pauth_per_cpu
345#endif
346
347#ifndef CFG_VIRTUALIZATION
348	mov	x0, #THREAD_CLF_TMP
349	str     w0, [x22, #THREAD_CORE_LOCAL_FLAGS]
350	mov	sp, x21
351#endif
352
353#ifdef _CFG_CORE_STACK_PROTECTOR
354	/* Update stack canary value */
355	bl	plat_get_random_stack_canary
356	adr_l	x5, __stack_chk_guard
357	str	x0, [x5]
358#endif
359
360	/*
361	 * In case we've touched memory that secondary CPUs will use before
362	 * they have turned on their D-cache, clean and invalidate the
363	 * D-cache before exiting to normal world.
364	 */
365	adr_l	x0, __text_start
366	ldr	x1, cached_mem_end
367	sub	x1, x1, x0
368	bl	dcache_cleaninv_range
369
370
371	/*
372	 * Clear current thread id now to allow the thread to be reused on
373	 * next entry. Matches the thread_init_boot_thread in
374	 * boot.c.
375	 */
376#ifndef CFG_VIRTUALIZATION
377	bl 	thread_clr_boot_thread
378#endif
379
380#ifdef CFG_CORE_FFA
381	adr	x0, cpu_on_handler
382	/*
383	 * Compensate for the load offset since cpu_on_handler() is
384	 * called with MMU off.
385	 */
386	ldr	x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
387	sub	x0, x0, x1
388	bl	thread_spmc_register_secondary_ep
389	b	thread_ffa_msg_wait
390#else
391	/*
392	 * Pass the vector address returned from main_init
393	 * Compensate for the load offset since cpu_on_handler() is
394	 * called with MMU off.
395	 */
396	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
397	adr	x1, thread_vector_table
398	sub	x1, x1, x0
399	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE
400	smc	#0
401	/* SMC should not return */
402	panic_at_smc_return
403#endif
404END_FUNC _start
405DECLARE_KEEP_INIT _start
406
407	.section .identity_map.data
408	.balign	8
409LOCAL_DATA cached_mem_end , :
410	.skip	8
411END_DATA cached_mem_end
412
413#ifdef CFG_CORE_ASLR
414LOCAL_FUNC relocate , :
415	/* x0 holds load offset */
416#ifdef CFG_WITH_PAGER
417	adr_l	x6, __init_end
418#else
419	adr_l	x6, __end
420#endif
421	ldp	w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET]
422
423	mov_imm	x1, TEE_RAM_START
424	add	x2, x2, x6	/* start of relocations */
425	add	x3, x3, x2	/* end of relocations */
426
427	/*
428	 * Relocations are not formatted as Rela64, instead they are in a
429	 * compressed format created by get_reloc_bin() in
430	 * scripts/gen_tee_bin.py
431	 *
432	 * All the R_AARCH64_RELATIVE relocations are translated into a
433	 * list list of 32-bit offsets from TEE_RAM_START. At each address
434	 * a 64-bit value pointed out which increased with the load offset.
435	 */
436
437#ifdef CFG_WITH_PAGER
438	/*
439	 * With pager enabled we can only relocate the pager and init
440	 * parts, the rest has to be done when a page is populated.
441	 */
442	sub	x6, x6, x1
443#endif
444
445	b	2f
446	/* Loop over the relocation addresses and process all entries */
4471:	ldr	w4, [x2], #4
448#ifdef CFG_WITH_PAGER
449	/* Skip too large addresses */
450	cmp	x4, x6
451	b.ge	2f
452#endif
453	add	x4, x4, x1
454	ldr	x5, [x4]
455	add	x5, x5, x0
456	str	x5, [x4]
457
4582:	cmp	x2, x3
459	b.ne	1b
460
461	ret
462END_FUNC relocate
463#endif
464
465/*
466 * void enable_mmu(unsigned long core_pos);
467 *
468 * This function depends on being mapped with in the identity map where
469 * physical address and virtual address is the same. After MMU has been
470 * enabled the instruction pointer will be updated to execute as the new
471 * offset instead. Stack pointers and the return address are updated.
472 */
473LOCAL_FUNC enable_mmu , : , .identity_map
474	adr	x1, boot_mmu_config
475	load_xregs x1, 0, 2, 6
476	/*
477	 * x0 = core_pos
478	 * x2 = tcr_el1
479	 * x3 = mair_el1
480	 * x4 = ttbr0_el1_base
481	 * x5 = ttbr0_core_offset
482	 * x6 = load_offset
483	 */
484	msr	tcr_el1, x2
485	msr	mair_el1, x3
486
487	/*
488	 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos
489	 */
490	madd	x1, x5, x0, x4
491	msr	ttbr0_el1, x1
492	msr	ttbr1_el1, xzr
493	isb
494
495	/* Invalidate TLB */
496	tlbi	vmalle1
497
498	/*
499	 * Make sure translation table writes have drained into memory and
500	 * the TLB invalidation is complete.
501	 */
502	dsb	sy
503	isb
504
505	/* Enable the MMU */
506	mrs	x1, sctlr_el1
507	orr	x1, x1, #SCTLR_M
508	msr	sctlr_el1, x1
509	isb
510
511	/* Update vbar */
512	mrs	x1, vbar_el1
513	add	x1, x1, x6
514	msr	vbar_el1, x1
515	isb
516
517	/* Invalidate instruction cache and branch predictor */
518	ic	iallu
519	isb
520
521	/* Enable I and D cache */
522	mrs	x1, sctlr_el1
523	orr	x1, x1, #SCTLR_I
524	orr	x1, x1, #SCTLR_C
525	msr	sctlr_el1, x1
526	isb
527
528	/* Adjust stack pointers and return address */
529	msr	spsel, #1
530	add	sp, sp, x6
531	msr	spsel, #0
532	add	sp, sp, x6
533	add	x30, x30, x6
534
535	ret
536END_FUNC enable_mmu
537
538	.section .identity_map.data
539	.balign	8
540DATA boot_mmu_config , : /* struct core_mmu_config */
541	.skip	CORE_MMU_CONFIG_SIZE
542END_DATA boot_mmu_config
543
544FUNC cpu_on_handler , :
545	mov	x19, x0
546	mov	x20, x1
547	mov	x21, x30
548
549	adr	x0, reset_vect_table
550	msr	vbar_el1, x0
551	isb
552
553	set_sctlr_el1
554	isb
555
556	/* Enable aborts now that we can receive exceptions */
557	msr	daifclr, #DAIFBIT_ABT
558
559	bl	__get_core_pos
560	bl	enable_mmu
561
562	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
563	set_sp
564
565#ifdef CFG_MEMTAG
566	init_memtag_per_cpu
567#endif
568#ifdef CFG_CORE_PAUTH
569	init_pauth_per_cpu
570#endif
571
572	mov	x0, x19
573	mov	x1, x20
574#ifdef CFG_CORE_FFA
575	bl	boot_cpu_on_handler
576	b	thread_ffa_msg_wait
577#else
578	mov	x30, x21
579	b	boot_cpu_on_handler
580#endif
581END_FUNC cpu_on_handler
582DECLARE_KEEP_PAGER cpu_on_handler
583
584LOCAL_FUNC unhandled_cpu , :
585	wfi
586	b	unhandled_cpu
587END_FUNC unhandled_cpu
588
589LOCAL_DATA stack_tmp_rel , :
590	.word	stack_tmp - stack_tmp_rel - STACK_TMP_GUARD
591END_DATA stack_tmp_rel
592
593	/*
594	 * This macro verifies that the a given vector doesn't exceed the
595	 * architectural limit of 32 instructions. This is meant to be placed
596	 * immedately after the last instruction in the vector. It takes the
597	 * vector entry as the parameter
598	 */
599	.macro check_vector_size since
600	  .if (. - \since) > (32 * 4)
601	    .error "Vector exceeds 32 instructions"
602	  .endif
603	.endm
604
605	.section .identity_map, "ax", %progbits
606	.align	11
607LOCAL_FUNC reset_vect_table , :, .identity_map, , nobti
608	/* -----------------------------------------------------
609	 * Current EL with SP0 : 0x0 - 0x180
610	 * -----------------------------------------------------
611	 */
612SynchronousExceptionSP0:
613	b	SynchronousExceptionSP0
614	check_vector_size SynchronousExceptionSP0
615
616	.align	7
617IrqSP0:
618	b	IrqSP0
619	check_vector_size IrqSP0
620
621	.align	7
622FiqSP0:
623	b	FiqSP0
624	check_vector_size FiqSP0
625
626	.align	7
627SErrorSP0:
628	b	SErrorSP0
629	check_vector_size SErrorSP0
630
631	/* -----------------------------------------------------
632	 * Current EL with SPx: 0x200 - 0x380
633	 * -----------------------------------------------------
634	 */
635	.align	7
636SynchronousExceptionSPx:
637	b	SynchronousExceptionSPx
638	check_vector_size SynchronousExceptionSPx
639
640	.align	7
641IrqSPx:
642	b	IrqSPx
643	check_vector_size IrqSPx
644
645	.align	7
646FiqSPx:
647	b	FiqSPx
648	check_vector_size FiqSPx
649
650	.align	7
651SErrorSPx:
652	b	SErrorSPx
653	check_vector_size SErrorSPx
654
655	/* -----------------------------------------------------
656	 * Lower EL using AArch64 : 0x400 - 0x580
657	 * -----------------------------------------------------
658	 */
659	.align	7
660SynchronousExceptionA64:
661	b	SynchronousExceptionA64
662	check_vector_size SynchronousExceptionA64
663
664	.align	7
665IrqA64:
666	b	IrqA64
667	check_vector_size IrqA64
668
669	.align	7
670FiqA64:
671	b	FiqA64
672	check_vector_size FiqA64
673
674	.align	7
675SErrorA64:
676	b   	SErrorA64
677	check_vector_size SErrorA64
678
679	/* -----------------------------------------------------
680	 * Lower EL using AArch32 : 0x0 - 0x180
681	 * -----------------------------------------------------
682	 */
683	.align	7
684SynchronousExceptionA32:
685	b	SynchronousExceptionA32
686	check_vector_size SynchronousExceptionA32
687
688	.align	7
689IrqA32:
690	b	IrqA32
691	check_vector_size IrqA32
692
693	.align	7
694FiqA32:
695	b	FiqA32
696	check_vector_size FiqA32
697
698	.align	7
699SErrorA32:
700	b	SErrorA32
701	check_vector_size SErrorA32
702
703END_FUNC reset_vect_table
704
705BTI(emit_aarch64_feature_1_and     GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
706