xref: /optee_os/core/arch/arm/kernel/entry_a64.S (revision 528dabb2825400a7ace52190f944dc8314c9319a)
1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2015, Linaro Limited
4 * Copyright (c) 2021, Arm Limited
5 */
6
7#include <platform_config.h>
8
9#include <arm64_macros.S>
10#include <arm.h>
11#include <asm.S>
12#include <generated/asm-defines.h>
13#include <keep.h>
14#include <kernel/thread_private.h>
15#include <sm/optee_smc.h>
16#include <sm/teesmc_opteed.h>
17#include <sm/teesmc_opteed_macros.h>
18
19	/*
20	 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0.
21	 * SP_EL0 is assigned:
22	 *   stack_tmp + (cpu_id + 1) * stack_tmp_stride - STACK_TMP_GUARD
23	 * SP_EL1 is assigned thread_core_local[cpu_id]
24	 */
25	.macro set_sp
26		bl	__get_core_pos
27		cmp	x0, #CFG_TEE_CORE_NB_CORE
28		/* Unsupported CPU, park it before it breaks something */
29		bge	unhandled_cpu
30		add	x0, x0, #1
31		adr_l	x1, stack_tmp_stride
32		ldr	w1, [x1]
33		mul	x1, x0, x1
34
35		/* x0 = stack_tmp - STACK_TMP_GUARD */
36		adr_l	x2, stack_tmp_rel
37		ldr	w0, [x2]
38		add	x0, x0, x2
39
40		msr	spsel, #0
41		add	sp, x1, x0
42		bl	thread_get_core_local
43		msr	spsel, #1
44		mov	sp, x0
45		msr	spsel, #0
46	.endm
47
48	.macro set_sctlr_el1
49		mrs	x0, sctlr_el1
50		orr	x0, x0, #SCTLR_I
51		orr	x0, x0, #SCTLR_SA
52		orr	x0, x0, #SCTLR_SPAN
53#if defined(CFG_CORE_RWDATA_NOEXEC)
54		orr	x0, x0, #SCTLR_WXN
55#endif
56#if defined(CFG_SCTLR_ALIGNMENT_CHECK)
57		orr	x0, x0, #SCTLR_A
58#else
59		bic	x0, x0, #SCTLR_A
60#endif
61		msr	sctlr_el1, x0
62	.endm
63
64FUNC _start , :
65#if defined(CFG_CORE_SEL1_SPMC)
66	/*
67	 * With OP-TEE as SPMC at S-EL1 the SPMD (SPD_spmd) in TF-A passes
68	 * the DTB in x0, pagaeble part in x1 and the rest of the registers
69	 * are unused
70	 */
71	mov	x19, x1		/* Save pagable part */
72	mov	x20, x0		/* Save DT address */
73#else
74	mov	x19, x0		/* Save pagable part address */
75#if defined(CFG_DT_ADDR)
76	ldr     x20, =CFG_DT_ADDR
77#else
78	mov	x20, x2		/* Save DT address */
79#endif
80#endif
81
82	adr	x0, reset_vect_table
83	msr	vbar_el1, x0
84	isb
85
86	set_sctlr_el1
87	isb
88
89#ifdef CFG_WITH_PAGER
90	/*
91	 * Move init code into correct location and move hashes to a
92	 * temporary safe location until the heap is initialized.
93	 *
94	 * The binary is built as:
95	 * [Pager code, rodata and data] : In correct location
96	 * [Init code and rodata] : Should be copied to __init_start
97	 * [struct boot_embdata + data] : Should be saved before
98	 * initializing pager, first uint32_t tells the length of the data
99	 */
100	adr	x0, __init_start	/* dst */
101	adr	x1, __data_end		/* src */
102	adr	x2, __init_end
103	sub	x2, x2, x0		/* init len */
104	ldr	w4, [x1, x2]		/* length of hashes etc */
105	add	x2, x2, x4		/* length of init and hashes etc */
106	/* Copy backwards (as memmove) in case we're overlapping */
107	add	x0, x0, x2		/* __init_start + len */
108	add	x1, x1, x2		/* __data_end + len */
109	adr	x3, cached_mem_end
110	str	x0, [x3]
111	adr	x2, __init_start
112copy_init:
113	ldp	x3, x4, [x1, #-16]!
114	stp	x3, x4, [x0, #-16]!
115	cmp	x0, x2
116	b.gt	copy_init
117#else
118	/*
119	 * The binary is built as:
120	 * [Core, rodata and data] : In correct location
121	 * [struct boot_embdata + data] : Should be moved to __end, first
122	 * uint32_t tells the length of the struct + data
123	 */
124	adr_l	x0, __end		/* dst */
125	adr_l	x1, __data_end		/* src */
126	ldr	w2, [x1]		/* struct boot_embdata::total_len */
127	/* Copy backwards (as memmove) in case we're overlapping */
128	add	x0, x0, x2
129	add	x1, x1, x2
130	adr	x3, cached_mem_end
131	str	x0, [x3]
132	adr_l	x2, __end
133
134copy_init:
135	ldp	x3, x4, [x1, #-16]!
136	stp	x3, x4, [x0, #-16]!
137	cmp	x0, x2
138	b.gt	copy_init
139#endif
140
141	/*
142	 * Clear .bss, this code obviously depends on the linker keeping
143	 * start/end of .bss at least 8 byte aligned.
144	 */
145	adr_l	x0, __bss_start
146	adr_l	x1, __bss_end
147clear_bss:
148	str	xzr, [x0], #8
149	cmp	x0, x1
150	b.lt	clear_bss
151
152#ifdef CFG_VIRTUALIZATION
153	/*
154	 * Clear .nex_bss, this code obviously depends on the linker keeping
155	 * start/end of .bss at least 8 byte aligned.
156	 */
157	adr	x0, __nex_bss_start
158	adr	x1, __nex_bss_end
159clear_nex_bss:
160	str	xzr, [x0], #8
161	cmp	x0, x1
162	b.lt	clear_nex_bss
163#endif
164
165	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
166	set_sp
167
168	bl	thread_init_thread_core_local
169
170	/* Enable aborts now that we can receive exceptions */
171	msr	daifclr, #DAIFBIT_ABT
172
173	/*
174	 * Invalidate dcache for all memory used during initialization to
175	 * avoid nasty surprices when the cache is turned on. We must not
176	 * invalidate memory not used by OP-TEE since we may invalidate
177	 * entries used by for instance ARM Trusted Firmware.
178	 */
179	adr_l	x0, __text_start
180	ldr	x1, cached_mem_end
181	sub	x1, x1, x0
182	bl	dcache_cleaninv_range
183
184	/* Enable Console */
185	bl	console_init
186
187#ifdef CFG_CORE_ASLR
188	mov	x0, x20
189	bl	get_aslr_seed
190#else
191	mov	x0, #0
192#endif
193
194	adr	x1, boot_mmu_config
195	bl	core_init_mmu_map
196
197#ifdef CFG_CORE_ASLR
198	/*
199	 * Process relocation information again updating for the new
200	 * offset. We're doing this now before MMU is enabled as some of
201	 * the memory will become write protected.
202	 */
203	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
204	/*
205	 * Update cached_mem_end address with load offset since it was
206	 * calculated before relocation.
207	 */
208	adr	x5, cached_mem_end
209	ldr	x6, [x5]
210	add	x6, x6, x0
211	str	x6, [x5]
212	bl	relocate
213#endif
214
215	bl	__get_core_pos
216	bl	enable_mmu
217#ifdef CFG_CORE_ASLR
218	/*
219	 * Reinitialize console, since register_serial_console() has
220	 * previously registered a PA and with ASLR the VA is different
221	 * from the PA.
222	 */
223	bl	console_init
224#endif
225
226#ifdef CFG_VIRTUALIZATION
227	/*
228	 * Initialize partition tables for each partition to
229	 * default_partition which has been relocated now to a different VA
230	 */
231	bl	core_mmu_set_default_prtn_tbl
232#endif
233
234	mov	x0, x19		/* pagable part address */
235	mov	x1, #-1
236	bl	boot_init_primary_early
237#ifndef CFG_VIRTUALIZATION
238	mov	x21, sp
239	adr_l	x0, threads
240	ldr	x0, [x0, #THREAD_CTX_STACK_VA_END]
241	mov	sp, x0
242	bl	thread_get_core_local
243	mov	x22, x0
244	str	wzr, [x22, #THREAD_CORE_LOCAL_FLAGS]
245#endif
246	mov	x0, x20		/* DT address */
247	bl	boot_init_primary_late
248#ifndef CFG_VIRTUALIZATION
249	mov	x0, #THREAD_CLF_TMP
250	str     w0, [x22, #THREAD_CORE_LOCAL_FLAGS]
251	mov	sp, x21
252#endif
253
254	/*
255	 * In case we've touched memory that secondary CPUs will use before
256	 * they have turned on their D-cache, clean and invalidate the
257	 * D-cache before exiting to normal world.
258	 */
259	adr_l	x0, __text_start
260	ldr	x1, cached_mem_end
261	sub	x1, x1, x0
262	bl	dcache_cleaninv_range
263
264
265	/*
266	 * Clear current thread id now to allow the thread to be reused on
267	 * next entry. Matches the thread_init_boot_thread in
268	 * boot.c.
269	 */
270#ifndef CFG_VIRTUALIZATION
271	bl 	thread_clr_boot_thread
272#endif
273
274#ifdef CFG_CORE_FFA
275	adr	x0, cpu_on_handler
276	/*
277	 * Compensate for the load offset since cpu_on_handler() is
278	 * called with MMU off.
279	 */
280	ldr	x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
281	sub	x0, x0, x1
282	bl	thread_spmc_register_secondary_ep
283	b	thread_ffa_msg_wait
284#else
285	/*
286	 * Pass the vector address returned from main_init
287	 * Compensate for the load offset since cpu_on_handler() is
288	 * called with MMU off.
289	 */
290	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
291	adr	x1, thread_vector_table
292	sub	x1, x1, x0
293	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE
294	smc	#0
295	/* SMC should not return */
296	panic_at_smc_return
297#endif
298END_FUNC _start
299DECLARE_KEEP_INIT _start
300
301	.balign	8
302LOCAL_DATA cached_mem_end , :
303	.skip	8
304END_DATA cached_mem_end
305
306#ifdef CFG_CORE_ASLR
307LOCAL_FUNC relocate , :
308	/* x0 holds load offset */
309#ifdef CFG_WITH_PAGER
310	adr_l	x6, __init_end
311#else
312	adr_l	x6, __end
313#endif
314	ldp	w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET]
315
316	mov_imm	x1, TEE_RAM_START
317	add	x2, x2, x6	/* start of relocations */
318	add	x3, x3, x2	/* end of relocations */
319
320	/*
321	 * Relocations are not formatted as Rela64, instead they are in a
322	 * compressed format created by get_reloc_bin() in
323	 * scripts/gen_tee_bin.py
324	 *
325	 * All the R_AARCH64_RELATIVE relocations are translated into a
326	 * list list of 32-bit offsets from TEE_RAM_START. At each address
327	 * a 64-bit value pointed out which increased with the load offset.
328	 */
329
330#ifdef CFG_WITH_PAGER
331	/*
332	 * With pager enabled we can only relocate the pager and init
333	 * parts, the rest has to be done when a page is populated.
334	 */
335	sub	x6, x6, x1
336#endif
337
338	b	2f
339	/* Loop over the relocation addresses and process all entries */
3401:	ldr	w4, [x2], #4
341#ifdef CFG_WITH_PAGER
342	/* Skip too large addresses */
343	cmp	x4, x6
344	b.ge	2f
345#endif
346	add	x4, x4, x1
347	ldr	x5, [x4]
348	add	x5, x5, x0
349	str	x5, [x4]
350
3512:	cmp	x2, x3
352	b.ne	1b
353
354	ret
355END_FUNC relocate
356#endif
357
358/*
359 * void enable_mmu(unsigned long core_pos);
360 *
361 * This function depends on being mapped with in the identity map where
362 * physical address and virtual address is the same. After MMU has been
363 * enabled the instruction pointer will be updated to execute as the new
364 * offset instead. Stack pointers and the return address are updated.
365 */
366LOCAL_FUNC enable_mmu , : , .identity_map
367	adr	x1, boot_mmu_config
368	load_xregs x1, 0, 2, 6
369	/*
370	 * x0 = core_pos
371	 * x2 = tcr_el1
372	 * x3 = mair_el1
373	 * x4 = ttbr0_el1_base
374	 * x5 = ttbr0_core_offset
375	 * x6 = load_offset
376	 */
377	msr	tcr_el1, x2
378	msr	mair_el1, x3
379
380	/*
381	 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos
382	 */
383	madd	x1, x5, x0, x4
384	msr	ttbr0_el1, x1
385	msr	ttbr1_el1, xzr
386	isb
387
388	/* Invalidate TLB */
389	tlbi	vmalle1
390
391	/*
392	 * Make sure translation table writes have drained into memory and
393	 * the TLB invalidation is complete.
394	 */
395	dsb	sy
396	isb
397
398	/* Enable the MMU */
399	mrs	x1, sctlr_el1
400	orr	x1, x1, #SCTLR_M
401	msr	sctlr_el1, x1
402	isb
403
404	/* Update vbar */
405	mrs	x1, vbar_el1
406	add	x1, x1, x6
407	msr	vbar_el1, x1
408	isb
409
410	/* Invalidate instruction cache and branch predictor */
411	ic	iallu
412	isb
413
414	/* Enable I and D cache */
415	mrs	x1, sctlr_el1
416	orr	x1, x1, #SCTLR_I
417	orr	x1, x1, #SCTLR_C
418	msr	sctlr_el1, x1
419	isb
420
421	/* Adjust stack pointers and return address */
422	msr	spsel, #1
423	add	sp, sp, x6
424	msr	spsel, #0
425	add	sp, sp, x6
426	add	x30, x30, x6
427
428	ret
429END_FUNC enable_mmu
430
431	.balign	8
432DATA boot_mmu_config , : /* struct core_mmu_config */
433	.skip	CORE_MMU_CONFIG_SIZE
434END_DATA boot_mmu_config
435
436FUNC cpu_on_handler , :
437	mov	x19, x0
438	mov	x20, x1
439	mov	x21, x30
440
441	adr	x0, reset_vect_table
442	msr	vbar_el1, x0
443	isb
444
445	set_sctlr_el1
446	isb
447
448	/* Enable aborts now that we can receive exceptions */
449	msr	daifclr, #DAIFBIT_ABT
450
451	bl	__get_core_pos
452	bl	enable_mmu
453
454	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
455	set_sp
456
457	mov	x0, x19
458	mov	x1, x20
459#ifdef CFG_CORE_FFA
460	bl	boot_cpu_on_handler
461	b	thread_ffa_msg_wait
462#else
463	mov	x30, x21
464	b	boot_cpu_on_handler
465#endif
466END_FUNC cpu_on_handler
467DECLARE_KEEP_PAGER cpu_on_handler
468
469LOCAL_FUNC unhandled_cpu , :
470	wfi
471	b	unhandled_cpu
472END_FUNC unhandled_cpu
473
474LOCAL_DATA stack_tmp_rel , :
475	.word	stack_tmp - stack_tmp_rel - STACK_TMP_GUARD
476END_DATA stack_tmp_rel
477
478	/*
479	 * This macro verifies that the a given vector doesn't exceed the
480	 * architectural limit of 32 instructions. This is meant to be placed
481	 * immedately after the last instruction in the vector. It takes the
482	 * vector entry as the parameter
483	 */
484	.macro check_vector_size since
485	  .if (. - \since) > (32 * 4)
486	    .error "Vector exceeds 32 instructions"
487	  .endif
488	.endm
489
490	.section .identity_map, "ax", %progbits
491	.align	11
492LOCAL_FUNC reset_vect_table , :, .identity_map, , nobti
493	/* -----------------------------------------------------
494	 * Current EL with SP0 : 0x0 - 0x180
495	 * -----------------------------------------------------
496	 */
497SynchronousExceptionSP0:
498	b	SynchronousExceptionSP0
499	check_vector_size SynchronousExceptionSP0
500
501	.align	7
502IrqSP0:
503	b	IrqSP0
504	check_vector_size IrqSP0
505
506	.align	7
507FiqSP0:
508	b	FiqSP0
509	check_vector_size FiqSP0
510
511	.align	7
512SErrorSP0:
513	b	SErrorSP0
514	check_vector_size SErrorSP0
515
516	/* -----------------------------------------------------
517	 * Current EL with SPx: 0x200 - 0x380
518	 * -----------------------------------------------------
519	 */
520	.align	7
521SynchronousExceptionSPx:
522	b	SynchronousExceptionSPx
523	check_vector_size SynchronousExceptionSPx
524
525	.align	7
526IrqSPx:
527	b	IrqSPx
528	check_vector_size IrqSPx
529
530	.align	7
531FiqSPx:
532	b	FiqSPx
533	check_vector_size FiqSPx
534
535	.align	7
536SErrorSPx:
537	b	SErrorSPx
538	check_vector_size SErrorSPx
539
540	/* -----------------------------------------------------
541	 * Lower EL using AArch64 : 0x400 - 0x580
542	 * -----------------------------------------------------
543	 */
544	.align	7
545SynchronousExceptionA64:
546	b	SynchronousExceptionA64
547	check_vector_size SynchronousExceptionA64
548
549	.align	7
550IrqA64:
551	b	IrqA64
552	check_vector_size IrqA64
553
554	.align	7
555FiqA64:
556	b	FiqA64
557	check_vector_size FiqA64
558
559	.align	7
560SErrorA64:
561	b   	SErrorA64
562	check_vector_size SErrorA64
563
564	/* -----------------------------------------------------
565	 * Lower EL using AArch32 : 0x0 - 0x180
566	 * -----------------------------------------------------
567	 */
568	.align	7
569SynchronousExceptionA32:
570	b	SynchronousExceptionA32
571	check_vector_size SynchronousExceptionA32
572
573	.align	7
574IrqA32:
575	b	IrqA32
576	check_vector_size IrqA32
577
578	.align	7
579FiqA32:
580	b	FiqA32
581	check_vector_size FiqA32
582
583	.align	7
584SErrorA32:
585	b	SErrorA32
586	check_vector_size SErrorA32
587
588END_FUNC reset_vect_table
589
590BTI(emit_aarch64_feature_1_and     GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
591