xref: /optee_os/core/arch/arm/kernel/entry_a64.S (revision e231582fca25178ed521995577f537580ed47a41)
1/* SPDX-License-Identifier: BSD-2-Clause */
2/*
3 * Copyright (c) 2015, Linaro Limited
4 * Copyright (c) 2021, Arm Limited
5 */
6
7#include <platform_config.h>
8
9#include <arm64_macros.S>
10#include <arm.h>
11#include <asm.S>
12#include <generated/asm-defines.h>
13#include <keep.h>
14#include <kernel/thread_private.h>
15#include <sm/optee_smc.h>
16#include <sm/teesmc_opteed.h>
17#include <sm/teesmc_opteed_macros.h>
18
19	/*
20	 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0.
21	 * SP_EL0 is assigned:
22	 *   stack_tmp + (cpu_id + 1) * stack_tmp_stride - STACK_TMP_GUARD
23	 * SP_EL1 is assigned thread_core_local[cpu_id]
24	 */
25	.macro set_sp
26		bl	__get_core_pos
27		cmp	x0, #CFG_TEE_CORE_NB_CORE
28		/* Unsupported CPU, park it before it breaks something */
29		bge	unhandled_cpu
30		add	x0, x0, #1
31		adr_l	x1, stack_tmp_stride
32		ldr	w1, [x1]
33		mul	x1, x0, x1
34
35		/* x0 = stack_tmp - STACK_TMP_GUARD */
36		adr_l	x2, stack_tmp_rel
37		ldr	w0, [x2]
38		add	x0, x0, x2
39
40		msr	spsel, #0
41		add	sp, x1, x0
42		bl	thread_get_core_local
43		msr	spsel, #1
44		mov	sp, x0
45		msr	spsel, #0
46	.endm
47
48	.macro set_sctlr_el1
49		mrs	x0, sctlr_el1
50		orr	x0, x0, #SCTLR_I
51		orr	x0, x0, #SCTLR_SA
52		orr	x0, x0, #SCTLR_SPAN
53#if defined(CFG_CORE_RWDATA_NOEXEC)
54		orr	x0, x0, #SCTLR_WXN
55#endif
56#if defined(CFG_SCTLR_ALIGNMENT_CHECK)
57		orr	x0, x0, #SCTLR_A
58#else
59		bic	x0, x0, #SCTLR_A
60#endif
61		msr	sctlr_el1, x0
62	.endm
63
64FUNC _start , :
65#if defined(CFG_CORE_SEL1_SPMC)
66	/*
67	 * With OP-TEE as SPMC at S-EL1 the SPMD (SPD_spmd) in TF-A passes
68	 * the DTB in x0, pagaeble part in x1 and the rest of the registers
69	 * are unused
70	 */
71	mov	x19, x1		/* Save pagable part */
72	mov	x20, x0		/* Save DT address */
73#else
74	mov	x19, x0		/* Save pagable part address */
75#if defined(CFG_DT_ADDR)
76	ldr     x20, =CFG_DT_ADDR
77#else
78	mov	x20, x2		/* Save DT address */
79#endif
80#endif
81
82	adr	x0, reset_vect_table
83	msr	vbar_el1, x0
84	isb
85
86	set_sctlr_el1
87	isb
88
89#ifdef CFG_WITH_PAGER
90	/*
91	 * Move init code into correct location and move hashes to a
92	 * temporary safe location until the heap is initialized.
93	 *
94	 * The binary is built as:
95	 * [Pager code, rodata and data] : In correct location
96	 * [Init code and rodata] : Should be copied to __init_start
97	 * [struct boot_embdata + data] : Should be saved before
98	 * initializing pager, first uint32_t tells the length of the data
99	 */
100	adr	x0, __init_start	/* dst */
101	adr	x1, __data_end		/* src */
102	adr	x2, __init_end
103	sub	x2, x2, x0		/* init len */
104	ldr	w4, [x1, x2]		/* length of hashes etc */
105	add	x2, x2, x4		/* length of init and hashes etc */
106	/* Copy backwards (as memmove) in case we're overlapping */
107	add	x0, x0, x2		/* __init_start + len */
108	add	x1, x1, x2		/* __data_end + len */
109	adr	x3, cached_mem_end
110	str	x0, [x3]
111	adr	x2, __init_start
112copy_init:
113	ldp	x3, x4, [x1, #-16]!
114	stp	x3, x4, [x0, #-16]!
115	cmp	x0, x2
116	b.gt	copy_init
117#else
118	/*
119	 * The binary is built as:
120	 * [Core, rodata and data] : In correct location
121	 * [struct boot_embdata + data] : Should be moved to __end, first
122	 * uint32_t tells the length of the struct + data
123	 */
124	adr_l	x0, __end		/* dst */
125	adr_l	x1, __data_end		/* src */
126	ldr	w2, [x1]		/* struct boot_embdata::total_len */
127	/* Copy backwards (as memmove) in case we're overlapping */
128	add	x0, x0, x2
129	add	x1, x1, x2
130	adr	x3, cached_mem_end
131	str	x0, [x3]
132	adr_l	x2, __end
133
134copy_init:
135	ldp	x3, x4, [x1, #-16]!
136	stp	x3, x4, [x0, #-16]!
137	cmp	x0, x2
138	b.gt	copy_init
139#endif
140
141	/*
142	 * Clear .bss, this code obviously depends on the linker keeping
143	 * start/end of .bss at least 8 byte aligned.
144	 */
145	adr_l	x0, __bss_start
146	adr_l	x1, __bss_end
147clear_bss:
148	str	xzr, [x0], #8
149	cmp	x0, x1
150	b.lt	clear_bss
151
152#ifdef CFG_VIRTUALIZATION
153	/*
154	 * Clear .nex_bss, this code obviously depends on the linker keeping
155	 * start/end of .bss at least 8 byte aligned.
156	 */
157	adr	x0, __nex_bss_start
158	adr	x1, __nex_bss_end
159clear_nex_bss:
160	str	xzr, [x0], #8
161	cmp	x0, x1
162	b.lt	clear_nex_bss
163#endif
164
165	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
166	set_sp
167
168	bl	thread_init_thread_core_local
169
170	/* Enable aborts now that we can receive exceptions */
171	msr	daifclr, #DAIFBIT_ABT
172
173	/*
174	 * Invalidate dcache for all memory used during initialization to
175	 * avoid nasty surprices when the cache is turned on. We must not
176	 * invalidate memory not used by OP-TEE since we may invalidate
177	 * entries used by for instance ARM Trusted Firmware.
178	 */
179	adr_l	x0, __text_start
180	ldr	x1, cached_mem_end
181	sub	x1, x1, x0
182	bl	dcache_cleaninv_range
183
184	/* Enable Console */
185	bl	console_init
186
187#ifdef CFG_CORE_ASLR
188	mov	x0, x20
189	bl	get_aslr_seed
190#else
191	mov	x0, #0
192#endif
193
194	adr	x1, boot_mmu_config
195	bl	core_init_mmu_map
196
197#ifdef CFG_CORE_ASLR
198	/*
199	 * Process relocation information again updating for the new
200	 * offset. We're doing this now before MMU is enabled as some of
201	 * the memory will become write protected.
202	 */
203	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
204	/*
205	 * Update cached_mem_end address with load offset since it was
206	 * calculated before relocation.
207	 */
208	adr	x5, cached_mem_end
209	ldr	x6, [x5]
210	add	x6, x6, x0
211	str	x6, [x5]
212	bl	relocate
213#endif
214
215	bl	__get_core_pos
216	bl	enable_mmu
217#ifdef CFG_CORE_ASLR
218	/*
219	 * Reinitialize console, since register_serial_console() has
220	 * previously registered a PA and with ASLR the VA is different
221	 * from the PA.
222	 */
223	bl	console_init
224#endif
225
226#ifdef CFG_VIRTUALIZATION
227	/*
228	 * Initialize partition tables for each partition to
229	 * default_partition which has been relocated now to a different VA
230	 */
231	bl	core_mmu_set_default_prtn_tbl
232#endif
233
234	mov	x0, x19		/* pagable part address */
235	mov	x1, #-1
236	bl	boot_init_primary_early
237#ifndef CFG_VIRTUALIZATION
238	mov	x21, sp
239	adr_l	x0, threads
240	ldr	x0, [x0, #THREAD_CTX_STACK_VA_END]
241	mov	sp, x0
242	bl	thread_get_core_local
243	mov	x22, x0
244	str	wzr, [x22, #THREAD_CORE_LOCAL_FLAGS]
245#endif
246	mov	x0, x20		/* DT address */
247	bl	boot_init_primary_late
248#ifndef CFG_VIRTUALIZATION
249	mov	x0, #THREAD_CLF_TMP
250	str     w0, [x22, #THREAD_CORE_LOCAL_FLAGS]
251	mov	sp, x21
252#endif
253
254	/*
255	 * In case we've touched memory that secondary CPUs will use before
256	 * they have turned on their D-cache, clean and invalidate the
257	 * D-cache before exiting to normal world.
258	 */
259	adr_l	x0, __text_start
260	ldr	x1, cached_mem_end
261	sub	x1, x1, x0
262	bl	dcache_cleaninv_range
263
264
265	/*
266	 * Clear current thread id now to allow the thread to be reused on
267	 * next entry. Matches the thread_init_boot_thread in
268	 * boot.c.
269	 */
270#ifndef CFG_VIRTUALIZATION
271	bl 	thread_clr_boot_thread
272#endif
273
274#ifdef CFG_CORE_FFA
275	adr	x0, cpu_on_handler
276	/*
277	 * Compensate for the load offset since cpu_on_handler() is
278	 * called with MMU off.
279	 */
280	ldr	x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
281	sub	x0, x0, x1
282	bl	thread_spmc_register_secondary_ep
283	b	thread_ffa_msg_wait
284#else
285	/*
286	 * Pass the vector address returned from main_init
287	 * Compensate for the load offset since cpu_on_handler() is
288	 * called with MMU off.
289	 */
290	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
291	adr	x1, thread_vector_table
292	sub	x1, x1, x0
293	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE
294	smc	#0
295	/* SMC should not return */
296	panic_at_smc_return
297#endif
298END_FUNC _start
299DECLARE_KEEP_INIT _start
300
301	.section .identity_map.data
302	.balign	8
303LOCAL_DATA cached_mem_end , :
304	.skip	8
305END_DATA cached_mem_end
306
307#ifdef CFG_CORE_ASLR
308LOCAL_FUNC relocate , :
309	/* x0 holds load offset */
310#ifdef CFG_WITH_PAGER
311	adr_l	x6, __init_end
312#else
313	adr_l	x6, __end
314#endif
315	ldp	w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET]
316
317	mov_imm	x1, TEE_RAM_START
318	add	x2, x2, x6	/* start of relocations */
319	add	x3, x3, x2	/* end of relocations */
320
321	/*
322	 * Relocations are not formatted as Rela64, instead they are in a
323	 * compressed format created by get_reloc_bin() in
324	 * scripts/gen_tee_bin.py
325	 *
326	 * All the R_AARCH64_RELATIVE relocations are translated into a
327	 * list list of 32-bit offsets from TEE_RAM_START. At each address
328	 * a 64-bit value pointed out which increased with the load offset.
329	 */
330
331#ifdef CFG_WITH_PAGER
332	/*
333	 * With pager enabled we can only relocate the pager and init
334	 * parts, the rest has to be done when a page is populated.
335	 */
336	sub	x6, x6, x1
337#endif
338
339	b	2f
340	/* Loop over the relocation addresses and process all entries */
3411:	ldr	w4, [x2], #4
342#ifdef CFG_WITH_PAGER
343	/* Skip too large addresses */
344	cmp	x4, x6
345	b.ge	2f
346#endif
347	add	x4, x4, x1
348	ldr	x5, [x4]
349	add	x5, x5, x0
350	str	x5, [x4]
351
3522:	cmp	x2, x3
353	b.ne	1b
354
355	ret
356END_FUNC relocate
357#endif
358
359/*
360 * void enable_mmu(unsigned long core_pos);
361 *
362 * This function depends on being mapped with in the identity map where
363 * physical address and virtual address is the same. After MMU has been
364 * enabled the instruction pointer will be updated to execute as the new
365 * offset instead. Stack pointers and the return address are updated.
366 */
367LOCAL_FUNC enable_mmu , : , .identity_map
368	adr	x1, boot_mmu_config
369	load_xregs x1, 0, 2, 6
370	/*
371	 * x0 = core_pos
372	 * x2 = tcr_el1
373	 * x3 = mair_el1
374	 * x4 = ttbr0_el1_base
375	 * x5 = ttbr0_core_offset
376	 * x6 = load_offset
377	 */
378	msr	tcr_el1, x2
379	msr	mair_el1, x3
380
381	/*
382	 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos
383	 */
384	madd	x1, x5, x0, x4
385	msr	ttbr0_el1, x1
386	msr	ttbr1_el1, xzr
387	isb
388
389	/* Invalidate TLB */
390	tlbi	vmalle1
391
392	/*
393	 * Make sure translation table writes have drained into memory and
394	 * the TLB invalidation is complete.
395	 */
396	dsb	sy
397	isb
398
399	/* Enable the MMU */
400	mrs	x1, sctlr_el1
401	orr	x1, x1, #SCTLR_M
402	msr	sctlr_el1, x1
403	isb
404
405	/* Update vbar */
406	mrs	x1, vbar_el1
407	add	x1, x1, x6
408	msr	vbar_el1, x1
409	isb
410
411	/* Invalidate instruction cache and branch predictor */
412	ic	iallu
413	isb
414
415	/* Enable I and D cache */
416	mrs	x1, sctlr_el1
417	orr	x1, x1, #SCTLR_I
418	orr	x1, x1, #SCTLR_C
419	msr	sctlr_el1, x1
420	isb
421
422	/* Adjust stack pointers and return address */
423	msr	spsel, #1
424	add	sp, sp, x6
425	msr	spsel, #0
426	add	sp, sp, x6
427	add	x30, x30, x6
428
429	ret
430END_FUNC enable_mmu
431
432	.section .identity_map.data
433	.balign	8
434DATA boot_mmu_config , : /* struct core_mmu_config */
435	.skip	CORE_MMU_CONFIG_SIZE
436END_DATA boot_mmu_config
437
438FUNC cpu_on_handler , :
439	mov	x19, x0
440	mov	x20, x1
441	mov	x21, x30
442
443	adr	x0, reset_vect_table
444	msr	vbar_el1, x0
445	isb
446
447	set_sctlr_el1
448	isb
449
450	/* Enable aborts now that we can receive exceptions */
451	msr	daifclr, #DAIFBIT_ABT
452
453	bl	__get_core_pos
454	bl	enable_mmu
455
456	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
457	set_sp
458
459	mov	x0, x19
460	mov	x1, x20
461#ifdef CFG_CORE_FFA
462	bl	boot_cpu_on_handler
463	b	thread_ffa_msg_wait
464#else
465	mov	x30, x21
466	b	boot_cpu_on_handler
467#endif
468END_FUNC cpu_on_handler
469DECLARE_KEEP_PAGER cpu_on_handler
470
471LOCAL_FUNC unhandled_cpu , :
472	wfi
473	b	unhandled_cpu
474END_FUNC unhandled_cpu
475
476LOCAL_DATA stack_tmp_rel , :
477	.word	stack_tmp - stack_tmp_rel - STACK_TMP_GUARD
478END_DATA stack_tmp_rel
479
480	/*
481	 * This macro verifies that the a given vector doesn't exceed the
482	 * architectural limit of 32 instructions. This is meant to be placed
483	 * immedately after the last instruction in the vector. It takes the
484	 * vector entry as the parameter
485	 */
486	.macro check_vector_size since
487	  .if (. - \since) > (32 * 4)
488	    .error "Vector exceeds 32 instructions"
489	  .endif
490	.endm
491
492	.section .identity_map, "ax", %progbits
493	.align	11
494LOCAL_FUNC reset_vect_table , :, .identity_map, , nobti
495	/* -----------------------------------------------------
496	 * Current EL with SP0 : 0x0 - 0x180
497	 * -----------------------------------------------------
498	 */
499SynchronousExceptionSP0:
500	b	SynchronousExceptionSP0
501	check_vector_size SynchronousExceptionSP0
502
503	.align	7
504IrqSP0:
505	b	IrqSP0
506	check_vector_size IrqSP0
507
508	.align	7
509FiqSP0:
510	b	FiqSP0
511	check_vector_size FiqSP0
512
513	.align	7
514SErrorSP0:
515	b	SErrorSP0
516	check_vector_size SErrorSP0
517
518	/* -----------------------------------------------------
519	 * Current EL with SPx: 0x200 - 0x380
520	 * -----------------------------------------------------
521	 */
522	.align	7
523SynchronousExceptionSPx:
524	b	SynchronousExceptionSPx
525	check_vector_size SynchronousExceptionSPx
526
527	.align	7
528IrqSPx:
529	b	IrqSPx
530	check_vector_size IrqSPx
531
532	.align	7
533FiqSPx:
534	b	FiqSPx
535	check_vector_size FiqSPx
536
537	.align	7
538SErrorSPx:
539	b	SErrorSPx
540	check_vector_size SErrorSPx
541
542	/* -----------------------------------------------------
543	 * Lower EL using AArch64 : 0x400 - 0x580
544	 * -----------------------------------------------------
545	 */
546	.align	7
547SynchronousExceptionA64:
548	b	SynchronousExceptionA64
549	check_vector_size SynchronousExceptionA64
550
551	.align	7
552IrqA64:
553	b	IrqA64
554	check_vector_size IrqA64
555
556	.align	7
557FiqA64:
558	b	FiqA64
559	check_vector_size FiqA64
560
561	.align	7
562SErrorA64:
563	b   	SErrorA64
564	check_vector_size SErrorA64
565
566	/* -----------------------------------------------------
567	 * Lower EL using AArch32 : 0x0 - 0x180
568	 * -----------------------------------------------------
569	 */
570	.align	7
571SynchronousExceptionA32:
572	b	SynchronousExceptionA32
573	check_vector_size SynchronousExceptionA32
574
575	.align	7
576IrqA32:
577	b	IrqA32
578	check_vector_size IrqA32
579
580	.align	7
581FiqA32:
582	b	FiqA32
583	check_vector_size FiqA32
584
585	.align	7
586SErrorA32:
587	b	SErrorA32
588	check_vector_size SErrorA32
589
590END_FUNC reset_vect_table
591
592BTI(emit_aarch64_feature_1_and     GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
593