1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2015, Linaro Limited 4 * Copyright (c) 2021, Arm Limited 5 */ 6 7#include <platform_config.h> 8 9#include <arm64_macros.S> 10#include <arm.h> 11#include <asm.S> 12#include <generated/asm-defines.h> 13#include <keep.h> 14#include <sm/optee_smc.h> 15#include <sm/teesmc_opteed.h> 16#include <sm/teesmc_opteed_macros.h> 17 18#include "thread_private.h" 19 20 /* 21 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0. 22 * SP_EL0 is assigned stack_tmp_export + cpu_id * stack_tmp_stride 23 * SP_EL1 is assigned thread_core_local[cpu_id] 24 */ 25 .macro set_sp 26 bl __get_core_pos 27 cmp x0, #CFG_TEE_CORE_NB_CORE 28 /* Unsupported CPU, park it before it breaks something */ 29 bge unhandled_cpu 30 adr_l x1, stack_tmp_stride 31 ldr w1, [x1] 32 mul x1, x0, x1 33 adr_l x0, stack_tmp_export 34 ldr x0, [x0] 35 msr spsel, #0 36 add sp, x1, x0 37 bl thread_get_core_local 38 msr spsel, #1 39 mov sp, x0 40 msr spsel, #0 41 .endm 42 43 .macro set_sctlr_el1 44 mrs x0, sctlr_el1 45 orr x0, x0, #SCTLR_I 46 orr x0, x0, #SCTLR_SA 47 orr x0, x0, #SCTLR_SPAN 48#if defined(CFG_CORE_RWDATA_NOEXEC) 49 orr x0, x0, #SCTLR_WXN 50#endif 51#if defined(CFG_SCTLR_ALIGNMENT_CHECK) 52 orr x0, x0, #SCTLR_A 53#else 54 bic x0, x0, #SCTLR_A 55#endif 56 msr sctlr_el1, x0 57 .endm 58 59FUNC _start , : 60#if defined(CFG_CORE_SEL1_SPMC) 61 /* 62 * With OP-TEE as SPMC at S-EL1 the SPMD (SPD_spmd) in TF-A passes 63 * the DTB in x0, pagaeble part in x1 and the rest of the registers 64 * are unused 65 */ 66 mov x19, x1 /* Save pagable part */ 67 mov x20, x0 /* Save DT address */ 68#else 69 mov x19, x0 /* Save pagable part address */ 70#if defined(CFG_DT_ADDR) 71 ldr x20, =CFG_DT_ADDR 72#else 73 mov x20, x2 /* Save DT address */ 74#endif 75#endif 76 77 adr x0, reset_vect_table 78 msr vbar_el1, x0 79 isb 80 81 set_sctlr_el1 82 isb 83 84#ifdef CFG_WITH_PAGER 85 /* 86 * Move init code into correct location and move hashes to a 87 * temporary safe location until the heap is initialized. 88 * 89 * The binary is built as: 90 * [Pager code, rodata and data] : In correct location 91 * [Init code and rodata] : Should be copied to __init_start 92 * [struct boot_embdata + data] : Should be saved before 93 * initializing pager, first uint32_t tells the length of the data 94 */ 95 adr x0, __init_start /* dst */ 96 adr x1, __data_end /* src */ 97 adr x2, __init_end 98 sub x2, x2, x0 /* init len */ 99 ldr w4, [x1, x2] /* length of hashes etc */ 100 add x2, x2, x4 /* length of init and hashes etc */ 101 /* Copy backwards (as memmove) in case we're overlapping */ 102 add x0, x0, x2 /* __init_start + len */ 103 add x1, x1, x2 /* __data_end + len */ 104 adr x3, cached_mem_end 105 str x0, [x3] 106 adr x2, __init_start 107copy_init: 108 ldp x3, x4, [x1, #-16]! 109 stp x3, x4, [x0, #-16]! 110 cmp x0, x2 111 b.gt copy_init 112#else 113 /* 114 * The binary is built as: 115 * [Core, rodata and data] : In correct location 116 * [struct boot_embdata + data] : Should be moved to __end, first 117 * uint32_t tells the length of the struct + data 118 */ 119 adr_l x0, __end /* dst */ 120 adr_l x1, __data_end /* src */ 121 ldr w2, [x1] /* struct boot_embdata::total_len */ 122 /* Copy backwards (as memmove) in case we're overlapping */ 123 add x0, x0, x2 124 add x1, x1, x2 125 adr x3, cached_mem_end 126 str x0, [x3] 127 adr_l x2, __end 128 129copy_init: 130 ldp x3, x4, [x1, #-16]! 131 stp x3, x4, [x0, #-16]! 132 cmp x0, x2 133 b.gt copy_init 134#endif 135 136 /* 137 * Clear .bss, this code obviously depends on the linker keeping 138 * start/end of .bss at least 8 byte aligned. 139 */ 140 adr_l x0, __bss_start 141 adr_l x1, __bss_end 142clear_bss: 143 str xzr, [x0], #8 144 cmp x0, x1 145 b.lt clear_bss 146 147#ifdef CFG_VIRTUALIZATION 148 /* 149 * Clear .nex_bss, this code obviously depends on the linker keeping 150 * start/end of .bss at least 8 byte aligned. 151 */ 152 adr x0, __nex_bss_start 153 adr x1, __nex_bss_end 154clear_nex_bss: 155 str xzr, [x0], #8 156 cmp x0, x1 157 b.lt clear_nex_bss 158#endif 159 160 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 161 set_sp 162 163 bl thread_init_thread_core_local 164 165 /* Enable aborts now that we can receive exceptions */ 166 msr daifclr, #DAIFBIT_ABT 167 168 /* 169 * Invalidate dcache for all memory used during initialization to 170 * avoid nasty surprices when the cache is turned on. We must not 171 * invalidate memory not used by OP-TEE since we may invalidate 172 * entries used by for instance ARM Trusted Firmware. 173 */ 174 adr_l x0, __text_start 175 ldr x1, cached_mem_end 176 sub x1, x1, x0 177 bl dcache_cleaninv_range 178 179 /* Enable Console */ 180 bl console_init 181 182#ifdef CFG_CORE_ASLR 183 mov x0, x20 184 bl get_aslr_seed 185#else 186 mov x0, #0 187#endif 188 189 adr x1, boot_mmu_config 190 bl core_init_mmu_map 191 192#ifdef CFG_CORE_ASLR 193 /* 194 * Process relocation information again updating for the new 195 * offset. We're doing this now before MMU is enabled as some of 196 * the memory will become write protected. 197 */ 198 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 199 /* 200 * Update cached_mem_end address with load offset since it was 201 * calculated before relocation. 202 */ 203 adr x5, cached_mem_end 204 ldr x6, [x5] 205 add x6, x6, x0 206 str x6, [x5] 207 bl relocate 208#endif 209 210 bl __get_core_pos 211 bl enable_mmu 212#ifdef CFG_CORE_ASLR 213 /* 214 * Reinitialize console, since register_serial_console() has 215 * previously registered a PA and with ASLR the VA is different 216 * from the PA. 217 */ 218 bl console_init 219#endif 220 221#ifdef CFG_VIRTUALIZATION 222 /* 223 * Initialize partition tables for each partition to 224 * default_partition which has been relocated now to a different VA 225 */ 226 bl core_mmu_set_default_prtn_tbl 227#endif 228 229 mov x0, x19 /* pagable part address */ 230 mov x1, #-1 231 bl boot_init_primary_early 232#ifndef CFG_VIRTUALIZATION 233 mov x21, sp 234 adr_l x0, threads 235 ldr x0, [x0, #THREAD_CTX_STACK_VA_END] 236 mov sp, x0 237 bl thread_get_core_local 238 mov x22, x0 239 str wzr, [x22, #THREAD_CORE_LOCAL_FLAGS] 240#endif 241 mov x0, x20 /* DT address */ 242 bl boot_init_primary_late 243#ifndef CFG_VIRTUALIZATION 244 mov x0, #THREAD_CLF_TMP 245 str w0, [x22, #THREAD_CORE_LOCAL_FLAGS] 246 mov sp, x21 247#endif 248 249 /* 250 * In case we've touched memory that secondary CPUs will use before 251 * they have turned on their D-cache, clean and invalidate the 252 * D-cache before exiting to normal world. 253 */ 254 adr_l x0, __text_start 255 ldr x1, cached_mem_end 256 sub x1, x1, x0 257 bl dcache_cleaninv_range 258 259 260 /* 261 * Clear current thread id now to allow the thread to be reused on 262 * next entry. Matches the thread_init_boot_thread in 263 * boot.c. 264 */ 265#ifndef CFG_VIRTUALIZATION 266 bl thread_clr_boot_thread 267#endif 268 269#ifdef CFG_CORE_FFA 270 adr x0, cpu_on_handler 271 /* 272 * Compensate for the load offset since cpu_on_handler() is 273 * called with MMU off. 274 */ 275 ldr x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 276 sub x0, x0, x1 277 bl ffa_secondary_cpu_ep_register 278 b thread_ffa_msg_wait 279#else 280 /* 281 * Pass the vector address returned from main_init 282 * Compensate for the load offset since cpu_on_handler() is 283 * called with MMU off. 284 */ 285 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 286 adr x1, thread_vector_table 287 sub x1, x1, x0 288 mov x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE 289 smc #0 290 /* SMC should not return */ 291 panic_at_smc_return 292#endif 293END_FUNC _start 294DECLARE_KEEP_INIT _start 295 296 .balign 8 297LOCAL_DATA cached_mem_end , : 298 .skip 8 299END_DATA cached_mem_end 300 301#ifdef CFG_CORE_ASLR 302LOCAL_FUNC relocate , : 303 /* x0 holds load offset */ 304#ifdef CFG_WITH_PAGER 305 adr_l x6, __init_end 306#else 307 adr_l x6, __end 308#endif 309 ldp w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET] 310 311 mov_imm x1, TEE_RAM_START 312 add x2, x2, x6 /* start of relocations */ 313 add x3, x3, x2 /* end of relocations */ 314 315 /* 316 * Relocations are not formatted as Rela64, instead they are in a 317 * compressed format created by get_reloc_bin() in 318 * scripts/gen_tee_bin.py 319 * 320 * All the R_AARCH64_RELATIVE relocations are translated into a 321 * list list of 32-bit offsets from TEE_RAM_START. At each address 322 * a 64-bit value pointed out which increased with the load offset. 323 */ 324 325#ifdef CFG_WITH_PAGER 326 /* 327 * With pager enabled we can only relocate the pager and init 328 * parts, the rest has to be done when a page is populated. 329 */ 330 sub x6, x6, x1 331#endif 332 333 b 2f 334 /* Loop over the relocation addresses and process all entries */ 3351: ldr w4, [x2], #4 336#ifdef CFG_WITH_PAGER 337 /* Skip too large addresses */ 338 cmp x4, x6 339 b.ge 2f 340#endif 341 add x4, x4, x1 342 ldr x5, [x4] 343 add x5, x5, x0 344 str x5, [x4] 345 3462: cmp x2, x3 347 b.ne 1b 348 349 ret 350END_FUNC relocate 351#endif 352 353/* 354 * void enable_mmu(unsigned long core_pos); 355 * 356 * This function depends on being mapped with in the identity map where 357 * physical address and virtual address is the same. After MMU has been 358 * enabled the instruction pointer will be updated to execute as the new 359 * offset instead. Stack pointers and the return address are updated. 360 */ 361LOCAL_FUNC enable_mmu , : , .identity_map 362 adr x1, boot_mmu_config 363 load_xregs x1, 0, 2, 6 364 /* 365 * x0 = core_pos 366 * x2 = tcr_el1 367 * x3 = mair_el1 368 * x4 = ttbr0_el1_base 369 * x5 = ttbr0_core_offset 370 * x6 = load_offset 371 */ 372 msr tcr_el1, x2 373 msr mair_el1, x3 374 375 /* 376 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos 377 */ 378 madd x1, x5, x0, x4 379 msr ttbr0_el1, x1 380 msr ttbr1_el1, xzr 381 isb 382 383 /* Invalidate TLB */ 384 tlbi vmalle1 385 386 /* 387 * Make sure translation table writes have drained into memory and 388 * the TLB invalidation is complete. 389 */ 390 dsb sy 391 isb 392 393 /* Enable the MMU */ 394 mrs x1, sctlr_el1 395 orr x1, x1, #SCTLR_M 396 msr sctlr_el1, x1 397 isb 398 399 /* Update vbar */ 400 mrs x1, vbar_el1 401 add x1, x1, x6 402 msr vbar_el1, x1 403 isb 404 405 /* Invalidate instruction cache and branch predictor */ 406 ic iallu 407 isb 408 409 /* Enable I and D cache */ 410 mrs x1, sctlr_el1 411 orr x1, x1, #SCTLR_I 412 orr x1, x1, #SCTLR_C 413 msr sctlr_el1, x1 414 isb 415 416 /* Adjust stack pointers and return address */ 417 msr spsel, #1 418 add sp, sp, x6 419 msr spsel, #0 420 add sp, sp, x6 421 add x30, x30, x6 422 423 ret 424END_FUNC enable_mmu 425 426 .balign 8 427DATA boot_mmu_config , : /* struct core_mmu_config */ 428 .skip CORE_MMU_CONFIG_SIZE 429END_DATA boot_mmu_config 430 431FUNC cpu_on_handler , : 432 mov x19, x0 433 mov x20, x1 434 mov x21, x30 435 436 adr x0, reset_vect_table 437 msr vbar_el1, x0 438 isb 439 440 set_sctlr_el1 441 isb 442 443 /* Enable aborts now that we can receive exceptions */ 444 msr daifclr, #DAIFBIT_ABT 445 446 bl __get_core_pos 447 bl enable_mmu 448 449 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 450 set_sp 451 452 mov x0, x19 453 mov x1, x20 454#ifdef CFG_CORE_FFA 455 bl boot_cpu_on_handler 456 b thread_ffa_msg_wait 457#else 458 mov x30, x21 459 b boot_cpu_on_handler 460#endif 461END_FUNC cpu_on_handler 462DECLARE_KEEP_PAGER cpu_on_handler 463 464LOCAL_FUNC unhandled_cpu , : 465 wfi 466 b unhandled_cpu 467END_FUNC unhandled_cpu 468 469 /* 470 * This macro verifies that the a given vector doesn't exceed the 471 * architectural limit of 32 instructions. This is meant to be placed 472 * immedately after the last instruction in the vector. It takes the 473 * vector entry as the parameter 474 */ 475 .macro check_vector_size since 476 .if (. - \since) > (32 * 4) 477 .error "Vector exceeds 32 instructions" 478 .endif 479 .endm 480 481 .section .identity_map, "ax", %progbits 482 .align 11 483LOCAL_FUNC reset_vect_table , :, .identity_map, , nobti 484 /* ----------------------------------------------------- 485 * Current EL with SP0 : 0x0 - 0x180 486 * ----------------------------------------------------- 487 */ 488SynchronousExceptionSP0: 489 b SynchronousExceptionSP0 490 check_vector_size SynchronousExceptionSP0 491 492 .align 7 493IrqSP0: 494 b IrqSP0 495 check_vector_size IrqSP0 496 497 .align 7 498FiqSP0: 499 b FiqSP0 500 check_vector_size FiqSP0 501 502 .align 7 503SErrorSP0: 504 b SErrorSP0 505 check_vector_size SErrorSP0 506 507 /* ----------------------------------------------------- 508 * Current EL with SPx: 0x200 - 0x380 509 * ----------------------------------------------------- 510 */ 511 .align 7 512SynchronousExceptionSPx: 513 b SynchronousExceptionSPx 514 check_vector_size SynchronousExceptionSPx 515 516 .align 7 517IrqSPx: 518 b IrqSPx 519 check_vector_size IrqSPx 520 521 .align 7 522FiqSPx: 523 b FiqSPx 524 check_vector_size FiqSPx 525 526 .align 7 527SErrorSPx: 528 b SErrorSPx 529 check_vector_size SErrorSPx 530 531 /* ----------------------------------------------------- 532 * Lower EL using AArch64 : 0x400 - 0x580 533 * ----------------------------------------------------- 534 */ 535 .align 7 536SynchronousExceptionA64: 537 b SynchronousExceptionA64 538 check_vector_size SynchronousExceptionA64 539 540 .align 7 541IrqA64: 542 b IrqA64 543 check_vector_size IrqA64 544 545 .align 7 546FiqA64: 547 b FiqA64 548 check_vector_size FiqA64 549 550 .align 7 551SErrorA64: 552 b SErrorA64 553 check_vector_size SErrorA64 554 555 /* ----------------------------------------------------- 556 * Lower EL using AArch32 : 0x0 - 0x180 557 * ----------------------------------------------------- 558 */ 559 .align 7 560SynchronousExceptionA32: 561 b SynchronousExceptionA32 562 check_vector_size SynchronousExceptionA32 563 564 .align 7 565IrqA32: 566 b IrqA32 567 check_vector_size IrqA32 568 569 .align 7 570FiqA32: 571 b FiqA32 572 check_vector_size FiqA32 573 574 .align 7 575SErrorA32: 576 b SErrorA32 577 check_vector_size SErrorA32 578 579END_FUNC reset_vect_table 580 581BTI(emit_aarch64_feature_1_and GNU_PROPERTY_AARCH64_FEATURE_1_BTI) 582