1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2015-2022, Linaro Limited 4 * Copyright (c) 2021-2023, Arm Limited 5 */ 6 7#include <platform_config.h> 8 9#include <arm64_macros.S> 10#include <arm.h> 11#include <asm.S> 12#include <generated/asm-defines.h> 13#include <keep.h> 14#include <kernel/thread_private.h> 15#include <mm/core_mmu.h> 16#include <sm/optee_smc.h> 17#include <sm/teesmc_opteed.h> 18#include <sm/teesmc_opteed_macros.h> 19 20 /* 21 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0. 22 * SP_EL0 is assigned: 23 * stack_tmp + (cpu_id + 1) * stack_tmp_stride - STACK_TMP_GUARD 24 * SP_EL1 is assigned thread_core_local[cpu_id] 25 */ 26 .macro set_sp 27 bl __get_core_pos 28 cmp x0, #CFG_TEE_CORE_NB_CORE 29 /* Unsupported CPU, park it before it breaks something */ 30 bge unhandled_cpu 31 add x0, x0, #1 32 adr_l x1, stack_tmp_stride 33 ldr w1, [x1] 34 mul x1, x0, x1 35 36 /* x0 = stack_tmp - STACK_TMP_GUARD */ 37 adr_l x2, stack_tmp_rel 38 ldr w0, [x2] 39 add x0, x0, x2 40 41 msr spsel, #0 42 add sp, x1, x0 43 bl thread_get_core_local 44 msr spsel, #1 45 mov sp, x0 46 msr spsel, #0 47 .endm 48 49 .macro read_feat_mte reg 50 mrs \reg, id_aa64pfr1_el1 51 ubfx \reg, \reg, #ID_AA64PFR1_EL1_MTE_SHIFT, #4 52 .endm 53 54 .macro set_sctlr_el1 55 mrs x0, sctlr_el1 56 orr x0, x0, #SCTLR_I 57 orr x0, x0, #SCTLR_SA 58 orr x0, x0, #SCTLR_SPAN 59#if defined(CFG_CORE_RWDATA_NOEXEC) 60 orr x0, x0, #SCTLR_WXN 61#endif 62#if defined(CFG_SCTLR_ALIGNMENT_CHECK) 63 orr x0, x0, #SCTLR_A 64#else 65 bic x0, x0, #SCTLR_A 66#endif 67#ifdef CFG_MEMTAG 68 read_feat_mte x1 69 cmp w1, #1 70 b.ls 111f 71 orr x0, x0, #(SCTLR_ATA | SCTLR_ATA0) 72 bic x0, x0, #SCTLR_TCF_MASK 73 bic x0, x0, #SCTLR_TCF0_MASK 74111: 75#endif 76#if defined(CFG_TA_PAUTH) && defined(CFG_TA_BTI) 77 orr x0, x0, #SCTLR_BT0 78#endif 79#if defined(CFG_CORE_PAUTH) && defined(CFG_CORE_BTI) 80 orr x0, x0, #SCTLR_BT1 81#endif 82 msr sctlr_el1, x0 83 .endm 84 85 .macro init_memtag_per_cpu 86 read_feat_mte x0 87 cmp w0, #1 88 b.ls 11f 89 90#ifdef CFG_TEE_CORE_DEBUG 91 /* 92 * This together with GCR_EL1.RRND = 0 will make the tags 93 * acquired with the irg instruction deterministic. 94 */ 95 mov_imm x0, 0xcafe00 96 msr rgsr_el1, x0 97 /* Avoid tag = 0x0 and 0xf */ 98 mov x0, #0 99#else 100 /* 101 * Still avoid tag = 0x0 and 0xf as we use that tag for 102 * everything which isn't explicitly tagged. Setting 103 * GCR_EL1.RRND = 1 to allow an implementation specific 104 * method of generating the tags. 105 */ 106 mov x0, #GCR_EL1_RRND 107#endif 108 orr x0, x0, #1 109 orr x0, x0, #(1 << 15) 110 msr gcr_el1, x0 111 112 /* 113 * Enable the tag checks on the current CPU. 114 * 115 * Depends on boot_init_memtag() having cleared tags for 116 * TEE core memory. Well, not really, addresses with the 117 * tag value 0b0000 will use unchecked access due to 118 * TCR_TCMA0. 119 */ 120 mrs x0, tcr_el1 121 orr x0, x0, #TCR_TBI0 122 orr x0, x0, #TCR_TCMA0 123 msr tcr_el1, x0 124 125 mrs x0, sctlr_el1 126 orr x0, x0, #SCTLR_TCF_SYNC 127 orr x0, x0, #SCTLR_TCF0_SYNC 128 msr sctlr_el1, x0 129 130 isb 13111: 132 .endm 133 134 .macro init_pauth_per_cpu 135 msr spsel, #1 136 ldp x0, x1, [sp, #THREAD_CORE_LOCAL_KEYS] 137 msr spsel, #0 138 write_apiakeyhi x0 139 write_apiakeylo x1 140 mrs x0, sctlr_el1 141 orr x0, x0, #SCTLR_ENIA 142 msr sctlr_el1, x0 143 isb 144 .endm 145 146FUNC _start , : 147 /* 148 * If CFG_CORE_FFA is enabled, then x0 if non-NULL holds the TOS FW 149 * config [1] address, else x0 if non-NULL holds the pagable part 150 * address. 151 * 152 * [1] A TF-A concept: TOS_FW_CONFIG - Trusted OS Firmware 153 * configuration file. Used by Trusted OS (BL32), that is, OP-TEE 154 * here. 155 */ 156 mov x19, x0 157#if defined(CFG_DT_ADDR) 158 ldr x20, =CFG_DT_ADDR 159#else 160 mov x20, x2 /* Save DT address */ 161#endif 162 163 adr x0, reset_vect_table 164 msr vbar_el1, x0 165 isb 166 167 set_sctlr_el1 168 isb 169 170#ifdef CFG_WITH_PAGER 171 /* 172 * Move init code into correct location and move hashes to a 173 * temporary safe location until the heap is initialized. 174 * 175 * The binary is built as: 176 * [Pager code, rodata and data] : In correct location 177 * [Init code and rodata] : Should be copied to __init_start 178 * [struct boot_embdata + data] : Should be saved before 179 * initializing pager, first uint32_t tells the length of the data 180 */ 181 adr x0, __init_start /* dst */ 182 adr x1, __data_end /* src */ 183 adr x2, __init_end 184 sub x2, x2, x0 /* init len */ 185 ldr w4, [x1, x2] /* length of hashes etc */ 186 add x2, x2, x4 /* length of init and hashes etc */ 187 /* Copy backwards (as memmove) in case we're overlapping */ 188 add x0, x0, x2 /* __init_start + len */ 189 add x1, x1, x2 /* __data_end + len */ 190 adr x3, cached_mem_end 191 str x0, [x3] 192 adr x2, __init_start 193copy_init: 194 ldp x3, x4, [x1, #-16]! 195 stp x3, x4, [x0, #-16]! 196 cmp x0, x2 197 b.gt copy_init 198#else 199 /* 200 * The binary is built as: 201 * [Core, rodata and data] : In correct location 202 * [struct boot_embdata + data] : Should be moved to __end, first 203 * uint32_t tells the length of the struct + data 204 */ 205 adr_l x0, __end /* dst */ 206 adr_l x1, __data_end /* src */ 207 ldr w2, [x1] /* struct boot_embdata::total_len */ 208 /* Copy backwards (as memmove) in case we're overlapping */ 209 add x0, x0, x2 210 add x1, x1, x2 211 adr x3, cached_mem_end 212 str x0, [x3] 213 adr_l x2, __end 214 215copy_init: 216 ldp x3, x4, [x1, #-16]! 217 stp x3, x4, [x0, #-16]! 218 cmp x0, x2 219 b.gt copy_init 220#endif 221 222 /* 223 * Clear .bss, this code obviously depends on the linker keeping 224 * start/end of .bss at least 8 byte aligned. 225 */ 226 adr_l x0, __bss_start 227 adr_l x1, __bss_end 228clear_bss: 229 str xzr, [x0], #8 230 cmp x0, x1 231 b.lt clear_bss 232 233#ifdef CFG_NS_VIRTUALIZATION 234 /* 235 * Clear .nex_bss, this code obviously depends on the linker keeping 236 * start/end of .bss at least 8 byte aligned. 237 */ 238 adr x0, __nex_bss_start 239 adr x1, __nex_bss_end 240clear_nex_bss: 241 str xzr, [x0], #8 242 cmp x0, x1 243 b.lt clear_nex_bss 244#endif 245 246 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 247 set_sp 248 249 bl thread_init_thread_core_local 250 251 /* Enable aborts now that we can receive exceptions */ 252 msr daifclr, #DAIFBIT_ABT 253 254 /* 255 * Invalidate dcache for all memory used during initialization to 256 * avoid nasty surprices when the cache is turned on. We must not 257 * invalidate memory not used by OP-TEE since we may invalidate 258 * entries used by for instance ARM Trusted Firmware. 259 */ 260 adr_l x0, __text_start 261 ldr x1, cached_mem_end 262 sub x1, x1, x0 263 bl dcache_cleaninv_range 264 265 /* Enable Console */ 266 bl console_init 267 268#ifdef CFG_MEMTAG 269 /* 270 * If FEAT_MTE2 is available, initializes the memtag callbacks. 271 * Tags for OP-TEE core memory are then cleared to make it safe to 272 * enable MEMTAG below. 273 */ 274 bl boot_init_memtag 275#endif 276 277#ifdef CFG_CORE_ASLR 278 mov x0, x20 279 bl get_aslr_seed 280#else 281 mov x0, #0 282#endif 283 284 adr x1, boot_mmu_config 285 bl core_init_mmu_map 286 287#ifdef CFG_CORE_ASLR 288 /* 289 * Process relocation information again updating for the virtual 290 * map offset. We're doing this now before MMU is enabled as some 291 * of the memory will become write protected. 292 */ 293 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_MAP_OFFSET 294 /* 295 * Update cached_mem_end address with load offset since it was 296 * calculated before relocation. 297 */ 298 adr x5, cached_mem_end 299 ldr x6, [x5] 300 add x6, x6, x0 301 str x6, [x5] 302 bl relocate 303#endif 304 305 bl __get_core_pos 306 bl enable_mmu 307#ifdef CFG_CORE_ASLR 308 /* 309 * Reinitialize console, since register_serial_console() has 310 * previously registered a PA and with ASLR the VA is different 311 * from the PA. 312 */ 313 bl console_init 314#endif 315 316#ifdef CFG_NS_VIRTUALIZATION 317 /* 318 * Initialize partition tables for each partition to 319 * default_partition which has been relocated now to a different VA 320 */ 321 bl core_mmu_set_default_prtn_tbl 322#endif 323 324#ifdef CFG_CORE_SEL1_SPMC 325 mov x0, xzr /* pager not used */ 326#else 327 mov x0, x19 /* pagable part address */ 328#endif 329 mov x1, #-1 330 bl boot_init_primary_early 331 332#ifdef CFG_MEMTAG 333 init_memtag_per_cpu 334#endif 335 336#ifndef CFG_NS_VIRTUALIZATION 337 mov x21, sp 338 adr_l x0, threads 339 ldr x0, [x0, #THREAD_CTX_STACK_VA_END] 340 mov sp, x0 341 bl thread_get_core_local 342 mov x22, x0 343 str wzr, [x22, #THREAD_CORE_LOCAL_FLAGS] 344#endif 345 mov x0, x20 /* DT address also known as HW_CONFIG */ 346#ifdef CFG_CORE_SEL1_SPMC 347 mov x1, x19 /* TOS_FW_CONFIG DT address */ 348#else 349 mov x1, xzr /* unused */ 350#endif 351 bl boot_init_primary_late 352#ifdef CFG_CORE_PAUTH 353 init_pauth_per_cpu 354#endif 355 356#ifndef CFG_NS_VIRTUALIZATION 357 mov x0, #THREAD_CLF_TMP 358 str w0, [x22, #THREAD_CORE_LOCAL_FLAGS] 359 mov sp, x21 360#endif 361 362#ifdef _CFG_CORE_STACK_PROTECTOR 363 /* Update stack canary value */ 364 bl plat_get_random_stack_canary 365 adr_l x5, __stack_chk_guard 366 str x0, [x5] 367#endif 368 369 /* 370 * In case we've touched memory that secondary CPUs will use before 371 * they have turned on their D-cache, clean and invalidate the 372 * D-cache before exiting to normal world. 373 */ 374 adr_l x0, __text_start 375 ldr x1, cached_mem_end 376 sub x1, x1, x0 377 bl dcache_cleaninv_range 378 379 380 /* 381 * Clear current thread id now to allow the thread to be reused on 382 * next entry. Matches the thread_init_boot_thread in 383 * boot.c. 384 */ 385#ifndef CFG_NS_VIRTUALIZATION 386 bl thread_clr_boot_thread 387#endif 388 389#ifdef CFG_CORE_FFA 390 adr x0, cpu_on_handler 391 /* 392 * Compensate for the virtual map offset since cpu_on_handler() is 393 * called with MMU off. 394 */ 395 ldr x1, boot_mmu_config + CORE_MMU_CONFIG_MAP_OFFSET 396 sub x0, x0, x1 397 bl thread_spmc_register_secondary_ep 398 b thread_ffa_msg_wait 399#else 400 /* 401 * Pass the vector address returned from main_init Compensate for 402 * the virtual map offset since cpu_on_handler() is called with MMU 403 * off. 404 */ 405 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_MAP_OFFSET 406 adr x1, thread_vector_table 407 sub x1, x1, x0 408 mov x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE 409 smc #0 410 /* SMC should not return */ 411 panic_at_smc_return 412#endif 413END_FUNC _start 414DECLARE_KEEP_INIT _start 415 416 .section .identity_map.data 417 .balign 8 418LOCAL_DATA cached_mem_end , : 419 .skip 8 420END_DATA cached_mem_end 421 422#ifdef CFG_CORE_ASLR 423LOCAL_FUNC relocate , : 424 /* x0 holds load offset */ 425#ifdef CFG_WITH_PAGER 426 adr_l x6, __init_end 427#else 428 adr_l x6, __end 429#endif 430 ldp w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET] 431 432 mov_imm x1, TEE_LOAD_ADDR 433 add x2, x2, x6 /* start of relocations */ 434 add x3, x3, x2 /* end of relocations */ 435 436 /* 437 * Relocations are not formatted as Rela64, instead they are in a 438 * compressed format created by get_reloc_bin() in 439 * scripts/gen_tee_bin.py 440 * 441 * All the R_AARCH64_RELATIVE relocations are translated into a 442 * list of 32-bit offsets from TEE_LOAD_ADDR. At each address a 443 * 64-bit value pointed out which increased with the load offset. 444 */ 445 446#ifdef CFG_WITH_PAGER 447 /* 448 * With pager enabled we can only relocate the pager and init 449 * parts, the rest has to be done when a page is populated. 450 */ 451 sub x6, x6, x1 452#endif 453 454 b 2f 455 /* Loop over the relocation addresses and process all entries */ 4561: ldr w4, [x2], #4 457#ifdef CFG_WITH_PAGER 458 /* Skip too large addresses */ 459 cmp x4, x6 460 b.ge 2f 461#endif 462 add x4, x4, x1 463 ldr x5, [x4] 464 add x5, x5, x0 465 str x5, [x4] 466 4672: cmp x2, x3 468 b.ne 1b 469 470 ret 471END_FUNC relocate 472#endif 473 474/* 475 * void enable_mmu(unsigned long core_pos); 476 * 477 * This function depends on being mapped with in the identity map where 478 * physical address and virtual address is the same. After MMU has been 479 * enabled the instruction pointer will be updated to execute as the new 480 * offset instead. Stack pointers and the return address are updated. 481 */ 482LOCAL_FUNC enable_mmu , : , .identity_map 483 adr x1, boot_mmu_config 484 load_xregs x1, 0, 2, 6 485 /* 486 * x0 = core_pos 487 * x2 = tcr_el1 488 * x3 = mair_el1 489 * x4 = ttbr0_el1_base 490 * x5 = ttbr0_core_offset 491 * x6 = load_offset 492 */ 493 msr tcr_el1, x2 494 msr mair_el1, x3 495 496 /* 497 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos 498 */ 499 madd x1, x5, x0, x4 500 msr ttbr0_el1, x1 501 msr ttbr1_el1, xzr 502 isb 503 504 /* Invalidate TLB */ 505 tlbi vmalle1 506 507 /* 508 * Make sure translation table writes have drained into memory and 509 * the TLB invalidation is complete. 510 */ 511 dsb sy 512 isb 513 514 /* Enable the MMU */ 515 mrs x1, sctlr_el1 516 orr x1, x1, #SCTLR_M 517 msr sctlr_el1, x1 518 isb 519 520 /* Update vbar */ 521 mrs x1, vbar_el1 522 add x1, x1, x6 523 msr vbar_el1, x1 524 isb 525 526 /* Invalidate instruction cache and branch predictor */ 527 ic iallu 528 isb 529 530 /* Enable I and D cache */ 531 mrs x1, sctlr_el1 532 orr x1, x1, #SCTLR_I 533 orr x1, x1, #SCTLR_C 534 msr sctlr_el1, x1 535 isb 536 537 /* Adjust stack pointers and return address */ 538 msr spsel, #1 539 add sp, sp, x6 540 msr spsel, #0 541 add sp, sp, x6 542 add x30, x30, x6 543 544 ret 545END_FUNC enable_mmu 546 547 .section .identity_map.data 548 .balign 8 549DATA boot_mmu_config , : /* struct core_mmu_config */ 550 .skip CORE_MMU_CONFIG_SIZE 551END_DATA boot_mmu_config 552 553FUNC cpu_on_handler , : 554 mov x19, x0 555 mov x20, x1 556 mov x21, x30 557 558 adr x0, reset_vect_table 559 msr vbar_el1, x0 560 isb 561 562 set_sctlr_el1 563 isb 564 565 /* Enable aborts now that we can receive exceptions */ 566 msr daifclr, #DAIFBIT_ABT 567 568 bl __get_core_pos 569 bl enable_mmu 570 571 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 572 set_sp 573 574#ifdef CFG_MEMTAG 575 init_memtag_per_cpu 576#endif 577#ifdef CFG_CORE_PAUTH 578 init_pauth_per_cpu 579#endif 580 581 mov x0, x19 582 mov x1, x20 583#ifdef CFG_CORE_FFA 584 bl boot_cpu_on_handler 585 b thread_ffa_msg_wait 586#else 587 mov x30, x21 588 b boot_cpu_on_handler 589#endif 590END_FUNC cpu_on_handler 591DECLARE_KEEP_PAGER cpu_on_handler 592 593LOCAL_FUNC unhandled_cpu , : 594 wfi 595 b unhandled_cpu 596END_FUNC unhandled_cpu 597 598LOCAL_DATA stack_tmp_rel , : 599 .word stack_tmp - stack_tmp_rel - STACK_TMP_GUARD 600END_DATA stack_tmp_rel 601 602 /* 603 * This macro verifies that the a given vector doesn't exceed the 604 * architectural limit of 32 instructions. This is meant to be placed 605 * immedately after the last instruction in the vector. It takes the 606 * vector entry as the parameter 607 */ 608 .macro check_vector_size since 609 .if (. - \since) > (32 * 4) 610 .error "Vector exceeds 32 instructions" 611 .endif 612 .endm 613 614 .section .identity_map, "ax", %progbits 615 .align 11 616LOCAL_FUNC reset_vect_table , :, .identity_map, , nobti 617 /* ----------------------------------------------------- 618 * Current EL with SP0 : 0x0 - 0x180 619 * ----------------------------------------------------- 620 */ 621SynchronousExceptionSP0: 622 b SynchronousExceptionSP0 623 check_vector_size SynchronousExceptionSP0 624 625 .align 7 626IrqSP0: 627 b IrqSP0 628 check_vector_size IrqSP0 629 630 .align 7 631FiqSP0: 632 b FiqSP0 633 check_vector_size FiqSP0 634 635 .align 7 636SErrorSP0: 637 b SErrorSP0 638 check_vector_size SErrorSP0 639 640 /* ----------------------------------------------------- 641 * Current EL with SPx: 0x200 - 0x380 642 * ----------------------------------------------------- 643 */ 644 .align 7 645SynchronousExceptionSPx: 646 b SynchronousExceptionSPx 647 check_vector_size SynchronousExceptionSPx 648 649 .align 7 650IrqSPx: 651 b IrqSPx 652 check_vector_size IrqSPx 653 654 .align 7 655FiqSPx: 656 b FiqSPx 657 check_vector_size FiqSPx 658 659 .align 7 660SErrorSPx: 661 b SErrorSPx 662 check_vector_size SErrorSPx 663 664 /* ----------------------------------------------------- 665 * Lower EL using AArch64 : 0x400 - 0x580 666 * ----------------------------------------------------- 667 */ 668 .align 7 669SynchronousExceptionA64: 670 b SynchronousExceptionA64 671 check_vector_size SynchronousExceptionA64 672 673 .align 7 674IrqA64: 675 b IrqA64 676 check_vector_size IrqA64 677 678 .align 7 679FiqA64: 680 b FiqA64 681 check_vector_size FiqA64 682 683 .align 7 684SErrorA64: 685 b SErrorA64 686 check_vector_size SErrorA64 687 688 /* ----------------------------------------------------- 689 * Lower EL using AArch32 : 0x0 - 0x180 690 * ----------------------------------------------------- 691 */ 692 .align 7 693SynchronousExceptionA32: 694 b SynchronousExceptionA32 695 check_vector_size SynchronousExceptionA32 696 697 .align 7 698IrqA32: 699 b IrqA32 700 check_vector_size IrqA32 701 702 .align 7 703FiqA32: 704 b FiqA32 705 check_vector_size FiqA32 706 707 .align 7 708SErrorA32: 709 b SErrorA32 710 check_vector_size SErrorA32 711 712END_FUNC reset_vect_table 713 714BTI(emit_aarch64_feature_1_and GNU_PROPERTY_AARCH64_FEATURE_1_BTI) 715