1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2015, Linaro Limited 4 * Copyright (c) 2021, Arm Limited 5 */ 6 7#include <platform_config.h> 8 9#include <arm64_macros.S> 10#include <arm.h> 11#include <asm.S> 12#include <generated/asm-defines.h> 13#include <keep.h> 14#include <sm/optee_smc.h> 15#include <sm/teesmc_opteed.h> 16#include <sm/teesmc_opteed_macros.h> 17 18 /* 19 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0. 20 * SP_EL0 is assigned stack_tmp_export + cpu_id * stack_tmp_stride 21 * SP_EL1 is assigned thread_core_local[cpu_id] 22 */ 23 .macro set_sp 24 bl __get_core_pos 25 cmp x0, #CFG_TEE_CORE_NB_CORE 26 /* Unsupported CPU, park it before it breaks something */ 27 bge unhandled_cpu 28 adr x1, stack_tmp_stride 29 ldr w1, [x1] 30 mul x1, x0, x1 31 adr_l x0, stack_tmp_export 32 ldr x0, [x0] 33 msr spsel, #0 34 add sp, x1, x0 35 bl thread_get_core_local 36 msr spsel, #1 37 mov sp, x0 38 msr spsel, #0 39 .endm 40 41 .macro set_sctlr_el1 42 mrs x0, sctlr_el1 43 orr x0, x0, #SCTLR_I 44 orr x0, x0, #SCTLR_SA 45 orr x0, x0, #SCTLR_SPAN 46#if defined(CFG_CORE_RWDATA_NOEXEC) 47 orr x0, x0, #SCTLR_WXN 48#endif 49#if defined(CFG_SCTLR_ALIGNMENT_CHECK) 50 orr x0, x0, #SCTLR_A 51#else 52 bic x0, x0, #SCTLR_A 53#endif 54 msr sctlr_el1, x0 55 .endm 56 57FUNC _start , : 58#if defined(CFG_CORE_SEL1_SPMC) 59 /* 60 * With OP-TEE as SPMC at S-EL1 the SPMD (SPD_spmd) in TF-A passes 61 * the DTB in x0, pagaeble part in x1 and the rest of the registers 62 * are unused 63 */ 64 mov x19, x1 /* Save pagable part */ 65 mov x20, x0 /* Save DT address */ 66#else 67 mov x19, x0 /* Save pagable part address */ 68#if defined(CFG_DT_ADDR) 69 ldr x20, =CFG_DT_ADDR 70#else 71 mov x20, x2 /* Save DT address */ 72#endif 73#endif 74 75 adr x0, reset_vect_table 76 msr vbar_el1, x0 77 isb 78 79 set_sctlr_el1 80 isb 81 82#ifdef CFG_WITH_PAGER 83 /* 84 * Move init code into correct location and move hashes to a 85 * temporary safe location until the heap is initialized. 86 * 87 * The binary is built as: 88 * [Pager code, rodata and data] : In correct location 89 * [Init code and rodata] : Should be copied to __init_start 90 * [struct boot_embdata + data] : Should be saved before 91 * initializing pager, first uint32_t tells the length of the data 92 */ 93 adr x0, __init_start /* dst */ 94 adr x1, __data_end /* src */ 95 adr x2, __init_end 96 sub x2, x2, x0 /* init len */ 97 ldr w4, [x1, x2] /* length of hashes etc */ 98 add x2, x2, x4 /* length of init and hashes etc */ 99 /* Copy backwards (as memmove) in case we're overlapping */ 100 add x0, x0, x2 /* __init_start + len */ 101 add x1, x1, x2 /* __data_end + len */ 102 adr x3, cached_mem_end 103 str x0, [x3] 104 adr x2, __init_start 105copy_init: 106 ldp x3, x4, [x1, #-16]! 107 stp x3, x4, [x0, #-16]! 108 cmp x0, x2 109 b.gt copy_init 110#else 111 /* 112 * The binary is built as: 113 * [Core, rodata and data] : In correct location 114 * [struct boot_embdata + data] : Should be moved to __end, first 115 * uint32_t tells the length of the struct + data 116 */ 117 adr_l x0, __end /* dst */ 118 adr_l x1, __data_end /* src */ 119 ldr w2, [x1] /* struct boot_embdata::total_len */ 120 /* Copy backwards (as memmove) in case we're overlapping */ 121 add x0, x0, x2 122 add x1, x1, x2 123 adr x3, cached_mem_end 124 str x0, [x3] 125 adr_l x2, __end 126 127copy_init: 128 ldp x3, x4, [x1, #-16]! 129 stp x3, x4, [x0, #-16]! 130 cmp x0, x2 131 b.gt copy_init 132#endif 133 134 /* 135 * Clear .bss, this code obviously depends on the linker keeping 136 * start/end of .bss at least 8 byte aligned. 137 */ 138 adr_l x0, __bss_start 139 adr_l x1, __bss_end 140clear_bss: 141 str xzr, [x0], #8 142 cmp x0, x1 143 b.lt clear_bss 144 145#ifdef CFG_VIRTUALIZATION 146 /* 147 * Clear .nex_bss, this code obviously depends on the linker keeping 148 * start/end of .bss at least 8 byte aligned. 149 */ 150 adr x0, __nex_bss_start 151 adr x1, __nex_bss_end 152clear_nex_bss: 153 str xzr, [x0], #8 154 cmp x0, x1 155 b.lt clear_nex_bss 156#endif 157 158 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 159 set_sp 160 161 bl thread_init_thread_core_local 162 163 /* Enable aborts now that we can receive exceptions */ 164 msr daifclr, #DAIFBIT_ABT 165 166 /* 167 * Invalidate dcache for all memory used during initialization to 168 * avoid nasty surprices when the cache is turned on. We must not 169 * invalidate memory not used by OP-TEE since we may invalidate 170 * entries used by for instance ARM Trusted Firmware. 171 */ 172 adr_l x0, __text_start 173 ldr x1, cached_mem_end 174 sub x1, x1, x0 175 bl dcache_cleaninv_range 176 177 /* Enable Console */ 178 bl console_init 179 180#ifdef CFG_CORE_ASLR 181 mov x0, x20 182 bl get_aslr_seed 183#else 184 mov x0, #0 185#endif 186 187 adr x1, boot_mmu_config 188 bl core_init_mmu_map 189 190#ifdef CFG_CORE_ASLR 191 /* 192 * Process relocation information again updating for the new 193 * offset. We're doing this now before MMU is enabled as some of 194 * the memory will become write protected. 195 */ 196 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 197 /* 198 * Update cached_mem_end address with load offset since it was 199 * calculated before relocation. 200 */ 201 adr x5, cached_mem_end 202 ldr x6, [x5] 203 add x6, x6, x0 204 str x6, [x5] 205 bl relocate 206#endif 207 208 bl __get_core_pos 209 bl enable_mmu 210#ifdef CFG_CORE_ASLR 211 /* 212 * Reinitialize console, since register_serial_console() has 213 * previously registered a PA and with ASLR the VA is different 214 * from the PA. 215 */ 216 bl console_init 217#endif 218 219#ifdef CFG_VIRTUALIZATION 220 /* 221 * Initialize partition tables for each partition to 222 * default_partition which has been relocated now to a different VA 223 */ 224 bl core_mmu_set_default_prtn_tbl 225#endif 226 227 mov x0, x19 /* pagable part address */ 228 mov x1, #-1 229 bl boot_init_primary_early 230#ifndef CFG_VIRTUALIZATION 231 mov x21, sp 232 adr_l x0, threads 233 ldr x0, [x0, #THREAD_CTX_STACK_VA_END] 234 mov sp, x0 235#endif 236 mov x0, x20 /* DT address */ 237 bl boot_init_primary_late 238#ifndef CFG_VIRTUALIZATION 239 mov sp, x21 240#endif 241 242 /* 243 * In case we've touched memory that secondary CPUs will use before 244 * they have turned on their D-cache, clean and invalidate the 245 * D-cache before exiting to normal world. 246 */ 247 adr_l x0, __text_start 248 ldr x1, cached_mem_end 249 sub x1, x1, x0 250 bl dcache_cleaninv_range 251 252 253 /* 254 * Clear current thread id now to allow the thread to be reused on 255 * next entry. Matches the thread_init_boot_thread in 256 * boot.c. 257 */ 258#ifndef CFG_VIRTUALIZATION 259 bl thread_clr_boot_thread 260#endif 261 262#ifdef CFG_CORE_FFA 263 adr x0, cpu_on_handler 264 /* 265 * Compensate for the load offset since cpu_on_handler() is 266 * called with MMU off. 267 */ 268 ldr x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 269 sub x0, x0, x1 270 bl ffa_secondary_cpu_ep_register 271 b thread_ffa_msg_wait 272#else 273 /* 274 * Pass the vector address returned from main_init 275 * Compensate for the load offset since cpu_on_handler() is 276 * called with MMU off. 277 */ 278 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 279 adr x1, thread_vector_table 280 sub x1, x1, x0 281 mov x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE 282 smc #0 283 b . /* SMC should not return */ 284#endif 285END_FUNC _start 286DECLARE_KEEP_INIT _start 287 288 .balign 8 289LOCAL_DATA cached_mem_end , : 290 .skip 8 291END_DATA cached_mem_end 292 293#ifdef CFG_CORE_ASLR 294LOCAL_FUNC relocate , : 295 /* x0 holds load offset */ 296#ifdef CFG_WITH_PAGER 297 adr_l x6, __init_end 298#else 299 adr_l x6, __end 300#endif 301 ldp w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET] 302 303 mov_imm x1, TEE_RAM_START 304 add x2, x2, x6 /* start of relocations */ 305 add x3, x3, x2 /* end of relocations */ 306 307 /* 308 * Relocations are not formatted as Rela64, instead they are in a 309 * compressed format created by get_reloc_bin() in 310 * scripts/gen_tee_bin.py 311 * 312 * All the R_AARCH64_RELATIVE relocations are translated into a 313 * list list of 32-bit offsets from TEE_RAM_START. At each address 314 * a 64-bit value pointed out which increased with the load offset. 315 */ 316 317#ifdef CFG_WITH_PAGER 318 /* 319 * With pager enabled we can only relocate the pager and init 320 * parts, the rest has to be done when a page is populated. 321 */ 322 sub x6, x6, x1 323#endif 324 325 b 2f 326 /* Loop over the relocation addresses and process all entries */ 3271: ldr w4, [x2], #4 328#ifdef CFG_WITH_PAGER 329 /* Skip too large addresses */ 330 cmp x4, x6 331 b.ge 2f 332#endif 333 add x4, x4, x1 334 ldr x5, [x4] 335 add x5, x5, x0 336 str x5, [x4] 337 3382: cmp x2, x3 339 b.ne 1b 340 341 ret 342END_FUNC relocate 343#endif 344 345/* 346 * void enable_mmu(unsigned long core_pos); 347 * 348 * This function depends on being mapped with in the identity map where 349 * physical address and virtual address is the same. After MMU has been 350 * enabled the instruction pointer will be updated to execute as the new 351 * offset instead. Stack pointers and the return address are updated. 352 */ 353LOCAL_FUNC enable_mmu , : , .identity_map 354 adr x1, boot_mmu_config 355 load_xregs x1, 0, 2, 6 356 /* 357 * x0 = core_pos 358 * x2 = tcr_el1 359 * x3 = mair_el1 360 * x4 = ttbr0_el1_base 361 * x5 = ttbr0_core_offset 362 * x6 = load_offset 363 */ 364 msr tcr_el1, x2 365 msr mair_el1, x3 366 367 /* 368 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos 369 */ 370 madd x1, x5, x0, x4 371 msr ttbr0_el1, x1 372 msr ttbr1_el1, xzr 373 isb 374 375 /* Invalidate TLB */ 376 tlbi vmalle1 377 378 /* 379 * Make sure translation table writes have drained into memory and 380 * the TLB invalidation is complete. 381 */ 382 dsb sy 383 isb 384 385 /* Enable the MMU */ 386 mrs x1, sctlr_el1 387 orr x1, x1, #SCTLR_M 388 msr sctlr_el1, x1 389 isb 390 391 /* Update vbar */ 392 mrs x1, vbar_el1 393 add x1, x1, x6 394 msr vbar_el1, x1 395 isb 396 397 /* Invalidate instruction cache and branch predictor */ 398 ic iallu 399 isb 400 401 /* Enable I and D cache */ 402 mrs x1, sctlr_el1 403 orr x1, x1, #SCTLR_I 404 orr x1, x1, #SCTLR_C 405 msr sctlr_el1, x1 406 isb 407 408 /* Adjust stack pointers and return address */ 409 msr spsel, #1 410 add sp, sp, x6 411 msr spsel, #0 412 add sp, sp, x6 413 add x30, x30, x6 414 415 ret 416END_FUNC enable_mmu 417 418 .balign 8 419DATA boot_mmu_config , : /* struct core_mmu_config */ 420 .skip CORE_MMU_CONFIG_SIZE 421END_DATA boot_mmu_config 422 423FUNC cpu_on_handler , : 424 mov x19, x0 425 mov x20, x1 426 mov x21, x30 427 428 adr x0, reset_vect_table 429 msr vbar_el1, x0 430 isb 431 432 set_sctlr_el1 433 isb 434 435 /* Enable aborts now that we can receive exceptions */ 436 msr daifclr, #DAIFBIT_ABT 437 438 bl __get_core_pos 439 bl enable_mmu 440 441 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 442 set_sp 443 444 mov x0, x19 445 mov x1, x20 446#ifdef CFG_CORE_FFA 447 bl boot_cpu_on_handler 448 b thread_ffa_msg_wait 449#else 450 mov x30, x21 451 b boot_cpu_on_handler 452#endif 453END_FUNC cpu_on_handler 454DECLARE_KEEP_PAGER cpu_on_handler 455 456LOCAL_FUNC unhandled_cpu , : 457 wfi 458 b unhandled_cpu 459END_FUNC unhandled_cpu 460 461 /* 462 * This macro verifies that the a given vector doesn't exceed the 463 * architectural limit of 32 instructions. This is meant to be placed 464 * immedately after the last instruction in the vector. It takes the 465 * vector entry as the parameter 466 */ 467 .macro check_vector_size since 468 .if (. - \since) > (32 * 4) 469 .error "Vector exceeds 32 instructions" 470 .endif 471 .endm 472 473 .section .identity_map, "ax", %progbits 474 .align 11 475LOCAL_FUNC reset_vect_table , :, .identity_map 476 /* ----------------------------------------------------- 477 * Current EL with SP0 : 0x0 - 0x180 478 * ----------------------------------------------------- 479 */ 480SynchronousExceptionSP0: 481 b SynchronousExceptionSP0 482 check_vector_size SynchronousExceptionSP0 483 484 .align 7 485IrqSP0: 486 b IrqSP0 487 check_vector_size IrqSP0 488 489 .align 7 490FiqSP0: 491 b FiqSP0 492 check_vector_size FiqSP0 493 494 .align 7 495SErrorSP0: 496 b SErrorSP0 497 check_vector_size SErrorSP0 498 499 /* ----------------------------------------------------- 500 * Current EL with SPx: 0x200 - 0x380 501 * ----------------------------------------------------- 502 */ 503 .align 7 504SynchronousExceptionSPx: 505 b SynchronousExceptionSPx 506 check_vector_size SynchronousExceptionSPx 507 508 .align 7 509IrqSPx: 510 b IrqSPx 511 check_vector_size IrqSPx 512 513 .align 7 514FiqSPx: 515 b FiqSPx 516 check_vector_size FiqSPx 517 518 .align 7 519SErrorSPx: 520 b SErrorSPx 521 check_vector_size SErrorSPx 522 523 /* ----------------------------------------------------- 524 * Lower EL using AArch64 : 0x400 - 0x580 525 * ----------------------------------------------------- 526 */ 527 .align 7 528SynchronousExceptionA64: 529 b SynchronousExceptionA64 530 check_vector_size SynchronousExceptionA64 531 532 .align 7 533IrqA64: 534 b IrqA64 535 check_vector_size IrqA64 536 537 .align 7 538FiqA64: 539 b FiqA64 540 check_vector_size FiqA64 541 542 .align 7 543SErrorA64: 544 b SErrorA64 545 check_vector_size SErrorA64 546 547 /* ----------------------------------------------------- 548 * Lower EL using AArch32 : 0x0 - 0x180 549 * ----------------------------------------------------- 550 */ 551 .align 7 552SynchronousExceptionA32: 553 b SynchronousExceptionA32 554 check_vector_size SynchronousExceptionA32 555 556 .align 7 557IrqA32: 558 b IrqA32 559 check_vector_size IrqA32 560 561 .align 7 562FiqA32: 563 b FiqA32 564 check_vector_size FiqA32 565 566 .align 7 567SErrorA32: 568 b SErrorA32 569 check_vector_size SErrorA32 570 571END_FUNC reset_vect_table 572