1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2015, Linaro Limited 4 * Copyright (c) 2021, Arm Limited 5 */ 6 7#include <platform_config.h> 8 9#include <arm64_macros.S> 10#include <arm.h> 11#include <asm.S> 12#include <generated/asm-defines.h> 13#include <keep.h> 14#include <sm/optee_smc.h> 15#include <sm/teesmc_opteed.h> 16#include <sm/teesmc_opteed_macros.h> 17 18 /* 19 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0. 20 * SP_EL0 is assigned stack_tmp_export + cpu_id * stack_tmp_stride 21 * SP_EL1 is assigned thread_core_local[cpu_id] 22 */ 23 .macro set_sp 24 bl __get_core_pos 25 cmp x0, #CFG_TEE_CORE_NB_CORE 26 /* Unsupported CPU, park it before it breaks something */ 27 bge unhandled_cpu 28 adr x1, stack_tmp_stride 29 ldr w1, [x1] 30 mul x1, x0, x1 31 adrp x0, stack_tmp_export 32 add x0, x0, :lo12:stack_tmp_export 33 ldr x0, [x0] 34 msr spsel, #0 35 add sp, x1, x0 36 bl thread_get_core_local 37 msr spsel, #1 38 mov sp, x0 39 msr spsel, #0 40 .endm 41 42 .macro set_sctlr_el1 43 mrs x0, sctlr_el1 44 orr x0, x0, #SCTLR_I 45 orr x0, x0, #SCTLR_SA 46 orr x0, x0, #SCTLR_SPAN 47#if defined(CFG_CORE_RWDATA_NOEXEC) 48 orr x0, x0, #SCTLR_WXN 49#endif 50#if defined(CFG_SCTLR_ALIGNMENT_CHECK) 51 orr x0, x0, #SCTLR_A 52#else 53 bic x0, x0, #SCTLR_A 54#endif 55 msr sctlr_el1, x0 56 .endm 57 58FUNC _start , : 59#if defined(CFG_CORE_SEL1_SPMC) 60 /* 61 * With OP-TEE as SPMC at S-EL1 the SPMD (SPD_spmd) in TF-A passes 62 * the DTB in x0, pagaeble part in x1 and the rest of the registers 63 * are unused 64 */ 65 mov x19, x1 /* Save pagable part */ 66 mov x20, x0 /* Save DT address */ 67#else 68 mov x19, x0 /* Save pagable part address */ 69#if defined(CFG_DT_ADDR) 70 ldr x20, =CFG_DT_ADDR 71#else 72 mov x20, x2 /* Save DT address */ 73#endif 74#endif 75 76 adr x0, reset_vect_table 77 msr vbar_el1, x0 78 isb 79 80 set_sctlr_el1 81 isb 82 83#ifdef CFG_WITH_PAGER 84 /* 85 * Move init code into correct location and move hashes to a 86 * temporary safe location until the heap is initialized. 87 * 88 * The binary is built as: 89 * [Pager code, rodata and data] : In correct location 90 * [Init code and rodata] : Should be copied to __init_start 91 * [struct boot_embdata + data] : Should be saved before 92 * initializing pager, first uint32_t tells the length of the data 93 */ 94 adr x0, __init_start /* dst */ 95 adr x1, __data_end /* src */ 96 adr x2, __init_end 97 sub x2, x2, x0 /* init len */ 98 ldr w4, [x1, x2] /* length of hashes etc */ 99 add x2, x2, x4 /* length of init and hashes etc */ 100 /* Copy backwards (as memmove) in case we're overlapping */ 101 add x0, x0, x2 /* __init_start + len */ 102 add x1, x1, x2 /* __data_end + len */ 103 adr x3, cached_mem_end 104 str x0, [x3] 105 adr x2, __init_start 106copy_init: 107 ldp x3, x4, [x1, #-16]! 108 stp x3, x4, [x0, #-16]! 109 cmp x0, x2 110 b.gt copy_init 111#else 112 /* 113 * The binary is built as: 114 * [Core, rodata and data] : In correct location 115 * [struct boot_embdata + data] : Should be moved to __end, first 116 * uint32_t tells the length of the struct + data 117 */ 118 adr_l x0, __end /* dst */ 119 adr_l x1, __data_end /* src */ 120 ldr w2, [x1] /* struct boot_embdata::total_len */ 121 /* Copy backwards (as memmove) in case we're overlapping */ 122 add x0, x0, x2 123 add x1, x1, x2 124 adr x3, cached_mem_end 125 str x0, [x3] 126 adr_l x2, __end 127 128copy_init: 129 ldp x3, x4, [x1, #-16]! 130 stp x3, x4, [x0, #-16]! 131 cmp x0, x2 132 b.gt copy_init 133#endif 134 135 /* 136 * Clear .bss, this code obviously depends on the linker keeping 137 * start/end of .bss at least 8 byte aligned. 138 */ 139 adr_l x0, __bss_start 140 adr_l x1, __bss_end 141clear_bss: 142 str xzr, [x0], #8 143 cmp x0, x1 144 b.lt clear_bss 145 146#ifdef CFG_VIRTUALIZATION 147 /* 148 * Clear .nex_bss, this code obviously depends on the linker keeping 149 * start/end of .bss at least 8 byte aligned. 150 */ 151 adr x0, __nex_bss_start 152 adr x1, __nex_bss_end 153clear_nex_bss: 154 str xzr, [x0], #8 155 cmp x0, x1 156 b.lt clear_nex_bss 157#endif 158 159 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 160 set_sp 161 162 bl thread_init_thread_core_local 163 164 /* Enable aborts now that we can receive exceptions */ 165 msr daifclr, #DAIFBIT_ABT 166 167 /* 168 * Invalidate dcache for all memory used during initialization to 169 * avoid nasty surprices when the cache is turned on. We must not 170 * invalidate memory not used by OP-TEE since we may invalidate 171 * entries used by for instance ARM Trusted Firmware. 172 */ 173 adr_l x0, __text_start 174 ldr x1, cached_mem_end 175 sub x1, x1, x0 176 bl dcache_cleaninv_range 177 178 /* Enable Console */ 179 bl console_init 180 181#ifdef CFG_CORE_ASLR 182 mov x0, x20 183 bl get_aslr_seed 184#else 185 mov x0, #0 186#endif 187 188 adr x1, boot_mmu_config 189 bl core_init_mmu_map 190 191#ifdef CFG_CORE_ASLR 192 /* 193 * Process relocation information again updating for the new 194 * offset. We're doing this now before MMU is enabled as some of 195 * the memory will become write protected. 196 */ 197 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 198 /* 199 * Update cached_mem_end address with load offset since it was 200 * calculated before relocation. 201 */ 202 adr x5, cached_mem_end 203 ldr x6, [x5] 204 add x6, x6, x0 205 str x6, [x5] 206 bl relocate 207#endif 208 209 bl __get_core_pos 210 bl enable_mmu 211#ifdef CFG_CORE_ASLR 212 /* 213 * Reinitialize console, since register_serial_console() has 214 * previously registered a PA and with ASLR the VA is different 215 * from the PA. 216 */ 217 bl console_init 218#endif 219 220#ifdef CFG_VIRTUALIZATION 221 /* 222 * Initialize partition tables for each partition to 223 * default_partition which has been relocated now to a different VA 224 */ 225 bl core_mmu_set_default_prtn_tbl 226#endif 227 228 mov x0, x19 /* pagable part address */ 229 mov x1, #-1 230 bl boot_init_primary_early 231#ifndef CFG_VIRTUALIZATION 232 mov x21, sp 233 adr_l x0, threads 234 ldr x0, [x0, #THREAD_CTX_STACK_VA_END] 235 mov sp, x0 236#endif 237 mov x0, x20 /* DT address */ 238 bl boot_init_primary_late 239#ifndef CFG_VIRTUALIZATION 240 mov sp, x21 241#endif 242 243 /* 244 * In case we've touched memory that secondary CPUs will use before 245 * they have turned on their D-cache, clean and invalidate the 246 * D-cache before exiting to normal world. 247 */ 248 adr_l x0, __text_start 249 ldr x1, cached_mem_end 250 sub x1, x1, x0 251 bl dcache_cleaninv_range 252 253 254 /* 255 * Clear current thread id now to allow the thread to be reused on 256 * next entry. Matches the thread_init_boot_thread in 257 * boot.c. 258 */ 259#ifndef CFG_VIRTUALIZATION 260 bl thread_clr_boot_thread 261#endif 262 263#ifdef CFG_CORE_FFA 264 adr x0, cpu_on_handler 265 /* 266 * Compensate for the load offset since cpu_on_handler() is 267 * called with MMU off. 268 */ 269 ldr x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 270 sub x0, x0, x1 271 bl ffa_secondary_cpu_ep_register 272 b thread_ffa_msg_wait 273#else 274 /* 275 * Pass the vector address returned from main_init 276 * Compensate for the load offset since cpu_on_handler() is 277 * called with MMU off. 278 */ 279 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 280 adr x1, thread_vector_table 281 sub x1, x1, x0 282 mov x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE 283 smc #0 284 b . /* SMC should not return */ 285#endif 286END_FUNC _start 287DECLARE_KEEP_INIT _start 288 289 .balign 8 290LOCAL_DATA cached_mem_end , : 291 .skip 8 292END_DATA cached_mem_end 293 294#ifdef CFG_CORE_ASLR 295LOCAL_FUNC relocate , : 296 /* x0 holds load offset */ 297#ifdef CFG_WITH_PAGER 298 adr_l x6, __init_end 299#else 300 adr_l x6, __end 301#endif 302 ldp w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET] 303 304 mov_imm x1, TEE_RAM_START 305 add x2, x2, x6 /* start of relocations */ 306 add x3, x3, x2 /* end of relocations */ 307 308 /* 309 * Relocations are not formatted as Rela64, instead they are in a 310 * compressed format created by get_reloc_bin() in 311 * scripts/gen_tee_bin.py 312 * 313 * All the R_AARCH64_RELATIVE relocations are translated into a 314 * list list of 32-bit offsets from TEE_RAM_START. At each address 315 * a 64-bit value pointed out which increased with the load offset. 316 */ 317 318#ifdef CFG_WITH_PAGER 319 /* 320 * With pager enabled we can only relocate the pager and init 321 * parts, the rest has to be done when a page is populated. 322 */ 323 sub x6, x6, x1 324#endif 325 326 b 2f 327 /* Loop over the relocation addresses and process all entries */ 3281: ldr w4, [x2], #4 329#ifdef CFG_WITH_PAGER 330 /* Skip too large addresses */ 331 cmp x4, x6 332 b.ge 2f 333#endif 334 add x4, x4, x1 335 ldr x5, [x4] 336 add x5, x5, x0 337 str x5, [x4] 338 3392: cmp x2, x3 340 b.ne 1b 341 342 ret 343END_FUNC relocate 344#endif 345 346/* 347 * void enable_mmu(unsigned long core_pos); 348 * 349 * This function depends on being mapped with in the identity map where 350 * physical address and virtual address is the same. After MMU has been 351 * enabled the instruction pointer will be updated to execute as the new 352 * offset instead. Stack pointers and the return address are updated. 353 */ 354LOCAL_FUNC enable_mmu , : , .identity_map 355 adr x1, boot_mmu_config 356 load_xregs x1, 0, 2, 6 357 /* 358 * x0 = core_pos 359 * x2 = tcr_el1 360 * x3 = mair_el1 361 * x4 = ttbr0_el1_base 362 * x5 = ttbr0_core_offset 363 * x6 = load_offset 364 */ 365 msr tcr_el1, x2 366 msr mair_el1, x3 367 368 /* 369 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos 370 */ 371 madd x1, x5, x0, x4 372 msr ttbr0_el1, x1 373 msr ttbr1_el1, xzr 374 isb 375 376 /* Invalidate TLB */ 377 tlbi vmalle1 378 379 /* 380 * Make sure translation table writes have drained into memory and 381 * the TLB invalidation is complete. 382 */ 383 dsb sy 384 isb 385 386 /* Enable the MMU */ 387 mrs x1, sctlr_el1 388 orr x1, x1, #SCTLR_M 389 msr sctlr_el1, x1 390 isb 391 392 /* Update vbar */ 393 mrs x1, vbar_el1 394 add x1, x1, x6 395 msr vbar_el1, x1 396 isb 397 398 /* Invalidate instruction cache and branch predictor */ 399 ic iallu 400 isb 401 402 /* Enable I and D cache */ 403 mrs x1, sctlr_el1 404 orr x1, x1, #SCTLR_I 405 orr x1, x1, #SCTLR_C 406 msr sctlr_el1, x1 407 isb 408 409 /* Adjust stack pointers and return address */ 410 msr spsel, #1 411 add sp, sp, x6 412 msr spsel, #0 413 add sp, sp, x6 414 add x30, x30, x6 415 416 ret 417END_FUNC enable_mmu 418 419 .balign 8 420DATA boot_mmu_config , : /* struct core_mmu_config */ 421 .skip CORE_MMU_CONFIG_SIZE 422END_DATA boot_mmu_config 423 424FUNC cpu_on_handler , : 425 mov x19, x0 426 mov x20, x1 427 mov x21, x30 428 429 adr x0, reset_vect_table 430 msr vbar_el1, x0 431 isb 432 433 set_sctlr_el1 434 isb 435 436 /* Enable aborts now that we can receive exceptions */ 437 msr daifclr, #DAIFBIT_ABT 438 439 bl __get_core_pos 440 bl enable_mmu 441 442 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 443 set_sp 444 445 mov x0, x19 446 mov x1, x20 447#ifdef CFG_CORE_FFA 448 bl boot_cpu_on_handler 449 b thread_ffa_msg_wait 450#else 451 mov x30, x21 452 b boot_cpu_on_handler 453#endif 454END_FUNC cpu_on_handler 455DECLARE_KEEP_PAGER cpu_on_handler 456 457LOCAL_FUNC unhandled_cpu , : 458 wfi 459 b unhandled_cpu 460END_FUNC unhandled_cpu 461 462 /* 463 * This macro verifies that the a given vector doesn't exceed the 464 * architectural limit of 32 instructions. This is meant to be placed 465 * immedately after the last instruction in the vector. It takes the 466 * vector entry as the parameter 467 */ 468 .macro check_vector_size since 469 .if (. - \since) > (32 * 4) 470 .error "Vector exceeds 32 instructions" 471 .endif 472 .endm 473 474 .section .identity_map, "ax", %progbits 475 .align 11 476LOCAL_FUNC reset_vect_table , :, .identity_map 477 /* ----------------------------------------------------- 478 * Current EL with SP0 : 0x0 - 0x180 479 * ----------------------------------------------------- 480 */ 481SynchronousExceptionSP0: 482 b SynchronousExceptionSP0 483 check_vector_size SynchronousExceptionSP0 484 485 .align 7 486IrqSP0: 487 b IrqSP0 488 check_vector_size IrqSP0 489 490 .align 7 491FiqSP0: 492 b FiqSP0 493 check_vector_size FiqSP0 494 495 .align 7 496SErrorSP0: 497 b SErrorSP0 498 check_vector_size SErrorSP0 499 500 /* ----------------------------------------------------- 501 * Current EL with SPx: 0x200 - 0x380 502 * ----------------------------------------------------- 503 */ 504 .align 7 505SynchronousExceptionSPx: 506 b SynchronousExceptionSPx 507 check_vector_size SynchronousExceptionSPx 508 509 .align 7 510IrqSPx: 511 b IrqSPx 512 check_vector_size IrqSPx 513 514 .align 7 515FiqSPx: 516 b FiqSPx 517 check_vector_size FiqSPx 518 519 .align 7 520SErrorSPx: 521 b SErrorSPx 522 check_vector_size SErrorSPx 523 524 /* ----------------------------------------------------- 525 * Lower EL using AArch64 : 0x400 - 0x580 526 * ----------------------------------------------------- 527 */ 528 .align 7 529SynchronousExceptionA64: 530 b SynchronousExceptionA64 531 check_vector_size SynchronousExceptionA64 532 533 .align 7 534IrqA64: 535 b IrqA64 536 check_vector_size IrqA64 537 538 .align 7 539FiqA64: 540 b FiqA64 541 check_vector_size FiqA64 542 543 .align 7 544SErrorA64: 545 b SErrorA64 546 check_vector_size SErrorA64 547 548 /* ----------------------------------------------------- 549 * Lower EL using AArch32 : 0x0 - 0x180 550 * ----------------------------------------------------- 551 */ 552 .align 7 553SynchronousExceptionA32: 554 b SynchronousExceptionA32 555 check_vector_size SynchronousExceptionA32 556 557 .align 7 558IrqA32: 559 b IrqA32 560 check_vector_size IrqA32 561 562 .align 7 563FiqA32: 564 b FiqA32 565 check_vector_size FiqA32 566 567 .align 7 568SErrorA32: 569 b SErrorA32 570 check_vector_size SErrorA32 571 572END_FUNC reset_vect_table 573