1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2015, Linaro Limited 4 */ 5 6#include <platform_config.h> 7 8#include <arm64_macros.S> 9#include <arm.h> 10#include <asm.S> 11#include <generated/asm-defines.h> 12#include <keep.h> 13#include <sm/optee_smc.h> 14#include <sm/teesmc_opteed.h> 15#include <sm/teesmc_opteed_macros.h> 16 17 /* 18 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0. 19 * SP_EL0 is assigned stack_tmp_export + cpu_id * stack_tmp_stride 20 * SP_EL1 is assigned thread_core_local[cpu_id] 21 */ 22 .macro set_sp 23 bl __get_core_pos 24 cmp x0, #CFG_TEE_CORE_NB_CORE 25 /* Unsupported CPU, park it before it breaks something */ 26 bge unhandled_cpu 27 adr x1, stack_tmp_stride 28 ldr w1, [x1] 29 mul x1, x0, x1 30 adrp x0, stack_tmp_export 31 add x0, x0, :lo12:stack_tmp_export 32 ldr x0, [x0] 33 msr spsel, #0 34 add sp, x1, x0 35 bl thread_get_core_local 36 msr spsel, #1 37 mov sp, x0 38 msr spsel, #0 39 .endm 40 41 .macro set_sctlr_el1 42 mrs x0, sctlr_el1 43 orr x0, x0, #SCTLR_I 44 orr x0, x0, #SCTLR_SA 45 orr x0, x0, #SCTLR_SPAN 46#if defined(CFG_CORE_RWDATA_NOEXEC) 47 orr x0, x0, #SCTLR_WXN 48#endif 49#if defined(CFG_SCTLR_ALIGNMENT_CHECK) 50 orr x0, x0, #SCTLR_A 51#else 52 bic x0, x0, #SCTLR_A 53#endif 54 msr sctlr_el1, x0 55 .endm 56 57FUNC _start , : 58 mov x19, x0 /* Save pagable part address */ 59#if defined(CFG_DT_ADDR) 60 ldr x20, =CFG_DT_ADDR 61#else 62 mov x20, x2 /* Save DT address */ 63#endif 64 65 adr x0, reset_vect_table 66 msr vbar_el1, x0 67 isb 68 69 set_sctlr_el1 70 isb 71 72#ifdef CFG_WITH_PAGER 73 /* 74 * Move init code into correct location and move hashes to a 75 * temporary safe location until the heap is initialized. 76 * 77 * The binary is built as: 78 * [Pager code, rodata and data] : In correct location 79 * [Init code and rodata] : Should be copied to __init_start 80 * [struct boot_embdata + data] : Should be saved before 81 * initializing pager, first uint32_t tells the length of the data 82 */ 83 adr x0, __init_start /* dst */ 84 adr x1, __data_end /* src */ 85 adr x2, __init_end 86 sub x2, x2, x0 /* init len */ 87 ldr w4, [x1, x2] /* length of hashes etc */ 88 add x2, x2, x4 /* length of init and hashes etc */ 89 /* Copy backwards (as memmove) in case we're overlapping */ 90 add x0, x0, x2 /* __init_start + len */ 91 add x1, x1, x2 /* __data_end + len */ 92 adr x3, cached_mem_end 93 str x0, [x3] 94 adr x2, __init_start 95copy_init: 96 ldp x3, x4, [x1, #-16]! 97 stp x3, x4, [x0, #-16]! 98 cmp x0, x2 99 b.gt copy_init 100#else 101 /* 102 * The binary is built as: 103 * [Core, rodata and data] : In correct location 104 * [struct boot_embdata + data] : Should be moved to __end, first 105 * uint32_t tells the length of the struct + data 106 */ 107 adr_l x0, __end /* dst */ 108 adr_l x1, __data_end /* src */ 109 ldr w2, [x1] /* struct boot_embdata::total_len */ 110 /* Copy backwards (as memmove) in case we're overlapping */ 111 add x0, x0, x2 112 add x1, x1, x2 113 adr x3, cached_mem_end 114 str x0, [x3] 115 adr_l x2, __end 116 117copy_init: 118 ldp x3, x4, [x1, #-16]! 119 stp x3, x4, [x0, #-16]! 120 cmp x0, x2 121 b.gt copy_init 122#endif 123 124 /* 125 * Clear .bss, this code obviously depends on the linker keeping 126 * start/end of .bss at least 8 byte aligned. 127 */ 128 adr_l x0, __bss_start 129 adr_l x1, __bss_end 130clear_bss: 131 str xzr, [x0], #8 132 cmp x0, x1 133 b.lt clear_bss 134 135#ifdef CFG_VIRTUALIZATION 136 /* 137 * Clear .nex_bss, this code obviously depends on the linker keeping 138 * start/end of .bss at least 8 byte aligned. 139 */ 140 adr x0, __nex_bss_start 141 adr x1, __nex_bss_end 142clear_nex_bss: 143 str xzr, [x0], #8 144 cmp x0, x1 145 b.lt clear_nex_bss 146#endif 147 148 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 149 set_sp 150 151 /* curr_thread needs to be -1 until threads are properly initialized */ 152 bl thread_clr_thread_core_local 153 154 /* Enable aborts now that we can receive exceptions */ 155 msr daifclr, #DAIFBIT_ABT 156 157 /* 158 * Invalidate dcache for all memory used during initialization to 159 * avoid nasty surprices when the cache is turned on. We must not 160 * invalidate memory not used by OP-TEE since we may invalidate 161 * entries used by for instance ARM Trusted Firmware. 162 */ 163 adr_l x0, __text_start 164 ldr x1, cached_mem_end 165 sub x1, x1, x0 166 bl dcache_cleaninv_range 167 168 /* Enable Console */ 169 bl console_init 170 171#ifdef CFG_CORE_ASLR 172 mov x0, x20 173 bl get_aslr_seed 174#else 175 mov x0, #0 176#endif 177 178 adr x1, boot_mmu_config 179 bl core_init_mmu_map 180 181#ifdef CFG_CORE_ASLR 182 /* 183 * Process relocation information again updating for the new 184 * offset. We're doing this now before MMU is enabled as some of 185 * the memory will become write protected. 186 */ 187 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 188 /* 189 * Update cached_mem_end address with load offset since it was 190 * calculated before relocation. 191 */ 192 adr x5, cached_mem_end 193 ldr x6, [x5] 194 add x6, x6, x0 195 str x6, [x5] 196 bl relocate 197#endif 198 199 bl __get_core_pos 200 bl enable_mmu 201#ifdef CFG_CORE_ASLR 202 /* 203 * Reinitialize console, since register_serial_console() has 204 * previously registered a PA and with ASLR the VA is different 205 * from the PA. 206 */ 207 bl console_init 208#endif 209 210 mov x0, x19 /* pagable part address */ 211 mov x1, #-1 212 mov x2, x20 /* DT address */ 213 bl boot_init_primary 214 215 /* 216 * In case we've touched memory that secondary CPUs will use before 217 * they have turned on their D-cache, clean and invalidate the 218 * D-cache before exiting to normal world. 219 */ 220 adr_l x0, __text_start 221 ldr x1, cached_mem_end 222 sub x1, x1, x0 223 bl dcache_cleaninv_range 224 225 226 /* 227 * Clear current thread id now to allow the thread to be reused on 228 * next entry. Matches the thread_init_boot_thread in 229 * boot.c. 230 */ 231#ifndef CFG_VIRTUALIZATION 232 bl thread_clr_boot_thread 233#endif 234 235#ifdef CFG_CORE_FFA 236 adr x0, cpu_on_handler 237 /* 238 * Compensate for the load offset since cpu_on_handler() is 239 * called with MMU off. 240 */ 241 ldr x1, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 242 sub x0, x0, x1 243 bl ffa_secondary_cpu_boot_req 244 b thread_ffa_msg_wait 245#else 246 /* 247 * Pass the vector address returned from main_init 248 * Compensate for the load offset since cpu_on_handler() is 249 * called with MMU off. 250 */ 251 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 252 adr x1, thread_vector_table 253 sub x1, x1, x0 254 mov x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE 255 smc #0 256 b . /* SMC should not return */ 257#endif 258END_FUNC _start 259DECLARE_KEEP_INIT _start 260 261 .balign 8 262LOCAL_DATA cached_mem_end , : 263 .skip 8 264END_DATA cached_mem_end 265 266#ifdef CFG_CORE_ASLR 267LOCAL_FUNC relocate , : 268 /* x0 holds load offset */ 269#ifdef CFG_WITH_PAGER 270 adr_l x6, __init_end 271#else 272 adr_l x6, __end 273#endif 274 ldp w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET] 275 276 mov_imm x1, TEE_RAM_START 277 add x2, x2, x6 /* start of relocations */ 278 add x3, x3, x2 /* end of relocations */ 279 280 /* 281 * Relocations are not formatted as Rela64, instead they are in a 282 * compressed format created by get_reloc_bin() in 283 * scripts/gen_tee_bin.py 284 * 285 * All the R_AARCH64_RELATIVE relocations are translated into a 286 * list list of 32-bit offsets from TEE_RAM_START. At each address 287 * a 64-bit value pointed out which increased with the load offset. 288 */ 289 290#ifdef CFG_WITH_PAGER 291 /* 292 * With pager enabled we can only relocate the pager and init 293 * parts, the rest has to be done when a page is populated. 294 */ 295 sub x6, x6, x1 296#endif 297 298 b 2f 299 /* Loop over the relocation addresses and process all entries */ 3001: ldr w4, [x2], #4 301#ifdef CFG_WITH_PAGER 302 /* Skip too large addresses */ 303 cmp x4, x6 304 b.ge 2f 305#endif 306 add x4, x4, x1 307 ldr x5, [x4] 308 add x5, x5, x0 309 str x5, [x4] 310 3112: cmp x2, x3 312 b.ne 1b 313 314 ret 315END_FUNC relocate 316#endif 317 318/* 319 * void enable_mmu(unsigned long core_pos); 320 * 321 * This function depends on being mapped with in the identity map where 322 * physical address and virtual address is the same. After MMU has been 323 * enabled the instruction pointer will be updated to execute as the new 324 * offset instead. Stack pointers and the return address are updated. 325 */ 326LOCAL_FUNC enable_mmu , : , .identity_map 327 adr x1, boot_mmu_config 328 load_xregs x1, 0, 2, 6 329 /* 330 * x0 = core_pos 331 * x2 = tcr_el1 332 * x3 = mair_el1 333 * x4 = ttbr0_el1_base 334 * x5 = ttbr0_core_offset 335 * x6 = load_offset 336 */ 337 msr tcr_el1, x2 338 msr mair_el1, x3 339 340 /* 341 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos 342 */ 343 madd x1, x5, x0, x4 344 msr ttbr0_el1, x1 345 msr ttbr1_el1, xzr 346 isb 347 348 /* Invalidate TLB */ 349 tlbi vmalle1 350 351 /* 352 * Make sure translation table writes have drained into memory and 353 * the TLB invalidation is complete. 354 */ 355 dsb sy 356 isb 357 358 /* Enable the MMU */ 359 mrs x1, sctlr_el1 360 orr x1, x1, #SCTLR_M 361 msr sctlr_el1, x1 362 isb 363 364 /* Update vbar */ 365 mrs x1, vbar_el1 366 add x1, x1, x6 367 msr vbar_el1, x1 368 isb 369 370 /* Invalidate instruction cache and branch predictor */ 371 ic iallu 372 isb 373 374 /* Enable I and D cache */ 375 mrs x1, sctlr_el1 376 orr x1, x1, #SCTLR_I 377 orr x1, x1, #SCTLR_C 378 msr sctlr_el1, x1 379 isb 380 381 /* Adjust stack pointers and return address */ 382 msr spsel, #1 383 add sp, sp, x6 384 msr spsel, #0 385 add sp, sp, x6 386 add x30, x30, x6 387 388 ret 389END_FUNC enable_mmu 390 391 .balign 8 392DATA boot_mmu_config , : /* struct core_mmu_config */ 393 .skip CORE_MMU_CONFIG_SIZE 394END_DATA boot_mmu_config 395 396FUNC cpu_on_handler , : 397 mov x19, x0 398 mov x20, x1 399 mov x21, x30 400 401 adr x0, reset_vect_table 402 msr vbar_el1, x0 403 isb 404 405 set_sctlr_el1 406 isb 407 408 /* Enable aborts now that we can receive exceptions */ 409 msr daifclr, #DAIFBIT_ABT 410 411 bl __get_core_pos 412 bl enable_mmu 413 414 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 415 set_sp 416 417 mov x0, x19 418 mov x1, x20 419#ifdef CFG_CORE_FFA 420 bl boot_cpu_on_handler 421 b thread_ffa_msg_wait 422#else 423 mov x30, x21 424 b boot_cpu_on_handler 425#endif 426END_FUNC cpu_on_handler 427DECLARE_KEEP_PAGER cpu_on_handler 428 429LOCAL_FUNC unhandled_cpu , : 430 wfi 431 b unhandled_cpu 432END_FUNC unhandled_cpu 433 434 /* 435 * This macro verifies that the a given vector doesn't exceed the 436 * architectural limit of 32 instructions. This is meant to be placed 437 * immedately after the last instruction in the vector. It takes the 438 * vector entry as the parameter 439 */ 440 .macro check_vector_size since 441 .if (. - \since) > (32 * 4) 442 .error "Vector exceeds 32 instructions" 443 .endif 444 .endm 445 446 .section .identity_map, "ax", %progbits 447 .align 11 448LOCAL_FUNC reset_vect_table , :, .identity_map 449 /* ----------------------------------------------------- 450 * Current EL with SP0 : 0x0 - 0x180 451 * ----------------------------------------------------- 452 */ 453SynchronousExceptionSP0: 454 b SynchronousExceptionSP0 455 check_vector_size SynchronousExceptionSP0 456 457 .align 7 458IrqSP0: 459 b IrqSP0 460 check_vector_size IrqSP0 461 462 .align 7 463FiqSP0: 464 b FiqSP0 465 check_vector_size FiqSP0 466 467 .align 7 468SErrorSP0: 469 b SErrorSP0 470 check_vector_size SErrorSP0 471 472 /* ----------------------------------------------------- 473 * Current EL with SPx: 0x200 - 0x380 474 * ----------------------------------------------------- 475 */ 476 .align 7 477SynchronousExceptionSPx: 478 b SynchronousExceptionSPx 479 check_vector_size SynchronousExceptionSPx 480 481 .align 7 482IrqSPx: 483 b IrqSPx 484 check_vector_size IrqSPx 485 486 .align 7 487FiqSPx: 488 b FiqSPx 489 check_vector_size FiqSPx 490 491 .align 7 492SErrorSPx: 493 b SErrorSPx 494 check_vector_size SErrorSPx 495 496 /* ----------------------------------------------------- 497 * Lower EL using AArch64 : 0x400 - 0x580 498 * ----------------------------------------------------- 499 */ 500 .align 7 501SynchronousExceptionA64: 502 b SynchronousExceptionA64 503 check_vector_size SynchronousExceptionA64 504 505 .align 7 506IrqA64: 507 b IrqA64 508 check_vector_size IrqA64 509 510 .align 7 511FiqA64: 512 b FiqA64 513 check_vector_size FiqA64 514 515 .align 7 516SErrorA64: 517 b SErrorA64 518 check_vector_size SErrorA64 519 520 /* ----------------------------------------------------- 521 * Lower EL using AArch32 : 0x0 - 0x180 522 * ----------------------------------------------------- 523 */ 524 .align 7 525SynchronousExceptionA32: 526 b SynchronousExceptionA32 527 check_vector_size SynchronousExceptionA32 528 529 .align 7 530IrqA32: 531 b IrqA32 532 check_vector_size IrqA32 533 534 .align 7 535FiqA32: 536 b FiqA32 537 check_vector_size FiqA32 538 539 .align 7 540SErrorA32: 541 b SErrorA32 542 check_vector_size SErrorA32 543 544END_FUNC reset_vect_table 545