1/* SPDX-License-Identifier: BSD-2-Clause */ 2/* 3 * Copyright (c) 2015, Linaro Limited 4 */ 5 6#include <platform_config.h> 7 8#include <arm64_macros.S> 9#include <arm.h> 10#include <asm.S> 11#include <generated/asm-defines.h> 12#include <keep.h> 13#include <sm/optee_smc.h> 14#include <sm/teesmc_opteed.h> 15#include <sm/teesmc_opteed_macros.h> 16 17 /* 18 * Setup SP_EL0 and SPEL1, SP will be set to SP_EL0. 19 * SP_EL0 is assigned stack_tmp_export + cpu_id * stack_tmp_stride 20 * SP_EL1 is assigned thread_core_local[cpu_id] 21 */ 22 .macro set_sp 23 bl __get_core_pos 24 cmp x0, #CFG_TEE_CORE_NB_CORE 25 /* Unsupported CPU, park it before it breaks something */ 26 bge unhandled_cpu 27 adr x1, stack_tmp_stride 28 ldr w1, [x1] 29 mul x1, x0, x1 30 adrp x0, stack_tmp_export 31 add x0, x0, :lo12:stack_tmp_export 32 ldr x0, [x0] 33 msr spsel, #0 34 add sp, x1, x0 35 bl thread_get_core_local 36 msr spsel, #1 37 mov sp, x0 38 msr spsel, #0 39 .endm 40 41 .macro set_sctlr_el1 42 mrs x0, sctlr_el1 43 orr x0, x0, #SCTLR_I 44 orr x0, x0, #SCTLR_SA 45 orr x0, x0, #SCTLR_SPAN 46#if defined(CFG_CORE_RWDATA_NOEXEC) 47 orr x0, x0, #SCTLR_WXN 48#endif 49#if defined(CFG_SCTLR_ALIGNMENT_CHECK) 50 orr x0, x0, #SCTLR_A 51#else 52 bic x0, x0, #SCTLR_A 53#endif 54 msr sctlr_el1, x0 55 .endm 56 57FUNC _start , : 58 mov x19, x0 /* Save pagable part address */ 59#if defined(CFG_DT_ADDR) 60 ldr x20, =CFG_DT_ADDR 61#else 62 mov x20, x2 /* Save DT address */ 63#endif 64 65 adr x0, reset_vect_table 66 msr vbar_el1, x0 67 isb 68 69 set_sctlr_el1 70 isb 71 72#ifdef CFG_WITH_PAGER 73 /* 74 * Move init code into correct location and move hashes to a 75 * temporary safe location until the heap is initialized. 76 * 77 * The binary is built as: 78 * [Pager code, rodata and data] : In correct location 79 * [Init code and rodata] : Should be copied to __init_start 80 * [struct boot_embdata + data] : Should be saved before 81 * initializing pager, first uint32_t tells the length of the data 82 */ 83 adr x0, __init_start /* dst */ 84 adr x1, __data_end /* src */ 85 adr x2, __init_end 86 sub x2, x2, x0 /* init len */ 87 ldr w4, [x1, x2] /* length of hashes etc */ 88 add x2, x2, x4 /* length of init and hashes etc */ 89 /* Copy backwards (as memmove) in case we're overlapping */ 90 add x0, x0, x2 /* __init_start + len */ 91 add x1, x1, x2 /* __data_end + len */ 92 adr x3, cached_mem_end 93 str x0, [x3] 94 adr x2, __init_start 95copy_init: 96 ldp x3, x4, [x1, #-16]! 97 stp x3, x4, [x0, #-16]! 98 cmp x0, x2 99 b.gt copy_init 100#else 101 /* 102 * The binary is built as: 103 * [Core, rodata and data] : In correct location 104 * [struct boot_embdata + data] : Should be moved to __end, first 105 * uint32_t tells the length of the struct + data 106 */ 107 adr_l x0, __end /* dst */ 108 adr_l x1, __data_end /* src */ 109 ldr w2, [x1] /* struct boot_embdata::total_len */ 110 /* Copy backwards (as memmove) in case we're overlapping */ 111 add x0, x0, x2 112 add x1, x1, x2 113 adr x3, cached_mem_end 114 str x0, [x3] 115 adr_l x2, __end 116 117copy_init: 118 ldp x3, x4, [x1, #-16]! 119 stp x3, x4, [x0, #-16]! 120 cmp x0, x2 121 b.gt copy_init 122#endif 123 124 /* 125 * Clear .bss, this code obviously depends on the linker keeping 126 * start/end of .bss at least 8 byte aligned. 127 */ 128 adr_l x0, __bss_start 129 adr_l x1, __bss_end 130clear_bss: 131 str xzr, [x0], #8 132 cmp x0, x1 133 b.lt clear_bss 134 135#ifdef CFG_VIRTUALIZATION 136 /* 137 * Clear .nex_bss, this code obviously depends on the linker keeping 138 * start/end of .bss at least 8 byte aligned. 139 */ 140 adr x0, __nex_bss_start 141 adr x1, __nex_bss_end 142clear_nex_bss: 143 str xzr, [x0], #8 144 cmp x0, x1 145 b.lt clear_nex_bss 146#endif 147 148 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 149 set_sp 150 151 /* curr_thread needs to be -1 until threads are properly initialized */ 152 bl thread_clr_thread_core_local 153 154 /* Enable aborts now that we can receive exceptions */ 155 msr daifclr, #DAIFBIT_ABT 156 157 /* 158 * Invalidate dcache for all memory used during initialization to 159 * avoid nasty surprices when the cache is turned on. We must not 160 * invalidate memory not used by OP-TEE since we may invalidate 161 * entries used by for instance ARM Trusted Firmware. 162 */ 163 adr_l x0, __text_start 164 ldr x1, cached_mem_end 165 sub x1, x1, x0 166 bl dcache_cleaninv_range 167 168 /* Enable Console */ 169 bl console_init 170 171#ifdef CFG_CORE_ASLR 172 mov x0, x20 173 bl get_aslr_seed 174#else 175 mov x0, #0 176#endif 177 178 adr x1, boot_mmu_config 179 bl core_init_mmu_map 180 181#ifdef CFG_CORE_ASLR 182 /* 183 * Process relocation information again updating for the new 184 * offset. We're doing this now before MMU is enabled as some of 185 * the memory will become write protected. 186 */ 187 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 188 /* 189 * Update cached_mem_end address with load offset since it was 190 * calculated before relocation. 191 */ 192 adr x5, cached_mem_end 193 ldr x6, [x5] 194 add x6, x6, x0 195 str x6, [x5] 196 bl relocate 197#endif 198 199 bl __get_core_pos 200 bl enable_mmu 201#ifdef CFG_CORE_ASLR 202 /* 203 * Reinitialize console, since register_serial_console() has 204 * previously registered a PA and with ASLR the VA is different 205 * from the PA. 206 */ 207 bl console_init 208#endif 209 210 mov x0, x19 /* pagable part address */ 211 mov x1, #-1 212 mov x2, x20 /* DT address */ 213 bl boot_init_primary 214 215 /* 216 * In case we've touched memory that secondary CPUs will use before 217 * they have turned on their D-cache, clean and invalidate the 218 * D-cache before exiting to normal world. 219 */ 220 adr_l x0, __text_start 221 ldr x1, cached_mem_end 222 sub x1, x1, x0 223 bl dcache_cleaninv_range 224 225 226 /* 227 * Clear current thread id now to allow the thread to be reused on 228 * next entry. Matches the thread_init_boot_thread in 229 * boot.c. 230 */ 231#ifndef CFG_VIRTUALIZATION 232 bl thread_clr_boot_thread 233#endif 234 235 /* 236 * Pass the vector address returned from main_init 237 * Compensate for the load offset since cpu_on_handler() is 238 * called with MMU off. 239 */ 240 ldr x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET 241 adr x1, thread_vector_table 242 sub x1, x1, x0 243 mov x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE 244 smc #0 245 b . /* SMC should not return */ 246END_FUNC _start 247DECLARE_KEEP_INIT _start 248 249 .balign 8 250LOCAL_DATA cached_mem_end , : 251 .skip 8 252END_DATA cached_mem_end 253 254#ifdef CFG_CORE_ASLR 255LOCAL_FUNC relocate , : 256 /* x0 holds load offset */ 257#ifdef CFG_WITH_PAGER 258 adr_l x6, __init_end 259#else 260 adr_l x6, __end 261#endif 262 ldp w2, w3, [x6, #BOOT_EMBDATA_RELOC_OFFSET] 263 264 mov_imm x1, TEE_RAM_START 265 add x2, x2, x6 /* start of relocations */ 266 add x3, x3, x2 /* end of relocations */ 267 268 /* 269 * Relocations are not formatted as Rela64, instead they are in a 270 * compressed format created by get_reloc_bin() in 271 * scripts/gen_tee_bin.py 272 * 273 * All the R_AARCH64_RELATIVE relocations are translated into a 274 * list list of 32-bit offsets from TEE_RAM_START. At each address 275 * a 64-bit value pointed out which increased with the load offset. 276 */ 277 278#ifdef CFG_WITH_PAGER 279 /* 280 * With pager enabled we can only relocate the pager and init 281 * parts, the rest has to be done when a page is populated. 282 */ 283 sub x6, x6, x1 284#endif 285 286 b 2f 287 /* Loop over the relocation addresses and process all entries */ 2881: ldr w4, [x2], #4 289#ifdef CFG_WITH_PAGER 290 /* Skip too large addresses */ 291 cmp x4, x6 292 b.ge 2f 293#endif 294 add x4, x4, x1 295 ldr x5, [x4] 296 add x5, x5, x0 297 str x5, [x4] 298 2992: cmp x2, x3 300 b.ne 1b 301 302 ret 303END_FUNC relocate 304#endif 305 306/* 307 * void enable_mmu(unsigned long core_pos); 308 * 309 * This function depends on being mapped with in the identity map where 310 * physical address and virtual address is the same. After MMU has been 311 * enabled the instruction pointer will be updated to execute as the new 312 * offset instead. Stack pointers and the return address are updated. 313 */ 314LOCAL_FUNC enable_mmu , : , .identity_map 315 adr x1, boot_mmu_config 316 load_xregs x1, 0, 2, 6 317 /* 318 * x0 = core_pos 319 * x2 = tcr_el1 320 * x3 = mair_el1 321 * x4 = ttbr0_el1_base 322 * x5 = ttbr0_core_offset 323 * x6 = load_offset 324 */ 325 msr tcr_el1, x2 326 msr mair_el1, x3 327 328 /* 329 * ttbr0_el1 = ttbr0_el1_base + ttbr0_core_offset * core_pos 330 */ 331 madd x1, x5, x0, x4 332 msr ttbr0_el1, x1 333 msr ttbr1_el1, xzr 334 isb 335 336 /* Invalidate TLB */ 337 tlbi vmalle1 338 339 /* 340 * Make sure translation table writes have drained into memory and 341 * the TLB invalidation is complete. 342 */ 343 dsb sy 344 isb 345 346 /* Enable the MMU */ 347 mrs x1, sctlr_el1 348 orr x1, x1, #SCTLR_M 349 msr sctlr_el1, x1 350 isb 351 352 /* Update vbar */ 353 mrs x1, vbar_el1 354 add x1, x1, x6 355 msr vbar_el1, x1 356 isb 357 358 /* Invalidate instruction cache and branch predictor */ 359 ic iallu 360 isb 361 362 /* Enable I and D cache */ 363 mrs x1, sctlr_el1 364 orr x1, x1, #SCTLR_I 365 orr x1, x1, #SCTLR_C 366 msr sctlr_el1, x1 367 isb 368 369 /* Adjust stack pointers and return address */ 370 msr spsel, #1 371 add sp, sp, x6 372 msr spsel, #0 373 add sp, sp, x6 374 add x30, x30, x6 375 376 ret 377END_FUNC enable_mmu 378 379 .balign 8 380DATA boot_mmu_config , : /* struct core_mmu_config */ 381 .skip CORE_MMU_CONFIG_SIZE 382END_DATA boot_mmu_config 383 384FUNC cpu_on_handler , : 385 mov x19, x0 386 mov x20, x1 387 mov x21, x30 388 389 adr x0, reset_vect_table 390 msr vbar_el1, x0 391 isb 392 393 set_sctlr_el1 394 isb 395 396 /* Enable aborts now that we can receive exceptions */ 397 msr daifclr, #DAIFBIT_ABT 398 399 bl __get_core_pos 400 bl enable_mmu 401 402 /* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */ 403 set_sp 404 405 mov x0, x19 406 mov x1, x20 407 mov x30, x21 408 b boot_cpu_on_handler 409END_FUNC cpu_on_handler 410DECLARE_KEEP_PAGER cpu_on_handler 411 412LOCAL_FUNC unhandled_cpu , : 413 wfi 414 b unhandled_cpu 415END_FUNC unhandled_cpu 416 417 /* 418 * This macro verifies that the a given vector doesn't exceed the 419 * architectural limit of 32 instructions. This is meant to be placed 420 * immedately after the last instruction in the vector. It takes the 421 * vector entry as the parameter 422 */ 423 .macro check_vector_size since 424 .if (. - \since) > (32 * 4) 425 .error "Vector exceeds 32 instructions" 426 .endif 427 .endm 428 429 .section .identity_map, "ax", %progbits 430 .align 11 431LOCAL_FUNC reset_vect_table , :, .identity_map 432 /* ----------------------------------------------------- 433 * Current EL with SP0 : 0x0 - 0x180 434 * ----------------------------------------------------- 435 */ 436SynchronousExceptionSP0: 437 b SynchronousExceptionSP0 438 check_vector_size SynchronousExceptionSP0 439 440 .align 7 441IrqSP0: 442 b IrqSP0 443 check_vector_size IrqSP0 444 445 .align 7 446FiqSP0: 447 b FiqSP0 448 check_vector_size FiqSP0 449 450 .align 7 451SErrorSP0: 452 b SErrorSP0 453 check_vector_size SErrorSP0 454 455 /* ----------------------------------------------------- 456 * Current EL with SPx: 0x200 - 0x380 457 * ----------------------------------------------------- 458 */ 459 .align 7 460SynchronousExceptionSPx: 461 b SynchronousExceptionSPx 462 check_vector_size SynchronousExceptionSPx 463 464 .align 7 465IrqSPx: 466 b IrqSPx 467 check_vector_size IrqSPx 468 469 .align 7 470FiqSPx: 471 b FiqSPx 472 check_vector_size FiqSPx 473 474 .align 7 475SErrorSPx: 476 b SErrorSPx 477 check_vector_size SErrorSPx 478 479 /* ----------------------------------------------------- 480 * Lower EL using AArch64 : 0x400 - 0x580 481 * ----------------------------------------------------- 482 */ 483 .align 7 484SynchronousExceptionA64: 485 b SynchronousExceptionA64 486 check_vector_size SynchronousExceptionA64 487 488 .align 7 489IrqA64: 490 b IrqA64 491 check_vector_size IrqA64 492 493 .align 7 494FiqA64: 495 b FiqA64 496 check_vector_size FiqA64 497 498 .align 7 499SErrorA64: 500 b SErrorA64 501 check_vector_size SErrorA64 502 503 /* ----------------------------------------------------- 504 * Lower EL using AArch32 : 0x0 - 0x180 505 * ----------------------------------------------------- 506 */ 507 .align 7 508SynchronousExceptionA32: 509 b SynchronousExceptionA32 510 check_vector_size SynchronousExceptionA32 511 512 .align 7 513IrqA32: 514 b IrqA32 515 check_vector_size IrqA32 516 517 .align 7 518FiqA32: 519 b FiqA32 520 check_vector_size FiqA32 521 522 .align 7 523SErrorA32: 524 b SErrorA32 525 check_vector_size SErrorA32 526 527END_FUNC reset_vect_table 528