1/* 2 * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include <arch.h> 8#include <asm_macros.S> 9#include <assert_macros.S> 10#include <lib/xlat_tables/xlat_tables_defs.h> 11 12#if !ERROR_DEPRECATED 13 .globl get_afflvl_shift 14 .globl mpidr_mask_lower_afflvls 15 .globl eret 16#endif /* ERROR_DEPRECATED */ 17 .globl smc 18 19 .globl zero_normalmem 20 .globl zeromem 21 .globl memcpy16 22 23 .globl disable_mmu_el1 24 .globl disable_mmu_el3 25 .globl disable_mmu_icache_el1 26 .globl disable_mmu_icache_el3 27 28 .globl fixup_gdt_reloc 29 30#if SUPPORT_VFP 31 .globl enable_vfp 32#endif 33 34#if !ERROR_DEPRECATED 35func get_afflvl_shift 36 cmp x0, #3 37 cinc x0, x0, eq 38 mov x1, #MPIDR_AFFLVL_SHIFT 39 lsl x0, x0, x1 40 ret 41endfunc get_afflvl_shift 42 43func mpidr_mask_lower_afflvls 44 cmp x1, #3 45 cinc x1, x1, eq 46 mov x2, #MPIDR_AFFLVL_SHIFT 47 lsl x2, x1, x2 48 lsr x0, x0, x2 49 lsl x0, x0, x2 50 ret 51endfunc mpidr_mask_lower_afflvls 52 53 54func eret 55 eret 56endfunc eret 57#endif /* ERROR_DEPRECATED */ 58 59func smc 60 smc #0 61endfunc smc 62 63/* ----------------------------------------------------------------------- 64 * void zero_normalmem(void *mem, unsigned int length); 65 * 66 * Initialise a region in normal memory to 0. This functions complies with the 67 * AAPCS and can be called from C code. 68 * 69 * NOTE: MMU must be enabled when using this function as it can only operate on 70 * normal memory. It is intended to be mainly used from C code when MMU 71 * is usually enabled. 72 * ----------------------------------------------------------------------- 73 */ 74.equ zero_normalmem, zeromem_dczva 75 76/* ----------------------------------------------------------------------- 77 * void zeromem(void *mem, unsigned int length); 78 * 79 * Initialise a region of device memory to 0. This functions complies with the 80 * AAPCS and can be called from C code. 81 * 82 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be 83 * used instead for faster zeroing. 84 * 85 * ----------------------------------------------------------------------- 86 */ 87func zeromem 88 /* x2 is the address past the last zeroed address */ 89 add x2, x0, x1 90 /* 91 * Uses the fallback path that does not use DC ZVA instruction and 92 * therefore does not need enabled MMU 93 */ 94 b .Lzeromem_dczva_fallback_entry 95endfunc zeromem 96 97/* ----------------------------------------------------------------------- 98 * void zeromem_dczva(void *mem, unsigned int length); 99 * 100 * Fill a region of normal memory of size "length" in bytes with null bytes. 101 * MMU must be enabled and the memory be of 102 * normal type. This is because this function internally uses the DC ZVA 103 * instruction, which generates an Alignment fault if used on any type of 104 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU 105 * is disabled, all memory behaves like Device-nGnRnE memory (see section 106 * D4.2.8), hence the requirement on the MMU being enabled. 107 * NOTE: The code assumes that the block size as defined in DCZID_EL0 108 * register is at least 16 bytes. 109 * 110 * ----------------------------------------------------------------------- 111 */ 112func zeromem_dczva 113 114 /* 115 * The function consists of a series of loops that zero memory one byte 116 * at a time, 16 bytes at a time or using the DC ZVA instruction to 117 * zero aligned block of bytes, which is assumed to be more than 16. 118 * In the case where the DC ZVA instruction cannot be used or if the 119 * first 16 bytes loop would overflow, there is fallback path that does 120 * not use DC ZVA. 121 * Note: The fallback path is also used by the zeromem function that 122 * branches to it directly. 123 * 124 * +---------+ zeromem_dczva 125 * | entry | 126 * +----+----+ 127 * | 128 * v 129 * +---------+ 130 * | checks |>o-------+ (If any check fails, fallback) 131 * +----+----+ | 132 * | |---------------+ 133 * v | Fallback path | 134 * +------+------+ |---------------+ 135 * | 1 byte loop | | 136 * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end 137 * | | 138 * v | 139 * +-------+-------+ | 140 * | 16 bytes loop | | 141 * +-------+-------+ | 142 * | | 143 * v | 144 * +------+------+ .Lzeromem_dczva_blocksize_aligned 145 * | DC ZVA loop | | 146 * +------+------+ | 147 * +--------+ | | 148 * | | | | 149 * | v v | 150 * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned 151 * | | 16 bytes loop | | 152 * | +-------+-------+ | 153 * | | | 154 * | v | 155 * | +------+------+ .Lzeromem_dczva_final_1byte_aligned 156 * | | 1 byte loop | | 157 * | +-------------+ | 158 * | | | 159 * | v | 160 * | +---+--+ | 161 * | | exit | | 162 * | +------+ | 163 * | | 164 * | +--------------+ +------------------+ zeromem 165 * | | +----------------| zeromem function | 166 * | | | +------------------+ 167 * | v v 168 * | +-------------+ .Lzeromem_dczva_fallback_entry 169 * | | 1 byte loop | 170 * | +------+------+ 171 * | | 172 * +-----------+ 173 */ 174 175 /* 176 * Readable names for registers 177 * 178 * Registers x0, x1 and x2 are also set by zeromem which 179 * branches into the fallback path directly, so cursor, length and 180 * stop_address should not be retargeted to other registers. 181 */ 182 cursor .req x0 /* Start address and then current address */ 183 length .req x1 /* Length in bytes of the region to zero out */ 184 /* Reusing x1 as length is never used after block_mask is set */ 185 block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */ 186 stop_address .req x2 /* Address past the last zeroed byte */ 187 block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */ 188 tmp1 .req x4 189 tmp2 .req x5 190 191#if ENABLE_ASSERTIONS 192 /* 193 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3) 194 * register value and panic if the MMU is disabled. 195 */ 196#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3) 197 mrs tmp1, sctlr_el3 198#else 199 mrs tmp1, sctlr_el1 200#endif 201 202 tst tmp1, #SCTLR_M_BIT 203 ASM_ASSERT(ne) 204#endif /* ENABLE_ASSERTIONS */ 205 206 /* stop_address is the address past the last to zero */ 207 add stop_address, cursor, length 208 209 /* 210 * Get block_size = (log2(<block size>) >> 2) (see encoding of 211 * dczid_el0 reg) 212 */ 213 mrs block_size, dczid_el0 214 215 /* 216 * Select the 4 lowest bits and convert the extracted log2(<block size 217 * in words>) to <block size in bytes> 218 */ 219 ubfx block_size, block_size, #0, #4 220 mov tmp2, #(1 << 2) 221 lsl block_size, tmp2, block_size 222 223#if ENABLE_ASSERTIONS 224 /* 225 * Assumes block size is at least 16 bytes to avoid manual realignment 226 * of the cursor at the end of the DCZVA loop. 227 */ 228 cmp block_size, #16 229 ASM_ASSERT(hs) 230#endif 231 /* 232 * Not worth doing all the setup for a region less than a block and 233 * protects against zeroing a whole block when the area to zero is 234 * smaller than that. Also, as it is assumed that the block size is at 235 * least 16 bytes, this also protects the initial aligning loops from 236 * trying to zero 16 bytes when length is less than 16. 237 */ 238 cmp length, block_size 239 b.lo .Lzeromem_dczva_fallback_entry 240 241 /* 242 * Calculate the bitmask of the block alignment. It will never 243 * underflow as the block size is between 4 bytes and 2kB. 244 * block_mask = block_size - 1 245 */ 246 sub block_mask, block_size, #1 247 248 /* 249 * length alias should not be used after this point unless it is 250 * defined as a register other than block_mask's. 251 */ 252 .unreq length 253 254 /* 255 * If the start address is already aligned to zero block size, go 256 * straight to the cache zeroing loop. This is safe because at this 257 * point, the length cannot be smaller than a block size. 258 */ 259 tst cursor, block_mask 260 b.eq .Lzeromem_dczva_blocksize_aligned 261 262 /* 263 * Calculate the first block-size-aligned address. It is assumed that 264 * the zero block size is at least 16 bytes. This address is the last 265 * address of this initial loop. 266 */ 267 orr tmp1, cursor, block_mask 268 add tmp1, tmp1, #1 269 270 /* 271 * If the addition overflows, skip the cache zeroing loops. This is 272 * quite unlikely however. 273 */ 274 cbz tmp1, .Lzeromem_dczva_fallback_entry 275 276 /* 277 * If the first block-size-aligned address is past the last address, 278 * fallback to the simpler code. 279 */ 280 cmp tmp1, stop_address 281 b.hi .Lzeromem_dczva_fallback_entry 282 283 /* 284 * If the start address is already aligned to 16 bytes, skip this loop. 285 * It is safe to do this because tmp1 (the stop address of the initial 286 * 16 bytes loop) will never be greater than the final stop address. 287 */ 288 tst cursor, #0xf 289 b.eq .Lzeromem_dczva_initial_1byte_aligned_end 290 291 /* Calculate the next address aligned to 16 bytes */ 292 orr tmp2, cursor, #0xf 293 add tmp2, tmp2, #1 294 /* If it overflows, fallback to the simple path (unlikely) */ 295 cbz tmp2, .Lzeromem_dczva_fallback_entry 296 /* 297 * Next aligned address cannot be after the stop address because the 298 * length cannot be smaller than 16 at this point. 299 */ 300 301 /* First loop: zero byte per byte */ 3021: 303 strb wzr, [cursor], #1 304 cmp cursor, tmp2 305 b.ne 1b 306.Lzeromem_dczva_initial_1byte_aligned_end: 307 308 /* 309 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1 310 * before being able to use the code that deals with block-size-aligned 311 * addresses. 312 */ 313 cmp cursor, tmp1 314 b.hs 2f 3151: 316 stp xzr, xzr, [cursor], #16 317 cmp cursor, tmp1 318 b.lo 1b 3192: 320 321 /* 322 * Third loop: zero a block at a time using DC ZVA cache block zeroing 323 * instruction. 324 */ 325.Lzeromem_dczva_blocksize_aligned: 326 /* 327 * Calculate the last block-size-aligned address. If the result equals 328 * to the start address, the loop will exit immediately. 329 */ 330 bic tmp1, stop_address, block_mask 331 332 cmp cursor, tmp1 333 b.hs 2f 3341: 335 /* Zero the block containing the cursor */ 336 dc zva, cursor 337 /* Increment the cursor by the size of a block */ 338 add cursor, cursor, block_size 339 cmp cursor, tmp1 340 b.lo 1b 3412: 342 343 /* 344 * Fourth loop: zero 16 bytes at a time and then byte per byte the 345 * remaining area 346 */ 347.Lzeromem_dczva_final_16bytes_aligned: 348 /* 349 * Calculate the last 16 bytes aligned address. It is assumed that the 350 * block size will never be smaller than 16 bytes so that the current 351 * cursor is aligned to at least 16 bytes boundary. 352 */ 353 bic tmp1, stop_address, #15 354 355 cmp cursor, tmp1 356 b.hs 2f 3571: 358 stp xzr, xzr, [cursor], #16 359 cmp cursor, tmp1 360 b.lo 1b 3612: 362 363 /* Fifth and final loop: zero byte per byte */ 364.Lzeromem_dczva_final_1byte_aligned: 365 cmp cursor, stop_address 366 b.eq 2f 3671: 368 strb wzr, [cursor], #1 369 cmp cursor, stop_address 370 b.ne 1b 3712: 372 ret 373 374 /* Fallback for unaligned start addresses */ 375.Lzeromem_dczva_fallback_entry: 376 /* 377 * If the start address is already aligned to 16 bytes, skip this loop. 378 */ 379 tst cursor, #0xf 380 b.eq .Lzeromem_dczva_final_16bytes_aligned 381 382 /* Calculate the next address aligned to 16 bytes */ 383 orr tmp1, cursor, #15 384 add tmp1, tmp1, #1 385 /* If it overflows, fallback to byte per byte zeroing */ 386 cbz tmp1, .Lzeromem_dczva_final_1byte_aligned 387 /* If the next aligned address is after the stop address, fall back */ 388 cmp tmp1, stop_address 389 b.hs .Lzeromem_dczva_final_1byte_aligned 390 391 /* Fallback entry loop: zero byte per byte */ 3921: 393 strb wzr, [cursor], #1 394 cmp cursor, tmp1 395 b.ne 1b 396 397 b .Lzeromem_dczva_final_16bytes_aligned 398 399 .unreq cursor 400 /* 401 * length is already unreq'ed to reuse the register for another 402 * variable. 403 */ 404 .unreq stop_address 405 .unreq block_size 406 .unreq block_mask 407 .unreq tmp1 408 .unreq tmp2 409endfunc zeromem_dczva 410 411/* -------------------------------------------------------------------------- 412 * void memcpy16(void *dest, const void *src, unsigned int length) 413 * 414 * Copy length bytes from memory area src to memory area dest. 415 * The memory areas should not overlap. 416 * Destination and source addresses must be 16-byte aligned. 417 * -------------------------------------------------------------------------- 418 */ 419func memcpy16 420#if ENABLE_ASSERTIONS 421 orr x3, x0, x1 422 tst x3, #0xf 423 ASM_ASSERT(eq) 424#endif 425/* copy 16 bytes at a time */ 426m_loop16: 427 cmp x2, #16 428 b.lo m_loop1 429 ldp x3, x4, [x1], #16 430 stp x3, x4, [x0], #16 431 sub x2, x2, #16 432 b m_loop16 433/* copy byte per byte */ 434m_loop1: 435 cbz x2, m_end 436 ldrb w3, [x1], #1 437 strb w3, [x0], #1 438 subs x2, x2, #1 439 b.ne m_loop1 440m_end: 441 ret 442endfunc memcpy16 443 444/* --------------------------------------------------------------------------- 445 * Disable the MMU at EL3 446 * --------------------------------------------------------------------------- 447 */ 448 449func disable_mmu_el3 450 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) 451do_disable_mmu_el3: 452 mrs x0, sctlr_el3 453 bic x0, x0, x1 454 msr sctlr_el3, x0 455 isb /* ensure MMU is off */ 456 dsb sy 457 ret 458endfunc disable_mmu_el3 459 460 461func disable_mmu_icache_el3 462 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 463 b do_disable_mmu_el3 464endfunc disable_mmu_icache_el3 465 466/* --------------------------------------------------------------------------- 467 * Disable the MMU at EL1 468 * --------------------------------------------------------------------------- 469 */ 470 471func disable_mmu_el1 472 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) 473do_disable_mmu_el1: 474 mrs x0, sctlr_el1 475 bic x0, x0, x1 476 msr sctlr_el1, x0 477 isb /* ensure MMU is off */ 478 dsb sy 479 ret 480endfunc disable_mmu_el1 481 482 483func disable_mmu_icache_el1 484 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 485 b do_disable_mmu_el1 486endfunc disable_mmu_icache_el1 487 488/* --------------------------------------------------------------------------- 489 * Enable the use of VFP at EL3 490 * --------------------------------------------------------------------------- 491 */ 492#if SUPPORT_VFP 493func enable_vfp 494 mrs x0, cpacr_el1 495 orr x0, x0, #CPACR_VFP_BITS 496 msr cpacr_el1, x0 497 mrs x0, cptr_el3 498 mov x1, #AARCH64_CPTR_TFP 499 bic x0, x0, x1 500 msr cptr_el3, x0 501 isb 502 ret 503endfunc enable_vfp 504#endif 505 506/* --------------------------------------------------------------------------- 507 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations 508 * (.rela.dyn) at runtime. 509 * 510 * This function is meant to be used when the firmware is compiled with -fpie 511 * and linked with -pie options. We rely on the linker script exporting 512 * appropriate markers for start and end of the section. For GOT, we 513 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect 514 * __RELA_START__ and __RELA_END__. 515 * 516 * The function takes the limits of the memory to apply fixups to as 517 * arguments (which is usually the limits of the relocable BL image). 518 * x0 - the start of the fixup region 519 * x1 - the limit of the fixup region 520 * These addresses have to be page (4KB aligned). 521 * --------------------------------------------------------------------------- 522 */ 523func fixup_gdt_reloc 524 mov x6, x0 525 mov x7, x1 526 527 /* Test if the limits are 4K aligned */ 528#if ENABLE_ASSERTIONS 529 orr x0, x0, x1 530 tst x0, #(PAGE_SIZE - 1) 531 ASM_ASSERT(eq) 532#endif 533 /* 534 * Calculate the offset based on return address in x30. 535 * Assume that this funtion is called within a page of the start of 536 * of fixup region. 537 */ 538 and x2, x30, #~(PAGE_SIZE - 1) 539 sub x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */ 540 541 adrp x1, __GOT_START__ 542 add x1, x1, :lo12:__GOT_START__ 543 adrp x2, __GOT_END__ 544 add x2, x2, :lo12:__GOT_END__ 545 546 /* 547 * GOT is an array of 64_bit addresses which must be fixed up as 548 * new_addr = old_addr + Diff(S). 549 * The new_addr is the address currently the binary is executing from 550 * and old_addr is the address at compile time. 551 */ 5521: 553 ldr x3, [x1] 554 /* Skip adding offset if address is < lower limit */ 555 cmp x3, x6 556 b.lo 2f 557 /* Skip adding offset if address is >= upper limit */ 558 cmp x3, x7 559 b.ge 2f 560 add x3, x3, x0 561 str x3, [x1] 5622: 563 add x1, x1, #8 564 cmp x1, x2 565 b.lo 1b 566 567 /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */ 568 adrp x1, __RELA_START__ 569 add x1, x1, :lo12:__RELA_START__ 570 adrp x2, __RELA_END__ 571 add x2, x2, :lo12:__RELA_END__ 572 /* 573 * According to ELF-64 specification, the RELA data structure is as 574 * follows: 575 * typedef struct 576 * { 577 * Elf64_Addr r_offset; 578 * Elf64_Xword r_info; 579 * Elf64_Sxword r_addend; 580 * } Elf64_Rela; 581 * 582 * r_offset is address of reference 583 * r_info is symbol index and type of relocation (in this case 584 * 0x403 which corresponds to R_AARCH64_RELATIV). 585 * r_addend is constant part of expression. 586 * 587 * Size of Elf64_Rela structure is 24 bytes. 588 */ 5891: 590 /* Assert that the relocation type is R_AARCH64_RELATIV */ 591#if ENABLE_ASSERTIONS 592 ldr x3, [x1, #8] 593 cmp x3, #0x403 594 ASM_ASSERT(eq) 595#endif 596 ldr x3, [x1] /* r_offset */ 597 add x3, x0, x3 598 ldr x4, [x1, #16] /* r_addend */ 599 600 /* Skip adding offset if r_addend is < lower limit */ 601 cmp x4, x6 602 b.lo 2f 603 /* Skip adding offset if r_addend entry is >= upper limit */ 604 cmp x4, x7 605 b.ge 2f 606 607 add x4, x0, x4 /* Diff(S) + r_addend */ 608 str x4, [x3] 609 6102: add x1, x1, #24 611 cmp x1, x2 612 b.lo 1b 613 614 ret 615endfunc fixup_gdt_reloc 616