1/* 2 * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include <arch.h> 8#include <asm_macros.S> 9#include <assert_macros.S> 10#include <common/bl_common.h> 11#include <lib/xlat_tables/xlat_tables_defs.h> 12 13 .globl smc 14 15 .globl zero_normalmem 16 .globl zeromem 17 .globl memcpy16 18 19 .globl disable_mmu_el1 20 .globl disable_mmu_el3 21 .globl disable_mmu_icache_el1 22 .globl disable_mmu_icache_el3 23 .globl fixup_gdt_reloc 24func smc 25 smc #0 26endfunc smc 27 28/* ----------------------------------------------------------------------- 29 * void zero_normalmem(void *mem, unsigned int length); 30 * 31 * Initialise a region in normal memory to 0. This functions complies with the 32 * AAPCS and can be called from C code. 33 * 34 * NOTE: MMU must be enabled when using this function as it can only operate on 35 * normal memory. It is intended to be mainly used from C code when MMU 36 * is usually enabled. 37 * ----------------------------------------------------------------------- 38 */ 39.equ zero_normalmem, zeromem_dczva 40 41/* ----------------------------------------------------------------------- 42 * void zeromem(void *mem, unsigned int length); 43 * 44 * Initialise a region of device memory to 0. This functions complies with the 45 * AAPCS and can be called from C code. 46 * 47 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be 48 * used instead for faster zeroing. 49 * 50 * ----------------------------------------------------------------------- 51 */ 52func zeromem 53 /* x2 is the address past the last zeroed address */ 54 add x2, x0, x1 55 /* 56 * Uses the fallback path that does not use DC ZVA instruction and 57 * therefore does not need enabled MMU 58 */ 59 b .Lzeromem_dczva_fallback_entry 60endfunc zeromem 61 62/* ----------------------------------------------------------------------- 63 * void zeromem_dczva(void *mem, unsigned int length); 64 * 65 * Fill a region of normal memory of size "length" in bytes with null bytes. 66 * MMU must be enabled and the memory be of 67 * normal type. This is because this function internally uses the DC ZVA 68 * instruction, which generates an Alignment fault if used on any type of 69 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU 70 * is disabled, all memory behaves like Device-nGnRnE memory (see section 71 * D4.2.8), hence the requirement on the MMU being enabled. 72 * NOTE: The code assumes that the block size as defined in DCZID_EL0 73 * register is at least 16 bytes. 74 * 75 * ----------------------------------------------------------------------- 76 */ 77func zeromem_dczva 78 79 /* 80 * The function consists of a series of loops that zero memory one byte 81 * at a time, 16 bytes at a time or using the DC ZVA instruction to 82 * zero aligned block of bytes, which is assumed to be more than 16. 83 * In the case where the DC ZVA instruction cannot be used or if the 84 * first 16 bytes loop would overflow, there is fallback path that does 85 * not use DC ZVA. 86 * Note: The fallback path is also used by the zeromem function that 87 * branches to it directly. 88 * 89 * +---------+ zeromem_dczva 90 * | entry | 91 * +----+----+ 92 * | 93 * v 94 * +---------+ 95 * | checks |>o-------+ (If any check fails, fallback) 96 * +----+----+ | 97 * | |---------------+ 98 * v | Fallback path | 99 * +------+------+ |---------------+ 100 * | 1 byte loop | | 101 * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end 102 * | | 103 * v | 104 * +-------+-------+ | 105 * | 16 bytes loop | | 106 * +-------+-------+ | 107 * | | 108 * v | 109 * +------+------+ .Lzeromem_dczva_blocksize_aligned 110 * | DC ZVA loop | | 111 * +------+------+ | 112 * +--------+ | | 113 * | | | | 114 * | v v | 115 * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned 116 * | | 16 bytes loop | | 117 * | +-------+-------+ | 118 * | | | 119 * | v | 120 * | +------+------+ .Lzeromem_dczva_final_1byte_aligned 121 * | | 1 byte loop | | 122 * | +-------------+ | 123 * | | | 124 * | v | 125 * | +---+--+ | 126 * | | exit | | 127 * | +------+ | 128 * | | 129 * | +--------------+ +------------------+ zeromem 130 * | | +----------------| zeromem function | 131 * | | | +------------------+ 132 * | v v 133 * | +-------------+ .Lzeromem_dczva_fallback_entry 134 * | | 1 byte loop | 135 * | +------+------+ 136 * | | 137 * +-----------+ 138 */ 139 140 /* 141 * Readable names for registers 142 * 143 * Registers x0, x1 and x2 are also set by zeromem which 144 * branches into the fallback path directly, so cursor, length and 145 * stop_address should not be retargeted to other registers. 146 */ 147 cursor .req x0 /* Start address and then current address */ 148 length .req x1 /* Length in bytes of the region to zero out */ 149 /* Reusing x1 as length is never used after block_mask is set */ 150 block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */ 151 stop_address .req x2 /* Address past the last zeroed byte */ 152 block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */ 153 tmp1 .req x4 154 tmp2 .req x5 155 156#if ENABLE_ASSERTIONS 157 /* 158 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3) 159 * register value and panic if the MMU is disabled. 160 */ 161#if defined(IMAGE_AT_EL3) 162 mrs tmp1, sctlr_el3 163#else 164 mrs tmp1, sctlr_el1 165#endif 166 167 tst tmp1, #SCTLR_M_BIT 168 ASM_ASSERT(ne) 169#endif /* ENABLE_ASSERTIONS */ 170 171 /* stop_address is the address past the last to zero */ 172 add stop_address, cursor, length 173 174 /* 175 * Get block_size = (log2(<block size>) >> 2) (see encoding of 176 * dczid_el0 reg) 177 */ 178 mrs block_size, dczid_el0 179 180 /* 181 * Select the 4 lowest bits and convert the extracted log2(<block size 182 * in words>) to <block size in bytes> 183 */ 184 ubfx block_size, block_size, #0, #4 185 mov tmp2, #(1 << 2) 186 lsl block_size, tmp2, block_size 187 188#if ENABLE_ASSERTIONS 189 /* 190 * Assumes block size is at least 16 bytes to avoid manual realignment 191 * of the cursor at the end of the DCZVA loop. 192 */ 193 cmp block_size, #16 194 ASM_ASSERT(hs) 195#endif 196 /* 197 * Not worth doing all the setup for a region less than a block and 198 * protects against zeroing a whole block when the area to zero is 199 * smaller than that. Also, as it is assumed that the block size is at 200 * least 16 bytes, this also protects the initial aligning loops from 201 * trying to zero 16 bytes when length is less than 16. 202 */ 203 cmp length, block_size 204 b.lo .Lzeromem_dczva_fallback_entry 205 206 /* 207 * Calculate the bitmask of the block alignment. It will never 208 * underflow as the block size is between 4 bytes and 2kB. 209 * block_mask = block_size - 1 210 */ 211 sub block_mask, block_size, #1 212 213 /* 214 * length alias should not be used after this point unless it is 215 * defined as a register other than block_mask's. 216 */ 217 .unreq length 218 219 /* 220 * If the start address is already aligned to zero block size, go 221 * straight to the cache zeroing loop. This is safe because at this 222 * point, the length cannot be smaller than a block size. 223 */ 224 tst cursor, block_mask 225 b.eq .Lzeromem_dczva_blocksize_aligned 226 227 /* 228 * Calculate the first block-size-aligned address. It is assumed that 229 * the zero block size is at least 16 bytes. This address is the last 230 * address of this initial loop. 231 */ 232 orr tmp1, cursor, block_mask 233 add tmp1, tmp1, #1 234 235 /* 236 * If the addition overflows, skip the cache zeroing loops. This is 237 * quite unlikely however. 238 */ 239 cbz tmp1, .Lzeromem_dczva_fallback_entry 240 241 /* 242 * If the first block-size-aligned address is past the last address, 243 * fallback to the simpler code. 244 */ 245 cmp tmp1, stop_address 246 b.hi .Lzeromem_dczva_fallback_entry 247 248 /* 249 * If the start address is already aligned to 16 bytes, skip this loop. 250 * It is safe to do this because tmp1 (the stop address of the initial 251 * 16 bytes loop) will never be greater than the final stop address. 252 */ 253 tst cursor, #0xf 254 b.eq .Lzeromem_dczva_initial_1byte_aligned_end 255 256 /* Calculate the next address aligned to 16 bytes */ 257 orr tmp2, cursor, #0xf 258 add tmp2, tmp2, #1 259 /* If it overflows, fallback to the simple path (unlikely) */ 260 cbz tmp2, .Lzeromem_dczva_fallback_entry 261 /* 262 * Next aligned address cannot be after the stop address because the 263 * length cannot be smaller than 16 at this point. 264 */ 265 266 /* First loop: zero byte per byte */ 2671: 268 strb wzr, [cursor], #1 269 cmp cursor, tmp2 270 b.ne 1b 271.Lzeromem_dczva_initial_1byte_aligned_end: 272 273 /* 274 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1 275 * before being able to use the code that deals with block-size-aligned 276 * addresses. 277 */ 278 cmp cursor, tmp1 279 b.hs 2f 2801: 281 stp xzr, xzr, [cursor], #16 282 cmp cursor, tmp1 283 b.lo 1b 2842: 285 286 /* 287 * Third loop: zero a block at a time using DC ZVA cache block zeroing 288 * instruction. 289 */ 290.Lzeromem_dczva_blocksize_aligned: 291 /* 292 * Calculate the last block-size-aligned address. If the result equals 293 * to the start address, the loop will exit immediately. 294 */ 295 bic tmp1, stop_address, block_mask 296 297 cmp cursor, tmp1 298 b.hs 2f 2991: 300 /* Zero the block containing the cursor */ 301 dc zva, cursor 302 /* Increment the cursor by the size of a block */ 303 add cursor, cursor, block_size 304 cmp cursor, tmp1 305 b.lo 1b 3062: 307 308 /* 309 * Fourth loop: zero 16 bytes at a time and then byte per byte the 310 * remaining area 311 */ 312.Lzeromem_dczva_final_16bytes_aligned: 313 /* 314 * Calculate the last 16 bytes aligned address. It is assumed that the 315 * block size will never be smaller than 16 bytes so that the current 316 * cursor is aligned to at least 16 bytes boundary. 317 */ 318 bic tmp1, stop_address, #15 319 320 cmp cursor, tmp1 321 b.hs 2f 3221: 323 stp xzr, xzr, [cursor], #16 324 cmp cursor, tmp1 325 b.lo 1b 3262: 327 328 /* Fifth and final loop: zero byte per byte */ 329.Lzeromem_dczva_final_1byte_aligned: 330 cmp cursor, stop_address 331 b.eq 2f 3321: 333 strb wzr, [cursor], #1 334 cmp cursor, stop_address 335 b.ne 1b 3362: 337 ret 338 339 /* Fallback for unaligned start addresses */ 340.Lzeromem_dczva_fallback_entry: 341 /* 342 * If the start address is already aligned to 16 bytes, skip this loop. 343 */ 344 tst cursor, #0xf 345 b.eq .Lzeromem_dczva_final_16bytes_aligned 346 347 /* Calculate the next address aligned to 16 bytes */ 348 orr tmp1, cursor, #15 349 add tmp1, tmp1, #1 350 /* If it overflows, fallback to byte per byte zeroing */ 351 cbz tmp1, .Lzeromem_dczva_final_1byte_aligned 352 /* If the next aligned address is after the stop address, fall back */ 353 cmp tmp1, stop_address 354 b.hs .Lzeromem_dczva_final_1byte_aligned 355 356 /* Fallback entry loop: zero byte per byte */ 3571: 358 strb wzr, [cursor], #1 359 cmp cursor, tmp1 360 b.ne 1b 361 362 b .Lzeromem_dczva_final_16bytes_aligned 363 364 .unreq cursor 365 /* 366 * length is already unreq'ed to reuse the register for another 367 * variable. 368 */ 369 .unreq stop_address 370 .unreq block_size 371 .unreq block_mask 372 .unreq tmp1 373 .unreq tmp2 374endfunc zeromem_dczva 375 376/* -------------------------------------------------------------------------- 377 * void memcpy16(void *dest, const void *src, unsigned int length) 378 * 379 * Copy length bytes from memory area src to memory area dest. 380 * The memory areas should not overlap. 381 * Destination and source addresses must be 16-byte aligned. 382 * -------------------------------------------------------------------------- 383 */ 384func memcpy16 385#if ENABLE_ASSERTIONS 386 orr x3, x0, x1 387 tst x3, #0xf 388 ASM_ASSERT(eq) 389#endif 390/* copy 16 bytes at a time */ 391m_loop16: 392 cmp x2, #16 393 b.lo m_loop1 394 ldp x3, x4, [x1], #16 395 stp x3, x4, [x0], #16 396 sub x2, x2, #16 397 b m_loop16 398/* copy byte per byte */ 399m_loop1: 400 cbz x2, m_end 401 ldrb w3, [x1], #1 402 strb w3, [x0], #1 403 subs x2, x2, #1 404 b.ne m_loop1 405m_end: 406 ret 407endfunc memcpy16 408 409/* --------------------------------------------------------------------------- 410 * Disable the MMU at EL3 411 * --------------------------------------------------------------------------- 412 */ 413 414func disable_mmu_el3 415 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) 416do_disable_mmu_el3: 417 mrs x0, sctlr_el3 418 bic x0, x0, x1 419 msr sctlr_el3, x0 420 isb /* ensure MMU is off */ 421 dsb sy 422 ret 423endfunc disable_mmu_el3 424 425 426func disable_mmu_icache_el3 427 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 428 b do_disable_mmu_el3 429endfunc disable_mmu_icache_el3 430 431/* --------------------------------------------------------------------------- 432 * Disable the MMU at EL1 433 * --------------------------------------------------------------------------- 434 */ 435 436func disable_mmu_el1 437 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) 438do_disable_mmu_el1: 439 mrs x0, sctlr_el1 440 bic x0, x0, x1 441 msr sctlr_el1, x0 442 isb /* ensure MMU is off */ 443 dsb sy 444 ret 445endfunc disable_mmu_el1 446 447 448func disable_mmu_icache_el1 449 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 450 b do_disable_mmu_el1 451endfunc disable_mmu_icache_el1 452 453/* --------------------------------------------------------------------------- 454 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations 455 * (.rela.dyn) at runtime. 456 * 457 * This function is meant to be used when the firmware is compiled with -fpie 458 * and linked with -pie options. We rely on the linker script exporting 459 * appropriate markers for start and end of the section. For GOT, we 460 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect 461 * __RELA_START__ and __RELA_END__. 462 * 463 * The function takes the limits of the memory to apply fixups to as 464 * arguments (which is usually the limits of the relocable BL image). 465 * x0 - the start of the fixup region 466 * x1 - the limit of the fixup region 467 * These addresses have to be 4KB page aligned. 468 * --------------------------------------------------------------------------- 469 */ 470 471/* Relocation codes */ 472#define R_AARCH64_NONE 0 473#define R_AARCH64_RELATIVE 1027 474 475func fixup_gdt_reloc 476 mov x6, x0 477 mov x7, x1 478 479#if ENABLE_ASSERTIONS 480 /* Test if the limits are 4KB aligned */ 481 orr x0, x0, x1 482 tst x0, #(PAGE_SIZE_MASK) 483 ASM_ASSERT(eq) 484#endif 485 /* 486 * Calculate the offset based on return address in x30. 487 * Assume that this function is called within a page at the start of 488 * fixup region. 489 */ 490 and x2, x30, #~(PAGE_SIZE_MASK) 491 subs x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */ 492 b.eq 3f /* Diff(S) = 0. No relocation needed */ 493 494 adrp x1, __GOT_START__ 495 add x1, x1, :lo12:__GOT_START__ 496 adrp x2, __GOT_END__ 497 add x2, x2, :lo12:__GOT_END__ 498 499 /* 500 * GOT is an array of 64_bit addresses which must be fixed up as 501 * new_addr = old_addr + Diff(S). 502 * The new_addr is the address currently the binary is executing from 503 * and old_addr is the address at compile time. 504 */ 5051: ldr x3, [x1] 506 507 /* Skip adding offset if address is < lower limit */ 508 cmp x3, x6 509 b.lo 2f 510 511 /* Skip adding offset if address is > upper limit */ 512 cmp x3, x7 513 b.hi 2f 514 add x3, x3, x0 515 str x3, [x1] 516 5172: add x1, x1, #8 518 cmp x1, x2 519 b.lo 1b 520 521 /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */ 5223: adrp x1, __RELA_START__ 523 add x1, x1, :lo12:__RELA_START__ 524 adrp x2, __RELA_END__ 525 add x2, x2, :lo12:__RELA_END__ 526 527 /* 528 * According to ELF-64 specification, the RELA data structure is as 529 * follows: 530 * typedef struct { 531 * Elf64_Addr r_offset; 532 * Elf64_Xword r_info; 533 * Elf64_Sxword r_addend; 534 * } Elf64_Rela; 535 * 536 * r_offset is address of reference 537 * r_info is symbol index and type of relocation (in this case 538 * code 1027 which corresponds to R_AARCH64_RELATIVE). 539 * r_addend is constant part of expression. 540 * 541 * Size of Elf64_Rela structure is 24 bytes. 542 */ 543 544 /* Skip R_AARCH64_NONE entry with code 0 */ 5451: ldr x3, [x1, #8] 546 cbz x3, 2f 547 548#if ENABLE_ASSERTIONS 549 /* Assert that the relocation type is R_AARCH64_RELATIVE */ 550 cmp x3, #R_AARCH64_RELATIVE 551 ASM_ASSERT(eq) 552#endif 553 ldr x3, [x1] /* r_offset */ 554 add x3, x0, x3 555 ldr x4, [x1, #16] /* r_addend */ 556 557 /* Skip adding offset if r_addend is < lower limit */ 558 cmp x4, x6 559 b.lo 2f 560 561 /* Skip adding offset if r_addend entry is > upper limit */ 562 cmp x4, x7 563 b.hi 2f 564 565 add x4, x0, x4 /* Diff(S) + r_addend */ 566 str x4, [x3] 567 5682: add x1, x1, #24 569 cmp x1, x2 570 b.lo 1b 571 ret 572endfunc fixup_gdt_reloc 573