14ecca339SDan Handley/* 2*308d359bSDouglas Raillard * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. 34ecca339SDan Handley * 44ecca339SDan Handley * Redistribution and use in source and binary forms, with or without 54ecca339SDan Handley * modification, are permitted provided that the following conditions are met: 64ecca339SDan Handley * 74ecca339SDan Handley * Redistributions of source code must retain the above copyright notice, this 84ecca339SDan Handley * list of conditions and the following disclaimer. 94ecca339SDan Handley * 104ecca339SDan Handley * Redistributions in binary form must reproduce the above copyright notice, 114ecca339SDan Handley * this list of conditions and the following disclaimer in the documentation 124ecca339SDan Handley * and/or other materials provided with the distribution. 134ecca339SDan Handley * 144ecca339SDan Handley * Neither the name of ARM nor the names of its contributors may be used 154ecca339SDan Handley * to endorse or promote products derived from this software without specific 164ecca339SDan Handley * prior written permission. 174ecca339SDan Handley * 184ecca339SDan Handley * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 194ecca339SDan Handley * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 204ecca339SDan Handley * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 214ecca339SDan Handley * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 224ecca339SDan Handley * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 234ecca339SDan Handley * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 244ecca339SDan Handley * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 254ecca339SDan Handley * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 264ecca339SDan Handley * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 274ecca339SDan Handley * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 284ecca339SDan Handley * POSSIBILITY OF SUCH DAMAGE. 294ecca339SDan Handley */ 304ecca339SDan Handley 3197043ac9SDan Handley#include <arch.h> 324ecca339SDan Handley#include <asm_macros.S> 33bc920128SSoby Mathew#include <assert_macros.S> 344ecca339SDan Handley 354ecca339SDan Handley .globl get_afflvl_shift 364ecca339SDan Handley .globl mpidr_mask_lower_afflvls 374ecca339SDan Handley .globl eret 384ecca339SDan Handley .globl smc 394ecca339SDan Handley 40*308d359bSDouglas Raillard .globl zero_normalmem 41*308d359bSDouglas Raillard .globl zeromem 424ecca339SDan Handley .globl zeromem16 434ecca339SDan Handley .globl memcpy16 444ecca339SDan Handley 452f5dcfefSAndrew Thoelke .globl disable_mmu_el3 462f5dcfefSAndrew Thoelke .globl disable_mmu_icache_el3 472f5dcfefSAndrew Thoelke 485c3272a7SAndrew Thoelke#if SUPPORT_VFP 495c3272a7SAndrew Thoelke .globl enable_vfp 505c3272a7SAndrew Thoelke#endif 515c3272a7SAndrew Thoelke 524ecca339SDan Handleyfunc get_afflvl_shift 534ecca339SDan Handley cmp x0, #3 544ecca339SDan Handley cinc x0, x0, eq 554ecca339SDan Handley mov x1, #MPIDR_AFFLVL_SHIFT 564ecca339SDan Handley lsl x0, x0, x1 574ecca339SDan Handley ret 588b779620SKévin Petitendfunc get_afflvl_shift 594ecca339SDan Handley 604ecca339SDan Handleyfunc mpidr_mask_lower_afflvls 614ecca339SDan Handley cmp x1, #3 624ecca339SDan Handley cinc x1, x1, eq 634ecca339SDan Handley mov x2, #MPIDR_AFFLVL_SHIFT 644ecca339SDan Handley lsl x2, x1, x2 654ecca339SDan Handley lsr x0, x0, x2 664ecca339SDan Handley lsl x0, x0, x2 674ecca339SDan Handley ret 688b779620SKévin Petitendfunc mpidr_mask_lower_afflvls 694ecca339SDan Handley 704ecca339SDan Handley 714ecca339SDan Handleyfunc eret 724ecca339SDan Handley eret 738b779620SKévin Petitendfunc eret 744ecca339SDan Handley 754ecca339SDan Handley 764ecca339SDan Handleyfunc smc 774ecca339SDan Handley smc #0 788b779620SKévin Petitendfunc smc 794ecca339SDan Handley 804ecca339SDan Handley/* ----------------------------------------------------------------------- 814ecca339SDan Handley * void zeromem16(void *mem, unsigned int length); 824ecca339SDan Handley * 834ecca339SDan Handley * Initialise a memory region to 0. 844ecca339SDan Handley * The memory address must be 16-byte aligned. 85*308d359bSDouglas Raillard * NOTE: This function is deprecated and zeromem should be used instead. 864ecca339SDan Handley * ----------------------------------------------------------------------- 874ecca339SDan Handley */ 88*308d359bSDouglas Raillard.equ zeromem16, zeromem 894ecca339SDan Handley 90*308d359bSDouglas Raillard/* ----------------------------------------------------------------------- 91*308d359bSDouglas Raillard * void zero_normalmem(void *mem, unsigned int length); 92*308d359bSDouglas Raillard * 93*308d359bSDouglas Raillard * Initialise a region in normal memory to 0. This functions complies with the 94*308d359bSDouglas Raillard * AAPCS and can be called from C code. 95*308d359bSDouglas Raillard * 96*308d359bSDouglas Raillard * NOTE: MMU must be enabled when using this function as it can only operate on 97*308d359bSDouglas Raillard * normal memory. It is intended to be mainly used from C code when MMU 98*308d359bSDouglas Raillard * is usually enabled. 99*308d359bSDouglas Raillard * ----------------------------------------------------------------------- 100*308d359bSDouglas Raillard */ 101*308d359bSDouglas Raillard.equ zero_normalmem, zeromem_dczva 102*308d359bSDouglas Raillard 103*308d359bSDouglas Raillard/* ----------------------------------------------------------------------- 104*308d359bSDouglas Raillard * void zeromem(void *mem, unsigned int length); 105*308d359bSDouglas Raillard * 106*308d359bSDouglas Raillard * Initialise a region of device memory to 0. This functions complies with the 107*308d359bSDouglas Raillard * AAPCS and can be called from C code. 108*308d359bSDouglas Raillard * 109*308d359bSDouglas Raillard * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be 110*308d359bSDouglas Raillard * used instead for faster zeroing. 111*308d359bSDouglas Raillard * 112*308d359bSDouglas Raillard * ----------------------------------------------------------------------- 113*308d359bSDouglas Raillard */ 114*308d359bSDouglas Raillardfunc zeromem 115*308d359bSDouglas Raillard /* x2 is the address past the last zeroed address */ 116*308d359bSDouglas Raillard add x2, x0, x1 117*308d359bSDouglas Raillard /* 118*308d359bSDouglas Raillard * Uses the fallback path that does not use DC ZVA instruction and 119*308d359bSDouglas Raillard * therefore does not need enabled MMU 120*308d359bSDouglas Raillard */ 121*308d359bSDouglas Raillard b .Lzeromem_dczva_fallback_entry 122*308d359bSDouglas Raillardendfunc zeromem 123*308d359bSDouglas Raillard 124*308d359bSDouglas Raillard/* ----------------------------------------------------------------------- 125*308d359bSDouglas Raillard * void zeromem_dczva(void *mem, unsigned int length); 126*308d359bSDouglas Raillard * 127*308d359bSDouglas Raillard * Fill a region of normal memory of size "length" in bytes with null bytes. 128*308d359bSDouglas Raillard * MMU must be enabled and the memory be of 129*308d359bSDouglas Raillard * normal type. This is because this function internally uses the DC ZVA 130*308d359bSDouglas Raillard * instruction, which generates an Alignment fault if used on any type of 131*308d359bSDouglas Raillard * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU 132*308d359bSDouglas Raillard * is disabled, all memory behaves like Device-nGnRnE memory (see section 133*308d359bSDouglas Raillard * D4.2.8), hence the requirement on the MMU being enabled. 134*308d359bSDouglas Raillard * NOTE: The code assumes that the block size as defined in DCZID_EL0 135*308d359bSDouglas Raillard * register is at least 16 bytes. 136*308d359bSDouglas Raillard * 137*308d359bSDouglas Raillard * ----------------------------------------------------------------------- 138*308d359bSDouglas Raillard */ 139*308d359bSDouglas Raillardfunc zeromem_dczva 140*308d359bSDouglas Raillard 141*308d359bSDouglas Raillard /* 142*308d359bSDouglas Raillard * The function consists of a series of loops that zero memory one byte 143*308d359bSDouglas Raillard * at a time, 16 bytes at a time or using the DC ZVA instruction to 144*308d359bSDouglas Raillard * zero aligned block of bytes, which is assumed to be more than 16. 145*308d359bSDouglas Raillard * In the case where the DC ZVA instruction cannot be used or if the 146*308d359bSDouglas Raillard * first 16 bytes loop would overflow, there is fallback path that does 147*308d359bSDouglas Raillard * not use DC ZVA. 148*308d359bSDouglas Raillard * Note: The fallback path is also used by the zeromem function that 149*308d359bSDouglas Raillard * branches to it directly. 150*308d359bSDouglas Raillard * 151*308d359bSDouglas Raillard * +---------+ zeromem_dczva 152*308d359bSDouglas Raillard * | entry | 153*308d359bSDouglas Raillard * +----+----+ 154*308d359bSDouglas Raillard * | 155*308d359bSDouglas Raillard * v 156*308d359bSDouglas Raillard * +---------+ 157*308d359bSDouglas Raillard * | checks |>o-------+ (If any check fails, fallback) 158*308d359bSDouglas Raillard * +----+----+ | 159*308d359bSDouglas Raillard * | |---------------+ 160*308d359bSDouglas Raillard * v | Fallback path | 161*308d359bSDouglas Raillard * +------+------+ |---------------+ 162*308d359bSDouglas Raillard * | 1 byte loop | | 163*308d359bSDouglas Raillard * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end 164*308d359bSDouglas Raillard * | | 165*308d359bSDouglas Raillard * v | 166*308d359bSDouglas Raillard * +-------+-------+ | 167*308d359bSDouglas Raillard * | 16 bytes loop | | 168*308d359bSDouglas Raillard * +-------+-------+ | 169*308d359bSDouglas Raillard * | | 170*308d359bSDouglas Raillard * v | 171*308d359bSDouglas Raillard * +------+------+ .Lzeromem_dczva_blocksize_aligned 172*308d359bSDouglas Raillard * | DC ZVA loop | | 173*308d359bSDouglas Raillard * +------+------+ | 174*308d359bSDouglas Raillard * +--------+ | | 175*308d359bSDouglas Raillard * | | | | 176*308d359bSDouglas Raillard * | v v | 177*308d359bSDouglas Raillard * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned 178*308d359bSDouglas Raillard * | | 16 bytes loop | | 179*308d359bSDouglas Raillard * | +-------+-------+ | 180*308d359bSDouglas Raillard * | | | 181*308d359bSDouglas Raillard * | v | 182*308d359bSDouglas Raillard * | +------+------+ .Lzeromem_dczva_final_1byte_aligned 183*308d359bSDouglas Raillard * | | 1 byte loop | | 184*308d359bSDouglas Raillard * | +-------------+ | 185*308d359bSDouglas Raillard * | | | 186*308d359bSDouglas Raillard * | v | 187*308d359bSDouglas Raillard * | +---+--+ | 188*308d359bSDouglas Raillard * | | exit | | 189*308d359bSDouglas Raillard * | +------+ | 190*308d359bSDouglas Raillard * | | 191*308d359bSDouglas Raillard * | +--------------+ +------------------+ zeromem 192*308d359bSDouglas Raillard * | | +----------------| zeromem function | 193*308d359bSDouglas Raillard * | | | +------------------+ 194*308d359bSDouglas Raillard * | v v 195*308d359bSDouglas Raillard * | +-------------+ .Lzeromem_dczva_fallback_entry 196*308d359bSDouglas Raillard * | | 1 byte loop | 197*308d359bSDouglas Raillard * | +------+------+ 198*308d359bSDouglas Raillard * | | 199*308d359bSDouglas Raillard * +-----------+ 200*308d359bSDouglas Raillard */ 201*308d359bSDouglas Raillard 202*308d359bSDouglas Raillard /* 203*308d359bSDouglas Raillard * Readable names for registers 204*308d359bSDouglas Raillard * 205*308d359bSDouglas Raillard * Registers x0, x1 and x2 are also set by zeromem which 206*308d359bSDouglas Raillard * branches into the fallback path directly, so cursor, length and 207*308d359bSDouglas Raillard * stop_address should not be retargeted to other registers. 208*308d359bSDouglas Raillard */ 209*308d359bSDouglas Raillard cursor .req x0 /* Start address and then current address */ 210*308d359bSDouglas Raillard length .req x1 /* Length in bytes of the region to zero out */ 211*308d359bSDouglas Raillard /* Reusing x1 as length is never used after block_mask is set */ 212*308d359bSDouglas Raillard block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */ 213*308d359bSDouglas Raillard stop_address .req x2 /* Address past the last zeroed byte */ 214*308d359bSDouglas Raillard block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */ 215*308d359bSDouglas Raillard tmp1 .req x4 216*308d359bSDouglas Raillard tmp2 .req x5 217*308d359bSDouglas Raillard 218*308d359bSDouglas Raillard#if ASM_ASSERTION 219*308d359bSDouglas Raillard /* 220*308d359bSDouglas Raillard * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3) 221*308d359bSDouglas Raillard * register value and panic if the MMU is disabled. 222*308d359bSDouglas Raillard */ 223*308d359bSDouglas Raillard#if defined(IMAGE_BL1) || defined(IMAGE_BL31) 224*308d359bSDouglas Raillard mrs tmp1, sctlr_el3 225*308d359bSDouglas Raillard#else 226*308d359bSDouglas Raillard mrs tmp1, sctlr_el1 227*308d359bSDouglas Raillard#endif 228*308d359bSDouglas Raillard 229*308d359bSDouglas Raillard tst tmp1, #SCTLR_M_BIT 230*308d359bSDouglas Raillard ASM_ASSERT(ne) 231*308d359bSDouglas Raillard#endif /* ASM_ASSERTION */ 232*308d359bSDouglas Raillard 233*308d359bSDouglas Raillard /* stop_address is the address past the last to zero */ 234*308d359bSDouglas Raillard add stop_address, cursor, length 235*308d359bSDouglas Raillard 236*308d359bSDouglas Raillard /* 237*308d359bSDouglas Raillard * Get block_size = (log2(<block size>) >> 2) (see encoding of 238*308d359bSDouglas Raillard * dczid_el0 reg) 239*308d359bSDouglas Raillard */ 240*308d359bSDouglas Raillard mrs block_size, dczid_el0 241*308d359bSDouglas Raillard 242*308d359bSDouglas Raillard /* 243*308d359bSDouglas Raillard * Select the 4 lowest bits and convert the extracted log2(<block size 244*308d359bSDouglas Raillard * in words>) to <block size in bytes> 245*308d359bSDouglas Raillard */ 246*308d359bSDouglas Raillard ubfx block_size, block_size, #0, #4 247*308d359bSDouglas Raillard mov tmp2, #(1 << 2) 248*308d359bSDouglas Raillard lsl block_size, tmp2, block_size 249*308d359bSDouglas Raillard 250*308d359bSDouglas Raillard#if ASM_ASSERTION 251*308d359bSDouglas Raillard /* 252*308d359bSDouglas Raillard * Assumes block size is at least 16 bytes to avoid manual realignment 253*308d359bSDouglas Raillard * of the cursor at the end of the DCZVA loop. 254*308d359bSDouglas Raillard */ 255*308d359bSDouglas Raillard cmp block_size, #16 256*308d359bSDouglas Raillard ASM_ASSERT(hs) 257*308d359bSDouglas Raillard#endif 258*308d359bSDouglas Raillard /* 259*308d359bSDouglas Raillard * Not worth doing all the setup for a region less than a block and 260*308d359bSDouglas Raillard * protects against zeroing a whole block when the area to zero is 261*308d359bSDouglas Raillard * smaller than that. Also, as it is assumed that the block size is at 262*308d359bSDouglas Raillard * least 16 bytes, this also protects the initial aligning loops from 263*308d359bSDouglas Raillard * trying to zero 16 bytes when length is less than 16. 264*308d359bSDouglas Raillard */ 265*308d359bSDouglas Raillard cmp length, block_size 266*308d359bSDouglas Raillard b.lo .Lzeromem_dczva_fallback_entry 267*308d359bSDouglas Raillard 268*308d359bSDouglas Raillard /* 269*308d359bSDouglas Raillard * Calculate the bitmask of the block alignment. It will never 270*308d359bSDouglas Raillard * underflow as the block size is between 4 bytes and 2kB. 271*308d359bSDouglas Raillard * block_mask = block_size - 1 272*308d359bSDouglas Raillard */ 273*308d359bSDouglas Raillard sub block_mask, block_size, #1 274*308d359bSDouglas Raillard 275*308d359bSDouglas Raillard /* 276*308d359bSDouglas Raillard * length alias should not be used after this point unless it is 277*308d359bSDouglas Raillard * defined as a register other than block_mask's. 278*308d359bSDouglas Raillard */ 279*308d359bSDouglas Raillard .unreq length 280*308d359bSDouglas Raillard 281*308d359bSDouglas Raillard /* 282*308d359bSDouglas Raillard * If the start address is already aligned to zero block size, go 283*308d359bSDouglas Raillard * straight to the cache zeroing loop. This is safe because at this 284*308d359bSDouglas Raillard * point, the length cannot be smaller than a block size. 285*308d359bSDouglas Raillard */ 286*308d359bSDouglas Raillard tst cursor, block_mask 287*308d359bSDouglas Raillard b.eq .Lzeromem_dczva_blocksize_aligned 288*308d359bSDouglas Raillard 289*308d359bSDouglas Raillard /* 290*308d359bSDouglas Raillard * Calculate the first block-size-aligned address. It is assumed that 291*308d359bSDouglas Raillard * the zero block size is at least 16 bytes. This address is the last 292*308d359bSDouglas Raillard * address of this initial loop. 293*308d359bSDouglas Raillard */ 294*308d359bSDouglas Raillard orr tmp1, cursor, block_mask 295*308d359bSDouglas Raillard add tmp1, tmp1, #1 296*308d359bSDouglas Raillard 297*308d359bSDouglas Raillard /* 298*308d359bSDouglas Raillard * If the addition overflows, skip the cache zeroing loops. This is 299*308d359bSDouglas Raillard * quite unlikely however. 300*308d359bSDouglas Raillard */ 301*308d359bSDouglas Raillard cbz tmp1, .Lzeromem_dczva_fallback_entry 302*308d359bSDouglas Raillard 303*308d359bSDouglas Raillard /* 304*308d359bSDouglas Raillard * If the first block-size-aligned address is past the last address, 305*308d359bSDouglas Raillard * fallback to the simpler code. 306*308d359bSDouglas Raillard */ 307*308d359bSDouglas Raillard cmp tmp1, stop_address 308*308d359bSDouglas Raillard b.hi .Lzeromem_dczva_fallback_entry 309*308d359bSDouglas Raillard 310*308d359bSDouglas Raillard /* 311*308d359bSDouglas Raillard * If the start address is already aligned to 16 bytes, skip this loop. 312*308d359bSDouglas Raillard * It is safe to do this because tmp1 (the stop address of the initial 313*308d359bSDouglas Raillard * 16 bytes loop) will never be greater than the final stop address. 314*308d359bSDouglas Raillard */ 315*308d359bSDouglas Raillard tst cursor, #0xf 316*308d359bSDouglas Raillard b.eq .Lzeromem_dczva_initial_1byte_aligned_end 317*308d359bSDouglas Raillard 318*308d359bSDouglas Raillard /* Calculate the next address aligned to 16 bytes */ 319*308d359bSDouglas Raillard orr tmp2, cursor, #0xf 320*308d359bSDouglas Raillard add tmp2, tmp2, #1 321*308d359bSDouglas Raillard /* If it overflows, fallback to the simple path (unlikely) */ 322*308d359bSDouglas Raillard cbz tmp2, .Lzeromem_dczva_fallback_entry 323*308d359bSDouglas Raillard /* 324*308d359bSDouglas Raillard * Next aligned address cannot be after the stop address because the 325*308d359bSDouglas Raillard * length cannot be smaller than 16 at this point. 326*308d359bSDouglas Raillard */ 327*308d359bSDouglas Raillard 328*308d359bSDouglas Raillard /* First loop: zero byte per byte */ 329*308d359bSDouglas Raillard1: 330*308d359bSDouglas Raillard strb wzr, [cursor], #1 331*308d359bSDouglas Raillard cmp cursor, tmp2 332*308d359bSDouglas Raillard b.ne 1b 333*308d359bSDouglas Raillard.Lzeromem_dczva_initial_1byte_aligned_end: 334*308d359bSDouglas Raillard 335*308d359bSDouglas Raillard /* 336*308d359bSDouglas Raillard * Second loop: we need to zero 16 bytes at a time from cursor to tmp1 337*308d359bSDouglas Raillard * before being able to use the code that deals with block-size-aligned 338*308d359bSDouglas Raillard * addresses. 339*308d359bSDouglas Raillard */ 340*308d359bSDouglas Raillard cmp cursor, tmp1 341*308d359bSDouglas Raillard b.hs 2f 342*308d359bSDouglas Raillard1: 343*308d359bSDouglas Raillard stp xzr, xzr, [cursor], #16 344*308d359bSDouglas Raillard cmp cursor, tmp1 345*308d359bSDouglas Raillard b.lo 1b 346*308d359bSDouglas Raillard2: 347*308d359bSDouglas Raillard 348*308d359bSDouglas Raillard /* 349*308d359bSDouglas Raillard * Third loop: zero a block at a time using DC ZVA cache block zeroing 350*308d359bSDouglas Raillard * instruction. 351*308d359bSDouglas Raillard */ 352*308d359bSDouglas Raillard.Lzeromem_dczva_blocksize_aligned: 353*308d359bSDouglas Raillard /* 354*308d359bSDouglas Raillard * Calculate the last block-size-aligned address. If the result equals 355*308d359bSDouglas Raillard * to the start address, the loop will exit immediately. 356*308d359bSDouglas Raillard */ 357*308d359bSDouglas Raillard bic tmp1, stop_address, block_mask 358*308d359bSDouglas Raillard 359*308d359bSDouglas Raillard cmp cursor, tmp1 360*308d359bSDouglas Raillard b.hs 2f 361*308d359bSDouglas Raillard1: 362*308d359bSDouglas Raillard /* Zero the block containing the cursor */ 363*308d359bSDouglas Raillard dc zva, cursor 364*308d359bSDouglas Raillard /* Increment the cursor by the size of a block */ 365*308d359bSDouglas Raillard add cursor, cursor, block_size 366*308d359bSDouglas Raillard cmp cursor, tmp1 367*308d359bSDouglas Raillard b.lo 1b 368*308d359bSDouglas Raillard2: 369*308d359bSDouglas Raillard 370*308d359bSDouglas Raillard /* 371*308d359bSDouglas Raillard * Fourth loop: zero 16 bytes at a time and then byte per byte the 372*308d359bSDouglas Raillard * remaining area 373*308d359bSDouglas Raillard */ 374*308d359bSDouglas Raillard.Lzeromem_dczva_final_16bytes_aligned: 375*308d359bSDouglas Raillard /* 376*308d359bSDouglas Raillard * Calculate the last 16 bytes aligned address. It is assumed that the 377*308d359bSDouglas Raillard * block size will never be smaller than 16 bytes so that the current 378*308d359bSDouglas Raillard * cursor is aligned to at least 16 bytes boundary. 379*308d359bSDouglas Raillard */ 380*308d359bSDouglas Raillard bic tmp1, stop_address, #15 381*308d359bSDouglas Raillard 382*308d359bSDouglas Raillard cmp cursor, tmp1 383*308d359bSDouglas Raillard b.hs 2f 384*308d359bSDouglas Raillard1: 385*308d359bSDouglas Raillard stp xzr, xzr, [cursor], #16 386*308d359bSDouglas Raillard cmp cursor, tmp1 387*308d359bSDouglas Raillard b.lo 1b 388*308d359bSDouglas Raillard2: 389*308d359bSDouglas Raillard 390*308d359bSDouglas Raillard /* Fifth and final loop: zero byte per byte */ 391*308d359bSDouglas Raillard.Lzeromem_dczva_final_1byte_aligned: 392*308d359bSDouglas Raillard cmp cursor, stop_address 393*308d359bSDouglas Raillard b.eq 2f 394*308d359bSDouglas Raillard1: 395*308d359bSDouglas Raillard strb wzr, [cursor], #1 396*308d359bSDouglas Raillard cmp cursor, stop_address 397*308d359bSDouglas Raillard b.ne 1b 398*308d359bSDouglas Raillard2: 399*308d359bSDouglas Raillard ret 400*308d359bSDouglas Raillard 401*308d359bSDouglas Raillard /* Fallback for unaligned start addresses */ 402*308d359bSDouglas Raillard.Lzeromem_dczva_fallback_entry: 403*308d359bSDouglas Raillard /* 404*308d359bSDouglas Raillard * If the start address is already aligned to 16 bytes, skip this loop. 405*308d359bSDouglas Raillard */ 406*308d359bSDouglas Raillard tst cursor, #0xf 407*308d359bSDouglas Raillard b.eq .Lzeromem_dczva_final_16bytes_aligned 408*308d359bSDouglas Raillard 409*308d359bSDouglas Raillard /* Calculate the next address aligned to 16 bytes */ 410*308d359bSDouglas Raillard orr tmp1, cursor, #15 411*308d359bSDouglas Raillard add tmp1, tmp1, #1 412*308d359bSDouglas Raillard /* If it overflows, fallback to byte per byte zeroing */ 413*308d359bSDouglas Raillard cbz tmp1, .Lzeromem_dczva_final_1byte_aligned 414*308d359bSDouglas Raillard /* If the next aligned address is after the stop address, fall back */ 415*308d359bSDouglas Raillard cmp tmp1, stop_address 416*308d359bSDouglas Raillard b.hs .Lzeromem_dczva_final_1byte_aligned 417*308d359bSDouglas Raillard 418*308d359bSDouglas Raillard /* Fallback entry loop: zero byte per byte */ 419*308d359bSDouglas Raillard1: 420*308d359bSDouglas Raillard strb wzr, [cursor], #1 421*308d359bSDouglas Raillard cmp cursor, tmp1 422*308d359bSDouglas Raillard b.ne 1b 423*308d359bSDouglas Raillard 424*308d359bSDouglas Raillard b .Lzeromem_dczva_final_16bytes_aligned 425*308d359bSDouglas Raillard 426*308d359bSDouglas Raillard .unreq cursor 427*308d359bSDouglas Raillard /* 428*308d359bSDouglas Raillard * length is already unreq'ed to reuse the register for another 429*308d359bSDouglas Raillard * variable. 430*308d359bSDouglas Raillard */ 431*308d359bSDouglas Raillard .unreq stop_address 432*308d359bSDouglas Raillard .unreq block_size 433*308d359bSDouglas Raillard .unreq block_mask 434*308d359bSDouglas Raillard .unreq tmp1 435*308d359bSDouglas Raillard .unreq tmp2 436*308d359bSDouglas Raillardendfunc zeromem_dczva 4374ecca339SDan Handley 4384ecca339SDan Handley/* -------------------------------------------------------------------------- 4394ecca339SDan Handley * void memcpy16(void *dest, const void *src, unsigned int length) 4404ecca339SDan Handley * 4414ecca339SDan Handley * Copy length bytes from memory area src to memory area dest. 4424ecca339SDan Handley * The memory areas should not overlap. 4434ecca339SDan Handley * Destination and source addresses must be 16-byte aligned. 4444ecca339SDan Handley * -------------------------------------------------------------------------- 4454ecca339SDan Handley */ 4464ecca339SDan Handleyfunc memcpy16 447bc920128SSoby Mathew#if ASM_ASSERTION 448bc920128SSoby Mathew orr x3, x0, x1 449bc920128SSoby Mathew tst x3, #0xf 450bc920128SSoby Mathew ASM_ASSERT(eq) 451bc920128SSoby Mathew#endif 4524ecca339SDan Handley/* copy 16 bytes at a time */ 4534ecca339SDan Handleym_loop16: 4544ecca339SDan Handley cmp x2, #16 455ea926532SDouglas Raillard b.lo m_loop1 4564ecca339SDan Handley ldp x3, x4, [x1], #16 4574ecca339SDan Handley stp x3, x4, [x0], #16 4584ecca339SDan Handley sub x2, x2, #16 4594ecca339SDan Handley b m_loop16 4604ecca339SDan Handley/* copy byte per byte */ 4614ecca339SDan Handleym_loop1: 4624ecca339SDan Handley cbz x2, m_end 4634ecca339SDan Handley ldrb w3, [x1], #1 4644ecca339SDan Handley strb w3, [x0], #1 4654ecca339SDan Handley subs x2, x2, #1 4664ecca339SDan Handley b.ne m_loop1 4678b779620SKévin Petitm_end: 4688b779620SKévin Petit ret 4698b779620SKévin Petitendfunc memcpy16 4702f5dcfefSAndrew Thoelke 4712f5dcfefSAndrew Thoelke/* --------------------------------------------------------------------------- 4722f5dcfefSAndrew Thoelke * Disable the MMU at EL3 4732f5dcfefSAndrew Thoelke * --------------------------------------------------------------------------- 4742f5dcfefSAndrew Thoelke */ 4752f5dcfefSAndrew Thoelke 4762f5dcfefSAndrew Thoelkefunc disable_mmu_el3 4772f5dcfefSAndrew Thoelke mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) 4782f5dcfefSAndrew Thoelkedo_disable_mmu: 4792f5dcfefSAndrew Thoelke mrs x0, sctlr_el3 4802f5dcfefSAndrew Thoelke bic x0, x0, x1 4812f5dcfefSAndrew Thoelke msr sctlr_el3, x0 4822f5dcfefSAndrew Thoelke isb // ensure MMU is off 48354dc71e7SAchin Gupta dsb sy 48454dc71e7SAchin Gupta ret 4858b779620SKévin Petitendfunc disable_mmu_el3 4862f5dcfefSAndrew Thoelke 4872f5dcfefSAndrew Thoelke 4882f5dcfefSAndrew Thoelkefunc disable_mmu_icache_el3 4892f5dcfefSAndrew Thoelke mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 4902f5dcfefSAndrew Thoelke b do_disable_mmu 4918b779620SKévin Petitendfunc disable_mmu_icache_el3 4922f5dcfefSAndrew Thoelke 4935c3272a7SAndrew Thoelke/* --------------------------------------------------------------------------- 4945c3272a7SAndrew Thoelke * Enable the use of VFP at EL3 4955c3272a7SAndrew Thoelke * --------------------------------------------------------------------------- 4965c3272a7SAndrew Thoelke */ 4975c3272a7SAndrew Thoelke#if SUPPORT_VFP 4985c3272a7SAndrew Thoelkefunc enable_vfp 4995c3272a7SAndrew Thoelke mrs x0, cpacr_el1 5005c3272a7SAndrew Thoelke orr x0, x0, #CPACR_VFP_BITS 5015c3272a7SAndrew Thoelke msr cpacr_el1, x0 5025c3272a7SAndrew Thoelke mrs x0, cptr_el3 5035c3272a7SAndrew Thoelke mov x1, #AARCH64_CPTR_TFP 5045c3272a7SAndrew Thoelke bic x0, x0, x1 5055c3272a7SAndrew Thoelke msr cptr_el3, x0 5065c3272a7SAndrew Thoelke isb 5075c3272a7SAndrew Thoelke ret 5088b779620SKévin Petitendfunc enable_vfp 5095c3272a7SAndrew Thoelke#endif 510