xref: /rk3399_ARM-atf/lib/aarch64/misc_helpers.S (revision 51faada71a219a8b94cd8d8e423f0f22e9da4d8f)
1/*
2 * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * Neither the name of ARM nor the names of its contributors may be used
15 * to endorse or promote products derived from this software without specific
16 * prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <arch.h>
32#include <asm_macros.S>
33#include <assert_macros.S>
34
35	.globl	get_afflvl_shift
36	.globl	mpidr_mask_lower_afflvls
37	.globl	eret
38	.globl	smc
39
40	.globl	zero_normalmem
41	.globl	zeromem
42	.globl	zeromem16
43	.globl	memcpy16
44
45	.globl	disable_mmu_el3
46	.globl	disable_mmu_icache_el3
47
48#if SUPPORT_VFP
49	.globl	enable_vfp
50#endif
51
52func get_afflvl_shift
53	cmp	x0, #3
54	cinc	x0, x0, eq
55	mov	x1, #MPIDR_AFFLVL_SHIFT
56	lsl	x0, x0, x1
57	ret
58endfunc get_afflvl_shift
59
60func mpidr_mask_lower_afflvls
61	cmp	x1, #3
62	cinc	x1, x1, eq
63	mov	x2, #MPIDR_AFFLVL_SHIFT
64	lsl	x2, x1, x2
65	lsr	x0, x0, x2
66	lsl	x0, x0, x2
67	ret
68endfunc mpidr_mask_lower_afflvls
69
70
71func eret
72	eret
73endfunc eret
74
75
76func smc
77	smc	#0
78endfunc smc
79
80/* -----------------------------------------------------------------------
81 * void zeromem16(void *mem, unsigned int length);
82 *
83 * Initialise a memory region to 0.
84 * The memory address must be 16-byte aligned.
85 * NOTE: This function is deprecated and zeromem should be used instead.
86 * -----------------------------------------------------------------------
87 */
88.equ	zeromem16, zeromem
89
90/* -----------------------------------------------------------------------
91 * void zero_normalmem(void *mem, unsigned int length);
92 *
93 * Initialise a region in normal memory to 0. This functions complies with the
94 * AAPCS and can be called from C code.
95 *
96 * NOTE: MMU must be enabled when using this function as it can only operate on
97 *       normal memory. It is intended to be mainly used from C code when MMU
98 *       is usually enabled.
99 * -----------------------------------------------------------------------
100 */
101.equ	zero_normalmem, zeromem_dczva
102
103/* -----------------------------------------------------------------------
104 * void zeromem(void *mem, unsigned int length);
105 *
106 * Initialise a region of device memory to 0. This functions complies with the
107 * AAPCS and can be called from C code.
108 *
109 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
110 *       used instead for faster zeroing.
111 *
112 * -----------------------------------------------------------------------
113 */
114func zeromem
115	/* x2 is the address past the last zeroed address */
116	add	x2, x0, x1
117	/*
118	 * Uses the fallback path that does not use DC ZVA instruction and
119	 * therefore does not need enabled MMU
120	 */
121	b	.Lzeromem_dczva_fallback_entry
122endfunc zeromem
123
124/* -----------------------------------------------------------------------
125 * void zeromem_dczva(void *mem, unsigned int length);
126 *
127 * Fill a region of normal memory of size "length" in bytes with null bytes.
128 * MMU must be enabled and the memory be of
129 * normal type. This is because this function internally uses the DC ZVA
130 * instruction, which generates an Alignment fault if used on any type of
131 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
132 * is disabled, all memory behaves like Device-nGnRnE memory (see section
133 * D4.2.8), hence the requirement on the MMU being enabled.
134 * NOTE: The code assumes that the block size as defined in DCZID_EL0
135 *       register is at least 16 bytes.
136 *
137 * -----------------------------------------------------------------------
138 */
139func zeromem_dczva
140
141	/*
142	 * The function consists of a series of loops that zero memory one byte
143	 * at a time, 16 bytes at a time or using the DC ZVA instruction to
144	 * zero aligned block of bytes, which is assumed to be more than 16.
145	 * In the case where the DC ZVA instruction cannot be used or if the
146	 * first 16 bytes loop would overflow, there is fallback path that does
147	 * not use DC ZVA.
148	 * Note: The fallback path is also used by the zeromem function that
149	 *       branches to it directly.
150	 *
151	 *              +---------+   zeromem_dczva
152	 *              |  entry  |
153	 *              +----+----+
154	 *                   |
155	 *                   v
156	 *              +---------+
157	 *              | checks  |>o-------+ (If any check fails, fallback)
158	 *              +----+----+         |
159	 *                   |              |---------------+
160	 *                   v              | Fallback path |
161	 *            +------+------+       |---------------+
162	 *            | 1 byte loop |       |
163	 *            +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
164	 *                   |              |
165	 *                   v              |
166	 *           +-------+-------+      |
167	 *           | 16 bytes loop |      |
168	 *           +-------+-------+      |
169	 *                   |              |
170	 *                   v              |
171	 *            +------+------+ .Lzeromem_dczva_blocksize_aligned
172	 *            | DC ZVA loop |       |
173	 *            +------+------+       |
174	 *       +--------+  |              |
175	 *       |        |  |              |
176	 *       |        v  v              |
177	 *       |   +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
178	 *       |   | 16 bytes loop |      |
179	 *       |   +-------+-------+      |
180	 *       |           |              |
181	 *       |           v              |
182	 *       |    +------+------+ .Lzeromem_dczva_final_1byte_aligned
183	 *       |    | 1 byte loop |       |
184	 *       |    +-------------+       |
185	 *       |           |              |
186	 *       |           v              |
187	 *       |       +---+--+           |
188	 *       |       | exit |           |
189	 *       |       +------+           |
190	 *       |			    |
191	 *       |           +--------------+    +------------------+ zeromem
192	 *       |           |  +----------------| zeromem function |
193	 *       |           |  |                +------------------+
194	 *       |           v  v
195	 *       |    +-------------+ .Lzeromem_dczva_fallback_entry
196	 *       |    | 1 byte loop |
197	 *       |    +------+------+
198	 *       |           |
199	 *       +-----------+
200	 */
201
202	/*
203	 * Readable names for registers
204	 *
205	 * Registers x0, x1 and x2 are also set by zeromem which
206	 * branches into the fallback path directly, so cursor, length and
207	 * stop_address should not be retargeted to other registers.
208	 */
209	cursor       .req x0 /* Start address and then current address */
210	length       .req x1 /* Length in bytes of the region to zero out */
211	/* Reusing x1 as length is never used after block_mask is set */
212	block_mask   .req x1 /* Bitmask of the block size read in DCZID_EL0 */
213	stop_address .req x2 /* Address past the last zeroed byte */
214	block_size   .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
215	tmp1         .req x4
216	tmp2         .req x5
217
218#if ASM_ASSERTION
219	/*
220	 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
221	 * register value and panic if the MMU is disabled.
222	 */
223#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
224	mrs	tmp1, sctlr_el3
225#else
226	mrs	tmp1, sctlr_el1
227#endif
228
229	tst	tmp1, #SCTLR_M_BIT
230	ASM_ASSERT(ne)
231#endif /* ASM_ASSERTION */
232
233	/* stop_address is the address past the last to zero */
234	add	stop_address, cursor, length
235
236	/*
237	 * Get block_size = (log2(<block size>) >> 2) (see encoding of
238	 * dczid_el0 reg)
239	 */
240	mrs	block_size, dczid_el0
241
242	/*
243	 * Select the 4 lowest bits and convert the extracted log2(<block size
244	 * in words>) to <block size in bytes>
245	 */
246	ubfx	block_size, block_size, #0, #4
247	mov	tmp2, #(1 << 2)
248	lsl	block_size, tmp2, block_size
249
250#if ASM_ASSERTION
251	/*
252	 * Assumes block size is at least 16 bytes to avoid manual realignment
253	 * of the cursor at the end of the DCZVA loop.
254	 */
255	cmp	block_size, #16
256	ASM_ASSERT(hs)
257#endif
258	/*
259	 * Not worth doing all the setup for a region less than a block and
260	 * protects against zeroing a whole block when the area to zero is
261	 * smaller than that. Also, as it is assumed that the block size is at
262	 * least 16 bytes, this also protects the initial aligning loops from
263	 * trying to zero 16 bytes when length is less than 16.
264	 */
265	cmp	length, block_size
266	b.lo	.Lzeromem_dczva_fallback_entry
267
268	/*
269	 * Calculate the bitmask of the block alignment. It will never
270	 * underflow as the block size is between 4 bytes and 2kB.
271	 * block_mask = block_size - 1
272	 */
273	sub	block_mask, block_size, #1
274
275	/*
276	 * length alias should not be used after this point unless it is
277	 * defined as a register other than block_mask's.
278	 */
279	 .unreq length
280
281	/*
282	 * If the start address is already aligned to zero block size, go
283	 * straight to the cache zeroing loop. This is safe because at this
284	 * point, the length cannot be smaller than a block size.
285	 */
286	tst	cursor, block_mask
287	b.eq	.Lzeromem_dczva_blocksize_aligned
288
289	/*
290	 * Calculate the first block-size-aligned address. It is assumed that
291	 * the zero block size is at least 16 bytes. This address is the last
292	 * address of this initial loop.
293	 */
294	orr	tmp1, cursor, block_mask
295	add	tmp1, tmp1, #1
296
297	/*
298	 * If the addition overflows, skip the cache zeroing loops. This is
299	 * quite unlikely however.
300	 */
301	cbz	tmp1, .Lzeromem_dczva_fallback_entry
302
303	/*
304	 * If the first block-size-aligned address is past the last address,
305	 * fallback to the simpler code.
306	 */
307	cmp	tmp1, stop_address
308	b.hi	.Lzeromem_dczva_fallback_entry
309
310	/*
311	 * If the start address is already aligned to 16 bytes, skip this loop.
312	 * It is safe to do this because tmp1 (the stop address of the initial
313	 * 16 bytes loop) will never be greater than the final stop address.
314	 */
315	tst	cursor, #0xf
316	b.eq	.Lzeromem_dczva_initial_1byte_aligned_end
317
318	/* Calculate the next address aligned to 16 bytes */
319	orr	tmp2, cursor, #0xf
320	add	tmp2, tmp2, #1
321	/* If it overflows, fallback to the simple path (unlikely) */
322	cbz	tmp2, .Lzeromem_dczva_fallback_entry
323	/*
324	 * Next aligned address cannot be after the stop address because the
325	 * length cannot be smaller than 16 at this point.
326	 */
327
328	/* First loop: zero byte per byte */
3291:
330	strb	wzr, [cursor], #1
331	cmp	cursor, tmp2
332	b.ne	1b
333.Lzeromem_dczva_initial_1byte_aligned_end:
334
335	/*
336	 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
337	 * before being able to use the code that deals with block-size-aligned
338	 * addresses.
339	 */
340	cmp	cursor, tmp1
341	b.hs	2f
3421:
343	stp	xzr, xzr, [cursor], #16
344	cmp	cursor, tmp1
345	b.lo	1b
3462:
347
348	/*
349	 * Third loop: zero a block at a time using DC ZVA cache block zeroing
350	 * instruction.
351	 */
352.Lzeromem_dczva_blocksize_aligned:
353	/*
354	 * Calculate the last block-size-aligned address. If the result equals
355	 * to the start address, the loop will exit immediately.
356	 */
357	bic	tmp1, stop_address, block_mask
358
359	cmp	cursor, tmp1
360	b.hs	2f
3611:
362	/* Zero the block containing the cursor */
363	dc	zva, cursor
364	/* Increment the cursor by the size of a block */
365	add	cursor, cursor, block_size
366	cmp	cursor, tmp1
367	b.lo	1b
3682:
369
370	/*
371	 * Fourth loop: zero 16 bytes at a time and then byte per byte the
372	 * remaining area
373	 */
374.Lzeromem_dczva_final_16bytes_aligned:
375	/*
376	 * Calculate the last 16 bytes aligned address. It is assumed that the
377	 * block size will never be smaller than 16 bytes so that the current
378	 * cursor is aligned to at least 16 bytes boundary.
379	 */
380	bic	tmp1, stop_address, #15
381
382	cmp	cursor, tmp1
383	b.hs	2f
3841:
385	stp	xzr, xzr, [cursor], #16
386	cmp	cursor, tmp1
387	b.lo	1b
3882:
389
390	/* Fifth and final loop: zero byte per byte */
391.Lzeromem_dczva_final_1byte_aligned:
392	cmp	cursor, stop_address
393	b.eq	2f
3941:
395	strb	wzr, [cursor], #1
396	cmp	cursor, stop_address
397	b.ne	1b
3982:
399	ret
400
401	/* Fallback for unaligned start addresses */
402.Lzeromem_dczva_fallback_entry:
403	/*
404	 * If the start address is already aligned to 16 bytes, skip this loop.
405	 */
406	tst	cursor, #0xf
407	b.eq	.Lzeromem_dczva_final_16bytes_aligned
408
409	/* Calculate the next address aligned to 16 bytes */
410	orr	tmp1, cursor, #15
411	add	tmp1, tmp1, #1
412	/* If it overflows, fallback to byte per byte zeroing */
413	cbz	tmp1, .Lzeromem_dczva_final_1byte_aligned
414	/* If the next aligned address is after the stop address, fall back */
415	cmp	tmp1, stop_address
416	b.hs	.Lzeromem_dczva_final_1byte_aligned
417
418	/* Fallback entry loop: zero byte per byte */
4191:
420	strb	wzr, [cursor], #1
421	cmp	cursor, tmp1
422	b.ne	1b
423
424	b	.Lzeromem_dczva_final_16bytes_aligned
425
426	.unreq	cursor
427	/*
428	 * length is already unreq'ed to reuse the register for another
429	 * variable.
430	 */
431	.unreq	stop_address
432	.unreq	block_size
433	.unreq	block_mask
434	.unreq	tmp1
435	.unreq	tmp2
436endfunc zeromem_dczva
437
438/* --------------------------------------------------------------------------
439 * void memcpy16(void *dest, const void *src, unsigned int length)
440 *
441 * Copy length bytes from memory area src to memory area dest.
442 * The memory areas should not overlap.
443 * Destination and source addresses must be 16-byte aligned.
444 * --------------------------------------------------------------------------
445 */
446func memcpy16
447#if ASM_ASSERTION
448	orr	x3, x0, x1
449	tst	x3, #0xf
450	ASM_ASSERT(eq)
451#endif
452/* copy 16 bytes at a time */
453m_loop16:
454	cmp	x2, #16
455	b.lo	m_loop1
456	ldp	x3, x4, [x1], #16
457	stp	x3, x4, [x0], #16
458	sub	x2, x2, #16
459	b	m_loop16
460/* copy byte per byte */
461m_loop1:
462	cbz	x2, m_end
463	ldrb	w3, [x1], #1
464	strb	w3, [x0], #1
465	subs	x2, x2, #1
466	b.ne	m_loop1
467m_end:
468	ret
469endfunc memcpy16
470
471/* ---------------------------------------------------------------------------
472 * Disable the MMU at EL3
473 * ---------------------------------------------------------------------------
474 */
475
476func disable_mmu_el3
477	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
478do_disable_mmu:
479	mrs	x0, sctlr_el3
480	bic	x0, x0, x1
481	msr	sctlr_el3, x0
482	isb				// ensure MMU is off
483	dsb	sy
484	ret
485endfunc disable_mmu_el3
486
487
488func disable_mmu_icache_el3
489	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
490	b	do_disable_mmu
491endfunc disable_mmu_icache_el3
492
493/* ---------------------------------------------------------------------------
494 * Enable the use of VFP at EL3
495 * ---------------------------------------------------------------------------
496 */
497#if SUPPORT_VFP
498func enable_vfp
499	mrs	x0, cpacr_el1
500	orr	x0, x0, #CPACR_VFP_BITS
501	msr	cpacr_el1, x0
502	mrs	x0, cptr_el3
503	mov	x1, #AARCH64_CPTR_TFP
504	bic	x0, x0, x1
505	msr	cptr_el3, x0
506	isb
507	ret
508endfunc enable_vfp
509#endif
510