xref: /rk3399_ARM-atf/lib/aarch64/misc_helpers.S (revision 82cb2c1ad9897473743f08437d0a3995bed561b9)
1/*
2 * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#include <arch.h>
8#include <asm_macros.S>
9#include <assert_macros.S>
10
11	.globl	get_afflvl_shift
12	.globl	mpidr_mask_lower_afflvls
13	.globl	eret
14	.globl	smc
15
16	.globl	zero_normalmem
17	.globl	zeromem
18	.globl	zeromem16
19	.globl	memcpy16
20
21	.globl	disable_mmu_el3
22	.globl	disable_mmu_icache_el3
23
24#if SUPPORT_VFP
25	.globl	enable_vfp
26#endif
27
28func get_afflvl_shift
29	cmp	x0, #3
30	cinc	x0, x0, eq
31	mov	x1, #MPIDR_AFFLVL_SHIFT
32	lsl	x0, x0, x1
33	ret
34endfunc get_afflvl_shift
35
36func mpidr_mask_lower_afflvls
37	cmp	x1, #3
38	cinc	x1, x1, eq
39	mov	x2, #MPIDR_AFFLVL_SHIFT
40	lsl	x2, x1, x2
41	lsr	x0, x0, x2
42	lsl	x0, x0, x2
43	ret
44endfunc mpidr_mask_lower_afflvls
45
46
47func eret
48	eret
49endfunc eret
50
51
52func smc
53	smc	#0
54endfunc smc
55
56/* -----------------------------------------------------------------------
57 * void zeromem16(void *mem, unsigned int length);
58 *
59 * Initialise a memory region to 0.
60 * The memory address must be 16-byte aligned.
61 * NOTE: This function is deprecated and zeromem should be used instead.
62 * -----------------------------------------------------------------------
63 */
64.equ	zeromem16, zeromem
65
66/* -----------------------------------------------------------------------
67 * void zero_normalmem(void *mem, unsigned int length);
68 *
69 * Initialise a region in normal memory to 0. This functions complies with the
70 * AAPCS and can be called from C code.
71 *
72 * NOTE: MMU must be enabled when using this function as it can only operate on
73 *       normal memory. It is intended to be mainly used from C code when MMU
74 *       is usually enabled.
75 * -----------------------------------------------------------------------
76 */
77.equ	zero_normalmem, zeromem_dczva
78
79/* -----------------------------------------------------------------------
80 * void zeromem(void *mem, unsigned int length);
81 *
82 * Initialise a region of device memory to 0. This functions complies with the
83 * AAPCS and can be called from C code.
84 *
85 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
86 *       used instead for faster zeroing.
87 *
88 * -----------------------------------------------------------------------
89 */
90func zeromem
91	/* x2 is the address past the last zeroed address */
92	add	x2, x0, x1
93	/*
94	 * Uses the fallback path that does not use DC ZVA instruction and
95	 * therefore does not need enabled MMU
96	 */
97	b	.Lzeromem_dczva_fallback_entry
98endfunc zeromem
99
100/* -----------------------------------------------------------------------
101 * void zeromem_dczva(void *mem, unsigned int length);
102 *
103 * Fill a region of normal memory of size "length" in bytes with null bytes.
104 * MMU must be enabled and the memory be of
105 * normal type. This is because this function internally uses the DC ZVA
106 * instruction, which generates an Alignment fault if used on any type of
107 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
108 * is disabled, all memory behaves like Device-nGnRnE memory (see section
109 * D4.2.8), hence the requirement on the MMU being enabled.
110 * NOTE: The code assumes that the block size as defined in DCZID_EL0
111 *       register is at least 16 bytes.
112 *
113 * -----------------------------------------------------------------------
114 */
115func zeromem_dczva
116
117	/*
118	 * The function consists of a series of loops that zero memory one byte
119	 * at a time, 16 bytes at a time or using the DC ZVA instruction to
120	 * zero aligned block of bytes, which is assumed to be more than 16.
121	 * In the case where the DC ZVA instruction cannot be used or if the
122	 * first 16 bytes loop would overflow, there is fallback path that does
123	 * not use DC ZVA.
124	 * Note: The fallback path is also used by the zeromem function that
125	 *       branches to it directly.
126	 *
127	 *              +---------+   zeromem_dczva
128	 *              |  entry  |
129	 *              +----+----+
130	 *                   |
131	 *                   v
132	 *              +---------+
133	 *              | checks  |>o-------+ (If any check fails, fallback)
134	 *              +----+----+         |
135	 *                   |              |---------------+
136	 *                   v              | Fallback path |
137	 *            +------+------+       |---------------+
138	 *            | 1 byte loop |       |
139	 *            +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
140	 *                   |              |
141	 *                   v              |
142	 *           +-------+-------+      |
143	 *           | 16 bytes loop |      |
144	 *           +-------+-------+      |
145	 *                   |              |
146	 *                   v              |
147	 *            +------+------+ .Lzeromem_dczva_blocksize_aligned
148	 *            | DC ZVA loop |       |
149	 *            +------+------+       |
150	 *       +--------+  |              |
151	 *       |        |  |              |
152	 *       |        v  v              |
153	 *       |   +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
154	 *       |   | 16 bytes loop |      |
155	 *       |   +-------+-------+      |
156	 *       |           |              |
157	 *       |           v              |
158	 *       |    +------+------+ .Lzeromem_dczva_final_1byte_aligned
159	 *       |    | 1 byte loop |       |
160	 *       |    +-------------+       |
161	 *       |           |              |
162	 *       |           v              |
163	 *       |       +---+--+           |
164	 *       |       | exit |           |
165	 *       |       +------+           |
166	 *       |			    |
167	 *       |           +--------------+    +------------------+ zeromem
168	 *       |           |  +----------------| zeromem function |
169	 *       |           |  |                +------------------+
170	 *       |           v  v
171	 *       |    +-------------+ .Lzeromem_dczva_fallback_entry
172	 *       |    | 1 byte loop |
173	 *       |    +------+------+
174	 *       |           |
175	 *       +-----------+
176	 */
177
178	/*
179	 * Readable names for registers
180	 *
181	 * Registers x0, x1 and x2 are also set by zeromem which
182	 * branches into the fallback path directly, so cursor, length and
183	 * stop_address should not be retargeted to other registers.
184	 */
185	cursor       .req x0 /* Start address and then current address */
186	length       .req x1 /* Length in bytes of the region to zero out */
187	/* Reusing x1 as length is never used after block_mask is set */
188	block_mask   .req x1 /* Bitmask of the block size read in DCZID_EL0 */
189	stop_address .req x2 /* Address past the last zeroed byte */
190	block_size   .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
191	tmp1         .req x4
192	tmp2         .req x5
193
194#if ENABLE_ASSERTIONS
195	/*
196	 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
197	 * register value and panic if the MMU is disabled.
198	 */
199#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
200	mrs	tmp1, sctlr_el3
201#else
202	mrs	tmp1, sctlr_el1
203#endif
204
205	tst	tmp1, #SCTLR_M_BIT
206	ASM_ASSERT(ne)
207#endif /* ENABLE_ASSERTIONS */
208
209	/* stop_address is the address past the last to zero */
210	add	stop_address, cursor, length
211
212	/*
213	 * Get block_size = (log2(<block size>) >> 2) (see encoding of
214	 * dczid_el0 reg)
215	 */
216	mrs	block_size, dczid_el0
217
218	/*
219	 * Select the 4 lowest bits and convert the extracted log2(<block size
220	 * in words>) to <block size in bytes>
221	 */
222	ubfx	block_size, block_size, #0, #4
223	mov	tmp2, #(1 << 2)
224	lsl	block_size, tmp2, block_size
225
226#if ENABLE_ASSERTIONS
227	/*
228	 * Assumes block size is at least 16 bytes to avoid manual realignment
229	 * of the cursor at the end of the DCZVA loop.
230	 */
231	cmp	block_size, #16
232	ASM_ASSERT(hs)
233#endif
234	/*
235	 * Not worth doing all the setup for a region less than a block and
236	 * protects against zeroing a whole block when the area to zero is
237	 * smaller than that. Also, as it is assumed that the block size is at
238	 * least 16 bytes, this also protects the initial aligning loops from
239	 * trying to zero 16 bytes when length is less than 16.
240	 */
241	cmp	length, block_size
242	b.lo	.Lzeromem_dczva_fallback_entry
243
244	/*
245	 * Calculate the bitmask of the block alignment. It will never
246	 * underflow as the block size is between 4 bytes and 2kB.
247	 * block_mask = block_size - 1
248	 */
249	sub	block_mask, block_size, #1
250
251	/*
252	 * length alias should not be used after this point unless it is
253	 * defined as a register other than block_mask's.
254	 */
255	 .unreq length
256
257	/*
258	 * If the start address is already aligned to zero block size, go
259	 * straight to the cache zeroing loop. This is safe because at this
260	 * point, the length cannot be smaller than a block size.
261	 */
262	tst	cursor, block_mask
263	b.eq	.Lzeromem_dczva_blocksize_aligned
264
265	/*
266	 * Calculate the first block-size-aligned address. It is assumed that
267	 * the zero block size is at least 16 bytes. This address is the last
268	 * address of this initial loop.
269	 */
270	orr	tmp1, cursor, block_mask
271	add	tmp1, tmp1, #1
272
273	/*
274	 * If the addition overflows, skip the cache zeroing loops. This is
275	 * quite unlikely however.
276	 */
277	cbz	tmp1, .Lzeromem_dczva_fallback_entry
278
279	/*
280	 * If the first block-size-aligned address is past the last address,
281	 * fallback to the simpler code.
282	 */
283	cmp	tmp1, stop_address
284	b.hi	.Lzeromem_dczva_fallback_entry
285
286	/*
287	 * If the start address is already aligned to 16 bytes, skip this loop.
288	 * It is safe to do this because tmp1 (the stop address of the initial
289	 * 16 bytes loop) will never be greater than the final stop address.
290	 */
291	tst	cursor, #0xf
292	b.eq	.Lzeromem_dczva_initial_1byte_aligned_end
293
294	/* Calculate the next address aligned to 16 bytes */
295	orr	tmp2, cursor, #0xf
296	add	tmp2, tmp2, #1
297	/* If it overflows, fallback to the simple path (unlikely) */
298	cbz	tmp2, .Lzeromem_dczva_fallback_entry
299	/*
300	 * Next aligned address cannot be after the stop address because the
301	 * length cannot be smaller than 16 at this point.
302	 */
303
304	/* First loop: zero byte per byte */
3051:
306	strb	wzr, [cursor], #1
307	cmp	cursor, tmp2
308	b.ne	1b
309.Lzeromem_dczva_initial_1byte_aligned_end:
310
311	/*
312	 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
313	 * before being able to use the code that deals with block-size-aligned
314	 * addresses.
315	 */
316	cmp	cursor, tmp1
317	b.hs	2f
3181:
319	stp	xzr, xzr, [cursor], #16
320	cmp	cursor, tmp1
321	b.lo	1b
3222:
323
324	/*
325	 * Third loop: zero a block at a time using DC ZVA cache block zeroing
326	 * instruction.
327	 */
328.Lzeromem_dczva_blocksize_aligned:
329	/*
330	 * Calculate the last block-size-aligned address. If the result equals
331	 * to the start address, the loop will exit immediately.
332	 */
333	bic	tmp1, stop_address, block_mask
334
335	cmp	cursor, tmp1
336	b.hs	2f
3371:
338	/* Zero the block containing the cursor */
339	dc	zva, cursor
340	/* Increment the cursor by the size of a block */
341	add	cursor, cursor, block_size
342	cmp	cursor, tmp1
343	b.lo	1b
3442:
345
346	/*
347	 * Fourth loop: zero 16 bytes at a time and then byte per byte the
348	 * remaining area
349	 */
350.Lzeromem_dczva_final_16bytes_aligned:
351	/*
352	 * Calculate the last 16 bytes aligned address. It is assumed that the
353	 * block size will never be smaller than 16 bytes so that the current
354	 * cursor is aligned to at least 16 bytes boundary.
355	 */
356	bic	tmp1, stop_address, #15
357
358	cmp	cursor, tmp1
359	b.hs	2f
3601:
361	stp	xzr, xzr, [cursor], #16
362	cmp	cursor, tmp1
363	b.lo	1b
3642:
365
366	/* Fifth and final loop: zero byte per byte */
367.Lzeromem_dczva_final_1byte_aligned:
368	cmp	cursor, stop_address
369	b.eq	2f
3701:
371	strb	wzr, [cursor], #1
372	cmp	cursor, stop_address
373	b.ne	1b
3742:
375	ret
376
377	/* Fallback for unaligned start addresses */
378.Lzeromem_dczva_fallback_entry:
379	/*
380	 * If the start address is already aligned to 16 bytes, skip this loop.
381	 */
382	tst	cursor, #0xf
383	b.eq	.Lzeromem_dczva_final_16bytes_aligned
384
385	/* Calculate the next address aligned to 16 bytes */
386	orr	tmp1, cursor, #15
387	add	tmp1, tmp1, #1
388	/* If it overflows, fallback to byte per byte zeroing */
389	cbz	tmp1, .Lzeromem_dczva_final_1byte_aligned
390	/* If the next aligned address is after the stop address, fall back */
391	cmp	tmp1, stop_address
392	b.hs	.Lzeromem_dczva_final_1byte_aligned
393
394	/* Fallback entry loop: zero byte per byte */
3951:
396	strb	wzr, [cursor], #1
397	cmp	cursor, tmp1
398	b.ne	1b
399
400	b	.Lzeromem_dczva_final_16bytes_aligned
401
402	.unreq	cursor
403	/*
404	 * length is already unreq'ed to reuse the register for another
405	 * variable.
406	 */
407	.unreq	stop_address
408	.unreq	block_size
409	.unreq	block_mask
410	.unreq	tmp1
411	.unreq	tmp2
412endfunc zeromem_dczva
413
414/* --------------------------------------------------------------------------
415 * void memcpy16(void *dest, const void *src, unsigned int length)
416 *
417 * Copy length bytes from memory area src to memory area dest.
418 * The memory areas should not overlap.
419 * Destination and source addresses must be 16-byte aligned.
420 * --------------------------------------------------------------------------
421 */
422func memcpy16
423#if ENABLE_ASSERTIONS
424	orr	x3, x0, x1
425	tst	x3, #0xf
426	ASM_ASSERT(eq)
427#endif
428/* copy 16 bytes at a time */
429m_loop16:
430	cmp	x2, #16
431	b.lo	m_loop1
432	ldp	x3, x4, [x1], #16
433	stp	x3, x4, [x0], #16
434	sub	x2, x2, #16
435	b	m_loop16
436/* copy byte per byte */
437m_loop1:
438	cbz	x2, m_end
439	ldrb	w3, [x1], #1
440	strb	w3, [x0], #1
441	subs	x2, x2, #1
442	b.ne	m_loop1
443m_end:
444	ret
445endfunc memcpy16
446
447/* ---------------------------------------------------------------------------
448 * Disable the MMU at EL3
449 * ---------------------------------------------------------------------------
450 */
451
452func disable_mmu_el3
453	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
454do_disable_mmu:
455	mrs	x0, sctlr_el3
456	bic	x0, x0, x1
457	msr	sctlr_el3, x0
458	isb				// ensure MMU is off
459	dsb	sy
460	ret
461endfunc disable_mmu_el3
462
463
464func disable_mmu_icache_el3
465	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
466	b	do_disable_mmu
467endfunc disable_mmu_icache_el3
468
469/* ---------------------------------------------------------------------------
470 * Enable the use of VFP at EL3
471 * ---------------------------------------------------------------------------
472 */
473#if SUPPORT_VFP
474func enable_vfp
475	mrs	x0, cpacr_el1
476	orr	x0, x0, #CPACR_VFP_BITS
477	msr	cpacr_el1, x0
478	mrs	x0, cptr_el3
479	mov	x1, #AARCH64_CPTR_TFP
480	bic	x0, x0, x1
481	msr	cptr_el3, x0
482	isb
483	ret
484endfunc enable_vfp
485#endif
486