xref: /rk3399_ARM-atf/lib/aarch64/misc_helpers.S (revision c948f77136c42a92d0bb660543a3600c36dcf7f1)
1/*
2 * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#include <arch.h>
8#include <asm_macros.S>
9#include <assert_macros.S>
10#include <lib/xlat_tables/xlat_tables_defs.h>
11
12#if !ERROR_DEPRECATED
13	.globl	get_afflvl_shift
14	.globl	mpidr_mask_lower_afflvls
15	.globl	eret
16#endif /* ERROR_DEPRECATED */
17	.globl	smc
18
19	.globl	zero_normalmem
20	.globl	zeromem
21	.globl	zeromem16
22	.globl	memcpy16
23
24	.globl	disable_mmu_el1
25	.globl	disable_mmu_el3
26	.globl	disable_mmu_icache_el1
27	.globl	disable_mmu_icache_el3
28
29	.globl	fixup_gdt_reloc
30
31#if SUPPORT_VFP
32	.globl	enable_vfp
33#endif
34
35#if !ERROR_DEPRECATED
36func get_afflvl_shift
37	cmp	x0, #3
38	cinc	x0, x0, eq
39	mov	x1, #MPIDR_AFFLVL_SHIFT
40	lsl	x0, x0, x1
41	ret
42endfunc get_afflvl_shift
43
44func mpidr_mask_lower_afflvls
45	cmp	x1, #3
46	cinc	x1, x1, eq
47	mov	x2, #MPIDR_AFFLVL_SHIFT
48	lsl	x2, x1, x2
49	lsr	x0, x0, x2
50	lsl	x0, x0, x2
51	ret
52endfunc mpidr_mask_lower_afflvls
53
54
55func eret
56	eret
57endfunc eret
58#endif /* ERROR_DEPRECATED */
59
60func smc
61	smc	#0
62endfunc smc
63
64/* -----------------------------------------------------------------------
65 * void zero_normalmem(void *mem, unsigned int length);
66 *
67 * Initialise a region in normal memory to 0. This functions complies with the
68 * AAPCS and can be called from C code.
69 *
70 * NOTE: MMU must be enabled when using this function as it can only operate on
71 *       normal memory. It is intended to be mainly used from C code when MMU
72 *       is usually enabled.
73 * -----------------------------------------------------------------------
74 */
75.equ	zero_normalmem, zeromem_dczva
76
77/* -----------------------------------------------------------------------
78 * void zeromem(void *mem, unsigned int length);
79 *
80 * Initialise a region of device memory to 0. This functions complies with the
81 * AAPCS and can be called from C code.
82 *
83 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
84 *       used instead for faster zeroing.
85 *
86 * -----------------------------------------------------------------------
87 */
88func zeromem
89	/* x2 is the address past the last zeroed address */
90	add	x2, x0, x1
91	/*
92	 * Uses the fallback path that does not use DC ZVA instruction and
93	 * therefore does not need enabled MMU
94	 */
95	b	.Lzeromem_dczva_fallback_entry
96endfunc zeromem
97
98/* -----------------------------------------------------------------------
99 * void zeromem_dczva(void *mem, unsigned int length);
100 *
101 * Fill a region of normal memory of size "length" in bytes with null bytes.
102 * MMU must be enabled and the memory be of
103 * normal type. This is because this function internally uses the DC ZVA
104 * instruction, which generates an Alignment fault if used on any type of
105 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
106 * is disabled, all memory behaves like Device-nGnRnE memory (see section
107 * D4.2.8), hence the requirement on the MMU being enabled.
108 * NOTE: The code assumes that the block size as defined in DCZID_EL0
109 *       register is at least 16 bytes.
110 *
111 * -----------------------------------------------------------------------
112 */
113func zeromem_dczva
114
115	/*
116	 * The function consists of a series of loops that zero memory one byte
117	 * at a time, 16 bytes at a time or using the DC ZVA instruction to
118	 * zero aligned block of bytes, which is assumed to be more than 16.
119	 * In the case where the DC ZVA instruction cannot be used or if the
120	 * first 16 bytes loop would overflow, there is fallback path that does
121	 * not use DC ZVA.
122	 * Note: The fallback path is also used by the zeromem function that
123	 *       branches to it directly.
124	 *
125	 *              +---------+   zeromem_dczva
126	 *              |  entry  |
127	 *              +----+----+
128	 *                   |
129	 *                   v
130	 *              +---------+
131	 *              | checks  |>o-------+ (If any check fails, fallback)
132	 *              +----+----+         |
133	 *                   |              |---------------+
134	 *                   v              | Fallback path |
135	 *            +------+------+       |---------------+
136	 *            | 1 byte loop |       |
137	 *            +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
138	 *                   |              |
139	 *                   v              |
140	 *           +-------+-------+      |
141	 *           | 16 bytes loop |      |
142	 *           +-------+-------+      |
143	 *                   |              |
144	 *                   v              |
145	 *            +------+------+ .Lzeromem_dczva_blocksize_aligned
146	 *            | DC ZVA loop |       |
147	 *            +------+------+       |
148	 *       +--------+  |              |
149	 *       |        |  |              |
150	 *       |        v  v              |
151	 *       |   +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
152	 *       |   | 16 bytes loop |      |
153	 *       |   +-------+-------+      |
154	 *       |           |              |
155	 *       |           v              |
156	 *       |    +------+------+ .Lzeromem_dczva_final_1byte_aligned
157	 *       |    | 1 byte loop |       |
158	 *       |    +-------------+       |
159	 *       |           |              |
160	 *       |           v              |
161	 *       |       +---+--+           |
162	 *       |       | exit |           |
163	 *       |       +------+           |
164	 *       |			    |
165	 *       |           +--------------+    +------------------+ zeromem
166	 *       |           |  +----------------| zeromem function |
167	 *       |           |  |                +------------------+
168	 *       |           v  v
169	 *       |    +-------------+ .Lzeromem_dczva_fallback_entry
170	 *       |    | 1 byte loop |
171	 *       |    +------+------+
172	 *       |           |
173	 *       +-----------+
174	 */
175
176	/*
177	 * Readable names for registers
178	 *
179	 * Registers x0, x1 and x2 are also set by zeromem which
180	 * branches into the fallback path directly, so cursor, length and
181	 * stop_address should not be retargeted to other registers.
182	 */
183	cursor       .req x0 /* Start address and then current address */
184	length       .req x1 /* Length in bytes of the region to zero out */
185	/* Reusing x1 as length is never used after block_mask is set */
186	block_mask   .req x1 /* Bitmask of the block size read in DCZID_EL0 */
187	stop_address .req x2 /* Address past the last zeroed byte */
188	block_size   .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
189	tmp1         .req x4
190	tmp2         .req x5
191
192#if ENABLE_ASSERTIONS
193	/*
194	 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
195	 * register value and panic if the MMU is disabled.
196	 */
197#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
198	mrs	tmp1, sctlr_el3
199#else
200	mrs	tmp1, sctlr_el1
201#endif
202
203	tst	tmp1, #SCTLR_M_BIT
204	ASM_ASSERT(ne)
205#endif /* ENABLE_ASSERTIONS */
206
207	/* stop_address is the address past the last to zero */
208	add	stop_address, cursor, length
209
210	/*
211	 * Get block_size = (log2(<block size>) >> 2) (see encoding of
212	 * dczid_el0 reg)
213	 */
214	mrs	block_size, dczid_el0
215
216	/*
217	 * Select the 4 lowest bits and convert the extracted log2(<block size
218	 * in words>) to <block size in bytes>
219	 */
220	ubfx	block_size, block_size, #0, #4
221	mov	tmp2, #(1 << 2)
222	lsl	block_size, tmp2, block_size
223
224#if ENABLE_ASSERTIONS
225	/*
226	 * Assumes block size is at least 16 bytes to avoid manual realignment
227	 * of the cursor at the end of the DCZVA loop.
228	 */
229	cmp	block_size, #16
230	ASM_ASSERT(hs)
231#endif
232	/*
233	 * Not worth doing all the setup for a region less than a block and
234	 * protects against zeroing a whole block when the area to zero is
235	 * smaller than that. Also, as it is assumed that the block size is at
236	 * least 16 bytes, this also protects the initial aligning loops from
237	 * trying to zero 16 bytes when length is less than 16.
238	 */
239	cmp	length, block_size
240	b.lo	.Lzeromem_dczva_fallback_entry
241
242	/*
243	 * Calculate the bitmask of the block alignment. It will never
244	 * underflow as the block size is between 4 bytes and 2kB.
245	 * block_mask = block_size - 1
246	 */
247	sub	block_mask, block_size, #1
248
249	/*
250	 * length alias should not be used after this point unless it is
251	 * defined as a register other than block_mask's.
252	 */
253	 .unreq length
254
255	/*
256	 * If the start address is already aligned to zero block size, go
257	 * straight to the cache zeroing loop. This is safe because at this
258	 * point, the length cannot be smaller than a block size.
259	 */
260	tst	cursor, block_mask
261	b.eq	.Lzeromem_dczva_blocksize_aligned
262
263	/*
264	 * Calculate the first block-size-aligned address. It is assumed that
265	 * the zero block size is at least 16 bytes. This address is the last
266	 * address of this initial loop.
267	 */
268	orr	tmp1, cursor, block_mask
269	add	tmp1, tmp1, #1
270
271	/*
272	 * If the addition overflows, skip the cache zeroing loops. This is
273	 * quite unlikely however.
274	 */
275	cbz	tmp1, .Lzeromem_dczva_fallback_entry
276
277	/*
278	 * If the first block-size-aligned address is past the last address,
279	 * fallback to the simpler code.
280	 */
281	cmp	tmp1, stop_address
282	b.hi	.Lzeromem_dczva_fallback_entry
283
284	/*
285	 * If the start address is already aligned to 16 bytes, skip this loop.
286	 * It is safe to do this because tmp1 (the stop address of the initial
287	 * 16 bytes loop) will never be greater than the final stop address.
288	 */
289	tst	cursor, #0xf
290	b.eq	.Lzeromem_dczva_initial_1byte_aligned_end
291
292	/* Calculate the next address aligned to 16 bytes */
293	orr	tmp2, cursor, #0xf
294	add	tmp2, tmp2, #1
295	/* If it overflows, fallback to the simple path (unlikely) */
296	cbz	tmp2, .Lzeromem_dczva_fallback_entry
297	/*
298	 * Next aligned address cannot be after the stop address because the
299	 * length cannot be smaller than 16 at this point.
300	 */
301
302	/* First loop: zero byte per byte */
3031:
304	strb	wzr, [cursor], #1
305	cmp	cursor, tmp2
306	b.ne	1b
307.Lzeromem_dczva_initial_1byte_aligned_end:
308
309	/*
310	 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
311	 * before being able to use the code that deals with block-size-aligned
312	 * addresses.
313	 */
314	cmp	cursor, tmp1
315	b.hs	2f
3161:
317	stp	xzr, xzr, [cursor], #16
318	cmp	cursor, tmp1
319	b.lo	1b
3202:
321
322	/*
323	 * Third loop: zero a block at a time using DC ZVA cache block zeroing
324	 * instruction.
325	 */
326.Lzeromem_dczva_blocksize_aligned:
327	/*
328	 * Calculate the last block-size-aligned address. If the result equals
329	 * to the start address, the loop will exit immediately.
330	 */
331	bic	tmp1, stop_address, block_mask
332
333	cmp	cursor, tmp1
334	b.hs	2f
3351:
336	/* Zero the block containing the cursor */
337	dc	zva, cursor
338	/* Increment the cursor by the size of a block */
339	add	cursor, cursor, block_size
340	cmp	cursor, tmp1
341	b.lo	1b
3422:
343
344	/*
345	 * Fourth loop: zero 16 bytes at a time and then byte per byte the
346	 * remaining area
347	 */
348.Lzeromem_dczva_final_16bytes_aligned:
349	/*
350	 * Calculate the last 16 bytes aligned address. It is assumed that the
351	 * block size will never be smaller than 16 bytes so that the current
352	 * cursor is aligned to at least 16 bytes boundary.
353	 */
354	bic	tmp1, stop_address, #15
355
356	cmp	cursor, tmp1
357	b.hs	2f
3581:
359	stp	xzr, xzr, [cursor], #16
360	cmp	cursor, tmp1
361	b.lo	1b
3622:
363
364	/* Fifth and final loop: zero byte per byte */
365.Lzeromem_dczva_final_1byte_aligned:
366	cmp	cursor, stop_address
367	b.eq	2f
3681:
369	strb	wzr, [cursor], #1
370	cmp	cursor, stop_address
371	b.ne	1b
3722:
373	ret
374
375	/* Fallback for unaligned start addresses */
376.Lzeromem_dczva_fallback_entry:
377	/*
378	 * If the start address is already aligned to 16 bytes, skip this loop.
379	 */
380	tst	cursor, #0xf
381	b.eq	.Lzeromem_dczva_final_16bytes_aligned
382
383	/* Calculate the next address aligned to 16 bytes */
384	orr	tmp1, cursor, #15
385	add	tmp1, tmp1, #1
386	/* If it overflows, fallback to byte per byte zeroing */
387	cbz	tmp1, .Lzeromem_dczva_final_1byte_aligned
388	/* If the next aligned address is after the stop address, fall back */
389	cmp	tmp1, stop_address
390	b.hs	.Lzeromem_dczva_final_1byte_aligned
391
392	/* Fallback entry loop: zero byte per byte */
3931:
394	strb	wzr, [cursor], #1
395	cmp	cursor, tmp1
396	b.ne	1b
397
398	b	.Lzeromem_dczva_final_16bytes_aligned
399
400	.unreq	cursor
401	/*
402	 * length is already unreq'ed to reuse the register for another
403	 * variable.
404	 */
405	.unreq	stop_address
406	.unreq	block_size
407	.unreq	block_mask
408	.unreq	tmp1
409	.unreq	tmp2
410endfunc zeromem_dczva
411
412/* --------------------------------------------------------------------------
413 * void memcpy16(void *dest, const void *src, unsigned int length)
414 *
415 * Copy length bytes from memory area src to memory area dest.
416 * The memory areas should not overlap.
417 * Destination and source addresses must be 16-byte aligned.
418 * --------------------------------------------------------------------------
419 */
420func memcpy16
421#if ENABLE_ASSERTIONS
422	orr	x3, x0, x1
423	tst	x3, #0xf
424	ASM_ASSERT(eq)
425#endif
426/* copy 16 bytes at a time */
427m_loop16:
428	cmp	x2, #16
429	b.lo	m_loop1
430	ldp	x3, x4, [x1], #16
431	stp	x3, x4, [x0], #16
432	sub	x2, x2, #16
433	b	m_loop16
434/* copy byte per byte */
435m_loop1:
436	cbz	x2, m_end
437	ldrb	w3, [x1], #1
438	strb	w3, [x0], #1
439	subs	x2, x2, #1
440	b.ne	m_loop1
441m_end:
442	ret
443endfunc memcpy16
444
445/* ---------------------------------------------------------------------------
446 * Disable the MMU at EL3
447 * ---------------------------------------------------------------------------
448 */
449
450func disable_mmu_el3
451	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
452do_disable_mmu_el3:
453	mrs	x0, sctlr_el3
454	bic	x0, x0, x1
455	msr	sctlr_el3, x0
456	isb	/* ensure MMU is off */
457	dsb	sy
458	ret
459endfunc disable_mmu_el3
460
461
462func disable_mmu_icache_el3
463	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
464	b	do_disable_mmu_el3
465endfunc disable_mmu_icache_el3
466
467/* ---------------------------------------------------------------------------
468 * Disable the MMU at EL1
469 * ---------------------------------------------------------------------------
470 */
471
472func disable_mmu_el1
473	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
474do_disable_mmu_el1:
475	mrs	x0, sctlr_el1
476	bic	x0, x0, x1
477	msr	sctlr_el1, x0
478	isb	/* ensure MMU is off */
479	dsb	sy
480	ret
481endfunc disable_mmu_el1
482
483
484func disable_mmu_icache_el1
485	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
486	b	do_disable_mmu_el1
487endfunc disable_mmu_icache_el1
488
489/* ---------------------------------------------------------------------------
490 * Enable the use of VFP at EL3
491 * ---------------------------------------------------------------------------
492 */
493#if SUPPORT_VFP
494func enable_vfp
495	mrs	x0, cpacr_el1
496	orr	x0, x0, #CPACR_VFP_BITS
497	msr	cpacr_el1, x0
498	mrs	x0, cptr_el3
499	mov	x1, #AARCH64_CPTR_TFP
500	bic	x0, x0, x1
501	msr	cptr_el3, x0
502	isb
503	ret
504endfunc enable_vfp
505#endif
506
507/* ---------------------------------------------------------------------------
508 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
509 * (.rela.dyn) at runtime.
510 *
511 * This function is meant to be used when the firmware is compiled with -fpie
512 * and linked with -pie options. We rely on the linker script exporting
513 * appropriate markers for start and end of the section. For GOT, we
514 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
515 * __RELA_START__ and __RELA_END__.
516 *
517 * The function takes the limits of the memory to apply fixups to as
518 * arguments (which is usually the limits of the relocable BL image).
519 *   x0 -  the start of the fixup region
520 *   x1 -  the limit of the fixup region
521 * These addresses have to be page (4KB aligned).
522 * ---------------------------------------------------------------------------
523 */
524func fixup_gdt_reloc
525	mov	x6, x0
526	mov	x7, x1
527
528	/* Test if the limits are 4K aligned */
529#if ENABLE_ASSERTIONS
530	orr	x0, x0, x1
531	tst	x0, #(PAGE_SIZE - 1)
532	ASM_ASSERT(eq)
533#endif
534	/*
535	 * Calculate the offset based on return address in x30.
536	 * Assume that this funtion is called within a page of the start of
537	 * of fixup region.
538	 */
539	and	x2, x30, #~(PAGE_SIZE - 1)
540	sub	x0, x2, x6	/* Diff(S) = Current Address - Compiled Address */
541
542	adrp	x1, __GOT_START__
543	add	x1, x1, :lo12:__GOT_START__
544	adrp	x2, __GOT_END__
545	add	x2, x2, :lo12:__GOT_END__
546
547	/*
548	 * GOT is an array of 64_bit addresses which must be fixed up as
549	 * new_addr = old_addr + Diff(S).
550	 * The new_addr is the address currently the binary is executing from
551	 * and old_addr is the address at compile time.
552	 */
5531:
554	ldr	x3, [x1]
555	/* Skip adding offset if address is < lower limit */
556	cmp	x3, x6
557	b.lo	2f
558	/* Skip adding offset if address is >= upper limit */
559	cmp	x3, x7
560	b.ge	2f
561	add	x3, x3, x0
562	str	x3, [x1]
5632:
564	add	x1, x1, #8
565	cmp	x1, x2
566	b.lo	1b
567
568	/* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
569	adrp	x1, __RELA_START__
570	add	x1, x1, :lo12:__RELA_START__
571	adrp	x2, __RELA_END__
572	add	x2, x2, :lo12:__RELA_END__
573	/*
574	 * According to ELF-64 specification, the RELA data structure is as
575	 * follows:
576	 *	typedef struct
577	 * 	{
578	 *		Elf64_Addr r_offset;
579	 *		Elf64_Xword r_info;
580	 *		Elf64_Sxword r_addend;
581	 *	} Elf64_Rela;
582	 *
583	 * r_offset is address of reference
584	 * r_info is symbol index and type of relocation (in this case
585	 * 0x403 which corresponds to R_AARCH64_RELATIV).
586	 * r_addend is constant part of expression.
587	 *
588	 * Size of Elf64_Rela structure is 24 bytes.
589	 */
5901:
591	/* Assert that the relocation type is R_AARCH64_RELATIV */
592#if ENABLE_ASSERTIONS
593	ldr	x3, [x1, #8]
594	cmp	x3, #0x403
595	ASM_ASSERT(eq)
596#endif
597	ldr	x3, [x1]	/* r_offset */
598	add	x3, x0, x3
599	ldr	x4, [x1, #16]	/* r_addend */
600
601	/* Skip adding offset if r_addend is < lower limit */
602	cmp	x4, x6
603	b.lo	2f
604	/* Skip adding offset if r_addend entry is >= upper limit */
605	cmp	x4, x7
606	b.ge	2f
607
608	add	x4, x0, x4	/* Diff(S) + r_addend */
609	str	x4, [x3]
610
6112:	add	x1, x1, #24
612	cmp	x1, x2
613	b.lo	1b
614
615	ret
616endfunc fixup_gdt_reloc
617