xref: /optee_os/lib/libutee/tcb.c (revision 31b31015b854f6688ce8ea71aed055a1e7f493e1)
1*31b31015Sliushiwei // SPDX-License-Identifier: BSD-2-Clause
2*31b31015Sliushiwei /*
3*31b31015Sliushiwei  * Copyright (c) 2020, Huawei Technologies Co., Ltd
4*31b31015Sliushiwei  */
5*31b31015Sliushiwei /*
6*31b31015Sliushiwei  * Support for Thread-Local Storage (TLS) ABIs for ARMv7/Aarch32 and Aarch64.
7*31b31015Sliushiwei  *
8*31b31015Sliushiwei  * TAs are currently single-threaded, so the only benefit of implementing these
9*31b31015Sliushiwei  * ABIs is to support toolchains that need them even when the target program is
10*31b31015Sliushiwei  * single-threaded. Such as, the g++ compiler from the GCC toolchain targeting a
11*31b31015Sliushiwei  * "Posix thread" Linux runtime, which OP-TEE has been using for quite some time
12*31b31015Sliushiwei  * (arm-linux-gnueabihf-* and aarch64-linux-gnu-*). This allows building C++ TAs
13*31b31015Sliushiwei  * without having to build a specific toolchain with --disable-threads.
14*31b31015Sliushiwei  *
15*31b31015Sliushiwei  * This implementation is based on [1].
16*31b31015Sliushiwei  *
17*31b31015Sliushiwei  *  - "TLS data structures variant 1" (section 3): the AArch64 compiler uses the
18*31b31015Sliushiwei  *    TPIDR_EL0 to access TLS data directly. This assumes a specific layout for
19*31b31015Sliushiwei  *    the TCB, and (for shared objects) the use of R_AARCH64_TLS_TPREL
20*31b31015Sliushiwei  *    relocations.
21*31b31015Sliushiwei  *  - The "General Dynamic access model" (section 4.1): the ARMv7/Aarch32
22*31b31015Sliushiwei  *    compiler inserts calls to the __tls_get_addr() function which has to be
23*31b31015Sliushiwei  *    implemented by the runtime library. The function takes a module ID and an
24*31b31015Sliushiwei  *    offset parameter, which are provided thanks to R_ARM_TLS_DTPMOD32 and
25*31b31015Sliushiwei  *    R_ARM_TLS_DTPOFF32 relocations.
26*31b31015Sliushiwei  *
27*31b31015Sliushiwei  * In addition, dl_iterate_phdr() is implemented here, because it is used by the
28*31b31015Sliushiwei  * g++ Aarch64 exception handling and it does use the TCB to provide TLS
29*31b31015Sliushiwei  * information to the caller.
30*31b31015Sliushiwei  *
31*31b31015Sliushiwei  * [1] "ELF Handling For Thread-Local Storage"
32*31b31015Sliushiwei  *     https://www.akkadia.org/drepper/tls.pdf
33*31b31015Sliushiwei  */
34*31b31015Sliushiwei 
35*31b31015Sliushiwei #include <arm64_user_sysreg.h>
36*31b31015Sliushiwei #include <assert.h>
37*31b31015Sliushiwei #include <link.h>
38*31b31015Sliushiwei #include <stdlib.h>
39*31b31015Sliushiwei #include <string.h>
40*31b31015Sliushiwei #include <sys/queue.h>
41*31b31015Sliushiwei #include "user_ta_header.h"
42*31b31015Sliushiwei 
43*31b31015Sliushiwei /* DTV - Dynamic Thread Vector
44*31b31015Sliushiwei  *
45*31b31015Sliushiwei  * Maintains an array of pointers to TLS data for each module in the TCB. Each
46*31b31015Sliushiwei  * module that has a TLS segment has an entry (and consequently, some space in
47*31b31015Sliushiwei  * the tcb_head::tls buffer). The index is the "module ID".
48*31b31015Sliushiwei  * dtv[0].size is the number of elements in the vector
49*31b31015Sliushiwei  * dtv[1].tls points to TLS for the main executable (may be NULL)
50*31b31015Sliushiwei  * tls[2 .. (size-1)] are for shared libraries
51*31b31015Sliushiwei  */
52*31b31015Sliushiwei union dtv {
53*31b31015Sliushiwei 	unsigned long size;
54*31b31015Sliushiwei 	uint8_t *tls;
55*31b31015Sliushiwei };
56*31b31015Sliushiwei 
57*31b31015Sliushiwei #define DTV_SIZE(size) (sizeof(union dtv) + (size))
58*31b31015Sliushiwei 
59*31b31015Sliushiwei /* Thread Control Block */
60*31b31015Sliushiwei struct tcb_head {
61*31b31015Sliushiwei 	/* Two words are reserved as per the "TLS variant 1" ABI */
62*31b31015Sliushiwei 	union dtv *dtv;
63*31b31015Sliushiwei 	unsigned long reserved;
64*31b31015Sliushiwei 	/*
65*31b31015Sliushiwei 	 * The rest of the structure contains the TLS blocks for each ELF module
66*31b31015Sliushiwei 	 * having a PT_TLS segment. Each block is a copy of the .tdata section
67*31b31015Sliushiwei 	 * plus some zero-initialized space for .tbss.
68*31b31015Sliushiwei 	 */
69*31b31015Sliushiwei 	uint8_t tls[];
70*31b31015Sliushiwei };
71*31b31015Sliushiwei 
72*31b31015Sliushiwei /*
73*31b31015Sliushiwei  * Since TAs are single threaded, only one TCB is needed. This would need to
74*31b31015Sliushiwei  * change if multi-threading is introduced.
75*31b31015Sliushiwei  */
76*31b31015Sliushiwei static struct tcb_head *_tcb;
77*31b31015Sliushiwei static size_t _tls_size;
78*31b31015Sliushiwei 
79*31b31015Sliushiwei #define TCB_SIZE(tls_size) (sizeof(*_tcb) + (tls_size))
80*31b31015Sliushiwei 
81*31b31015Sliushiwei /*
82*31b31015Sliushiwei  * Initialize or update the TCB.
83*31b31015Sliushiwei  * Called on application initialization and when additional shared objects are
84*31b31015Sliushiwei  * loaded via dlopen().
85*31b31015Sliushiwei  */
86*31b31015Sliushiwei void __utee_tcb_init(void)
87*31b31015Sliushiwei {
88*31b31015Sliushiwei 	struct dl_phdr_info *dlpi = NULL;
89*31b31015Sliushiwei 	const Elf_Phdr *phdr = NULL;
90*31b31015Sliushiwei 	size_t total_size = 0;
91*31b31015Sliushiwei 	size_t size = 0;
92*31b31015Sliushiwei 	size_t i = 0;
93*31b31015Sliushiwei 	size_t j = 0;
94*31b31015Sliushiwei 
95*31b31015Sliushiwei 	/* Compute the size needed for all the TLS blocks */
96*31b31015Sliushiwei 	for (i = 0; i < __elf_phdr_info.count; i++) {
97*31b31015Sliushiwei 		dlpi = __elf_phdr_info.dlpi + i;
98*31b31015Sliushiwei 		for (j = 0; j < dlpi->dlpi_phnum; j++) {
99*31b31015Sliushiwei 			phdr = dlpi->dlpi_phdr + j;
100*31b31015Sliushiwei 			if (phdr->p_type == PT_TLS) {
101*31b31015Sliushiwei 				total_size += phdr->p_memsz;
102*31b31015Sliushiwei 				break;
103*31b31015Sliushiwei 			}
104*31b31015Sliushiwei 		}
105*31b31015Sliushiwei 	}
106*31b31015Sliushiwei 
107*31b31015Sliushiwei 	/* ELF modules currently cannot be unmapped */
108*31b31015Sliushiwei 	assert(total_size >= _tls_size);
109*31b31015Sliushiwei 
110*31b31015Sliushiwei 	if (total_size == _tls_size)
111*31b31015Sliushiwei 		return;
112*31b31015Sliushiwei 
113*31b31015Sliushiwei 	/* (Re-)allocate the TCB */
114*31b31015Sliushiwei 	_tcb = realloc(_tcb, TCB_SIZE(total_size));
115*31b31015Sliushiwei 	if (!_tcb) {
116*31b31015Sliushiwei 		EMSG("TCB allocation failed (%zu bytes)", TCB_SIZE(total_size));
117*31b31015Sliushiwei 		abort();
118*31b31015Sliushiwei 	}
119*31b31015Sliushiwei 
120*31b31015Sliushiwei 	/* (Re-)allocate the DTV. + 1 since dtv[0] holds the size */
121*31b31015Sliushiwei 	size = DTV_SIZE((__elf_phdr_info.count + 1) * sizeof(union dtv));
122*31b31015Sliushiwei 	_tcb->dtv = realloc(_tcb->dtv, size);
123*31b31015Sliushiwei 	if (!_tcb->dtv) {
124*31b31015Sliushiwei 		EMSG("DTV allocation failed (%zu bytes)", size);
125*31b31015Sliushiwei 		abort();
126*31b31015Sliushiwei 	}
127*31b31015Sliushiwei 
128*31b31015Sliushiwei 	/* Copy TLS data to the TCB */
129*31b31015Sliushiwei 	size = 0;
130*31b31015Sliushiwei 	for (i = 0; i < __elf_phdr_info.count; i++) {
131*31b31015Sliushiwei 		dlpi = __elf_phdr_info.dlpi + i;
132*31b31015Sliushiwei 		for (j = 0; j < dlpi->dlpi_phnum; j++) {
133*31b31015Sliushiwei 			phdr = dlpi->dlpi_phdr + j;
134*31b31015Sliushiwei 			if (phdr->p_type != PT_TLS)
135*31b31015Sliushiwei 				continue;
136*31b31015Sliushiwei 			if (size + phdr->p_memsz <= _tls_size) {
137*31b31015Sliushiwei 				/* Already copied */
138*31b31015Sliushiwei 				break;
139*31b31015Sliushiwei 			}
140*31b31015Sliushiwei 			_tcb->dtv[i + 1].tls = _tcb->tls + size;
141*31b31015Sliushiwei 			/* Copy .tdata */
142*31b31015Sliushiwei 			memcpy(_tcb->tls + size,
143*31b31015Sliushiwei 			       (void *)(dlpi->dlpi_addr + phdr->p_vaddr),
144*31b31015Sliushiwei 			       phdr->p_filesz);
145*31b31015Sliushiwei 			/* Initialize .tbss */
146*31b31015Sliushiwei 			memset(_tcb->tls + size + phdr->p_filesz, 0,
147*31b31015Sliushiwei 			       phdr->p_memsz - phdr->p_filesz);
148*31b31015Sliushiwei 			size += phdr->p_memsz;
149*31b31015Sliushiwei 		}
150*31b31015Sliushiwei 	}
151*31b31015Sliushiwei 	_tcb->dtv[0].size = i;
152*31b31015Sliushiwei 
153*31b31015Sliushiwei 	_tls_size = total_size;
154*31b31015Sliushiwei #ifdef ARM64
155*31b31015Sliushiwei 	/*
156*31b31015Sliushiwei 	 * Aarch64 ABI requirement: the thread pointer shall point to the
157*31b31015Sliushiwei 	 * thread's TCB. ARMv7 and Aarch32 access the TCB via _tls_get_addr().
158*31b31015Sliushiwei 	 */
159*31b31015Sliushiwei 	write_tpidr_el0((vaddr_t)_tcb);
160*31b31015Sliushiwei #endif
161*31b31015Sliushiwei }
162*31b31015Sliushiwei 
163*31b31015Sliushiwei struct tls_index {
164*31b31015Sliushiwei 	unsigned long module;
165*31b31015Sliushiwei 	unsigned long offset;
166*31b31015Sliushiwei };
167*31b31015Sliushiwei 
168*31b31015Sliushiwei void *__tls_get_addr(struct tls_index *ti);
169*31b31015Sliushiwei 
170*31b31015Sliushiwei void *__tls_get_addr(struct tls_index *ti)
171*31b31015Sliushiwei {
172*31b31015Sliushiwei 	return _tcb->dtv[ti->module].tls + ti->offset;
173*31b31015Sliushiwei }
174*31b31015Sliushiwei 
175*31b31015Sliushiwei int dl_iterate_phdr(int (*callback)(struct dl_phdr_info *, size_t, void *),
176*31b31015Sliushiwei 		    void *data)
177*31b31015Sliushiwei {
178*31b31015Sliushiwei 	struct dl_phdr_info *dlpi = NULL;
179*31b31015Sliushiwei 	size_t id = 0;
180*31b31015Sliushiwei 	size_t i = 0;
181*31b31015Sliushiwei 	int st = 0;
182*31b31015Sliushiwei 
183*31b31015Sliushiwei 	/*
184*31b31015Sliushiwei 	 * dlpi_tls_data is thread-specific so if we were to support
185*31b31015Sliushiwei 	 * multi-threading, we would need one copy of struct dl_phdr_info per
186*31b31015Sliushiwei 	 * thread. Could be a pre-allocated area, or could be allocated on the
187*31b31015Sliushiwei 	 * heap. Doing the latter here so that it would at least work if/when we
188*31b31015Sliushiwei 	 * add thread support. Further optimization can always come later.
189*31b31015Sliushiwei 	 */
190*31b31015Sliushiwei 	dlpi = calloc(1, sizeof(*dlpi));
191*31b31015Sliushiwei 	if (!dlpi) {
192*31b31015Sliushiwei 		EMSG("dl_phdr_info allocation failed");
193*31b31015Sliushiwei 		abort();
194*31b31015Sliushiwei 	}
195*31b31015Sliushiwei 
196*31b31015Sliushiwei 	for (i = 0; i < __elf_phdr_info.count; i++) {
197*31b31015Sliushiwei 		memcpy(dlpi, __elf_phdr_info.dlpi + i, sizeof(*dlpi));
198*31b31015Sliushiwei 		dlpi->dlpi_tls_data = NULL;
199*31b31015Sliushiwei 		id = dlpi->dlpi_tls_modid;
200*31b31015Sliushiwei 		if (id)
201*31b31015Sliushiwei 			dlpi->dlpi_tls_data = _tcb->dtv[id].tls;
202*31b31015Sliushiwei 		st = callback(dlpi, sizeof(*dlpi), data);
203*31b31015Sliushiwei 	}
204*31b31015Sliushiwei 
205*31b31015Sliushiwei 	free(dlpi);
206*31b31015Sliushiwei 	return st;
207*31b31015Sliushiwei }
208