1 // SPDX-License-Identifier: BSD-2-Clause 2 /* 3 * Copyright (c) 2020, Huawei Technologies Co., Ltd 4 */ 5 /* 6 * Support for Thread-Local Storage (TLS) ABIs for ARMv7/Aarch32 and Aarch64. 7 * 8 * TAs are currently single-threaded, so the only benefit of implementing these 9 * ABIs is to support toolchains that need them even when the target program is 10 * single-threaded. Such as, the g++ compiler from the GCC toolchain targeting a 11 * "Posix thread" Linux runtime, which OP-TEE has been using for quite some time 12 * (arm-linux-gnueabihf-* and aarch64-linux-gnu-*). This allows building C++ TAs 13 * without having to build a specific toolchain with --disable-threads. 14 * 15 * This implementation is based on [1]. 16 * 17 * - "TLS data structures variant 1" (section 3): the AArch64 compiler uses the 18 * TPIDR_EL0 to access TLS data directly. This assumes a specific layout for 19 * the TCB, and (for shared objects) the use of R_AARCH64_TLS_TPREL 20 * relocations. 21 * - The "General Dynamic access model" (section 4.1): the ARMv7/Aarch32 22 * compiler inserts calls to the __tls_get_addr() function which has to be 23 * implemented by the runtime library. The function takes a module ID and an 24 * offset parameter, which are provided thanks to R_ARM_TLS_DTPMOD32 and 25 * R_ARM_TLS_DTPOFF32 relocations. 26 * 27 * In addition, dl_iterate_phdr() is implemented here, because it is used by the 28 * g++ Aarch64 exception handling and it does use the TCB to provide TLS 29 * information to the caller. 30 * 31 * [1] "ELF Handling For Thread-Local Storage" 32 * https://www.akkadia.org/drepper/tls.pdf 33 */ 34 35 #include <arm64_user_sysreg.h> 36 #include <assert.h> 37 #include <link.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <sys/queue.h> 41 #include "user_ta_header.h" 42 43 /* DTV - Dynamic Thread Vector 44 * 45 * Maintains an array of pointers to TLS data for each module in the TCB. Each 46 * module that has a TLS segment has an entry (and consequently, some space in 47 * the tcb_head::tls buffer). The index is the "module ID". 48 * dtv[0].size is the number of elements in the vector 49 * dtv[1].tls points to TLS for the main executable (may be NULL) 50 * tls[2 .. (size-1)] are for shared libraries 51 */ 52 union dtv { 53 unsigned long size; 54 uint8_t *tls; 55 }; 56 57 #define DTV_SIZE(size) (sizeof(union dtv) + (size)) 58 59 /* Thread Control Block */ 60 struct tcb_head { 61 /* Two words are reserved as per the "TLS variant 1" ABI */ 62 union dtv *dtv; 63 unsigned long reserved; 64 /* 65 * The rest of the structure contains the TLS blocks for each ELF module 66 * having a PT_TLS segment. Each block is a copy of the .tdata section 67 * plus some zero-initialized space for .tbss. 68 */ 69 uint8_t tls[]; 70 }; 71 72 /* 73 * Since TAs are single threaded, only one TCB is needed. This would need to 74 * change if multi-threading is introduced. 75 */ 76 static struct tcb_head *_tcb; 77 static size_t _tls_size; 78 79 #define TCB_SIZE(tls_size) (sizeof(*_tcb) + (tls_size)) 80 81 /* 82 * Initialize or update the TCB. 83 * Called on application initialization and when additional shared objects are 84 * loaded via dlopen(). 85 */ 86 void __utee_tcb_init(void) 87 { 88 struct dl_phdr_info *dlpi = NULL; 89 const Elf_Phdr *phdr = NULL; 90 size_t total_size = 0; 91 size_t size = 0; 92 size_t i = 0; 93 size_t j = 0; 94 95 /* Compute the size needed for all the TLS blocks */ 96 for (i = 0; i < __elf_phdr_info.count; i++) { 97 dlpi = __elf_phdr_info.dlpi + i; 98 for (j = 0; j < dlpi->dlpi_phnum; j++) { 99 phdr = dlpi->dlpi_phdr + j; 100 if (phdr->p_type == PT_TLS) { 101 total_size += phdr->p_memsz; 102 break; 103 } 104 } 105 } 106 107 /* ELF modules currently cannot be unmapped */ 108 assert(total_size >= _tls_size); 109 110 if (total_size == _tls_size) 111 return; 112 113 /* (Re-)allocate the TCB */ 114 _tcb = realloc(_tcb, TCB_SIZE(total_size)); 115 if (!_tcb) { 116 EMSG("TCB allocation failed (%zu bytes)", TCB_SIZE(total_size)); 117 abort(); 118 } 119 120 /* (Re-)allocate the DTV. + 1 since dtv[0] holds the size */ 121 size = DTV_SIZE((__elf_phdr_info.count + 1) * sizeof(union dtv)); 122 _tcb->dtv = realloc(_tcb->dtv, size); 123 if (!_tcb->dtv) { 124 EMSG("DTV allocation failed (%zu bytes)", size); 125 abort(); 126 } 127 128 /* Copy TLS data to the TCB */ 129 size = 0; 130 for (i = 0; i < __elf_phdr_info.count; i++) { 131 dlpi = __elf_phdr_info.dlpi + i; 132 for (j = 0; j < dlpi->dlpi_phnum; j++) { 133 phdr = dlpi->dlpi_phdr + j; 134 if (phdr->p_type != PT_TLS) 135 continue; 136 if (size + phdr->p_memsz <= _tls_size) { 137 /* Already copied */ 138 break; 139 } 140 _tcb->dtv[i + 1].tls = _tcb->tls + size; 141 /* Copy .tdata */ 142 memcpy(_tcb->tls + size, 143 (void *)(dlpi->dlpi_addr + phdr->p_vaddr), 144 phdr->p_filesz); 145 /* Initialize .tbss */ 146 memset(_tcb->tls + size + phdr->p_filesz, 0, 147 phdr->p_memsz - phdr->p_filesz); 148 size += phdr->p_memsz; 149 } 150 } 151 _tcb->dtv[0].size = i; 152 153 _tls_size = total_size; 154 #ifdef ARM64 155 /* 156 * Aarch64 ABI requirement: the thread pointer shall point to the 157 * thread's TCB. ARMv7 and Aarch32 access the TCB via _tls_get_addr(). 158 */ 159 write_tpidr_el0((vaddr_t)_tcb); 160 #endif 161 } 162 163 struct tls_index { 164 unsigned long module; 165 unsigned long offset; 166 }; 167 168 void *__tls_get_addr(struct tls_index *ti); 169 170 void *__tls_get_addr(struct tls_index *ti) 171 { 172 return _tcb->dtv[ti->module].tls + ti->offset; 173 } 174 175 int dl_iterate_phdr(int (*callback)(struct dl_phdr_info *, size_t, void *), 176 void *data) 177 { 178 struct dl_phdr_info *dlpi = NULL; 179 size_t id = 0; 180 size_t i = 0; 181 int st = 0; 182 183 /* 184 * dlpi_tls_data is thread-specific so if we were to support 185 * multi-threading, we would need one copy of struct dl_phdr_info per 186 * thread. Could be a pre-allocated area, or could be allocated on the 187 * heap. Doing the latter here so that it would at least work if/when we 188 * add thread support. Further optimization can always come later. 189 */ 190 dlpi = calloc(1, sizeof(*dlpi)); 191 if (!dlpi) { 192 EMSG("dl_phdr_info allocation failed"); 193 abort(); 194 } 195 196 for (i = 0; i < __elf_phdr_info.count; i++) { 197 memcpy(dlpi, __elf_phdr_info.dlpi + i, sizeof(*dlpi)); 198 dlpi->dlpi_tls_data = NULL; 199 id = dlpi->dlpi_tls_modid; 200 if (id) 201 dlpi->dlpi_tls_data = _tcb->dtv[id].tls; 202 st = callback(dlpi, sizeof(*dlpi), data); 203 } 204 205 free(dlpi); 206 return st; 207 } 208