1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * This test starts a transaction and triggers a signal, forcing a pagefault to
6*4882a593Smuzhiyun * happen when the kernel signal handling code touches the user signal stack.
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * In order to avoid pre-faulting the signal stack memory and to force the
9*4882a593Smuzhiyun * pagefault to happen precisely in the kernel signal handling code, the
10*4882a593Smuzhiyun * pagefault handling is done in userspace using the userfaultfd facility.
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * Further pagefaults are triggered by crafting the signal handler's ucontext
13*4882a593Smuzhiyun * to point to additional memory regions managed by the userfaultfd, so using
14*4882a593Smuzhiyun * the same mechanism used to avoid pre-faulting the signal stack memory.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * On failure (bug is present) kernel crashes or never returns control back to
17*4882a593Smuzhiyun * userspace. If bug is not present, tests completes almost immediately.
18*4882a593Smuzhiyun */
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #include <stdio.h>
21*4882a593Smuzhiyun #include <stdlib.h>
22*4882a593Smuzhiyun #include <string.h>
23*4882a593Smuzhiyun #include <linux/userfaultfd.h>
24*4882a593Smuzhiyun #include <poll.h>
25*4882a593Smuzhiyun #include <unistd.h>
26*4882a593Smuzhiyun #include <sys/ioctl.h>
27*4882a593Smuzhiyun #include <sys/syscall.h>
28*4882a593Smuzhiyun #include <fcntl.h>
29*4882a593Smuzhiyun #include <sys/mman.h>
30*4882a593Smuzhiyun #include <pthread.h>
31*4882a593Smuzhiyun #include <signal.h>
32*4882a593Smuzhiyun #include <errno.h>
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #include "tm.h"
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun #define UF_MEM_SIZE 655360 /* 10 x 64k pages */
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun /* Memory handled by userfaultfd */
40*4882a593Smuzhiyun static char *uf_mem;
41*4882a593Smuzhiyun static size_t uf_mem_offset = 0;
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun /*
44*4882a593Smuzhiyun * Data that will be copied into the faulting pages (instead of zero-filled
45*4882a593Smuzhiyun * pages). This is used to make the test more reliable and avoid segfaulting
46*4882a593Smuzhiyun * when we return from the signal handler. Since we are making the signal
47*4882a593Smuzhiyun * handler's ucontext point to newly allocated memory, when that memory is
48*4882a593Smuzhiyun * paged-in it will contain the expected content.
49*4882a593Smuzhiyun */
50*4882a593Smuzhiyun static char backing_mem[UF_MEM_SIZE];
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun static size_t pagesize;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /*
55*4882a593Smuzhiyun * Return a chunk of at least 'size' bytes of memory that will be handled by
56*4882a593Smuzhiyun * userfaultfd. If 'backing_data' is not NULL, its content will be save to
57*4882a593Smuzhiyun * 'backing_mem' and then copied into the faulting pages when the page fault
58*4882a593Smuzhiyun * is handled.
59*4882a593Smuzhiyun */
get_uf_mem(size_t size,void * backing_data)60*4882a593Smuzhiyun void *get_uf_mem(size_t size, void *backing_data)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun void *ret;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun if (uf_mem_offset + size > UF_MEM_SIZE) {
65*4882a593Smuzhiyun fprintf(stderr, "Requesting more uf_mem than expected!\n");
66*4882a593Smuzhiyun exit(EXIT_FAILURE);
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun ret = &uf_mem[uf_mem_offset];
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun /* Save the data that will be copied into the faulting page */
72*4882a593Smuzhiyun if (backing_data != NULL)
73*4882a593Smuzhiyun memcpy(&backing_mem[uf_mem_offset], backing_data, size);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun /* Reserve the requested amount of uf_mem */
76*4882a593Smuzhiyun uf_mem_offset += size;
77*4882a593Smuzhiyun /* Keep uf_mem_offset aligned to the page size (round up) */
78*4882a593Smuzhiyun uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun return ret;
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun
fault_handler_thread(void * arg)83*4882a593Smuzhiyun void *fault_handler_thread(void *arg)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun struct uffd_msg msg; /* Data read from userfaultfd */
86*4882a593Smuzhiyun long uffd; /* userfaultfd file descriptor */
87*4882a593Smuzhiyun struct uffdio_copy uffdio_copy;
88*4882a593Smuzhiyun struct pollfd pollfd;
89*4882a593Smuzhiyun ssize_t nread, offset;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun uffd = (long) arg;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun for (;;) {
94*4882a593Smuzhiyun pollfd.fd = uffd;
95*4882a593Smuzhiyun pollfd.events = POLLIN;
96*4882a593Smuzhiyun if (poll(&pollfd, 1, -1) == -1) {
97*4882a593Smuzhiyun perror("poll() failed");
98*4882a593Smuzhiyun exit(EXIT_FAILURE);
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun nread = read(uffd, &msg, sizeof(msg));
102*4882a593Smuzhiyun if (nread == 0) {
103*4882a593Smuzhiyun fprintf(stderr, "read(): EOF on userfaultfd\n");
104*4882a593Smuzhiyun exit(EXIT_FAILURE);
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun if (nread == -1) {
108*4882a593Smuzhiyun perror("read() failed");
109*4882a593Smuzhiyun exit(EXIT_FAILURE);
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun /* We expect only one kind of event */
113*4882a593Smuzhiyun if (msg.event != UFFD_EVENT_PAGEFAULT) {
114*4882a593Smuzhiyun fprintf(stderr, "Unexpected event on userfaultfd\n");
115*4882a593Smuzhiyun exit(EXIT_FAILURE);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun /*
119*4882a593Smuzhiyun * We need to handle page faults in units of pages(!).
120*4882a593Smuzhiyun * So, round faulting address down to page boundary.
121*4882a593Smuzhiyun */
122*4882a593Smuzhiyun uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun offset = (char *) uffdio_copy.dst - uf_mem;
125*4882a593Smuzhiyun uffdio_copy.src = (unsigned long) &backing_mem[offset];
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun uffdio_copy.len = pagesize;
128*4882a593Smuzhiyun uffdio_copy.mode = 0;
129*4882a593Smuzhiyun uffdio_copy.copy = 0;
130*4882a593Smuzhiyun if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
131*4882a593Smuzhiyun perror("ioctl-UFFDIO_COPY failed");
132*4882a593Smuzhiyun exit(EXIT_FAILURE);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
setup_uf_mem(void)137*4882a593Smuzhiyun void setup_uf_mem(void)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun long uffd; /* userfaultfd file descriptor */
140*4882a593Smuzhiyun pthread_t thr;
141*4882a593Smuzhiyun struct uffdio_api uffdio_api;
142*4882a593Smuzhiyun struct uffdio_register uffdio_register;
143*4882a593Smuzhiyun int ret;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun pagesize = sysconf(_SC_PAGE_SIZE);
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun /* Create and enable userfaultfd object */
148*4882a593Smuzhiyun uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
149*4882a593Smuzhiyun if (uffd == -1) {
150*4882a593Smuzhiyun perror("userfaultfd() failed");
151*4882a593Smuzhiyun exit(EXIT_FAILURE);
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun uffdio_api.api = UFFD_API;
154*4882a593Smuzhiyun uffdio_api.features = 0;
155*4882a593Smuzhiyun if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
156*4882a593Smuzhiyun perror("ioctl-UFFDIO_API failed");
157*4882a593Smuzhiyun exit(EXIT_FAILURE);
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun /*
161*4882a593Smuzhiyun * Create a private anonymous mapping. The memory will be demand-zero
162*4882a593Smuzhiyun * paged, that is, not yet allocated. When we actually touch the memory
163*4882a593Smuzhiyun * the related page will be allocated via the userfaultfd mechanism.
164*4882a593Smuzhiyun */
165*4882a593Smuzhiyun uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
166*4882a593Smuzhiyun MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
167*4882a593Smuzhiyun if (uf_mem == MAP_FAILED) {
168*4882a593Smuzhiyun perror("mmap() failed");
169*4882a593Smuzhiyun exit(EXIT_FAILURE);
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun /*
173*4882a593Smuzhiyun * Register the memory range of the mapping we've just mapped to be
174*4882a593Smuzhiyun * handled by the userfaultfd object. In 'mode' we request to track
175*4882a593Smuzhiyun * missing pages (i.e. pages that have not yet been faulted-in).
176*4882a593Smuzhiyun */
177*4882a593Smuzhiyun uffdio_register.range.start = (unsigned long) uf_mem;
178*4882a593Smuzhiyun uffdio_register.range.len = UF_MEM_SIZE;
179*4882a593Smuzhiyun uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
180*4882a593Smuzhiyun if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
181*4882a593Smuzhiyun perror("ioctl-UFFDIO_REGISTER");
182*4882a593Smuzhiyun exit(EXIT_FAILURE);
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun /* Create a thread that will process the userfaultfd events */
186*4882a593Smuzhiyun ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
187*4882a593Smuzhiyun if (ret != 0) {
188*4882a593Smuzhiyun fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
189*4882a593Smuzhiyun exit(EXIT_FAILURE);
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun /*
194*4882a593Smuzhiyun * Assumption: the signal was delivered while userspace was in transactional or
195*4882a593Smuzhiyun * suspended state, i.e. uc->uc_link != NULL.
196*4882a593Smuzhiyun */
signal_handler(int signo,siginfo_t * si,void * uc)197*4882a593Smuzhiyun void signal_handler(int signo, siginfo_t *si, void *uc)
198*4882a593Smuzhiyun {
199*4882a593Smuzhiyun ucontext_t *ucp = uc;
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
202*4882a593Smuzhiyun ucp->uc_link->uc_mcontext.regs->nip += 4;
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun ucp->uc_mcontext.v_regs =
205*4882a593Smuzhiyun get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun ucp->uc_link->uc_mcontext.v_regs =
208*4882a593Smuzhiyun get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
have_userfaultfd(void)213*4882a593Smuzhiyun bool have_userfaultfd(void)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun long rc;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun errno = 0;
218*4882a593Smuzhiyun rc = syscall(__NR_userfaultfd, -1);
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun return rc == 0 || errno != ENOSYS;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun
tm_signal_pagefault(void)223*4882a593Smuzhiyun int tm_signal_pagefault(void)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun struct sigaction sa;
226*4882a593Smuzhiyun stack_t ss;
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun SKIP_IF(!have_htm());
229*4882a593Smuzhiyun SKIP_IF(!have_userfaultfd());
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun setup_uf_mem();
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun /*
234*4882a593Smuzhiyun * Set an alternative stack that will generate a page fault when the
235*4882a593Smuzhiyun * signal is raised. The page fault will be treated via userfaultfd,
236*4882a593Smuzhiyun * i.e. via fault_handler_thread.
237*4882a593Smuzhiyun */
238*4882a593Smuzhiyun ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
239*4882a593Smuzhiyun ss.ss_size = SIGSTKSZ;
240*4882a593Smuzhiyun ss.ss_flags = 0;
241*4882a593Smuzhiyun if (sigaltstack(&ss, NULL) == -1) {
242*4882a593Smuzhiyun perror("sigaltstack() failed");
243*4882a593Smuzhiyun exit(EXIT_FAILURE);
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
247*4882a593Smuzhiyun sa.sa_sigaction = signal_handler;
248*4882a593Smuzhiyun if (sigaction(SIGTRAP, &sa, NULL) == -1) {
249*4882a593Smuzhiyun perror("sigaction() failed");
250*4882a593Smuzhiyun exit(EXIT_FAILURE);
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun /* Trigger a SIGTRAP in transactional state */
254*4882a593Smuzhiyun asm __volatile__(
255*4882a593Smuzhiyun "tbegin.;"
256*4882a593Smuzhiyun "beq 1f;"
257*4882a593Smuzhiyun "trap;"
258*4882a593Smuzhiyun "1: ;"
259*4882a593Smuzhiyun : : : "memory");
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun /* Trigger a SIGTRAP in suspended state */
262*4882a593Smuzhiyun asm __volatile__(
263*4882a593Smuzhiyun "tbegin.;"
264*4882a593Smuzhiyun "beq 1f;"
265*4882a593Smuzhiyun "tsuspend.;"
266*4882a593Smuzhiyun "trap;"
267*4882a593Smuzhiyun "tresume.;"
268*4882a593Smuzhiyun "1: ;"
269*4882a593Smuzhiyun : : : "memory");
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun return EXIT_SUCCESS;
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun
main(int argc,char ** argv)274*4882a593Smuzhiyun int main(int argc, char **argv)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun /*
277*4882a593Smuzhiyun * Depending on kernel config, the TM Bad Thing might not result in a
278*4882a593Smuzhiyun * crash, instead the kernel never returns control back to userspace, so
279*4882a593Smuzhiyun * set a tight timeout. If the test passes it completes almost
280*4882a593Smuzhiyun * immediately.
281*4882a593Smuzhiyun */
282*4882a593Smuzhiyun test_harness_set_timeout(2);
283*4882a593Smuzhiyun return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
284*4882a593Smuzhiyun }
285