xref: /OK3568_Linux_fs/kernel/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * This test starts a transaction and triggers a signal, forcing a pagefault to
6*4882a593Smuzhiyun  * happen when the kernel signal handling code touches the user signal stack.
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * In order to avoid pre-faulting the signal stack memory and to force the
9*4882a593Smuzhiyun  * pagefault to happen precisely in the kernel signal handling code, the
10*4882a593Smuzhiyun  * pagefault handling is done in userspace using the userfaultfd facility.
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  * Further pagefaults are triggered by crafting the signal handler's ucontext
13*4882a593Smuzhiyun  * to point to additional memory regions managed by the userfaultfd, so using
14*4882a593Smuzhiyun  * the same mechanism used to avoid pre-faulting the signal stack memory.
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * On failure (bug is present) kernel crashes or never returns control back to
17*4882a593Smuzhiyun  * userspace. If bug is not present, tests completes almost immediately.
18*4882a593Smuzhiyun  */
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include <stdio.h>
21*4882a593Smuzhiyun #include <stdlib.h>
22*4882a593Smuzhiyun #include <string.h>
23*4882a593Smuzhiyun #include <linux/userfaultfd.h>
24*4882a593Smuzhiyun #include <poll.h>
25*4882a593Smuzhiyun #include <unistd.h>
26*4882a593Smuzhiyun #include <sys/ioctl.h>
27*4882a593Smuzhiyun #include <sys/syscall.h>
28*4882a593Smuzhiyun #include <fcntl.h>
29*4882a593Smuzhiyun #include <sys/mman.h>
30*4882a593Smuzhiyun #include <pthread.h>
31*4882a593Smuzhiyun #include <signal.h>
32*4882a593Smuzhiyun #include <errno.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #include "tm.h"
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun #define UF_MEM_SIZE 655360	/* 10 x 64k pages */
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun /* Memory handled by userfaultfd */
40*4882a593Smuzhiyun static char *uf_mem;
41*4882a593Smuzhiyun static size_t uf_mem_offset = 0;
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun /*
44*4882a593Smuzhiyun  * Data that will be copied into the faulting pages (instead of zero-filled
45*4882a593Smuzhiyun  * pages). This is used to make the test more reliable and avoid segfaulting
46*4882a593Smuzhiyun  * when we return from the signal handler. Since we are making the signal
47*4882a593Smuzhiyun  * handler's ucontext point to newly allocated memory, when that memory is
48*4882a593Smuzhiyun  * paged-in it will contain the expected content.
49*4882a593Smuzhiyun  */
50*4882a593Smuzhiyun static char backing_mem[UF_MEM_SIZE];
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun static size_t pagesize;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun /*
55*4882a593Smuzhiyun  * Return a chunk of at least 'size' bytes of memory that will be handled by
56*4882a593Smuzhiyun  * userfaultfd. If 'backing_data' is not NULL, its content will be save to
57*4882a593Smuzhiyun  * 'backing_mem' and then copied into the faulting pages when the page fault
58*4882a593Smuzhiyun  * is handled.
59*4882a593Smuzhiyun  */
get_uf_mem(size_t size,void * backing_data)60*4882a593Smuzhiyun void *get_uf_mem(size_t size, void *backing_data)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun 	void *ret;
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	if (uf_mem_offset + size > UF_MEM_SIZE) {
65*4882a593Smuzhiyun 		fprintf(stderr, "Requesting more uf_mem than expected!\n");
66*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
67*4882a593Smuzhiyun 	}
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	ret = &uf_mem[uf_mem_offset];
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	/* Save the data that will be copied into the faulting page */
72*4882a593Smuzhiyun 	if (backing_data != NULL)
73*4882a593Smuzhiyun 		memcpy(&backing_mem[uf_mem_offset], backing_data, size);
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 	/* Reserve the requested amount of uf_mem */
76*4882a593Smuzhiyun 	uf_mem_offset += size;
77*4882a593Smuzhiyun 	/* Keep uf_mem_offset aligned to the page size (round up) */
78*4882a593Smuzhiyun 	uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	return ret;
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun 
fault_handler_thread(void * arg)83*4882a593Smuzhiyun void *fault_handler_thread(void *arg)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun 	struct uffd_msg msg;	/* Data read from userfaultfd */
86*4882a593Smuzhiyun 	long uffd;		/* userfaultfd file descriptor */
87*4882a593Smuzhiyun 	struct uffdio_copy uffdio_copy;
88*4882a593Smuzhiyun 	struct pollfd pollfd;
89*4882a593Smuzhiyun 	ssize_t nread, offset;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	uffd = (long) arg;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	for (;;) {
94*4882a593Smuzhiyun 		pollfd.fd = uffd;
95*4882a593Smuzhiyun 		pollfd.events = POLLIN;
96*4882a593Smuzhiyun 		if (poll(&pollfd, 1, -1) == -1) {
97*4882a593Smuzhiyun 			perror("poll() failed");
98*4882a593Smuzhiyun 			exit(EXIT_FAILURE);
99*4882a593Smuzhiyun 		}
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 		nread = read(uffd, &msg, sizeof(msg));
102*4882a593Smuzhiyun 		if (nread == 0) {
103*4882a593Smuzhiyun 			fprintf(stderr, "read(): EOF on userfaultfd\n");
104*4882a593Smuzhiyun 			exit(EXIT_FAILURE);
105*4882a593Smuzhiyun 		}
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 		if (nread == -1) {
108*4882a593Smuzhiyun 			perror("read() failed");
109*4882a593Smuzhiyun 			exit(EXIT_FAILURE);
110*4882a593Smuzhiyun 		}
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 		/* We expect only one kind of event */
113*4882a593Smuzhiyun 		if (msg.event != UFFD_EVENT_PAGEFAULT) {
114*4882a593Smuzhiyun 			fprintf(stderr, "Unexpected event on userfaultfd\n");
115*4882a593Smuzhiyun 			exit(EXIT_FAILURE);
116*4882a593Smuzhiyun 		}
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 		/*
119*4882a593Smuzhiyun 		 * We need to handle page faults in units of pages(!).
120*4882a593Smuzhiyun 		 * So, round faulting address down to page boundary.
121*4882a593Smuzhiyun 		 */
122*4882a593Smuzhiyun 		uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 		offset = (char *) uffdio_copy.dst - uf_mem;
125*4882a593Smuzhiyun 		uffdio_copy.src = (unsigned long) &backing_mem[offset];
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 		uffdio_copy.len = pagesize;
128*4882a593Smuzhiyun 		uffdio_copy.mode = 0;
129*4882a593Smuzhiyun 		uffdio_copy.copy = 0;
130*4882a593Smuzhiyun 		if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
131*4882a593Smuzhiyun 			perror("ioctl-UFFDIO_COPY failed");
132*4882a593Smuzhiyun 			exit(EXIT_FAILURE);
133*4882a593Smuzhiyun 		}
134*4882a593Smuzhiyun 	}
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun 
setup_uf_mem(void)137*4882a593Smuzhiyun void setup_uf_mem(void)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun 	long uffd;		/* userfaultfd file descriptor */
140*4882a593Smuzhiyun 	pthread_t thr;
141*4882a593Smuzhiyun 	struct uffdio_api uffdio_api;
142*4882a593Smuzhiyun 	struct uffdio_register uffdio_register;
143*4882a593Smuzhiyun 	int ret;
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	pagesize = sysconf(_SC_PAGE_SIZE);
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	/* Create and enable userfaultfd object */
148*4882a593Smuzhiyun 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
149*4882a593Smuzhiyun 	if (uffd == -1) {
150*4882a593Smuzhiyun 		perror("userfaultfd() failed");
151*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
152*4882a593Smuzhiyun 	}
153*4882a593Smuzhiyun 	uffdio_api.api = UFFD_API;
154*4882a593Smuzhiyun 	uffdio_api.features = 0;
155*4882a593Smuzhiyun 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
156*4882a593Smuzhiyun 		perror("ioctl-UFFDIO_API failed");
157*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
158*4882a593Smuzhiyun 	}
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	/*
161*4882a593Smuzhiyun 	 * Create a private anonymous mapping. The memory will be demand-zero
162*4882a593Smuzhiyun 	 * paged, that is, not yet allocated. When we actually touch the memory
163*4882a593Smuzhiyun 	 * the related page will be allocated via the userfaultfd mechanism.
164*4882a593Smuzhiyun 	 */
165*4882a593Smuzhiyun 	uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
166*4882a593Smuzhiyun 		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
167*4882a593Smuzhiyun 	if (uf_mem == MAP_FAILED) {
168*4882a593Smuzhiyun 		perror("mmap() failed");
169*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
170*4882a593Smuzhiyun 	}
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 	/*
173*4882a593Smuzhiyun 	 * Register the memory range of the mapping we've just mapped to be
174*4882a593Smuzhiyun 	 * handled by the userfaultfd object. In 'mode' we request to track
175*4882a593Smuzhiyun 	 * missing pages (i.e. pages that have not yet been faulted-in).
176*4882a593Smuzhiyun 	 */
177*4882a593Smuzhiyun 	uffdio_register.range.start = (unsigned long) uf_mem;
178*4882a593Smuzhiyun 	uffdio_register.range.len = UF_MEM_SIZE;
179*4882a593Smuzhiyun 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
180*4882a593Smuzhiyun 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
181*4882a593Smuzhiyun 		perror("ioctl-UFFDIO_REGISTER");
182*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
183*4882a593Smuzhiyun 	}
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	/* Create a thread that will process the userfaultfd events */
186*4882a593Smuzhiyun 	ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
187*4882a593Smuzhiyun 	if (ret != 0) {
188*4882a593Smuzhiyun 		fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
189*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
190*4882a593Smuzhiyun 	}
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun /*
194*4882a593Smuzhiyun  * Assumption: the signal was delivered while userspace was in transactional or
195*4882a593Smuzhiyun  * suspended state, i.e. uc->uc_link != NULL.
196*4882a593Smuzhiyun  */
signal_handler(int signo,siginfo_t * si,void * uc)197*4882a593Smuzhiyun void signal_handler(int signo, siginfo_t *si, void *uc)
198*4882a593Smuzhiyun {
199*4882a593Smuzhiyun 	ucontext_t *ucp = uc;
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	/* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
202*4882a593Smuzhiyun 	ucp->uc_link->uc_mcontext.regs->nip += 4;
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	ucp->uc_mcontext.v_regs =
205*4882a593Smuzhiyun 		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	ucp->uc_link->uc_mcontext.v_regs =
208*4882a593Smuzhiyun 		get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
have_userfaultfd(void)213*4882a593Smuzhiyun bool have_userfaultfd(void)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	long rc;
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	errno = 0;
218*4882a593Smuzhiyun 	rc = syscall(__NR_userfaultfd, -1);
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	return rc == 0 || errno != ENOSYS;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun 
tm_signal_pagefault(void)223*4882a593Smuzhiyun int tm_signal_pagefault(void)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun 	struct sigaction sa;
226*4882a593Smuzhiyun 	stack_t ss;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	SKIP_IF(!have_htm());
229*4882a593Smuzhiyun 	SKIP_IF(!have_userfaultfd());
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	setup_uf_mem();
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	/*
234*4882a593Smuzhiyun 	 * Set an alternative stack that will generate a page fault when the
235*4882a593Smuzhiyun 	 * signal is raised. The page fault will be treated via userfaultfd,
236*4882a593Smuzhiyun 	 * i.e. via fault_handler_thread.
237*4882a593Smuzhiyun 	 */
238*4882a593Smuzhiyun 	ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
239*4882a593Smuzhiyun 	ss.ss_size = SIGSTKSZ;
240*4882a593Smuzhiyun 	ss.ss_flags = 0;
241*4882a593Smuzhiyun 	if (sigaltstack(&ss, NULL) == -1) {
242*4882a593Smuzhiyun 		perror("sigaltstack() failed");
243*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
244*4882a593Smuzhiyun 	}
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
247*4882a593Smuzhiyun 	sa.sa_sigaction = signal_handler;
248*4882a593Smuzhiyun 	if (sigaction(SIGTRAP, &sa, NULL) == -1) {
249*4882a593Smuzhiyun 		perror("sigaction() failed");
250*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
251*4882a593Smuzhiyun 	}
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	/* Trigger a SIGTRAP in transactional state */
254*4882a593Smuzhiyun 	asm __volatile__(
255*4882a593Smuzhiyun 			"tbegin.;"
256*4882a593Smuzhiyun 			"beq    1f;"
257*4882a593Smuzhiyun 			"trap;"
258*4882a593Smuzhiyun 			"1: ;"
259*4882a593Smuzhiyun 			: : : "memory");
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	/* Trigger a SIGTRAP in suspended state */
262*4882a593Smuzhiyun 	asm __volatile__(
263*4882a593Smuzhiyun 			"tbegin.;"
264*4882a593Smuzhiyun 			"beq    1f;"
265*4882a593Smuzhiyun 			"tsuspend.;"
266*4882a593Smuzhiyun 			"trap;"
267*4882a593Smuzhiyun 			"tresume.;"
268*4882a593Smuzhiyun 			"1: ;"
269*4882a593Smuzhiyun 			: : : "memory");
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	return EXIT_SUCCESS;
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun 
main(int argc,char ** argv)274*4882a593Smuzhiyun int main(int argc, char **argv)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun 	/*
277*4882a593Smuzhiyun 	 * Depending on kernel config, the TM Bad Thing might not result in a
278*4882a593Smuzhiyun 	 * crash, instead the kernel never returns control back to userspace, so
279*4882a593Smuzhiyun 	 * set a tight timeout. If the test passes it completes almost
280*4882a593Smuzhiyun 	 * immediately.
281*4882a593Smuzhiyun 	 */
282*4882a593Smuzhiyun 	test_harness_set_timeout(2);
283*4882a593Smuzhiyun 	return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
284*4882a593Smuzhiyun }
285