xref: /OK3568_Linux_fs/kernel/arch/um/os-Linux/skas/process.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
4*4882a593Smuzhiyun  * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include <stdlib.h>
8*4882a593Smuzhiyun #include <stdbool.h>
9*4882a593Smuzhiyun #include <unistd.h>
10*4882a593Smuzhiyun #include <sched.h>
11*4882a593Smuzhiyun #include <errno.h>
12*4882a593Smuzhiyun #include <string.h>
13*4882a593Smuzhiyun #include <sys/mman.h>
14*4882a593Smuzhiyun #include <sys/wait.h>
15*4882a593Smuzhiyun #include <asm/unistd.h>
16*4882a593Smuzhiyun #include <as-layout.h>
17*4882a593Smuzhiyun #include <init.h>
18*4882a593Smuzhiyun #include <kern_util.h>
19*4882a593Smuzhiyun #include <mem.h>
20*4882a593Smuzhiyun #include <os.h>
21*4882a593Smuzhiyun #include <ptrace_user.h>
22*4882a593Smuzhiyun #include <registers.h>
23*4882a593Smuzhiyun #include <skas.h>
24*4882a593Smuzhiyun #include <sysdep/stub.h>
25*4882a593Smuzhiyun #include <linux/threads.h>
26*4882a593Smuzhiyun 
is_skas_winch(int pid,int fd,void * data)27*4882a593Smuzhiyun int is_skas_winch(int pid, int fd, void *data)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun 	return pid == getpgrp();
30*4882a593Smuzhiyun }
31*4882a593Smuzhiyun 
ptrace_dump_regs(int pid)32*4882a593Smuzhiyun static int ptrace_dump_regs(int pid)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun 	unsigned long regs[MAX_REG_NR];
35*4882a593Smuzhiyun 	int i;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
38*4882a593Smuzhiyun 		return -errno;
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun 	printk(UM_KERN_ERR "Stub registers -\n");
41*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(regs); i++)
42*4882a593Smuzhiyun 		printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]);
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun 	return 0;
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun /*
48*4882a593Smuzhiyun  * Signals that are OK to receive in the stub - we'll just continue it.
49*4882a593Smuzhiyun  * SIGWINCH will happen when UML is inside a detached screen.
50*4882a593Smuzhiyun  */
51*4882a593Smuzhiyun #define STUB_SIG_MASK ((1 << SIGALRM) | (1 << SIGWINCH))
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun /* Signals that the stub will finish with - anything else is an error */
54*4882a593Smuzhiyun #define STUB_DONE_MASK (1 << SIGTRAP)
55*4882a593Smuzhiyun 
wait_stub_done(int pid)56*4882a593Smuzhiyun void wait_stub_done(int pid)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun 	int n, status, err;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	while (1) {
61*4882a593Smuzhiyun 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
62*4882a593Smuzhiyun 		if ((n < 0) || !WIFSTOPPED(status))
63*4882a593Smuzhiyun 			goto bad_wait;
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 		if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0)
66*4882a593Smuzhiyun 			break;
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 		err = ptrace(PTRACE_CONT, pid, 0, 0);
69*4882a593Smuzhiyun 		if (err) {
70*4882a593Smuzhiyun 			printk(UM_KERN_ERR "wait_stub_done : continue failed, "
71*4882a593Smuzhiyun 			       "errno = %d\n", errno);
72*4882a593Smuzhiyun 			fatal_sigsegv();
73*4882a593Smuzhiyun 		}
74*4882a593Smuzhiyun 	}
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
77*4882a593Smuzhiyun 		return;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun bad_wait:
80*4882a593Smuzhiyun 	err = ptrace_dump_regs(pid);
81*4882a593Smuzhiyun 	if (err)
82*4882a593Smuzhiyun 		printk(UM_KERN_ERR "Failed to get registers from stub, "
83*4882a593Smuzhiyun 		       "errno = %d\n", -err);
84*4882a593Smuzhiyun 	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
85*4882a593Smuzhiyun 	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
86*4882a593Smuzhiyun 	       status);
87*4882a593Smuzhiyun 	fatal_sigsegv();
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun extern unsigned long current_stub_stack(void);
91*4882a593Smuzhiyun 
get_skas_faultinfo(int pid,struct faultinfo * fi,unsigned long * aux_fp_regs)92*4882a593Smuzhiyun static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun 	int err;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	err = get_fp_registers(pid, aux_fp_regs);
97*4882a593Smuzhiyun 	if (err < 0) {
98*4882a593Smuzhiyun 		printk(UM_KERN_ERR "save_fp_registers returned %d\n",
99*4882a593Smuzhiyun 		       err);
100*4882a593Smuzhiyun 		fatal_sigsegv();
101*4882a593Smuzhiyun 	}
102*4882a593Smuzhiyun 	err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
103*4882a593Smuzhiyun 	if (err) {
104*4882a593Smuzhiyun 		printk(UM_KERN_ERR "Failed to continue stub, pid = %d, "
105*4882a593Smuzhiyun 		       "errno = %d\n", pid, errno);
106*4882a593Smuzhiyun 		fatal_sigsegv();
107*4882a593Smuzhiyun 	}
108*4882a593Smuzhiyun 	wait_stub_done(pid);
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	/*
111*4882a593Smuzhiyun 	 * faultinfo is prepared by the stub_segv_handler at start of
112*4882a593Smuzhiyun 	 * the stub stack page. We just have to copy it.
113*4882a593Smuzhiyun 	 */
114*4882a593Smuzhiyun 	memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	err = put_fp_registers(pid, aux_fp_regs);
117*4882a593Smuzhiyun 	if (err < 0) {
118*4882a593Smuzhiyun 		printk(UM_KERN_ERR "put_fp_registers returned %d\n",
119*4882a593Smuzhiyun 		       err);
120*4882a593Smuzhiyun 		fatal_sigsegv();
121*4882a593Smuzhiyun 	}
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun 
handle_segv(int pid,struct uml_pt_regs * regs,unsigned long * aux_fp_regs)124*4882a593Smuzhiyun static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun 	get_skas_faultinfo(pid, &regs->faultinfo, aux_fp_regs);
127*4882a593Smuzhiyun 	segv(regs->faultinfo, 0, 1, NULL);
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun  * To use the same value of using_sysemu as the caller, ask it that value
132*4882a593Smuzhiyun  * (in local_using_sysemu
133*4882a593Smuzhiyun  */
handle_trap(int pid,struct uml_pt_regs * regs,int local_using_sysemu)134*4882a593Smuzhiyun static void handle_trap(int pid, struct uml_pt_regs *regs,
135*4882a593Smuzhiyun 			int local_using_sysemu)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun 	int err, status;
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
140*4882a593Smuzhiyun 		fatal_sigsegv();
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	if (!local_using_sysemu)
143*4882a593Smuzhiyun 	{
144*4882a593Smuzhiyun 		err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
145*4882a593Smuzhiyun 			     __NR_getpid);
146*4882a593Smuzhiyun 		if (err < 0) {
147*4882a593Smuzhiyun 			printk(UM_KERN_ERR "handle_trap - nullifying syscall "
148*4882a593Smuzhiyun 			       "failed, errno = %d\n", errno);
149*4882a593Smuzhiyun 			fatal_sigsegv();
150*4882a593Smuzhiyun 		}
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 		err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
153*4882a593Smuzhiyun 		if (err < 0) {
154*4882a593Smuzhiyun 			printk(UM_KERN_ERR "handle_trap - continuing to end of "
155*4882a593Smuzhiyun 			       "syscall failed, errno = %d\n", errno);
156*4882a593Smuzhiyun 			fatal_sigsegv();
157*4882a593Smuzhiyun 		}
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
160*4882a593Smuzhiyun 		if ((err < 0) || !WIFSTOPPED(status) ||
161*4882a593Smuzhiyun 		    (WSTOPSIG(status) != SIGTRAP + 0x80)) {
162*4882a593Smuzhiyun 			err = ptrace_dump_regs(pid);
163*4882a593Smuzhiyun 			if (err)
164*4882a593Smuzhiyun 				printk(UM_KERN_ERR "Failed to get registers "
165*4882a593Smuzhiyun 				       "from process, errno = %d\n", -err);
166*4882a593Smuzhiyun 			printk(UM_KERN_ERR "handle_trap - failed to wait at "
167*4882a593Smuzhiyun 			       "end of syscall, errno = %d, status = %d\n",
168*4882a593Smuzhiyun 			       errno, status);
169*4882a593Smuzhiyun 			fatal_sigsegv();
170*4882a593Smuzhiyun 		}
171*4882a593Smuzhiyun 	}
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	handle_syscall(regs);
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun extern char __syscall_stub_start[];
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun /**
179*4882a593Smuzhiyun  * userspace_tramp() - userspace trampoline
180*4882a593Smuzhiyun  * @stack:	pointer to the new userspace stack page, can be NULL, if? FIXME:
181*4882a593Smuzhiyun  *
182*4882a593Smuzhiyun  * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed.
183*4882a593Smuzhiyun  * This function will run on a temporary stack page.
184*4882a593Smuzhiyun  * It ptrace()'es itself, then
185*4882a593Smuzhiyun  * Two pages are mapped into the userspace address space:
186*4882a593Smuzhiyun  * - STUB_CODE (with EXEC), which contains the skas stub code
187*4882a593Smuzhiyun  * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel.
188*4882a593Smuzhiyun  * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process.
189*4882a593Smuzhiyun  * And last the process stops itself to give control to the UML kernel for this userspace process.
190*4882a593Smuzhiyun  *
191*4882a593Smuzhiyun  * Return: Always zero, otherwise the current userspace process is ended with non null exit() call
192*4882a593Smuzhiyun  */
userspace_tramp(void * stack)193*4882a593Smuzhiyun static int userspace_tramp(void *stack)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	void *addr;
196*4882a593Smuzhiyun 	int fd;
197*4882a593Smuzhiyun 	unsigned long long offset;
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	ptrace(PTRACE_TRACEME, 0, 0, 0);
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	signal(SIGTERM, SIG_DFL);
202*4882a593Smuzhiyun 	signal(SIGWINCH, SIG_IGN);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	/*
205*4882a593Smuzhiyun 	 * This has a pte, but it can't be mapped in with the usual
206*4882a593Smuzhiyun 	 * tlb_flush mechanism because this is part of that mechanism
207*4882a593Smuzhiyun 	 */
208*4882a593Smuzhiyun 	fd = phys_mapping(to_phys(__syscall_stub_start), &offset);
209*4882a593Smuzhiyun 	addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
210*4882a593Smuzhiyun 		      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
211*4882a593Smuzhiyun 	if (addr == MAP_FAILED) {
212*4882a593Smuzhiyun 		printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, "
213*4882a593Smuzhiyun 		       "errno = %d\n", STUB_CODE, errno);
214*4882a593Smuzhiyun 		exit(1);
215*4882a593Smuzhiyun 	}
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	if (stack != NULL) {
218*4882a593Smuzhiyun 		fd = phys_mapping(to_phys(stack), &offset);
219*4882a593Smuzhiyun 		addr = mmap((void *) STUB_DATA,
220*4882a593Smuzhiyun 			    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
221*4882a593Smuzhiyun 			    MAP_FIXED | MAP_SHARED, fd, offset);
222*4882a593Smuzhiyun 		if (addr == MAP_FAILED) {
223*4882a593Smuzhiyun 			printk(UM_KERN_ERR "mapping segfault stack "
224*4882a593Smuzhiyun 			       "at 0x%lx failed, errno = %d\n",
225*4882a593Smuzhiyun 			       STUB_DATA, errno);
226*4882a593Smuzhiyun 			exit(1);
227*4882a593Smuzhiyun 		}
228*4882a593Smuzhiyun 	}
229*4882a593Smuzhiyun 	if (stack != NULL) {
230*4882a593Smuzhiyun 		struct sigaction sa;
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 		unsigned long v = STUB_CODE +
233*4882a593Smuzhiyun 				  (unsigned long) stub_segv_handler -
234*4882a593Smuzhiyun 				  (unsigned long) __syscall_stub_start;
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 		set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
237*4882a593Smuzhiyun 		sigemptyset(&sa.sa_mask);
238*4882a593Smuzhiyun 		sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
239*4882a593Smuzhiyun 		sa.sa_sigaction = (void *) v;
240*4882a593Smuzhiyun 		sa.sa_restorer = NULL;
241*4882a593Smuzhiyun 		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
242*4882a593Smuzhiyun 			printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
243*4882a593Smuzhiyun 			       "handler failed - errno = %d\n", errno);
244*4882a593Smuzhiyun 			exit(1);
245*4882a593Smuzhiyun 		}
246*4882a593Smuzhiyun 	}
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 	kill(os_getpid(), SIGSTOP);
249*4882a593Smuzhiyun 	return 0;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun int userspace_pid[NR_CPUS];
253*4882a593Smuzhiyun int kill_userspace_mm[NR_CPUS];
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun /**
256*4882a593Smuzhiyun  * start_userspace() - prepare a new userspace process
257*4882a593Smuzhiyun  * @stub_stack:	pointer to the stub stack. Can be NULL, if? FIXME:
258*4882a593Smuzhiyun  *
259*4882a593Smuzhiyun  * Setups a new temporary stack page that is used while userspace_tramp() runs
260*4882a593Smuzhiyun  * Clones the kernel process into a new userspace process, with FDs only.
261*4882a593Smuzhiyun  *
262*4882a593Smuzhiyun  * Return: When positive: the process id of the new userspace process,
263*4882a593Smuzhiyun  *         when negative: an error number.
264*4882a593Smuzhiyun  * FIXME: can PIDs become negative?!
265*4882a593Smuzhiyun  */
start_userspace(unsigned long stub_stack)266*4882a593Smuzhiyun int start_userspace(unsigned long stub_stack)
267*4882a593Smuzhiyun {
268*4882a593Smuzhiyun 	void *stack;
269*4882a593Smuzhiyun 	unsigned long sp;
270*4882a593Smuzhiyun 	int pid, status, n, flags, err;
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	/* setup a temporary stack page */
273*4882a593Smuzhiyun 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
274*4882a593Smuzhiyun 		     PROT_READ | PROT_WRITE | PROT_EXEC,
275*4882a593Smuzhiyun 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
276*4882a593Smuzhiyun 	if (stack == MAP_FAILED) {
277*4882a593Smuzhiyun 		err = -errno;
278*4882a593Smuzhiyun 		printk(UM_KERN_ERR "start_userspace : mmap failed, "
279*4882a593Smuzhiyun 		       "errno = %d\n", errno);
280*4882a593Smuzhiyun 		return err;
281*4882a593Smuzhiyun 	}
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 	/* set stack pointer to the end of the stack page, so it can grow downwards */
284*4882a593Smuzhiyun 	sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 	flags = CLONE_FILES | SIGCHLD;
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	/* clone into new userspace process */
289*4882a593Smuzhiyun 	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
290*4882a593Smuzhiyun 	if (pid < 0) {
291*4882a593Smuzhiyun 		err = -errno;
292*4882a593Smuzhiyun 		printk(UM_KERN_ERR "start_userspace : clone failed, "
293*4882a593Smuzhiyun 		       "errno = %d\n", errno);
294*4882a593Smuzhiyun 		return err;
295*4882a593Smuzhiyun 	}
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	do {
298*4882a593Smuzhiyun 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
299*4882a593Smuzhiyun 		if (n < 0) {
300*4882a593Smuzhiyun 			err = -errno;
301*4882a593Smuzhiyun 			printk(UM_KERN_ERR "start_userspace : wait failed, "
302*4882a593Smuzhiyun 			       "errno = %d\n", errno);
303*4882a593Smuzhiyun 			goto out_kill;
304*4882a593Smuzhiyun 		}
305*4882a593Smuzhiyun 	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
306*4882a593Smuzhiyun 
307*4882a593Smuzhiyun 	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
308*4882a593Smuzhiyun 		err = -EINVAL;
309*4882a593Smuzhiyun 		printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got "
310*4882a593Smuzhiyun 		       "status = %d\n", status);
311*4882a593Smuzhiyun 		goto out_kill;
312*4882a593Smuzhiyun 	}
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
315*4882a593Smuzhiyun 		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
316*4882a593Smuzhiyun 		err = -errno;
317*4882a593Smuzhiyun 		printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS "
318*4882a593Smuzhiyun 		       "failed, errno = %d\n", errno);
319*4882a593Smuzhiyun 		goto out_kill;
320*4882a593Smuzhiyun 	}
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
323*4882a593Smuzhiyun 		err = -errno;
324*4882a593Smuzhiyun 		printk(UM_KERN_ERR "start_userspace : munmap failed, "
325*4882a593Smuzhiyun 		       "errno = %d\n", errno);
326*4882a593Smuzhiyun 		goto out_kill;
327*4882a593Smuzhiyun 	}
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	return pid;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun  out_kill:
332*4882a593Smuzhiyun 	os_kill_ptraced_process(pid, 1);
333*4882a593Smuzhiyun 	return err;
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun 
userspace(struct uml_pt_regs * regs,unsigned long * aux_fp_regs)336*4882a593Smuzhiyun void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
337*4882a593Smuzhiyun {
338*4882a593Smuzhiyun 	int err, status, op, pid = userspace_pid[0];
339*4882a593Smuzhiyun 	/* To prevent races if using_sysemu changes under us.*/
340*4882a593Smuzhiyun 	int local_using_sysemu;
341*4882a593Smuzhiyun 	siginfo_t si;
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	/* Handle any immediate reschedules or signals */
344*4882a593Smuzhiyun 	interrupt_end();
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 	while (1) {
347*4882a593Smuzhiyun 		if (kill_userspace_mm[0])
348*4882a593Smuzhiyun 			fatal_sigsegv();
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 		/*
351*4882a593Smuzhiyun 		 * This can legitimately fail if the process loads a
352*4882a593Smuzhiyun 		 * bogus value into a segment register.  It will
353*4882a593Smuzhiyun 		 * segfault and PTRACE_GETREGS will read that value
354*4882a593Smuzhiyun 		 * out of the process.  However, PTRACE_SETREGS will
355*4882a593Smuzhiyun 		 * fail.  In this case, there is nothing to do but
356*4882a593Smuzhiyun 		 * just kill the process.
357*4882a593Smuzhiyun 		 */
358*4882a593Smuzhiyun 		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
359*4882a593Smuzhiyun 			printk(UM_KERN_ERR "userspace - ptrace set regs "
360*4882a593Smuzhiyun 			       "failed, errno = %d\n", errno);
361*4882a593Smuzhiyun 			fatal_sigsegv();
362*4882a593Smuzhiyun 		}
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 		if (put_fp_registers(pid, regs->fp)) {
365*4882a593Smuzhiyun 			printk(UM_KERN_ERR "userspace - ptrace set fp regs "
366*4882a593Smuzhiyun 			       "failed, errno = %d\n", errno);
367*4882a593Smuzhiyun 			fatal_sigsegv();
368*4882a593Smuzhiyun 		}
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 		/* Now we set local_using_sysemu to be used for one loop */
371*4882a593Smuzhiyun 		local_using_sysemu = get_using_sysemu();
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 		op = SELECT_PTRACE_OPERATION(local_using_sysemu,
374*4882a593Smuzhiyun 					     singlestepping(NULL));
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 		if (ptrace(op, pid, 0, 0)) {
377*4882a593Smuzhiyun 			printk(UM_KERN_ERR "userspace - ptrace continue "
378*4882a593Smuzhiyun 			       "failed, op = %d, errno = %d\n", op, errno);
379*4882a593Smuzhiyun 			fatal_sigsegv();
380*4882a593Smuzhiyun 		}
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
383*4882a593Smuzhiyun 		if (err < 0) {
384*4882a593Smuzhiyun 			printk(UM_KERN_ERR "userspace - wait failed, "
385*4882a593Smuzhiyun 			       "errno = %d\n", errno);
386*4882a593Smuzhiyun 			fatal_sigsegv();
387*4882a593Smuzhiyun 		}
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 		regs->is_user = 1;
390*4882a593Smuzhiyun 		if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
391*4882a593Smuzhiyun 			printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, "
392*4882a593Smuzhiyun 			       "errno = %d\n", errno);
393*4882a593Smuzhiyun 			fatal_sigsegv();
394*4882a593Smuzhiyun 		}
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 		if (get_fp_registers(pid, regs->fp)) {
397*4882a593Smuzhiyun 			printk(UM_KERN_ERR "userspace -  get_fp_registers failed, "
398*4882a593Smuzhiyun 			       "errno = %d\n", errno);
399*4882a593Smuzhiyun 			fatal_sigsegv();
400*4882a593Smuzhiyun 		}
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 		if (WIFSTOPPED(status)) {
405*4882a593Smuzhiyun 			int sig = WSTOPSIG(status);
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 			ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 			switch (sig) {
410*4882a593Smuzhiyun 			case SIGSEGV:
411*4882a593Smuzhiyun 				if (PTRACE_FULL_FAULTINFO) {
412*4882a593Smuzhiyun 					get_skas_faultinfo(pid,
413*4882a593Smuzhiyun 							   &regs->faultinfo, aux_fp_regs);
414*4882a593Smuzhiyun 					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
415*4882a593Smuzhiyun 							     regs);
416*4882a593Smuzhiyun 				}
417*4882a593Smuzhiyun 				else handle_segv(pid, regs, aux_fp_regs);
418*4882a593Smuzhiyun 				break;
419*4882a593Smuzhiyun 			case SIGTRAP + 0x80:
420*4882a593Smuzhiyun 			        handle_trap(pid, regs, local_using_sysemu);
421*4882a593Smuzhiyun 				break;
422*4882a593Smuzhiyun 			case SIGTRAP:
423*4882a593Smuzhiyun 				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
424*4882a593Smuzhiyun 				break;
425*4882a593Smuzhiyun 			case SIGALRM:
426*4882a593Smuzhiyun 				break;
427*4882a593Smuzhiyun 			case SIGIO:
428*4882a593Smuzhiyun 			case SIGILL:
429*4882a593Smuzhiyun 			case SIGBUS:
430*4882a593Smuzhiyun 			case SIGFPE:
431*4882a593Smuzhiyun 			case SIGWINCH:
432*4882a593Smuzhiyun 				block_signals_trace();
433*4882a593Smuzhiyun 				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
434*4882a593Smuzhiyun 				unblock_signals_trace();
435*4882a593Smuzhiyun 				break;
436*4882a593Smuzhiyun 			default:
437*4882a593Smuzhiyun 				printk(UM_KERN_ERR "userspace - child stopped "
438*4882a593Smuzhiyun 				       "with signal %d\n", sig);
439*4882a593Smuzhiyun 				fatal_sigsegv();
440*4882a593Smuzhiyun 			}
441*4882a593Smuzhiyun 			pid = userspace_pid[0];
442*4882a593Smuzhiyun 			interrupt_end();
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun 			/* Avoid -ERESTARTSYS handling in host */
445*4882a593Smuzhiyun 			if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET)
446*4882a593Smuzhiyun 				PT_SYSCALL_NR(regs->gp) = -1;
447*4882a593Smuzhiyun 		}
448*4882a593Smuzhiyun 	}
449*4882a593Smuzhiyun }
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun static unsigned long thread_regs[MAX_REG_NR];
452*4882a593Smuzhiyun static unsigned long thread_fp_regs[FP_SIZE];
453*4882a593Smuzhiyun 
init_thread_regs(void)454*4882a593Smuzhiyun static int __init init_thread_regs(void)
455*4882a593Smuzhiyun {
456*4882a593Smuzhiyun 	get_safe_registers(thread_regs, thread_fp_regs);
457*4882a593Smuzhiyun 	/* Set parent's instruction pointer to start of clone-stub */
458*4882a593Smuzhiyun 	thread_regs[REGS_IP_INDEX] = STUB_CODE +
459*4882a593Smuzhiyun 				(unsigned long) stub_clone_handler -
460*4882a593Smuzhiyun 				(unsigned long) __syscall_stub_start;
461*4882a593Smuzhiyun 	thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE -
462*4882a593Smuzhiyun 		sizeof(void *);
463*4882a593Smuzhiyun #ifdef __SIGNAL_FRAMESIZE
464*4882a593Smuzhiyun 	thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
465*4882a593Smuzhiyun #endif
466*4882a593Smuzhiyun 	return 0;
467*4882a593Smuzhiyun }
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun __initcall(init_thread_regs);
470*4882a593Smuzhiyun 
copy_context_skas0(unsigned long new_stack,int pid)471*4882a593Smuzhiyun int copy_context_skas0(unsigned long new_stack, int pid)
472*4882a593Smuzhiyun {
473*4882a593Smuzhiyun 	int err;
474*4882a593Smuzhiyun 	unsigned long current_stack = current_stub_stack();
475*4882a593Smuzhiyun 	struct stub_data *data = (struct stub_data *) current_stack;
476*4882a593Smuzhiyun 	struct stub_data *child_data = (struct stub_data *) new_stack;
477*4882a593Smuzhiyun 	unsigned long long new_offset;
478*4882a593Smuzhiyun 	int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 	/*
481*4882a593Smuzhiyun 	 * prepare offset and fd of child's stack as argument for parent's
482*4882a593Smuzhiyun 	 * and child's mmap2 calls
483*4882a593Smuzhiyun 	 */
484*4882a593Smuzhiyun 	*data = ((struct stub_data) {
485*4882a593Smuzhiyun 			.offset	= MMAP_OFFSET(new_offset),
486*4882a593Smuzhiyun 			.fd     = new_fd
487*4882a593Smuzhiyun 	});
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun 	err = ptrace_setregs(pid, thread_regs);
490*4882a593Smuzhiyun 	if (err < 0) {
491*4882a593Smuzhiyun 		err = -errno;
492*4882a593Smuzhiyun 		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS "
493*4882a593Smuzhiyun 		       "failed, pid = %d, errno = %d\n", pid, -err);
494*4882a593Smuzhiyun 		return err;
495*4882a593Smuzhiyun 	}
496*4882a593Smuzhiyun 
497*4882a593Smuzhiyun 	err = put_fp_registers(pid, thread_fp_regs);
498*4882a593Smuzhiyun 	if (err < 0) {
499*4882a593Smuzhiyun 		printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers "
500*4882a593Smuzhiyun 		       "failed, pid = %d, err = %d\n", pid, err);
501*4882a593Smuzhiyun 		return err;
502*4882a593Smuzhiyun 	}
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 	/* set a well known return code for detection of child write failure */
505*4882a593Smuzhiyun 	child_data->err = 12345678;
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun 	/*
508*4882a593Smuzhiyun 	 * Wait, until parent has finished its work: read child's pid from
509*4882a593Smuzhiyun 	 * parent's stack, and check, if bad result.
510*4882a593Smuzhiyun 	 */
511*4882a593Smuzhiyun 	err = ptrace(PTRACE_CONT, pid, 0, 0);
512*4882a593Smuzhiyun 	if (err) {
513*4882a593Smuzhiyun 		err = -errno;
514*4882a593Smuzhiyun 		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, "
515*4882a593Smuzhiyun 		       "errno = %d\n", pid, errno);
516*4882a593Smuzhiyun 		return err;
517*4882a593Smuzhiyun 	}
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	wait_stub_done(pid);
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 	pid = data->err;
522*4882a593Smuzhiyun 	if (pid < 0) {
523*4882a593Smuzhiyun 		printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports "
524*4882a593Smuzhiyun 		       "error %d\n", -pid);
525*4882a593Smuzhiyun 		return pid;
526*4882a593Smuzhiyun 	}
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	/*
529*4882a593Smuzhiyun 	 * Wait, until child has finished too: read child's result from
530*4882a593Smuzhiyun 	 * child's stack and check it.
531*4882a593Smuzhiyun 	 */
532*4882a593Smuzhiyun 	wait_stub_done(pid);
533*4882a593Smuzhiyun 	if (child_data->err != STUB_DATA) {
534*4882a593Smuzhiyun 		printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports "
535*4882a593Smuzhiyun 		       "error %ld\n", child_data->err);
536*4882a593Smuzhiyun 		err = child_data->err;
537*4882a593Smuzhiyun 		goto out_kill;
538*4882a593Smuzhiyun 	}
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
541*4882a593Smuzhiyun 		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
542*4882a593Smuzhiyun 		err = -errno;
543*4882a593Smuzhiyun 		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS "
544*4882a593Smuzhiyun 		       "failed, errno = %d\n", errno);
545*4882a593Smuzhiyun 		goto out_kill;
546*4882a593Smuzhiyun 	}
547*4882a593Smuzhiyun 
548*4882a593Smuzhiyun 	return pid;
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun  out_kill:
551*4882a593Smuzhiyun 	os_kill_ptraced_process(pid, 1);
552*4882a593Smuzhiyun 	return err;
553*4882a593Smuzhiyun }
554*4882a593Smuzhiyun 
new_thread(void * stack,jmp_buf * buf,void (* handler)(void))555*4882a593Smuzhiyun void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
556*4882a593Smuzhiyun {
557*4882a593Smuzhiyun 	(*buf)[0].JB_IP = (unsigned long) handler;
558*4882a593Smuzhiyun 	(*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE -
559*4882a593Smuzhiyun 		sizeof(void *);
560*4882a593Smuzhiyun }
561*4882a593Smuzhiyun 
562*4882a593Smuzhiyun #define INIT_JMP_NEW_THREAD 0
563*4882a593Smuzhiyun #define INIT_JMP_CALLBACK 1
564*4882a593Smuzhiyun #define INIT_JMP_HALT 2
565*4882a593Smuzhiyun #define INIT_JMP_REBOOT 3
566*4882a593Smuzhiyun 
switch_threads(jmp_buf * me,jmp_buf * you)567*4882a593Smuzhiyun void switch_threads(jmp_buf *me, jmp_buf *you)
568*4882a593Smuzhiyun {
569*4882a593Smuzhiyun 	if (UML_SETJMP(me) == 0)
570*4882a593Smuzhiyun 		UML_LONGJMP(you, 1);
571*4882a593Smuzhiyun }
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun static jmp_buf initial_jmpbuf;
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun /* XXX Make these percpu */
576*4882a593Smuzhiyun static void (*cb_proc)(void *arg);
577*4882a593Smuzhiyun static void *cb_arg;
578*4882a593Smuzhiyun static jmp_buf *cb_back;
579*4882a593Smuzhiyun 
start_idle_thread(void * stack,jmp_buf * switch_buf)580*4882a593Smuzhiyun int start_idle_thread(void *stack, jmp_buf *switch_buf)
581*4882a593Smuzhiyun {
582*4882a593Smuzhiyun 	int n;
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	set_handler(SIGWINCH);
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	/*
587*4882a593Smuzhiyun 	 * Can't use UML_SETJMP or UML_LONGJMP here because they save
588*4882a593Smuzhiyun 	 * and restore signals, with the possible side-effect of
589*4882a593Smuzhiyun 	 * trying to handle any signals which came when they were
590*4882a593Smuzhiyun 	 * blocked, which can't be done on this stack.
591*4882a593Smuzhiyun 	 * Signals must be blocked when jumping back here and restored
592*4882a593Smuzhiyun 	 * after returning to the jumper.
593*4882a593Smuzhiyun 	 */
594*4882a593Smuzhiyun 	n = setjmp(initial_jmpbuf);
595*4882a593Smuzhiyun 	switch (n) {
596*4882a593Smuzhiyun 	case INIT_JMP_NEW_THREAD:
597*4882a593Smuzhiyun 		(*switch_buf)[0].JB_IP = (unsigned long) uml_finishsetup;
598*4882a593Smuzhiyun 		(*switch_buf)[0].JB_SP = (unsigned long) stack +
599*4882a593Smuzhiyun 			UM_THREAD_SIZE - sizeof(void *);
600*4882a593Smuzhiyun 		break;
601*4882a593Smuzhiyun 	case INIT_JMP_CALLBACK:
602*4882a593Smuzhiyun 		(*cb_proc)(cb_arg);
603*4882a593Smuzhiyun 		longjmp(*cb_back, 1);
604*4882a593Smuzhiyun 		break;
605*4882a593Smuzhiyun 	case INIT_JMP_HALT:
606*4882a593Smuzhiyun 		kmalloc_ok = 0;
607*4882a593Smuzhiyun 		return 0;
608*4882a593Smuzhiyun 	case INIT_JMP_REBOOT:
609*4882a593Smuzhiyun 		kmalloc_ok = 0;
610*4882a593Smuzhiyun 		return 1;
611*4882a593Smuzhiyun 	default:
612*4882a593Smuzhiyun 		printk(UM_KERN_ERR "Bad sigsetjmp return in "
613*4882a593Smuzhiyun 		       "start_idle_thread - %d\n", n);
614*4882a593Smuzhiyun 		fatal_sigsegv();
615*4882a593Smuzhiyun 	}
616*4882a593Smuzhiyun 	longjmp(*switch_buf, 1);
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun 	/* unreachable */
619*4882a593Smuzhiyun 	printk(UM_KERN_ERR "impossible long jump!");
620*4882a593Smuzhiyun 	fatal_sigsegv();
621*4882a593Smuzhiyun 	return 0;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun 
initial_thread_cb_skas(void (* proc)(void *),void * arg)624*4882a593Smuzhiyun void initial_thread_cb_skas(void (*proc)(void *), void *arg)
625*4882a593Smuzhiyun {
626*4882a593Smuzhiyun 	jmp_buf here;
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 	cb_proc = proc;
629*4882a593Smuzhiyun 	cb_arg = arg;
630*4882a593Smuzhiyun 	cb_back = &here;
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 	block_signals_trace();
633*4882a593Smuzhiyun 	if (UML_SETJMP(&here) == 0)
634*4882a593Smuzhiyun 		UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK);
635*4882a593Smuzhiyun 	unblock_signals_trace();
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	cb_proc = NULL;
638*4882a593Smuzhiyun 	cb_arg = NULL;
639*4882a593Smuzhiyun 	cb_back = NULL;
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun 
halt_skas(void)642*4882a593Smuzhiyun void halt_skas(void)
643*4882a593Smuzhiyun {
644*4882a593Smuzhiyun 	block_signals_trace();
645*4882a593Smuzhiyun 	UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT);
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun static bool noreboot;
649*4882a593Smuzhiyun 
noreboot_cmd_param(char * str,int * add)650*4882a593Smuzhiyun static int __init noreboot_cmd_param(char *str, int *add)
651*4882a593Smuzhiyun {
652*4882a593Smuzhiyun 	noreboot = true;
653*4882a593Smuzhiyun 	return 0;
654*4882a593Smuzhiyun }
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun __uml_setup("noreboot", noreboot_cmd_param,
657*4882a593Smuzhiyun "noreboot\n"
658*4882a593Smuzhiyun "    Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n"
659*4882a593Smuzhiyun "    This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n"
660*4882a593Smuzhiyun "    crashes in CI\n");
661*4882a593Smuzhiyun 
reboot_skas(void)662*4882a593Smuzhiyun void reboot_skas(void)
663*4882a593Smuzhiyun {
664*4882a593Smuzhiyun 	block_signals_trace();
665*4882a593Smuzhiyun 	UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT);
666*4882a593Smuzhiyun }
667*4882a593Smuzhiyun 
__switch_mm(struct mm_id * mm_idp)668*4882a593Smuzhiyun void __switch_mm(struct mm_id *mm_idp)
669*4882a593Smuzhiyun {
670*4882a593Smuzhiyun 	userspace_pid[0] = mm_idp->u.pid;
671*4882a593Smuzhiyun 	kill_userspace_mm[0] = mm_idp->kill;
672*4882a593Smuzhiyun }
673