1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * single_step_syscall.c - single-steps various x86 syscalls
4*4882a593Smuzhiyun * Copyright (c) 2014-2015 Andrew Lutomirski
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * This is a very simple series of tests that makes system calls with
7*4882a593Smuzhiyun * the TF flag set. This exercises some nasty kernel code in the
8*4882a593Smuzhiyun * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
9*4882a593Smuzhiyun * immediately issues #DB from CPL 0. This requires special handling in
10*4882a593Smuzhiyun * the kernel.
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #define _GNU_SOURCE
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include <sys/time.h>
16*4882a593Smuzhiyun #include <time.h>
17*4882a593Smuzhiyun #include <stdlib.h>
18*4882a593Smuzhiyun #include <sys/syscall.h>
19*4882a593Smuzhiyun #include <unistd.h>
20*4882a593Smuzhiyun #include <stdio.h>
21*4882a593Smuzhiyun #include <string.h>
22*4882a593Smuzhiyun #include <inttypes.h>
23*4882a593Smuzhiyun #include <sys/mman.h>
24*4882a593Smuzhiyun #include <sys/signal.h>
25*4882a593Smuzhiyun #include <sys/ucontext.h>
26*4882a593Smuzhiyun #include <asm/ldt.h>
27*4882a593Smuzhiyun #include <err.h>
28*4882a593Smuzhiyun #include <setjmp.h>
29*4882a593Smuzhiyun #include <stddef.h>
30*4882a593Smuzhiyun #include <stdbool.h>
31*4882a593Smuzhiyun #include <sys/ptrace.h>
32*4882a593Smuzhiyun #include <sys/user.h>
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #include "helpers.h"
35*4882a593Smuzhiyun
sethandler(int sig,void (* handler)(int,siginfo_t *,void *),int flags)36*4882a593Smuzhiyun static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
37*4882a593Smuzhiyun int flags)
38*4882a593Smuzhiyun {
39*4882a593Smuzhiyun struct sigaction sa;
40*4882a593Smuzhiyun memset(&sa, 0, sizeof(sa));
41*4882a593Smuzhiyun sa.sa_sigaction = handler;
42*4882a593Smuzhiyun sa.sa_flags = SA_SIGINFO | flags;
43*4882a593Smuzhiyun sigemptyset(&sa.sa_mask);
44*4882a593Smuzhiyun if (sigaction(sig, &sa, 0))
45*4882a593Smuzhiyun err(1, "sigaction");
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
clearhandler(int sig)48*4882a593Smuzhiyun static void clearhandler(int sig)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun struct sigaction sa;
51*4882a593Smuzhiyun memset(&sa, 0, sizeof(sa));
52*4882a593Smuzhiyun sa.sa_handler = SIG_DFL;
53*4882a593Smuzhiyun sigemptyset(&sa.sa_mask);
54*4882a593Smuzhiyun if (sigaction(sig, &sa, 0))
55*4882a593Smuzhiyun err(1, "sigaction");
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun static volatile sig_atomic_t sig_traps, sig_eflags;
59*4882a593Smuzhiyun sigjmp_buf jmpbuf;
60*4882a593Smuzhiyun static unsigned char altstack_data[SIGSTKSZ];
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun #ifdef __x86_64__
63*4882a593Smuzhiyun # define REG_IP REG_RIP
64*4882a593Smuzhiyun # define WIDTH "q"
65*4882a593Smuzhiyun # define INT80_CLOBBERS "r8", "r9", "r10", "r11"
66*4882a593Smuzhiyun #else
67*4882a593Smuzhiyun # define REG_IP REG_EIP
68*4882a593Smuzhiyun # define WIDTH "l"
69*4882a593Smuzhiyun # define INT80_CLOBBERS
70*4882a593Smuzhiyun #endif
71*4882a593Smuzhiyun
sigtrap(int sig,siginfo_t * info,void * ctx_void)72*4882a593Smuzhiyun static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun ucontext_t *ctx = (ucontext_t*)ctx_void;
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun if (get_eflags() & X86_EFLAGS_TF) {
77*4882a593Smuzhiyun set_eflags(get_eflags() & ~X86_EFLAGS_TF);
78*4882a593Smuzhiyun printf("[WARN]\tSIGTRAP handler had TF set\n");
79*4882a593Smuzhiyun _exit(1);
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun sig_traps++;
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun if (sig_traps == 10000 || sig_traps == 10001) {
85*4882a593Smuzhiyun printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
86*4882a593Smuzhiyun (int)sig_traps,
87*4882a593Smuzhiyun (unsigned long)info->si_addr,
88*4882a593Smuzhiyun (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
89*4882a593Smuzhiyun }
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun static char const * const signames[] = {
93*4882a593Smuzhiyun [SIGSEGV] = "SIGSEGV",
94*4882a593Smuzhiyun [SIGBUS] = "SIBGUS",
95*4882a593Smuzhiyun [SIGTRAP] = "SIGTRAP",
96*4882a593Smuzhiyun [SIGILL] = "SIGILL",
97*4882a593Smuzhiyun };
98*4882a593Smuzhiyun
print_and_longjmp(int sig,siginfo_t * si,void * ctx_void)99*4882a593Smuzhiyun static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun ucontext_t *ctx = ctx_void;
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig],
104*4882a593Smuzhiyun (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
105*4882a593Smuzhiyun (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF);
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL];
108*4882a593Smuzhiyun siglongjmp(jmpbuf, 1);
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
check_result(void)111*4882a593Smuzhiyun static void check_result(void)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun unsigned long new_eflags = get_eflags();
114*4882a593Smuzhiyun set_eflags(new_eflags & ~X86_EFLAGS_TF);
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun if (!sig_traps) {
117*4882a593Smuzhiyun printf("[FAIL]\tNo SIGTRAP\n");
118*4882a593Smuzhiyun exit(1);
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun if (!(new_eflags & X86_EFLAGS_TF)) {
122*4882a593Smuzhiyun printf("[FAIL]\tTF was cleared\n");
123*4882a593Smuzhiyun exit(1);
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
127*4882a593Smuzhiyun sig_traps = 0;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun
fast_syscall_no_tf(void)130*4882a593Smuzhiyun static void fast_syscall_no_tf(void)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun sig_traps = 0;
133*4882a593Smuzhiyun printf("[RUN]\tFast syscall with TF cleared\n");
134*4882a593Smuzhiyun fflush(stdout); /* Force a syscall */
135*4882a593Smuzhiyun if (get_eflags() & X86_EFLAGS_TF) {
136*4882a593Smuzhiyun printf("[FAIL]\tTF is now set\n");
137*4882a593Smuzhiyun exit(1);
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun if (sig_traps) {
140*4882a593Smuzhiyun printf("[FAIL]\tGot SIGTRAP\n");
141*4882a593Smuzhiyun exit(1);
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun printf("[OK]\tNothing unexpected happened\n");
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
main()146*4882a593Smuzhiyun int main()
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun #ifdef CAN_BUILD_32
149*4882a593Smuzhiyun int tmp;
150*4882a593Smuzhiyun #endif
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun sethandler(SIGTRAP, sigtrap, 0);
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun printf("[RUN]\tSet TF and check nop\n");
155*4882a593Smuzhiyun set_eflags(get_eflags() | X86_EFLAGS_TF);
156*4882a593Smuzhiyun asm volatile ("nop");
157*4882a593Smuzhiyun check_result();
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun #ifdef __x86_64__
160*4882a593Smuzhiyun printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
161*4882a593Smuzhiyun set_eflags(get_eflags() | X86_EFLAGS_TF);
162*4882a593Smuzhiyun extern unsigned char post_nop[];
163*4882a593Smuzhiyun asm volatile ("pushf" WIDTH "\n\t"
164*4882a593Smuzhiyun "pop" WIDTH " %%r11\n\t"
165*4882a593Smuzhiyun "nop\n\t"
166*4882a593Smuzhiyun "post_nop:"
167*4882a593Smuzhiyun : : "c" (post_nop) : "r11");
168*4882a593Smuzhiyun check_result();
169*4882a593Smuzhiyun #endif
170*4882a593Smuzhiyun #ifdef CAN_BUILD_32
171*4882a593Smuzhiyun printf("[RUN]\tSet TF and check int80\n");
172*4882a593Smuzhiyun set_eflags(get_eflags() | X86_EFLAGS_TF);
173*4882a593Smuzhiyun asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
174*4882a593Smuzhiyun : INT80_CLOBBERS);
175*4882a593Smuzhiyun check_result();
176*4882a593Smuzhiyun #endif
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun /*
179*4882a593Smuzhiyun * This test is particularly interesting if fast syscalls use
180*4882a593Smuzhiyun * SYSENTER: it triggers a nasty design flaw in SYSENTER.
181*4882a593Smuzhiyun * Specifically, SYSENTER does not clear TF, so either SYSENTER
182*4882a593Smuzhiyun * or the next instruction traps at CPL0. (Of course, Intel
183*4882a593Smuzhiyun * mostly forgot to document exactly what happens here.) So we
184*4882a593Smuzhiyun * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
185*4882a593Smuzhiyun * no stack. The only sane way the kernel can possibly handle
186*4882a593Smuzhiyun * it is to clear TF on return from the #DB handler, but this
187*4882a593Smuzhiyun * happens way too early to set TF in the saved pt_regs, so the
188*4882a593Smuzhiyun * kernel has to do something clever to avoid losing track of
189*4882a593Smuzhiyun * the TF bit.
190*4882a593Smuzhiyun *
191*4882a593Smuzhiyun * Needless to say, we've had bugs in this area.
192*4882a593Smuzhiyun */
193*4882a593Smuzhiyun syscall(SYS_getpid); /* Force symbol binding without TF set. */
194*4882a593Smuzhiyun printf("[RUN]\tSet TF and check a fast syscall\n");
195*4882a593Smuzhiyun set_eflags(get_eflags() | X86_EFLAGS_TF);
196*4882a593Smuzhiyun syscall(SYS_getpid);
197*4882a593Smuzhiyun check_result();
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun /* Now make sure that another fast syscall doesn't set TF again. */
200*4882a593Smuzhiyun fast_syscall_no_tf();
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun /*
203*4882a593Smuzhiyun * And do a forced SYSENTER to make sure that this works even if
204*4882a593Smuzhiyun * fast syscalls don't use SYSENTER.
205*4882a593Smuzhiyun *
206*4882a593Smuzhiyun * Invoking SYSENTER directly breaks all the rules. Just handle
207*4882a593Smuzhiyun * the SIGSEGV.
208*4882a593Smuzhiyun */
209*4882a593Smuzhiyun if (sigsetjmp(jmpbuf, 1) == 0) {
210*4882a593Smuzhiyun unsigned long nr = SYS_getpid;
211*4882a593Smuzhiyun printf("[RUN]\tSet TF and check SYSENTER\n");
212*4882a593Smuzhiyun stack_t stack = {
213*4882a593Smuzhiyun .ss_sp = altstack_data,
214*4882a593Smuzhiyun .ss_size = SIGSTKSZ,
215*4882a593Smuzhiyun };
216*4882a593Smuzhiyun if (sigaltstack(&stack, NULL) != 0)
217*4882a593Smuzhiyun err(1, "sigaltstack");
218*4882a593Smuzhiyun sethandler(SIGSEGV, print_and_longjmp,
219*4882a593Smuzhiyun SA_RESETHAND | SA_ONSTACK);
220*4882a593Smuzhiyun sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
221*4882a593Smuzhiyun set_eflags(get_eflags() | X86_EFLAGS_TF);
222*4882a593Smuzhiyun /* Clear EBP first to make sure we segfault cleanly. */
223*4882a593Smuzhiyun asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
224*4882a593Smuzhiyun #ifdef __x86_64__
225*4882a593Smuzhiyun , "r11"
226*4882a593Smuzhiyun #endif
227*4882a593Smuzhiyun );
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /* We're unreachable here. SYSENTER forgets RIP. */
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun clearhandler(SIGSEGV);
232*4882a593Smuzhiyun clearhandler(SIGILL);
233*4882a593Smuzhiyun if (!(sig_eflags & X86_EFLAGS_TF)) {
234*4882a593Smuzhiyun printf("[FAIL]\tTF was cleared\n");
235*4882a593Smuzhiyun exit(1);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun /* Now make sure that another fast syscall doesn't set TF again. */
239*4882a593Smuzhiyun fast_syscall_no_tf();
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun return 0;
242*4882a593Smuzhiyun }
243