1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * fsgsbase.c, an fsgsbase test
4*4882a593Smuzhiyun * Copyright (c) 2014-2016 Andy Lutomirski
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #define _GNU_SOURCE
8*4882a593Smuzhiyun #include <stdio.h>
9*4882a593Smuzhiyun #include <stdlib.h>
10*4882a593Smuzhiyun #include <stdbool.h>
11*4882a593Smuzhiyun #include <string.h>
12*4882a593Smuzhiyun #include <sys/syscall.h>
13*4882a593Smuzhiyun #include <unistd.h>
14*4882a593Smuzhiyun #include <err.h>
15*4882a593Smuzhiyun #include <sys/user.h>
16*4882a593Smuzhiyun #include <asm/prctl.h>
17*4882a593Smuzhiyun #include <sys/prctl.h>
18*4882a593Smuzhiyun #include <signal.h>
19*4882a593Smuzhiyun #include <limits.h>
20*4882a593Smuzhiyun #include <sys/ucontext.h>
21*4882a593Smuzhiyun #include <sched.h>
22*4882a593Smuzhiyun #include <linux/futex.h>
23*4882a593Smuzhiyun #include <pthread.h>
24*4882a593Smuzhiyun #include <asm/ldt.h>
25*4882a593Smuzhiyun #include <sys/mman.h>
26*4882a593Smuzhiyun #include <stddef.h>
27*4882a593Smuzhiyun #include <sys/ptrace.h>
28*4882a593Smuzhiyun #include <sys/wait.h>
29*4882a593Smuzhiyun #include <setjmp.h>
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun #ifndef __x86_64__
32*4882a593Smuzhiyun # error This test is 64-bit only
33*4882a593Smuzhiyun #endif
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun static volatile sig_atomic_t want_segv;
36*4882a593Smuzhiyun static volatile unsigned long segv_addr;
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun static unsigned short *shared_scratch;
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun static int nerrs;
41*4882a593Smuzhiyun
sethandler(int sig,void (* handler)(int,siginfo_t *,void *),int flags)42*4882a593Smuzhiyun static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
43*4882a593Smuzhiyun int flags)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun struct sigaction sa;
46*4882a593Smuzhiyun memset(&sa, 0, sizeof(sa));
47*4882a593Smuzhiyun sa.sa_sigaction = handler;
48*4882a593Smuzhiyun sa.sa_flags = SA_SIGINFO | flags;
49*4882a593Smuzhiyun sigemptyset(&sa.sa_mask);
50*4882a593Smuzhiyun if (sigaction(sig, &sa, 0))
51*4882a593Smuzhiyun err(1, "sigaction");
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun
clearhandler(int sig)54*4882a593Smuzhiyun static void clearhandler(int sig)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun struct sigaction sa;
57*4882a593Smuzhiyun memset(&sa, 0, sizeof(sa));
58*4882a593Smuzhiyun sa.sa_handler = SIG_DFL;
59*4882a593Smuzhiyun sigemptyset(&sa.sa_mask);
60*4882a593Smuzhiyun if (sigaction(sig, &sa, 0))
61*4882a593Smuzhiyun err(1, "sigaction");
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun
sigsegv(int sig,siginfo_t * si,void * ctx_void)64*4882a593Smuzhiyun static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun ucontext_t *ctx = (ucontext_t*)ctx_void;
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun if (!want_segv) {
69*4882a593Smuzhiyun clearhandler(SIGSEGV);
70*4882a593Smuzhiyun return; /* Crash cleanly. */
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun want_segv = false;
74*4882a593Smuzhiyun segv_addr = (unsigned long)si->si_addr;
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun static jmp_buf jmpbuf;
81*4882a593Smuzhiyun
sigill(int sig,siginfo_t * si,void * ctx_void)82*4882a593Smuzhiyun static void sigill(int sig, siginfo_t *si, void *ctx_void)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun siglongjmp(jmpbuf, 1);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun static bool have_fsgsbase;
88*4882a593Smuzhiyun
rdgsbase(void)89*4882a593Smuzhiyun static inline unsigned long rdgsbase(void)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun unsigned long gsbase;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun return gsbase;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun
rdfsbase(void)98*4882a593Smuzhiyun static inline unsigned long rdfsbase(void)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun unsigned long fsbase;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun return fsbase;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun
wrgsbase(unsigned long gsbase)107*4882a593Smuzhiyun static inline void wrgsbase(unsigned long gsbase)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun
wrfsbase(unsigned long fsbase)112*4882a593Smuzhiyun static inline void wrfsbase(unsigned long fsbase)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun enum which_base { FS, GS };
118*4882a593Smuzhiyun
read_base(enum which_base which)119*4882a593Smuzhiyun static unsigned long read_base(enum which_base which)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun unsigned long offset;
122*4882a593Smuzhiyun /*
123*4882a593Smuzhiyun * Unless we have FSGSBASE, there's no direct way to do this from
124*4882a593Smuzhiyun * user mode. We can get at it indirectly using signals, though.
125*4882a593Smuzhiyun */
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun want_segv = true;
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun offset = 0;
130*4882a593Smuzhiyun if (which == FS) {
131*4882a593Smuzhiyun /* Use a constant-length instruction here. */
132*4882a593Smuzhiyun asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
133*4882a593Smuzhiyun } else {
134*4882a593Smuzhiyun asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun if (!want_segv)
137*4882a593Smuzhiyun return segv_addr + offset;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /*
140*4882a593Smuzhiyun * If that didn't segfault, try the other end of the address space.
141*4882a593Smuzhiyun * Unless we get really unlucky and run into the vsyscall page, this
142*4882a593Smuzhiyun * is guaranteed to segfault.
143*4882a593Smuzhiyun */
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun offset = (ULONG_MAX >> 1) + 1;
146*4882a593Smuzhiyun if (which == FS) {
147*4882a593Smuzhiyun asm volatile ("mov %%fs:(%%rcx), %%rax"
148*4882a593Smuzhiyun : : "c" (offset) : "rax");
149*4882a593Smuzhiyun } else {
150*4882a593Smuzhiyun asm volatile ("mov %%gs:(%%rcx), %%rax"
151*4882a593Smuzhiyun : : "c" (offset) : "rax");
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun if (!want_segv)
154*4882a593Smuzhiyun return segv_addr + offset;
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun abort();
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
check_gs_value(unsigned long value)159*4882a593Smuzhiyun static void check_gs_value(unsigned long value)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun unsigned long base;
162*4882a593Smuzhiyun unsigned short sel;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
165*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
166*4882a593Smuzhiyun err(1, "ARCH_SET_GS");
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun asm volatile ("mov %%gs, %0" : "=rm" (sel));
169*4882a593Smuzhiyun base = read_base(GS);
170*4882a593Smuzhiyun if (base == value) {
171*4882a593Smuzhiyun printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
172*4882a593Smuzhiyun sel);
173*4882a593Smuzhiyun } else {
174*4882a593Smuzhiyun nerrs++;
175*4882a593Smuzhiyun printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
176*4882a593Smuzhiyun base, sel);
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
180*4882a593Smuzhiyun err(1, "ARCH_GET_GS");
181*4882a593Smuzhiyun if (base == value) {
182*4882a593Smuzhiyun printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
183*4882a593Smuzhiyun sel);
184*4882a593Smuzhiyun } else {
185*4882a593Smuzhiyun nerrs++;
186*4882a593Smuzhiyun printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
187*4882a593Smuzhiyun base, sel);
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun
mov_0_gs(unsigned long initial_base,bool schedule)191*4882a593Smuzhiyun static void mov_0_gs(unsigned long initial_base, bool schedule)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun unsigned long base, arch_base;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
196*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
197*4882a593Smuzhiyun err(1, "ARCH_SET_GS");
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun if (schedule)
200*4882a593Smuzhiyun usleep(10);
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" (0));
203*4882a593Smuzhiyun base = read_base(GS);
204*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
205*4882a593Smuzhiyun err(1, "ARCH_GET_GS");
206*4882a593Smuzhiyun if (base == arch_base) {
207*4882a593Smuzhiyun printf("[OK]\tGSBASE is 0x%lx\n", base);
208*4882a593Smuzhiyun } else {
209*4882a593Smuzhiyun nerrs++;
210*4882a593Smuzhiyun printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun static volatile unsigned long remote_base;
215*4882a593Smuzhiyun static volatile bool remote_hard_zero;
216*4882a593Smuzhiyun static volatile unsigned int ftx;
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun /*
219*4882a593Smuzhiyun * ARCH_SET_FS/GS(0) may or may not program a selector of zero. HARD_ZERO
220*4882a593Smuzhiyun * means to force the selector to zero to improve test coverage.
221*4882a593Smuzhiyun */
222*4882a593Smuzhiyun #define HARD_ZERO 0xa1fa5f343cb85fa4
223*4882a593Smuzhiyun
do_remote_base()224*4882a593Smuzhiyun static void do_remote_base()
225*4882a593Smuzhiyun {
226*4882a593Smuzhiyun unsigned long to_set = remote_base;
227*4882a593Smuzhiyun bool hard_zero = false;
228*4882a593Smuzhiyun if (to_set == HARD_ZERO) {
229*4882a593Smuzhiyun to_set = 0;
230*4882a593Smuzhiyun hard_zero = true;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
234*4882a593Smuzhiyun err(1, "ARCH_SET_GS");
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun if (hard_zero)
237*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun unsigned short sel;
240*4882a593Smuzhiyun asm volatile ("mov %%gs, %0" : "=rm" (sel));
241*4882a593Smuzhiyun printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
242*4882a593Smuzhiyun to_set, hard_zero ? " and clear gs" : "", sel);
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun static __thread int set_thread_area_entry_number = -1;
246*4882a593Smuzhiyun
load_gs(void)247*4882a593Smuzhiyun static unsigned short load_gs(void)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun /*
250*4882a593Smuzhiyun * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
251*4882a593Smuzhiyun * that GSBASE == 0 (i.e. thread.gsbase == 0).
252*4882a593Smuzhiyun */
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun /* Step 1: tell the kernel that we have GSBASE == 0. */
255*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
256*4882a593Smuzhiyun err(1, "ARCH_SET_GS");
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun /* Step 2: change GSBASE without telling the kernel. */
259*4882a593Smuzhiyun struct user_desc desc = {
260*4882a593Smuzhiyun .entry_number = 0,
261*4882a593Smuzhiyun .base_addr = 0xBAADF00D,
262*4882a593Smuzhiyun .limit = 0xfffff,
263*4882a593Smuzhiyun .seg_32bit = 1,
264*4882a593Smuzhiyun .contents = 0, /* Data, grow-up */
265*4882a593Smuzhiyun .read_exec_only = 0,
266*4882a593Smuzhiyun .limit_in_pages = 1,
267*4882a593Smuzhiyun .seg_not_present = 0,
268*4882a593Smuzhiyun .useable = 0
269*4882a593Smuzhiyun };
270*4882a593Smuzhiyun if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
271*4882a593Smuzhiyun printf("\tusing LDT slot 0\n");
272*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
273*4882a593Smuzhiyun return 0x7;
274*4882a593Smuzhiyun } else {
275*4882a593Smuzhiyun /* No modify_ldt for us (configured out, perhaps) */
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun struct user_desc *low_desc = mmap(
278*4882a593Smuzhiyun NULL, sizeof(desc),
279*4882a593Smuzhiyun PROT_READ | PROT_WRITE,
280*4882a593Smuzhiyun MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
281*4882a593Smuzhiyun memcpy(low_desc, &desc, sizeof(desc));
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun low_desc->entry_number = set_thread_area_entry_number;
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /* 32-bit set_thread_area */
286*4882a593Smuzhiyun long ret;
287*4882a593Smuzhiyun asm volatile ("int $0x80"
288*4882a593Smuzhiyun : "=a" (ret), "+m" (*low_desc)
289*4882a593Smuzhiyun : "a" (243), "b" (low_desc)
290*4882a593Smuzhiyun : "r8", "r9", "r10", "r11");
291*4882a593Smuzhiyun memcpy(&desc, low_desc, sizeof(desc));
292*4882a593Smuzhiyun munmap(low_desc, sizeof(desc));
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun if (ret != 0) {
295*4882a593Smuzhiyun printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
296*4882a593Smuzhiyun return 0;
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun printf("\tusing GDT slot %d\n", desc.entry_number);
299*4882a593Smuzhiyun set_thread_area_entry_number = desc.entry_number;
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
302*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" (gs));
303*4882a593Smuzhiyun return gs;
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
test_wrbase(unsigned short index,unsigned long base)307*4882a593Smuzhiyun void test_wrbase(unsigned short index, unsigned long base)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun unsigned short newindex;
310*4882a593Smuzhiyun unsigned long newbase;
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" (index));
315*4882a593Smuzhiyun wrgsbase(base);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun remote_base = 0;
318*4882a593Smuzhiyun ftx = 1;
319*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
320*4882a593Smuzhiyun while (ftx != 0)
321*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun asm volatile ("mov %%gs, %0" : "=rm" (newindex));
324*4882a593Smuzhiyun newbase = rdgsbase();
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun if (newindex == index && newbase == base) {
327*4882a593Smuzhiyun printf("[OK]\tIndex and base were preserved\n");
328*4882a593Smuzhiyun } else {
329*4882a593Smuzhiyun printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
330*4882a593Smuzhiyun newindex, newbase);
331*4882a593Smuzhiyun nerrs++;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun
threadproc(void * ctx)335*4882a593Smuzhiyun static void *threadproc(void *ctx)
336*4882a593Smuzhiyun {
337*4882a593Smuzhiyun while (1) {
338*4882a593Smuzhiyun while (ftx == 0)
339*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
340*4882a593Smuzhiyun if (ftx == 3)
341*4882a593Smuzhiyun return NULL;
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun if (ftx == 1) {
344*4882a593Smuzhiyun do_remote_base();
345*4882a593Smuzhiyun } else if (ftx == 2) {
346*4882a593Smuzhiyun /*
347*4882a593Smuzhiyun * On AMD chips, this causes GSBASE != 0, GS == 0, and
348*4882a593Smuzhiyun * thread.gsbase == 0.
349*4882a593Smuzhiyun */
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun load_gs();
352*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
353*4882a593Smuzhiyun } else {
354*4882a593Smuzhiyun errx(1, "helper thread got bad command");
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun ftx = 0;
358*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun
set_gs_and_switch_to(unsigned long local,unsigned short force_sel,unsigned long remote)362*4882a593Smuzhiyun static void set_gs_and_switch_to(unsigned long local,
363*4882a593Smuzhiyun unsigned short force_sel,
364*4882a593Smuzhiyun unsigned long remote)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun unsigned long base;
367*4882a593Smuzhiyun unsigned short sel_pre_sched, sel_post_sched;
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun bool hard_zero = false;
370*4882a593Smuzhiyun if (local == HARD_ZERO) {
371*4882a593Smuzhiyun hard_zero = true;
372*4882a593Smuzhiyun local = 0;
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun
375*4882a593Smuzhiyun printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
376*4882a593Smuzhiyun local, hard_zero ? " and clear gs" : "", remote);
377*4882a593Smuzhiyun if (force_sel)
378*4882a593Smuzhiyun printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
379*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
380*4882a593Smuzhiyun err(1, "ARCH_SET_GS");
381*4882a593Smuzhiyun if (hard_zero)
382*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun if (read_base(GS) != local) {
385*4882a593Smuzhiyun nerrs++;
386*4882a593Smuzhiyun printf("[FAIL]\tGSBASE wasn't set as expected\n");
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun
389*4882a593Smuzhiyun if (force_sel) {
390*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
391*4882a593Smuzhiyun sel_pre_sched = force_sel;
392*4882a593Smuzhiyun local = read_base(GS);
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun /*
395*4882a593Smuzhiyun * Signal delivery seems to mess up weird selectors. Put it
396*4882a593Smuzhiyun * back.
397*4882a593Smuzhiyun */
398*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
399*4882a593Smuzhiyun } else {
400*4882a593Smuzhiyun asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun remote_base = remote;
404*4882a593Smuzhiyun ftx = 1;
405*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
406*4882a593Smuzhiyun while (ftx != 0)
407*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
410*4882a593Smuzhiyun base = read_base(GS);
411*4882a593Smuzhiyun if (base == local && sel_pre_sched == sel_post_sched) {
412*4882a593Smuzhiyun printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
413*4882a593Smuzhiyun sel_pre_sched, local);
414*4882a593Smuzhiyun } else {
415*4882a593Smuzhiyun nerrs++;
416*4882a593Smuzhiyun printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
417*4882a593Smuzhiyun sel_pre_sched, local, sel_post_sched, base);
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun
test_unexpected_base(void)421*4882a593Smuzhiyun static void test_unexpected_base(void)
422*4882a593Smuzhiyun {
423*4882a593Smuzhiyun unsigned long base;
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
426*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
427*4882a593Smuzhiyun err(1, "ARCH_SET_GS");
428*4882a593Smuzhiyun asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun ftx = 2;
431*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
432*4882a593Smuzhiyun while (ftx != 0)
433*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun base = read_base(GS);
436*4882a593Smuzhiyun if (base == 0) {
437*4882a593Smuzhiyun printf("[OK]\tGSBASE remained 0\n");
438*4882a593Smuzhiyun } else {
439*4882a593Smuzhiyun nerrs++;
440*4882a593Smuzhiyun printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun }
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
445*4882a593Smuzhiyun
test_ptrace_write_gs_read_base(void)446*4882a593Smuzhiyun static void test_ptrace_write_gs_read_base(void)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun int status;
449*4882a593Smuzhiyun pid_t child = fork();
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun if (child < 0)
452*4882a593Smuzhiyun err(1, "fork");
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun if (child == 0) {
455*4882a593Smuzhiyun printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun printf("[RUN]\tARCH_SET_GS to 1\n");
458*4882a593Smuzhiyun if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
459*4882a593Smuzhiyun err(1, "ARCH_SET_GS");
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
462*4882a593Smuzhiyun err(1, "PTRACE_TRACEME");
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun raise(SIGTRAP);
465*4882a593Smuzhiyun _exit(0);
466*4882a593Smuzhiyun }
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun wait(&status);
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun if (WSTOPSIG(status) == SIGTRAP) {
471*4882a593Smuzhiyun unsigned long base;
472*4882a593Smuzhiyun unsigned long gs_offset = USER_REGS_OFFSET(gs);
473*4882a593Smuzhiyun unsigned long base_offset = USER_REGS_OFFSET(gs_base);
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun /* Read the initial base. It should be 1. */
476*4882a593Smuzhiyun base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
477*4882a593Smuzhiyun if (base == 1) {
478*4882a593Smuzhiyun printf("[OK]\tGSBASE started at 1\n");
479*4882a593Smuzhiyun } else {
480*4882a593Smuzhiyun nerrs++;
481*4882a593Smuzhiyun printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
482*4882a593Smuzhiyun }
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun /* Poke an LDT selector into GS. */
487*4882a593Smuzhiyun if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
488*4882a593Smuzhiyun err(1, "PTRACE_POKEUSER");
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun /* And read the base. */
491*4882a593Smuzhiyun base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun if (base == 0 || base == 1) {
494*4882a593Smuzhiyun printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
495*4882a593Smuzhiyun } else {
496*4882a593Smuzhiyun nerrs++;
497*4882a593Smuzhiyun printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun }
500*4882a593Smuzhiyun
501*4882a593Smuzhiyun ptrace(PTRACE_CONT, child, NULL, NULL);
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun wait(&status);
504*4882a593Smuzhiyun if (!WIFEXITED(status))
505*4882a593Smuzhiyun printf("[WARN]\tChild didn't exit cleanly.\n");
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun
test_ptrace_write_gsbase(void)508*4882a593Smuzhiyun static void test_ptrace_write_gsbase(void)
509*4882a593Smuzhiyun {
510*4882a593Smuzhiyun int status;
511*4882a593Smuzhiyun pid_t child = fork();
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun if (child < 0)
514*4882a593Smuzhiyun err(1, "fork");
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun if (child == 0) {
517*4882a593Smuzhiyun printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun *shared_scratch = load_gs();
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
522*4882a593Smuzhiyun err(1, "PTRACE_TRACEME");
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun raise(SIGTRAP);
525*4882a593Smuzhiyun _exit(0);
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun wait(&status);
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun if (WSTOPSIG(status) == SIGTRAP) {
531*4882a593Smuzhiyun unsigned long gs, base;
532*4882a593Smuzhiyun unsigned long gs_offset = USER_REGS_OFFSET(gs);
533*4882a593Smuzhiyun unsigned long base_offset = USER_REGS_OFFSET(gs_base);
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun if (gs != *shared_scratch) {
538*4882a593Smuzhiyun nerrs++;
539*4882a593Smuzhiyun printf("[FAIL]\tGS is not prepared with nonzero\n");
540*4882a593Smuzhiyun goto END;
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
544*4882a593Smuzhiyun err(1, "PTRACE_POKEUSER");
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
547*4882a593Smuzhiyun base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun /*
550*4882a593Smuzhiyun * In a non-FSGSBASE system, the nonzero selector will load
551*4882a593Smuzhiyun * GSBASE (again). But what is tested here is whether the
552*4882a593Smuzhiyun * selector value is changed or not by the GSBASE write in
553*4882a593Smuzhiyun * a ptracer.
554*4882a593Smuzhiyun */
555*4882a593Smuzhiyun if (gs != *shared_scratch) {
556*4882a593Smuzhiyun nerrs++;
557*4882a593Smuzhiyun printf("[FAIL]\tGS changed to %lx\n", gs);
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun /*
560*4882a593Smuzhiyun * On older kernels, poking a nonzero value into the
561*4882a593Smuzhiyun * base would zero the selector. On newer kernels,
562*4882a593Smuzhiyun * this behavior has changed -- poking the base
563*4882a593Smuzhiyun * changes only the base and, if FSGSBASE is not
564*4882a593Smuzhiyun * available, this may have no effect once the tracee
565*4882a593Smuzhiyun * is resumed.
566*4882a593Smuzhiyun */
567*4882a593Smuzhiyun if (gs == 0)
568*4882a593Smuzhiyun printf("\tNote: this is expected behavior on older kernels.\n");
569*4882a593Smuzhiyun } else if (have_fsgsbase && (base != 0xFF)) {
570*4882a593Smuzhiyun nerrs++;
571*4882a593Smuzhiyun printf("[FAIL]\tGSBASE changed to %lx\n", base);
572*4882a593Smuzhiyun } else {
573*4882a593Smuzhiyun printf("[OK]\tGS remained 0x%hx", *shared_scratch);
574*4882a593Smuzhiyun if (have_fsgsbase)
575*4882a593Smuzhiyun printf(" and GSBASE changed to 0xFF");
576*4882a593Smuzhiyun printf("\n");
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun }
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun END:
581*4882a593Smuzhiyun ptrace(PTRACE_CONT, child, NULL, NULL);
582*4882a593Smuzhiyun wait(&status);
583*4882a593Smuzhiyun if (!WIFEXITED(status))
584*4882a593Smuzhiyun printf("[WARN]\tChild didn't exit cleanly.\n");
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun
main()587*4882a593Smuzhiyun int main()
588*4882a593Smuzhiyun {
589*4882a593Smuzhiyun pthread_t thread;
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
592*4882a593Smuzhiyun MAP_ANONYMOUS | MAP_SHARED, -1, 0);
593*4882a593Smuzhiyun
594*4882a593Smuzhiyun /* Do these tests before we have an LDT. */
595*4882a593Smuzhiyun test_ptrace_write_gs_read_base();
596*4882a593Smuzhiyun
597*4882a593Smuzhiyun /* Probe FSGSBASE */
598*4882a593Smuzhiyun sethandler(SIGILL, sigill, 0);
599*4882a593Smuzhiyun if (sigsetjmp(jmpbuf, 1) == 0) {
600*4882a593Smuzhiyun rdfsbase();
601*4882a593Smuzhiyun have_fsgsbase = true;
602*4882a593Smuzhiyun printf("\tFSGSBASE instructions are enabled\n");
603*4882a593Smuzhiyun } else {
604*4882a593Smuzhiyun printf("\tFSGSBASE instructions are disabled\n");
605*4882a593Smuzhiyun }
606*4882a593Smuzhiyun clearhandler(SIGILL);
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun sethandler(SIGSEGV, sigsegv, 0);
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun check_gs_value(0);
611*4882a593Smuzhiyun check_gs_value(1);
612*4882a593Smuzhiyun check_gs_value(0x200000000);
613*4882a593Smuzhiyun check_gs_value(0);
614*4882a593Smuzhiyun check_gs_value(0x200000000);
615*4882a593Smuzhiyun check_gs_value(1);
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun for (int sched = 0; sched < 2; sched++) {
618*4882a593Smuzhiyun mov_0_gs(0, !!sched);
619*4882a593Smuzhiyun mov_0_gs(1, !!sched);
620*4882a593Smuzhiyun mov_0_gs(0x200000000, !!sched);
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun /* Set up for multithreading. */
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun cpu_set_t cpuset;
626*4882a593Smuzhiyun CPU_ZERO(&cpuset);
627*4882a593Smuzhiyun CPU_SET(0, &cpuset);
628*4882a593Smuzhiyun if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
629*4882a593Smuzhiyun err(1, "sched_setaffinity to CPU 0"); /* should never fail */
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun if (pthread_create(&thread, 0, threadproc, 0) != 0)
632*4882a593Smuzhiyun err(1, "pthread_create");
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun static unsigned long bases_with_hard_zero[] = {
635*4882a593Smuzhiyun 0, HARD_ZERO, 1, 0x200000000,
636*4882a593Smuzhiyun };
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun for (int local = 0; local < 4; local++) {
639*4882a593Smuzhiyun for (int remote = 0; remote < 4; remote++) {
640*4882a593Smuzhiyun for (unsigned short s = 0; s < 5; s++) {
641*4882a593Smuzhiyun unsigned short sel = s;
642*4882a593Smuzhiyun if (s == 4)
643*4882a593Smuzhiyun asm ("mov %%ss, %0" : "=rm" (sel));
644*4882a593Smuzhiyun set_gs_and_switch_to(
645*4882a593Smuzhiyun bases_with_hard_zero[local],
646*4882a593Smuzhiyun sel,
647*4882a593Smuzhiyun bases_with_hard_zero[remote]);
648*4882a593Smuzhiyun }
649*4882a593Smuzhiyun }
650*4882a593Smuzhiyun }
651*4882a593Smuzhiyun
652*4882a593Smuzhiyun test_unexpected_base();
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun if (have_fsgsbase) {
655*4882a593Smuzhiyun unsigned short ss;
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun asm volatile ("mov %%ss, %0" : "=rm" (ss));
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun test_wrbase(0, 0);
660*4882a593Smuzhiyun test_wrbase(0, 1);
661*4882a593Smuzhiyun test_wrbase(0, 0x200000000);
662*4882a593Smuzhiyun test_wrbase(0, 0xffffffffffffffff);
663*4882a593Smuzhiyun test_wrbase(ss, 0);
664*4882a593Smuzhiyun test_wrbase(ss, 1);
665*4882a593Smuzhiyun test_wrbase(ss, 0x200000000);
666*4882a593Smuzhiyun test_wrbase(ss, 0xffffffffffffffff);
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun ftx = 3; /* Kill the thread. */
670*4882a593Smuzhiyun syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun if (pthread_join(thread, NULL) != 0)
673*4882a593Smuzhiyun err(1, "pthread_join");
674*4882a593Smuzhiyun
675*4882a593Smuzhiyun test_ptrace_write_gsbase();
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun return nerrs == 0 ? 0 : 1;
678*4882a593Smuzhiyun }
679