xref: /OK3568_Linux_fs/kernel/tools/testing/selftests/x86/fsgsbase.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * fsgsbase.c, an fsgsbase test
4*4882a593Smuzhiyun  * Copyright (c) 2014-2016 Andy Lutomirski
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #define _GNU_SOURCE
8*4882a593Smuzhiyun #include <stdio.h>
9*4882a593Smuzhiyun #include <stdlib.h>
10*4882a593Smuzhiyun #include <stdbool.h>
11*4882a593Smuzhiyun #include <string.h>
12*4882a593Smuzhiyun #include <sys/syscall.h>
13*4882a593Smuzhiyun #include <unistd.h>
14*4882a593Smuzhiyun #include <err.h>
15*4882a593Smuzhiyun #include <sys/user.h>
16*4882a593Smuzhiyun #include <asm/prctl.h>
17*4882a593Smuzhiyun #include <sys/prctl.h>
18*4882a593Smuzhiyun #include <signal.h>
19*4882a593Smuzhiyun #include <limits.h>
20*4882a593Smuzhiyun #include <sys/ucontext.h>
21*4882a593Smuzhiyun #include <sched.h>
22*4882a593Smuzhiyun #include <linux/futex.h>
23*4882a593Smuzhiyun #include <pthread.h>
24*4882a593Smuzhiyun #include <asm/ldt.h>
25*4882a593Smuzhiyun #include <sys/mman.h>
26*4882a593Smuzhiyun #include <stddef.h>
27*4882a593Smuzhiyun #include <sys/ptrace.h>
28*4882a593Smuzhiyun #include <sys/wait.h>
29*4882a593Smuzhiyun #include <setjmp.h>
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun #ifndef __x86_64__
32*4882a593Smuzhiyun # error This test is 64-bit only
33*4882a593Smuzhiyun #endif
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun static volatile sig_atomic_t want_segv;
36*4882a593Smuzhiyun static volatile unsigned long segv_addr;
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun static unsigned short *shared_scratch;
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun static int nerrs;
41*4882a593Smuzhiyun 
sethandler(int sig,void (* handler)(int,siginfo_t *,void *),int flags)42*4882a593Smuzhiyun static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
43*4882a593Smuzhiyun 		       int flags)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun 	struct sigaction sa;
46*4882a593Smuzhiyun 	memset(&sa, 0, sizeof(sa));
47*4882a593Smuzhiyun 	sa.sa_sigaction = handler;
48*4882a593Smuzhiyun 	sa.sa_flags = SA_SIGINFO | flags;
49*4882a593Smuzhiyun 	sigemptyset(&sa.sa_mask);
50*4882a593Smuzhiyun 	if (sigaction(sig, &sa, 0))
51*4882a593Smuzhiyun 		err(1, "sigaction");
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun 
clearhandler(int sig)54*4882a593Smuzhiyun static void clearhandler(int sig)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	struct sigaction sa;
57*4882a593Smuzhiyun 	memset(&sa, 0, sizeof(sa));
58*4882a593Smuzhiyun 	sa.sa_handler = SIG_DFL;
59*4882a593Smuzhiyun 	sigemptyset(&sa.sa_mask);
60*4882a593Smuzhiyun 	if (sigaction(sig, &sa, 0))
61*4882a593Smuzhiyun 		err(1, "sigaction");
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun 
sigsegv(int sig,siginfo_t * si,void * ctx_void)64*4882a593Smuzhiyun static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	ucontext_t *ctx = (ucontext_t*)ctx_void;
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	if (!want_segv) {
69*4882a593Smuzhiyun 		clearhandler(SIGSEGV);
70*4882a593Smuzhiyun 		return;  /* Crash cleanly. */
71*4882a593Smuzhiyun 	}
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun 	want_segv = false;
74*4882a593Smuzhiyun 	segv_addr = (unsigned long)si->si_addr;
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	ctx->uc_mcontext.gregs[REG_RIP] += 4;	/* Skip the faulting mov */
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun static jmp_buf jmpbuf;
81*4882a593Smuzhiyun 
sigill(int sig,siginfo_t * si,void * ctx_void)82*4882a593Smuzhiyun static void sigill(int sig, siginfo_t *si, void *ctx_void)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun 	siglongjmp(jmpbuf, 1);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun static bool have_fsgsbase;
88*4882a593Smuzhiyun 
rdgsbase(void)89*4882a593Smuzhiyun static inline unsigned long rdgsbase(void)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun 	unsigned long gsbase;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	return gsbase;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun 
rdfsbase(void)98*4882a593Smuzhiyun static inline unsigned long rdfsbase(void)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun 	unsigned long fsbase;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun 	return fsbase;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun 
wrgsbase(unsigned long gsbase)107*4882a593Smuzhiyun static inline void wrgsbase(unsigned long gsbase)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun 	asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun 
wrfsbase(unsigned long fsbase)112*4882a593Smuzhiyun static inline void wrfsbase(unsigned long fsbase)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun 	asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun enum which_base { FS, GS };
118*4882a593Smuzhiyun 
read_base(enum which_base which)119*4882a593Smuzhiyun static unsigned long read_base(enum which_base which)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun 	unsigned long offset;
122*4882a593Smuzhiyun 	/*
123*4882a593Smuzhiyun 	 * Unless we have FSGSBASE, there's no direct way to do this from
124*4882a593Smuzhiyun 	 * user mode.  We can get at it indirectly using signals, though.
125*4882a593Smuzhiyun 	 */
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	want_segv = true;
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	offset = 0;
130*4882a593Smuzhiyun 	if (which == FS) {
131*4882a593Smuzhiyun 		/* Use a constant-length instruction here. */
132*4882a593Smuzhiyun 		asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
133*4882a593Smuzhiyun 	} else {
134*4882a593Smuzhiyun 		asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
135*4882a593Smuzhiyun 	}
136*4882a593Smuzhiyun 	if (!want_segv)
137*4882a593Smuzhiyun 		return segv_addr + offset;
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	/*
140*4882a593Smuzhiyun 	 * If that didn't segfault, try the other end of the address space.
141*4882a593Smuzhiyun 	 * Unless we get really unlucky and run into the vsyscall page, this
142*4882a593Smuzhiyun 	 * is guaranteed to segfault.
143*4882a593Smuzhiyun 	 */
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	offset = (ULONG_MAX >> 1) + 1;
146*4882a593Smuzhiyun 	if (which == FS) {
147*4882a593Smuzhiyun 		asm volatile ("mov %%fs:(%%rcx), %%rax"
148*4882a593Smuzhiyun 			      : : "c" (offset) : "rax");
149*4882a593Smuzhiyun 	} else {
150*4882a593Smuzhiyun 		asm volatile ("mov %%gs:(%%rcx), %%rax"
151*4882a593Smuzhiyun 			      : : "c" (offset) : "rax");
152*4882a593Smuzhiyun 	}
153*4882a593Smuzhiyun 	if (!want_segv)
154*4882a593Smuzhiyun 		return segv_addr + offset;
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 	abort();
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
check_gs_value(unsigned long value)159*4882a593Smuzhiyun static void check_gs_value(unsigned long value)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun 	unsigned long base;
162*4882a593Smuzhiyun 	unsigned short sel;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
165*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
166*4882a593Smuzhiyun 		err(1, "ARCH_SET_GS");
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	asm volatile ("mov %%gs, %0" : "=rm" (sel));
169*4882a593Smuzhiyun 	base = read_base(GS);
170*4882a593Smuzhiyun 	if (base == value) {
171*4882a593Smuzhiyun 		printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
172*4882a593Smuzhiyun 		       sel);
173*4882a593Smuzhiyun 	} else {
174*4882a593Smuzhiyun 		nerrs++;
175*4882a593Smuzhiyun 		printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
176*4882a593Smuzhiyun 		       base, sel);
177*4882a593Smuzhiyun 	}
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
180*4882a593Smuzhiyun 		err(1, "ARCH_GET_GS");
181*4882a593Smuzhiyun 	if (base == value) {
182*4882a593Smuzhiyun 		printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
183*4882a593Smuzhiyun 		       sel);
184*4882a593Smuzhiyun 	} else {
185*4882a593Smuzhiyun 		nerrs++;
186*4882a593Smuzhiyun 		printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
187*4882a593Smuzhiyun 		       base, sel);
188*4882a593Smuzhiyun 	}
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun 
mov_0_gs(unsigned long initial_base,bool schedule)191*4882a593Smuzhiyun static void mov_0_gs(unsigned long initial_base, bool schedule)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun 	unsigned long base, arch_base;
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
196*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
197*4882a593Smuzhiyun 		err(1, "ARCH_SET_GS");
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	if (schedule)
200*4882a593Smuzhiyun 		usleep(10);
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	asm volatile ("mov %0, %%gs" : : "rm" (0));
203*4882a593Smuzhiyun 	base = read_base(GS);
204*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
205*4882a593Smuzhiyun 		err(1, "ARCH_GET_GS");
206*4882a593Smuzhiyun 	if (base == arch_base) {
207*4882a593Smuzhiyun 		printf("[OK]\tGSBASE is 0x%lx\n", base);
208*4882a593Smuzhiyun 	} else {
209*4882a593Smuzhiyun 		nerrs++;
210*4882a593Smuzhiyun 		printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
211*4882a593Smuzhiyun 	}
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun static volatile unsigned long remote_base;
215*4882a593Smuzhiyun static volatile bool remote_hard_zero;
216*4882a593Smuzhiyun static volatile unsigned int ftx;
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun /*
219*4882a593Smuzhiyun  * ARCH_SET_FS/GS(0) may or may not program a selector of zero.  HARD_ZERO
220*4882a593Smuzhiyun  * means to force the selector to zero to improve test coverage.
221*4882a593Smuzhiyun  */
222*4882a593Smuzhiyun #define HARD_ZERO 0xa1fa5f343cb85fa4
223*4882a593Smuzhiyun 
do_remote_base()224*4882a593Smuzhiyun static void do_remote_base()
225*4882a593Smuzhiyun {
226*4882a593Smuzhiyun 	unsigned long to_set = remote_base;
227*4882a593Smuzhiyun 	bool hard_zero = false;
228*4882a593Smuzhiyun 	if (to_set == HARD_ZERO) {
229*4882a593Smuzhiyun 		to_set = 0;
230*4882a593Smuzhiyun 		hard_zero = true;
231*4882a593Smuzhiyun 	}
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
234*4882a593Smuzhiyun 		err(1, "ARCH_SET_GS");
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	if (hard_zero)
237*4882a593Smuzhiyun 		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	unsigned short sel;
240*4882a593Smuzhiyun 	asm volatile ("mov %%gs, %0" : "=rm" (sel));
241*4882a593Smuzhiyun 	printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
242*4882a593Smuzhiyun 	       to_set, hard_zero ? " and clear gs" : "", sel);
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun static __thread int set_thread_area_entry_number = -1;
246*4882a593Smuzhiyun 
load_gs(void)247*4882a593Smuzhiyun static unsigned short load_gs(void)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun 	/*
250*4882a593Smuzhiyun 	 * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
251*4882a593Smuzhiyun 	 * that GSBASE == 0 (i.e. thread.gsbase == 0).
252*4882a593Smuzhiyun 	 */
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 	/* Step 1: tell the kernel that we have GSBASE == 0. */
255*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
256*4882a593Smuzhiyun 		err(1, "ARCH_SET_GS");
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	/* Step 2: change GSBASE without telling the kernel. */
259*4882a593Smuzhiyun 	struct user_desc desc = {
260*4882a593Smuzhiyun 		.entry_number    = 0,
261*4882a593Smuzhiyun 		.base_addr       = 0xBAADF00D,
262*4882a593Smuzhiyun 		.limit           = 0xfffff,
263*4882a593Smuzhiyun 		.seg_32bit       = 1,
264*4882a593Smuzhiyun 		.contents        = 0, /* Data, grow-up */
265*4882a593Smuzhiyun 		.read_exec_only  = 0,
266*4882a593Smuzhiyun 		.limit_in_pages  = 1,
267*4882a593Smuzhiyun 		.seg_not_present = 0,
268*4882a593Smuzhiyun 		.useable         = 0
269*4882a593Smuzhiyun 	};
270*4882a593Smuzhiyun 	if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
271*4882a593Smuzhiyun 		printf("\tusing LDT slot 0\n");
272*4882a593Smuzhiyun 		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
273*4882a593Smuzhiyun 		return 0x7;
274*4882a593Smuzhiyun 	} else {
275*4882a593Smuzhiyun 		/* No modify_ldt for us (configured out, perhaps) */
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 		struct user_desc *low_desc = mmap(
278*4882a593Smuzhiyun 			NULL, sizeof(desc),
279*4882a593Smuzhiyun 			PROT_READ | PROT_WRITE,
280*4882a593Smuzhiyun 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
281*4882a593Smuzhiyun 		memcpy(low_desc, &desc, sizeof(desc));
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 		low_desc->entry_number = set_thread_area_entry_number;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 		/* 32-bit set_thread_area */
286*4882a593Smuzhiyun 		long ret;
287*4882a593Smuzhiyun 		asm volatile ("int $0x80"
288*4882a593Smuzhiyun 			      : "=a" (ret), "+m" (*low_desc)
289*4882a593Smuzhiyun 			      : "a" (243), "b" (low_desc)
290*4882a593Smuzhiyun 			      : "r8", "r9", "r10", "r11");
291*4882a593Smuzhiyun 		memcpy(&desc, low_desc, sizeof(desc));
292*4882a593Smuzhiyun 		munmap(low_desc, sizeof(desc));
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 		if (ret != 0) {
295*4882a593Smuzhiyun 			printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
296*4882a593Smuzhiyun 			return 0;
297*4882a593Smuzhiyun 		}
298*4882a593Smuzhiyun 		printf("\tusing GDT slot %d\n", desc.entry_number);
299*4882a593Smuzhiyun 		set_thread_area_entry_number = desc.entry_number;
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 		unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
302*4882a593Smuzhiyun 		asm volatile ("mov %0, %%gs" : : "rm" (gs));
303*4882a593Smuzhiyun 		return gs;
304*4882a593Smuzhiyun 	}
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun 
test_wrbase(unsigned short index,unsigned long base)307*4882a593Smuzhiyun void test_wrbase(unsigned short index, unsigned long base)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun 	unsigned short newindex;
310*4882a593Smuzhiyun 	unsigned long newbase;
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	asm volatile ("mov %0, %%gs" : : "rm" (index));
315*4882a593Smuzhiyun 	wrgsbase(base);
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	remote_base = 0;
318*4882a593Smuzhiyun 	ftx = 1;
319*4882a593Smuzhiyun 	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
320*4882a593Smuzhiyun 	while (ftx != 0)
321*4882a593Smuzhiyun 		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 	asm volatile ("mov %%gs, %0" : "=rm" (newindex));
324*4882a593Smuzhiyun 	newbase = rdgsbase();
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	if (newindex == index && newbase == base) {
327*4882a593Smuzhiyun 		printf("[OK]\tIndex and base were preserved\n");
328*4882a593Smuzhiyun 	} else {
329*4882a593Smuzhiyun 		printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
330*4882a593Smuzhiyun 		       newindex, newbase);
331*4882a593Smuzhiyun 		nerrs++;
332*4882a593Smuzhiyun 	}
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun 
threadproc(void * ctx)335*4882a593Smuzhiyun static void *threadproc(void *ctx)
336*4882a593Smuzhiyun {
337*4882a593Smuzhiyun 	while (1) {
338*4882a593Smuzhiyun 		while (ftx == 0)
339*4882a593Smuzhiyun 			syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
340*4882a593Smuzhiyun 		if (ftx == 3)
341*4882a593Smuzhiyun 			return NULL;
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 		if (ftx == 1) {
344*4882a593Smuzhiyun 			do_remote_base();
345*4882a593Smuzhiyun 		} else if (ftx == 2) {
346*4882a593Smuzhiyun 			/*
347*4882a593Smuzhiyun 			 * On AMD chips, this causes GSBASE != 0, GS == 0, and
348*4882a593Smuzhiyun 			 * thread.gsbase == 0.
349*4882a593Smuzhiyun 			 */
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 			load_gs();
352*4882a593Smuzhiyun 			asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
353*4882a593Smuzhiyun 		} else {
354*4882a593Smuzhiyun 			errx(1, "helper thread got bad command");
355*4882a593Smuzhiyun 		}
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 		ftx = 0;
358*4882a593Smuzhiyun 		syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
359*4882a593Smuzhiyun 	}
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun 
set_gs_and_switch_to(unsigned long local,unsigned short force_sel,unsigned long remote)362*4882a593Smuzhiyun static void set_gs_and_switch_to(unsigned long local,
363*4882a593Smuzhiyun 				 unsigned short force_sel,
364*4882a593Smuzhiyun 				 unsigned long remote)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun 	unsigned long base;
367*4882a593Smuzhiyun 	unsigned short sel_pre_sched, sel_post_sched;
368*4882a593Smuzhiyun 
369*4882a593Smuzhiyun 	bool hard_zero = false;
370*4882a593Smuzhiyun 	if (local == HARD_ZERO) {
371*4882a593Smuzhiyun 		hard_zero = true;
372*4882a593Smuzhiyun 		local = 0;
373*4882a593Smuzhiyun 	}
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 	printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
376*4882a593Smuzhiyun 	       local, hard_zero ? " and clear gs" : "", remote);
377*4882a593Smuzhiyun 	if (force_sel)
378*4882a593Smuzhiyun 		printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
379*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
380*4882a593Smuzhiyun 		err(1, "ARCH_SET_GS");
381*4882a593Smuzhiyun 	if (hard_zero)
382*4882a593Smuzhiyun 		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	if (read_base(GS) != local) {
385*4882a593Smuzhiyun 		nerrs++;
386*4882a593Smuzhiyun 		printf("[FAIL]\tGSBASE wasn't set as expected\n");
387*4882a593Smuzhiyun 	}
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 	if (force_sel) {
390*4882a593Smuzhiyun 		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
391*4882a593Smuzhiyun 		sel_pre_sched = force_sel;
392*4882a593Smuzhiyun 		local = read_base(GS);
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 		/*
395*4882a593Smuzhiyun 		 * Signal delivery seems to mess up weird selectors.  Put it
396*4882a593Smuzhiyun 		 * back.
397*4882a593Smuzhiyun 		 */
398*4882a593Smuzhiyun 		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
399*4882a593Smuzhiyun 	} else {
400*4882a593Smuzhiyun 		asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
401*4882a593Smuzhiyun 	}
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	remote_base = remote;
404*4882a593Smuzhiyun 	ftx = 1;
405*4882a593Smuzhiyun 	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
406*4882a593Smuzhiyun 	while (ftx != 0)
407*4882a593Smuzhiyun 		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
410*4882a593Smuzhiyun 	base = read_base(GS);
411*4882a593Smuzhiyun 	if (base == local && sel_pre_sched == sel_post_sched) {
412*4882a593Smuzhiyun 		printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
413*4882a593Smuzhiyun 		       sel_pre_sched, local);
414*4882a593Smuzhiyun 	} else {
415*4882a593Smuzhiyun 		nerrs++;
416*4882a593Smuzhiyun 		printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
417*4882a593Smuzhiyun 		       sel_pre_sched, local, sel_post_sched, base);
418*4882a593Smuzhiyun 	}
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun 
test_unexpected_base(void)421*4882a593Smuzhiyun static void test_unexpected_base(void)
422*4882a593Smuzhiyun {
423*4882a593Smuzhiyun 	unsigned long base;
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
426*4882a593Smuzhiyun 	if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
427*4882a593Smuzhiyun 		err(1, "ARCH_SET_GS");
428*4882a593Smuzhiyun 	asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	ftx = 2;
431*4882a593Smuzhiyun 	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
432*4882a593Smuzhiyun 	while (ftx != 0)
433*4882a593Smuzhiyun 		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
434*4882a593Smuzhiyun 
435*4882a593Smuzhiyun 	base = read_base(GS);
436*4882a593Smuzhiyun 	if (base == 0) {
437*4882a593Smuzhiyun 		printf("[OK]\tGSBASE remained 0\n");
438*4882a593Smuzhiyun 	} else {
439*4882a593Smuzhiyun 		nerrs++;
440*4882a593Smuzhiyun 		printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
441*4882a593Smuzhiyun 	}
442*4882a593Smuzhiyun }
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
445*4882a593Smuzhiyun 
test_ptrace_write_gs_read_base(void)446*4882a593Smuzhiyun static void test_ptrace_write_gs_read_base(void)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun 	int status;
449*4882a593Smuzhiyun 	pid_t child = fork();
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	if (child < 0)
452*4882a593Smuzhiyun 		err(1, "fork");
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	if (child == 0) {
455*4882a593Smuzhiyun 		printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 		printf("[RUN]\tARCH_SET_GS to 1\n");
458*4882a593Smuzhiyun 		if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
459*4882a593Smuzhiyun 			err(1, "ARCH_SET_GS");
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
462*4882a593Smuzhiyun 			err(1, "PTRACE_TRACEME");
463*4882a593Smuzhiyun 
464*4882a593Smuzhiyun 		raise(SIGTRAP);
465*4882a593Smuzhiyun 		_exit(0);
466*4882a593Smuzhiyun 	}
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	wait(&status);
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	if (WSTOPSIG(status) == SIGTRAP) {
471*4882a593Smuzhiyun 		unsigned long base;
472*4882a593Smuzhiyun 		unsigned long gs_offset = USER_REGS_OFFSET(gs);
473*4882a593Smuzhiyun 		unsigned long base_offset = USER_REGS_OFFSET(gs_base);
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 		/* Read the initial base.  It should be 1. */
476*4882a593Smuzhiyun 		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
477*4882a593Smuzhiyun 		if (base == 1) {
478*4882a593Smuzhiyun 			printf("[OK]\tGSBASE started at 1\n");
479*4882a593Smuzhiyun 		} else {
480*4882a593Smuzhiyun 			nerrs++;
481*4882a593Smuzhiyun 			printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
482*4882a593Smuzhiyun 		}
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 		printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 		/* Poke an LDT selector into GS. */
487*4882a593Smuzhiyun 		if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
488*4882a593Smuzhiyun 			err(1, "PTRACE_POKEUSER");
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 		/* And read the base. */
491*4882a593Smuzhiyun 		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
492*4882a593Smuzhiyun 
493*4882a593Smuzhiyun 		if (base == 0 || base == 1) {
494*4882a593Smuzhiyun 			printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
495*4882a593Smuzhiyun 		} else {
496*4882a593Smuzhiyun 			nerrs++;
497*4882a593Smuzhiyun 			printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
498*4882a593Smuzhiyun 		}
499*4882a593Smuzhiyun 	}
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	ptrace(PTRACE_CONT, child, NULL, NULL);
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	wait(&status);
504*4882a593Smuzhiyun 	if (!WIFEXITED(status))
505*4882a593Smuzhiyun 		printf("[WARN]\tChild didn't exit cleanly.\n");
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun 
test_ptrace_write_gsbase(void)508*4882a593Smuzhiyun static void test_ptrace_write_gsbase(void)
509*4882a593Smuzhiyun {
510*4882a593Smuzhiyun 	int status;
511*4882a593Smuzhiyun 	pid_t child = fork();
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	if (child < 0)
514*4882a593Smuzhiyun 		err(1, "fork");
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	if (child == 0) {
517*4882a593Smuzhiyun 		printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 		*shared_scratch = load_gs();
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
522*4882a593Smuzhiyun 			err(1, "PTRACE_TRACEME");
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 		raise(SIGTRAP);
525*4882a593Smuzhiyun 		_exit(0);
526*4882a593Smuzhiyun 	}
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	wait(&status);
529*4882a593Smuzhiyun 
530*4882a593Smuzhiyun 	if (WSTOPSIG(status) == SIGTRAP) {
531*4882a593Smuzhiyun 		unsigned long gs, base;
532*4882a593Smuzhiyun 		unsigned long gs_offset = USER_REGS_OFFSET(gs);
533*4882a593Smuzhiyun 		unsigned long base_offset = USER_REGS_OFFSET(gs_base);
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 		gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 		if (gs != *shared_scratch) {
538*4882a593Smuzhiyun 			nerrs++;
539*4882a593Smuzhiyun 			printf("[FAIL]\tGS is not prepared with nonzero\n");
540*4882a593Smuzhiyun 			goto END;
541*4882a593Smuzhiyun 		}
542*4882a593Smuzhiyun 
543*4882a593Smuzhiyun 		if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
544*4882a593Smuzhiyun 			err(1, "PTRACE_POKEUSER");
545*4882a593Smuzhiyun 
546*4882a593Smuzhiyun 		gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
547*4882a593Smuzhiyun 		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 		/*
550*4882a593Smuzhiyun 		 * In a non-FSGSBASE system, the nonzero selector will load
551*4882a593Smuzhiyun 		 * GSBASE (again). But what is tested here is whether the
552*4882a593Smuzhiyun 		 * selector value is changed or not by the GSBASE write in
553*4882a593Smuzhiyun 		 * a ptracer.
554*4882a593Smuzhiyun 		 */
555*4882a593Smuzhiyun 		if (gs != *shared_scratch) {
556*4882a593Smuzhiyun 			nerrs++;
557*4882a593Smuzhiyun 			printf("[FAIL]\tGS changed to %lx\n", gs);
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 			/*
560*4882a593Smuzhiyun 			 * On older kernels, poking a nonzero value into the
561*4882a593Smuzhiyun 			 * base would zero the selector.  On newer kernels,
562*4882a593Smuzhiyun 			 * this behavior has changed -- poking the base
563*4882a593Smuzhiyun 			 * changes only the base and, if FSGSBASE is not
564*4882a593Smuzhiyun 			 * available, this may have no effect once the tracee
565*4882a593Smuzhiyun 			 * is resumed.
566*4882a593Smuzhiyun 			 */
567*4882a593Smuzhiyun 			if (gs == 0)
568*4882a593Smuzhiyun 				printf("\tNote: this is expected behavior on older kernels.\n");
569*4882a593Smuzhiyun 		} else if (have_fsgsbase && (base != 0xFF)) {
570*4882a593Smuzhiyun 			nerrs++;
571*4882a593Smuzhiyun 			printf("[FAIL]\tGSBASE changed to %lx\n", base);
572*4882a593Smuzhiyun 		} else {
573*4882a593Smuzhiyun 			printf("[OK]\tGS remained 0x%hx", *shared_scratch);
574*4882a593Smuzhiyun 			if (have_fsgsbase)
575*4882a593Smuzhiyun 				printf(" and GSBASE changed to 0xFF");
576*4882a593Smuzhiyun 			printf("\n");
577*4882a593Smuzhiyun 		}
578*4882a593Smuzhiyun 	}
579*4882a593Smuzhiyun 
580*4882a593Smuzhiyun END:
581*4882a593Smuzhiyun 	ptrace(PTRACE_CONT, child, NULL, NULL);
582*4882a593Smuzhiyun 	wait(&status);
583*4882a593Smuzhiyun 	if (!WIFEXITED(status))
584*4882a593Smuzhiyun 		printf("[WARN]\tChild didn't exit cleanly.\n");
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun 
main()587*4882a593Smuzhiyun int main()
588*4882a593Smuzhiyun {
589*4882a593Smuzhiyun 	pthread_t thread;
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun 	shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
592*4882a593Smuzhiyun 			      MAP_ANONYMOUS | MAP_SHARED, -1, 0);
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	/* Do these tests before we have an LDT. */
595*4882a593Smuzhiyun 	test_ptrace_write_gs_read_base();
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	/* Probe FSGSBASE */
598*4882a593Smuzhiyun 	sethandler(SIGILL, sigill, 0);
599*4882a593Smuzhiyun 	if (sigsetjmp(jmpbuf, 1) == 0) {
600*4882a593Smuzhiyun 		rdfsbase();
601*4882a593Smuzhiyun 		have_fsgsbase = true;
602*4882a593Smuzhiyun 		printf("\tFSGSBASE instructions are enabled\n");
603*4882a593Smuzhiyun 	} else {
604*4882a593Smuzhiyun 		printf("\tFSGSBASE instructions are disabled\n");
605*4882a593Smuzhiyun 	}
606*4882a593Smuzhiyun 	clearhandler(SIGILL);
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	sethandler(SIGSEGV, sigsegv, 0);
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	check_gs_value(0);
611*4882a593Smuzhiyun 	check_gs_value(1);
612*4882a593Smuzhiyun 	check_gs_value(0x200000000);
613*4882a593Smuzhiyun 	check_gs_value(0);
614*4882a593Smuzhiyun 	check_gs_value(0x200000000);
615*4882a593Smuzhiyun 	check_gs_value(1);
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 	for (int sched = 0; sched < 2; sched++) {
618*4882a593Smuzhiyun 		mov_0_gs(0, !!sched);
619*4882a593Smuzhiyun 		mov_0_gs(1, !!sched);
620*4882a593Smuzhiyun 		mov_0_gs(0x200000000, !!sched);
621*4882a593Smuzhiyun 	}
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	/* Set up for multithreading. */
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 	cpu_set_t cpuset;
626*4882a593Smuzhiyun 	CPU_ZERO(&cpuset);
627*4882a593Smuzhiyun 	CPU_SET(0, &cpuset);
628*4882a593Smuzhiyun 	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
629*4882a593Smuzhiyun 		err(1, "sched_setaffinity to CPU 0");	/* should never fail */
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun 	if (pthread_create(&thread, 0, threadproc, 0) != 0)
632*4882a593Smuzhiyun 		err(1, "pthread_create");
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun 	static unsigned long bases_with_hard_zero[] = {
635*4882a593Smuzhiyun 		0, HARD_ZERO, 1, 0x200000000,
636*4882a593Smuzhiyun 	};
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 	for (int local = 0; local < 4; local++) {
639*4882a593Smuzhiyun 		for (int remote = 0; remote < 4; remote++) {
640*4882a593Smuzhiyun 			for (unsigned short s = 0; s < 5; s++) {
641*4882a593Smuzhiyun 				unsigned short sel = s;
642*4882a593Smuzhiyun 				if (s == 4)
643*4882a593Smuzhiyun 					asm ("mov %%ss, %0" : "=rm" (sel));
644*4882a593Smuzhiyun 				set_gs_and_switch_to(
645*4882a593Smuzhiyun 					bases_with_hard_zero[local],
646*4882a593Smuzhiyun 					sel,
647*4882a593Smuzhiyun 					bases_with_hard_zero[remote]);
648*4882a593Smuzhiyun 			}
649*4882a593Smuzhiyun 		}
650*4882a593Smuzhiyun 	}
651*4882a593Smuzhiyun 
652*4882a593Smuzhiyun 	test_unexpected_base();
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	if (have_fsgsbase) {
655*4882a593Smuzhiyun 		unsigned short ss;
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun 		asm volatile ("mov %%ss, %0" : "=rm" (ss));
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun 		test_wrbase(0, 0);
660*4882a593Smuzhiyun 		test_wrbase(0, 1);
661*4882a593Smuzhiyun 		test_wrbase(0, 0x200000000);
662*4882a593Smuzhiyun 		test_wrbase(0, 0xffffffffffffffff);
663*4882a593Smuzhiyun 		test_wrbase(ss, 0);
664*4882a593Smuzhiyun 		test_wrbase(ss, 1);
665*4882a593Smuzhiyun 		test_wrbase(ss, 0x200000000);
666*4882a593Smuzhiyun 		test_wrbase(ss, 0xffffffffffffffff);
667*4882a593Smuzhiyun 	}
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 	ftx = 3;  /* Kill the thread. */
670*4882a593Smuzhiyun 	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun 	if (pthread_join(thread, NULL) != 0)
673*4882a593Smuzhiyun 		err(1, "pthread_join");
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 	test_ptrace_write_gsbase();
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 	return nerrs == 0 ? 0 : 1;
678*4882a593Smuzhiyun }
679