xref: /OK3568_Linux_fs/kernel/arch/x86/xen/xen-asm.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun/*
3*4882a593Smuzhiyun * Asm versions of Xen pv-ops, suitable for direct use.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * We only bother with direct forms (ie, vcpu in percpu data) of the
6*4882a593Smuzhiyun * operations here; the indirect forms are better handled in C.
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun#include <asm/errno.h>
10*4882a593Smuzhiyun#include <asm/asm-offsets.h>
11*4882a593Smuzhiyun#include <asm/percpu.h>
12*4882a593Smuzhiyun#include <asm/processor-flags.h>
13*4882a593Smuzhiyun#include <asm/segment.h>
14*4882a593Smuzhiyun#include <asm/thread_info.h>
15*4882a593Smuzhiyun#include <asm/asm.h>
16*4882a593Smuzhiyun#include <asm/frame.h>
17*4882a593Smuzhiyun#include <asm/unwind_hints.h>
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun#include <xen/interface/xen.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun#include <linux/init.h>
22*4882a593Smuzhiyun#include <linux/linkage.h>
23*4882a593Smuzhiyun#include <../entry/calling.h>
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun/*
26*4882a593Smuzhiyun * Enable events.  This clears the event mask and tests the pending
27*4882a593Smuzhiyun * event status with one and operation.  If there are pending events,
28*4882a593Smuzhiyun * then enter the hypervisor to get them handled.
29*4882a593Smuzhiyun */
30*4882a593SmuzhiyunSYM_FUNC_START(xen_irq_enable_direct)
31*4882a593Smuzhiyun	FRAME_BEGIN
32*4882a593Smuzhiyun	/* Unmask events */
33*4882a593Smuzhiyun	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun	/*
36*4882a593Smuzhiyun	 * Preempt here doesn't matter because that will deal with any
37*4882a593Smuzhiyun	 * pending interrupts.  The pending check may end up being run
38*4882a593Smuzhiyun	 * on the wrong CPU, but that doesn't hurt.
39*4882a593Smuzhiyun	 */
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun	/* Test for pending */
42*4882a593Smuzhiyun	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
43*4882a593Smuzhiyun	jz 1f
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun	call check_events
46*4882a593Smuzhiyun1:
47*4882a593Smuzhiyun	FRAME_END
48*4882a593Smuzhiyun	RET
49*4882a593SmuzhiyunSYM_FUNC_END(xen_irq_enable_direct)
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun/*
53*4882a593Smuzhiyun * Disabling events is simply a matter of making the event mask
54*4882a593Smuzhiyun * non-zero.
55*4882a593Smuzhiyun */
56*4882a593SmuzhiyunSYM_FUNC_START(xen_irq_disable_direct)
57*4882a593Smuzhiyun	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
58*4882a593Smuzhiyun	RET
59*4882a593SmuzhiyunSYM_FUNC_END(xen_irq_disable_direct)
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun/*
62*4882a593Smuzhiyun * (xen_)save_fl is used to get the current interrupt enable status.
63*4882a593Smuzhiyun * Callers expect the status to be in X86_EFLAGS_IF, and other bits
64*4882a593Smuzhiyun * may be set in the return value.  We take advantage of this by
65*4882a593Smuzhiyun * making sure that X86_EFLAGS_IF has the right value (and other bits
66*4882a593Smuzhiyun * in that byte are 0), but other bits in the return value are
67*4882a593Smuzhiyun * undefined.  We need to toggle the state of the bit, because Xen and
68*4882a593Smuzhiyun * x86 use opposite senses (mask vs enable).
69*4882a593Smuzhiyun */
70*4882a593SmuzhiyunSYM_FUNC_START(xen_save_fl_direct)
71*4882a593Smuzhiyun	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
72*4882a593Smuzhiyun	setz %ah
73*4882a593Smuzhiyun	addb %ah, %ah
74*4882a593Smuzhiyun	RET
75*4882a593SmuzhiyunSYM_FUNC_END(xen_save_fl_direct)
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun/*
79*4882a593Smuzhiyun * In principle the caller should be passing us a value return from
80*4882a593Smuzhiyun * xen_save_fl_direct, but for robustness sake we test only the
81*4882a593Smuzhiyun * X86_EFLAGS_IF flag rather than the whole byte. After setting the
82*4882a593Smuzhiyun * interrupt mask state, it checks for unmasked pending events and
83*4882a593Smuzhiyun * enters the hypervisor to get them delivered if so.
84*4882a593Smuzhiyun */
85*4882a593SmuzhiyunSYM_FUNC_START(xen_restore_fl_direct)
86*4882a593Smuzhiyun	FRAME_BEGIN
87*4882a593Smuzhiyun	testw $X86_EFLAGS_IF, %di
88*4882a593Smuzhiyun	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
89*4882a593Smuzhiyun	/*
90*4882a593Smuzhiyun	 * Preempt here doesn't matter because that will deal with any
91*4882a593Smuzhiyun	 * pending interrupts.  The pending check may end up being run
92*4882a593Smuzhiyun	 * on the wrong CPU, but that doesn't hurt.
93*4882a593Smuzhiyun	 */
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun	/* check for unmasked and pending */
96*4882a593Smuzhiyun	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
97*4882a593Smuzhiyun	jnz 1f
98*4882a593Smuzhiyun	call check_events
99*4882a593Smuzhiyun1:
100*4882a593Smuzhiyun	FRAME_END
101*4882a593Smuzhiyun	RET
102*4882a593SmuzhiyunSYM_FUNC_END(xen_restore_fl_direct)
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun/*
106*4882a593Smuzhiyun * Force an event check by making a hypercall, but preserve regs
107*4882a593Smuzhiyun * before making the call.
108*4882a593Smuzhiyun */
109*4882a593SmuzhiyunSYM_FUNC_START(check_events)
110*4882a593Smuzhiyun	FRAME_BEGIN
111*4882a593Smuzhiyun	push %rax
112*4882a593Smuzhiyun	push %rcx
113*4882a593Smuzhiyun	push %rdx
114*4882a593Smuzhiyun	push %rsi
115*4882a593Smuzhiyun	push %rdi
116*4882a593Smuzhiyun	push %r8
117*4882a593Smuzhiyun	push %r9
118*4882a593Smuzhiyun	push %r10
119*4882a593Smuzhiyun	push %r11
120*4882a593Smuzhiyun	call xen_force_evtchn_callback
121*4882a593Smuzhiyun	pop %r11
122*4882a593Smuzhiyun	pop %r10
123*4882a593Smuzhiyun	pop %r9
124*4882a593Smuzhiyun	pop %r8
125*4882a593Smuzhiyun	pop %rdi
126*4882a593Smuzhiyun	pop %rsi
127*4882a593Smuzhiyun	pop %rdx
128*4882a593Smuzhiyun	pop %rcx
129*4882a593Smuzhiyun	pop %rax
130*4882a593Smuzhiyun	FRAME_END
131*4882a593Smuzhiyun	RET
132*4882a593SmuzhiyunSYM_FUNC_END(check_events)
133*4882a593Smuzhiyun
134*4882a593SmuzhiyunSYM_FUNC_START(xen_read_cr2)
135*4882a593Smuzhiyun	FRAME_BEGIN
136*4882a593Smuzhiyun	_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
137*4882a593Smuzhiyun	_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
138*4882a593Smuzhiyun	FRAME_END
139*4882a593Smuzhiyun	RET
140*4882a593SmuzhiyunSYM_FUNC_END(xen_read_cr2);
141*4882a593Smuzhiyun
142*4882a593SmuzhiyunSYM_FUNC_START(xen_read_cr2_direct)
143*4882a593Smuzhiyun	FRAME_BEGIN
144*4882a593Smuzhiyun	_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
145*4882a593Smuzhiyun	FRAME_END
146*4882a593Smuzhiyun	RET
147*4882a593SmuzhiyunSYM_FUNC_END(xen_read_cr2_direct);
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun.macro xen_pv_trap name
150*4882a593SmuzhiyunSYM_CODE_START(xen_\name)
151*4882a593Smuzhiyun	UNWIND_HINT_ENTRY
152*4882a593Smuzhiyun	pop %rcx
153*4882a593Smuzhiyun	pop %r11
154*4882a593Smuzhiyun	jmp  \name
155*4882a593SmuzhiyunSYM_CODE_END(xen_\name)
156*4882a593Smuzhiyun_ASM_NOKPROBE(xen_\name)
157*4882a593Smuzhiyun.endm
158*4882a593Smuzhiyun
159*4882a593Smuzhiyunxen_pv_trap asm_exc_divide_error
160*4882a593Smuzhiyunxen_pv_trap asm_xenpv_exc_debug
161*4882a593Smuzhiyunxen_pv_trap asm_exc_int3
162*4882a593Smuzhiyunxen_pv_trap asm_xenpv_exc_nmi
163*4882a593Smuzhiyunxen_pv_trap asm_exc_overflow
164*4882a593Smuzhiyunxen_pv_trap asm_exc_bounds
165*4882a593Smuzhiyunxen_pv_trap asm_exc_invalid_op
166*4882a593Smuzhiyunxen_pv_trap asm_exc_device_not_available
167*4882a593Smuzhiyunxen_pv_trap asm_exc_double_fault
168*4882a593Smuzhiyunxen_pv_trap asm_exc_coproc_segment_overrun
169*4882a593Smuzhiyunxen_pv_trap asm_exc_invalid_tss
170*4882a593Smuzhiyunxen_pv_trap asm_exc_segment_not_present
171*4882a593Smuzhiyunxen_pv_trap asm_exc_stack_segment
172*4882a593Smuzhiyunxen_pv_trap asm_exc_general_protection
173*4882a593Smuzhiyunxen_pv_trap asm_exc_page_fault
174*4882a593Smuzhiyunxen_pv_trap asm_exc_spurious_interrupt_bug
175*4882a593Smuzhiyunxen_pv_trap asm_exc_coprocessor_error
176*4882a593Smuzhiyunxen_pv_trap asm_exc_alignment_check
177*4882a593Smuzhiyun#ifdef CONFIG_X86_MCE
178*4882a593Smuzhiyunxen_pv_trap asm_exc_machine_check
179*4882a593Smuzhiyun#endif /* CONFIG_X86_MCE */
180*4882a593Smuzhiyunxen_pv_trap asm_exc_simd_coprocessor_error
181*4882a593Smuzhiyun#ifdef CONFIG_IA32_EMULATION
182*4882a593Smuzhiyunxen_pv_trap entry_INT80_compat
183*4882a593Smuzhiyun#endif
184*4882a593Smuzhiyunxen_pv_trap asm_exc_xen_unknown_trap
185*4882a593Smuzhiyunxen_pv_trap asm_exc_xen_hypervisor_callback
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun	__INIT
188*4882a593SmuzhiyunSYM_CODE_START(xen_early_idt_handler_array)
189*4882a593Smuzhiyun	i = 0
190*4882a593Smuzhiyun	.rept NUM_EXCEPTION_VECTORS
191*4882a593Smuzhiyun	UNWIND_HINT_EMPTY
192*4882a593Smuzhiyun	pop %rcx
193*4882a593Smuzhiyun	pop %r11
194*4882a593Smuzhiyun	jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
195*4882a593Smuzhiyun	i = i + 1
196*4882a593Smuzhiyun	.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
197*4882a593Smuzhiyun	.endr
198*4882a593SmuzhiyunSYM_CODE_END(xen_early_idt_handler_array)
199*4882a593Smuzhiyun	__FINIT
200*4882a593Smuzhiyun
201*4882a593Smuzhiyunhypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
202*4882a593Smuzhiyun/*
203*4882a593Smuzhiyun * Xen64 iret frame:
204*4882a593Smuzhiyun *
205*4882a593Smuzhiyun *	ss
206*4882a593Smuzhiyun *	rsp
207*4882a593Smuzhiyun *	rflags
208*4882a593Smuzhiyun *	cs
209*4882a593Smuzhiyun *	rip		<-- standard iret frame
210*4882a593Smuzhiyun *
211*4882a593Smuzhiyun *	flags
212*4882a593Smuzhiyun *
213*4882a593Smuzhiyun *	rcx		}
214*4882a593Smuzhiyun *	r11		}<-- pushed by hypercall page
215*4882a593Smuzhiyun * rsp->rax		}
216*4882a593Smuzhiyun */
217*4882a593SmuzhiyunSYM_CODE_START(xen_iret)
218*4882a593Smuzhiyun	UNWIND_HINT_EMPTY
219*4882a593Smuzhiyun	pushq $0
220*4882a593Smuzhiyun	jmp hypercall_iret
221*4882a593SmuzhiyunSYM_CODE_END(xen_iret)
222*4882a593Smuzhiyun
223*4882a593SmuzhiyunSYM_CODE_START(xen_sysret64)
224*4882a593Smuzhiyun	UNWIND_HINT_EMPTY
225*4882a593Smuzhiyun	/*
226*4882a593Smuzhiyun	 * We're already on the usermode stack at this point, but
227*4882a593Smuzhiyun	 * still with the kernel gs, so we can easily switch back.
228*4882a593Smuzhiyun	 *
229*4882a593Smuzhiyun	 * tss.sp2 is scratch space.
230*4882a593Smuzhiyun	 */
231*4882a593Smuzhiyun	movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
232*4882a593Smuzhiyun	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun	pushq $__USER_DS
235*4882a593Smuzhiyun	pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
236*4882a593Smuzhiyun	pushq %r11
237*4882a593Smuzhiyun	pushq $__USER_CS
238*4882a593Smuzhiyun	pushq %rcx
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun	pushq $VGCF_in_syscall
241*4882a593Smuzhiyun	jmp hypercall_iret
242*4882a593SmuzhiyunSYM_CODE_END(xen_sysret64)
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun/*
245*4882a593Smuzhiyun * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is
246*4882a593Smuzhiyun * also the kernel stack.  Reusing swapgs_restore_regs_and_return_to_usermode()
247*4882a593Smuzhiyun * in XEN pv would cause %rsp to move up to the top of the kernel stack and
248*4882a593Smuzhiyun * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI
249*4882a593Smuzhiyun * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET
250*4882a593Smuzhiyun * frame at the same address is useless.
251*4882a593Smuzhiyun */
252*4882a593SmuzhiyunSYM_CODE_START(xenpv_restore_regs_and_return_to_usermode)
253*4882a593Smuzhiyun	UNWIND_HINT_REGS
254*4882a593Smuzhiyun	POP_REGS
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun	/* stackleak_erase() can work safely on the kernel stack. */
257*4882a593Smuzhiyun	STACKLEAK_ERASE_NOCLOBBER
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun	addq	$8, %rsp	/* skip regs->orig_ax */
260*4882a593Smuzhiyun	jmp xen_iret
261*4882a593SmuzhiyunSYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun/*
264*4882a593Smuzhiyun * Xen handles syscall callbacks much like ordinary exceptions, which
265*4882a593Smuzhiyun * means we have:
266*4882a593Smuzhiyun * - kernel gs
267*4882a593Smuzhiyun * - kernel rsp
268*4882a593Smuzhiyun * - an iret-like stack frame on the stack (including rcx and r11):
269*4882a593Smuzhiyun *	ss
270*4882a593Smuzhiyun *	rsp
271*4882a593Smuzhiyun *	rflags
272*4882a593Smuzhiyun *	cs
273*4882a593Smuzhiyun *	rip
274*4882a593Smuzhiyun *	r11
275*4882a593Smuzhiyun * rsp->rcx
276*4882a593Smuzhiyun */
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun/* Normal 64-bit system call target */
279*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSCALL_64)
280*4882a593Smuzhiyun	UNWIND_HINT_ENTRY
281*4882a593Smuzhiyun	popq %rcx
282*4882a593Smuzhiyun	popq %r11
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun	/*
285*4882a593Smuzhiyun	 * Neither Xen nor the kernel really knows what the old SS and
286*4882a593Smuzhiyun	 * CS were.  The kernel expects __USER_DS and __USER_CS, so
287*4882a593Smuzhiyun	 * report those values even though Xen will guess its own values.
288*4882a593Smuzhiyun	 */
289*4882a593Smuzhiyun	movq $__USER_DS, 4*8(%rsp)
290*4882a593Smuzhiyun	movq $__USER_CS, 1*8(%rsp)
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun	jmp entry_SYSCALL_64_after_hwframe
293*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSCALL_64)
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun#ifdef CONFIG_IA32_EMULATION
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun/* 32-bit compat syscall target */
298*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSCALL_compat)
299*4882a593Smuzhiyun	UNWIND_HINT_ENTRY
300*4882a593Smuzhiyun	popq %rcx
301*4882a593Smuzhiyun	popq %r11
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun	/*
304*4882a593Smuzhiyun	 * Neither Xen nor the kernel really knows what the old SS and
305*4882a593Smuzhiyun	 * CS were.  The kernel expects __USER32_DS and __USER32_CS, so
306*4882a593Smuzhiyun	 * report those values even though Xen will guess its own values.
307*4882a593Smuzhiyun	 */
308*4882a593Smuzhiyun	movq $__USER32_DS, 4*8(%rsp)
309*4882a593Smuzhiyun	movq $__USER32_CS, 1*8(%rsp)
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun	jmp entry_SYSCALL_compat_after_hwframe
312*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSCALL_compat)
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun/* 32-bit compat sysenter target */
315*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSENTER_compat)
316*4882a593Smuzhiyun	UNWIND_HINT_ENTRY
317*4882a593Smuzhiyun	/*
318*4882a593Smuzhiyun	 * NB: Xen is polite and clears TF from EFLAGS for us.  This means
319*4882a593Smuzhiyun	 * that we don't need to guard against single step exceptions here.
320*4882a593Smuzhiyun	 */
321*4882a593Smuzhiyun	popq %rcx
322*4882a593Smuzhiyun	popq %r11
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun	/*
325*4882a593Smuzhiyun	 * Neither Xen nor the kernel really knows what the old SS and
326*4882a593Smuzhiyun	 * CS were.  The kernel expects __USER32_DS and __USER32_CS, so
327*4882a593Smuzhiyun	 * report those values even though Xen will guess its own values.
328*4882a593Smuzhiyun	 */
329*4882a593Smuzhiyun	movq $__USER32_DS, 4*8(%rsp)
330*4882a593Smuzhiyun	movq $__USER32_CS, 1*8(%rsp)
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun	jmp entry_SYSENTER_compat_after_hwframe
333*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSENTER_compat)
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun#else /* !CONFIG_IA32_EMULATION */
336*4882a593Smuzhiyun
337*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSCALL_compat)
338*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSENTER_compat)
339*4882a593Smuzhiyun	UNWIND_HINT_ENTRY
340*4882a593Smuzhiyun	lea 16(%rsp), %rsp	/* strip %rcx, %r11 */
341*4882a593Smuzhiyun	mov $-ENOSYS, %rax
342*4882a593Smuzhiyun	pushq $0
343*4882a593Smuzhiyun	jmp hypercall_iret
344*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSENTER_compat)
345*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSCALL_compat)
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun#endif	/* CONFIG_IA32_EMULATION */
348