1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun/* 3*4882a593Smuzhiyun * Asm versions of Xen pv-ops, suitable for direct use. 4*4882a593Smuzhiyun * 5*4882a593Smuzhiyun * We only bother with direct forms (ie, vcpu in percpu data) of the 6*4882a593Smuzhiyun * operations here; the indirect forms are better handled in C. 7*4882a593Smuzhiyun */ 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun#include <asm/errno.h> 10*4882a593Smuzhiyun#include <asm/asm-offsets.h> 11*4882a593Smuzhiyun#include <asm/percpu.h> 12*4882a593Smuzhiyun#include <asm/processor-flags.h> 13*4882a593Smuzhiyun#include <asm/segment.h> 14*4882a593Smuzhiyun#include <asm/thread_info.h> 15*4882a593Smuzhiyun#include <asm/asm.h> 16*4882a593Smuzhiyun#include <asm/frame.h> 17*4882a593Smuzhiyun#include <asm/unwind_hints.h> 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun#include <xen/interface/xen.h> 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun#include <linux/init.h> 22*4882a593Smuzhiyun#include <linux/linkage.h> 23*4882a593Smuzhiyun#include <../entry/calling.h> 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun/* 26*4882a593Smuzhiyun * Enable events. This clears the event mask and tests the pending 27*4882a593Smuzhiyun * event status with one and operation. If there are pending events, 28*4882a593Smuzhiyun * then enter the hypervisor to get them handled. 29*4882a593Smuzhiyun */ 30*4882a593SmuzhiyunSYM_FUNC_START(xen_irq_enable_direct) 31*4882a593Smuzhiyun FRAME_BEGIN 32*4882a593Smuzhiyun /* Unmask events */ 33*4882a593Smuzhiyun movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun /* 36*4882a593Smuzhiyun * Preempt here doesn't matter because that will deal with any 37*4882a593Smuzhiyun * pending interrupts. The pending check may end up being run 38*4882a593Smuzhiyun * on the wrong CPU, but that doesn't hurt. 39*4882a593Smuzhiyun */ 40*4882a593Smuzhiyun 41*4882a593Smuzhiyun /* Test for pending */ 42*4882a593Smuzhiyun testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending 43*4882a593Smuzhiyun jz 1f 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun call check_events 46*4882a593Smuzhiyun1: 47*4882a593Smuzhiyun FRAME_END 48*4882a593Smuzhiyun RET 49*4882a593SmuzhiyunSYM_FUNC_END(xen_irq_enable_direct) 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun 52*4882a593Smuzhiyun/* 53*4882a593Smuzhiyun * Disabling events is simply a matter of making the event mask 54*4882a593Smuzhiyun * non-zero. 55*4882a593Smuzhiyun */ 56*4882a593SmuzhiyunSYM_FUNC_START(xen_irq_disable_direct) 57*4882a593Smuzhiyun movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 58*4882a593Smuzhiyun RET 59*4882a593SmuzhiyunSYM_FUNC_END(xen_irq_disable_direct) 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun/* 62*4882a593Smuzhiyun * (xen_)save_fl is used to get the current interrupt enable status. 63*4882a593Smuzhiyun * Callers expect the status to be in X86_EFLAGS_IF, and other bits 64*4882a593Smuzhiyun * may be set in the return value. We take advantage of this by 65*4882a593Smuzhiyun * making sure that X86_EFLAGS_IF has the right value (and other bits 66*4882a593Smuzhiyun * in that byte are 0), but other bits in the return value are 67*4882a593Smuzhiyun * undefined. We need to toggle the state of the bit, because Xen and 68*4882a593Smuzhiyun * x86 use opposite senses (mask vs enable). 69*4882a593Smuzhiyun */ 70*4882a593SmuzhiyunSYM_FUNC_START(xen_save_fl_direct) 71*4882a593Smuzhiyun testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 72*4882a593Smuzhiyun setz %ah 73*4882a593Smuzhiyun addb %ah, %ah 74*4882a593Smuzhiyun RET 75*4882a593SmuzhiyunSYM_FUNC_END(xen_save_fl_direct) 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun/* 79*4882a593Smuzhiyun * In principle the caller should be passing us a value return from 80*4882a593Smuzhiyun * xen_save_fl_direct, but for robustness sake we test only the 81*4882a593Smuzhiyun * X86_EFLAGS_IF flag rather than the whole byte. After setting the 82*4882a593Smuzhiyun * interrupt mask state, it checks for unmasked pending events and 83*4882a593Smuzhiyun * enters the hypervisor to get them delivered if so. 84*4882a593Smuzhiyun */ 85*4882a593SmuzhiyunSYM_FUNC_START(xen_restore_fl_direct) 86*4882a593Smuzhiyun FRAME_BEGIN 87*4882a593Smuzhiyun testw $X86_EFLAGS_IF, %di 88*4882a593Smuzhiyun setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 89*4882a593Smuzhiyun /* 90*4882a593Smuzhiyun * Preempt here doesn't matter because that will deal with any 91*4882a593Smuzhiyun * pending interrupts. The pending check may end up being run 92*4882a593Smuzhiyun * on the wrong CPU, but that doesn't hurt. 93*4882a593Smuzhiyun */ 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun /* check for unmasked and pending */ 96*4882a593Smuzhiyun cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending 97*4882a593Smuzhiyun jnz 1f 98*4882a593Smuzhiyun call check_events 99*4882a593Smuzhiyun1: 100*4882a593Smuzhiyun FRAME_END 101*4882a593Smuzhiyun RET 102*4882a593SmuzhiyunSYM_FUNC_END(xen_restore_fl_direct) 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun 105*4882a593Smuzhiyun/* 106*4882a593Smuzhiyun * Force an event check by making a hypercall, but preserve regs 107*4882a593Smuzhiyun * before making the call. 108*4882a593Smuzhiyun */ 109*4882a593SmuzhiyunSYM_FUNC_START(check_events) 110*4882a593Smuzhiyun FRAME_BEGIN 111*4882a593Smuzhiyun push %rax 112*4882a593Smuzhiyun push %rcx 113*4882a593Smuzhiyun push %rdx 114*4882a593Smuzhiyun push %rsi 115*4882a593Smuzhiyun push %rdi 116*4882a593Smuzhiyun push %r8 117*4882a593Smuzhiyun push %r9 118*4882a593Smuzhiyun push %r10 119*4882a593Smuzhiyun push %r11 120*4882a593Smuzhiyun call xen_force_evtchn_callback 121*4882a593Smuzhiyun pop %r11 122*4882a593Smuzhiyun pop %r10 123*4882a593Smuzhiyun pop %r9 124*4882a593Smuzhiyun pop %r8 125*4882a593Smuzhiyun pop %rdi 126*4882a593Smuzhiyun pop %rsi 127*4882a593Smuzhiyun pop %rdx 128*4882a593Smuzhiyun pop %rcx 129*4882a593Smuzhiyun pop %rax 130*4882a593Smuzhiyun FRAME_END 131*4882a593Smuzhiyun RET 132*4882a593SmuzhiyunSYM_FUNC_END(check_events) 133*4882a593Smuzhiyun 134*4882a593SmuzhiyunSYM_FUNC_START(xen_read_cr2) 135*4882a593Smuzhiyun FRAME_BEGIN 136*4882a593Smuzhiyun _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX 137*4882a593Smuzhiyun _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX 138*4882a593Smuzhiyun FRAME_END 139*4882a593Smuzhiyun RET 140*4882a593SmuzhiyunSYM_FUNC_END(xen_read_cr2); 141*4882a593Smuzhiyun 142*4882a593SmuzhiyunSYM_FUNC_START(xen_read_cr2_direct) 143*4882a593Smuzhiyun FRAME_BEGIN 144*4882a593Smuzhiyun _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX 145*4882a593Smuzhiyun FRAME_END 146*4882a593Smuzhiyun RET 147*4882a593SmuzhiyunSYM_FUNC_END(xen_read_cr2_direct); 148*4882a593Smuzhiyun 149*4882a593Smuzhiyun.macro xen_pv_trap name 150*4882a593SmuzhiyunSYM_CODE_START(xen_\name) 151*4882a593Smuzhiyun UNWIND_HINT_ENTRY 152*4882a593Smuzhiyun pop %rcx 153*4882a593Smuzhiyun pop %r11 154*4882a593Smuzhiyun jmp \name 155*4882a593SmuzhiyunSYM_CODE_END(xen_\name) 156*4882a593Smuzhiyun_ASM_NOKPROBE(xen_\name) 157*4882a593Smuzhiyun.endm 158*4882a593Smuzhiyun 159*4882a593Smuzhiyunxen_pv_trap asm_exc_divide_error 160*4882a593Smuzhiyunxen_pv_trap asm_xenpv_exc_debug 161*4882a593Smuzhiyunxen_pv_trap asm_exc_int3 162*4882a593Smuzhiyunxen_pv_trap asm_xenpv_exc_nmi 163*4882a593Smuzhiyunxen_pv_trap asm_exc_overflow 164*4882a593Smuzhiyunxen_pv_trap asm_exc_bounds 165*4882a593Smuzhiyunxen_pv_trap asm_exc_invalid_op 166*4882a593Smuzhiyunxen_pv_trap asm_exc_device_not_available 167*4882a593Smuzhiyunxen_pv_trap asm_exc_double_fault 168*4882a593Smuzhiyunxen_pv_trap asm_exc_coproc_segment_overrun 169*4882a593Smuzhiyunxen_pv_trap asm_exc_invalid_tss 170*4882a593Smuzhiyunxen_pv_trap asm_exc_segment_not_present 171*4882a593Smuzhiyunxen_pv_trap asm_exc_stack_segment 172*4882a593Smuzhiyunxen_pv_trap asm_exc_general_protection 173*4882a593Smuzhiyunxen_pv_trap asm_exc_page_fault 174*4882a593Smuzhiyunxen_pv_trap asm_exc_spurious_interrupt_bug 175*4882a593Smuzhiyunxen_pv_trap asm_exc_coprocessor_error 176*4882a593Smuzhiyunxen_pv_trap asm_exc_alignment_check 177*4882a593Smuzhiyun#ifdef CONFIG_X86_MCE 178*4882a593Smuzhiyunxen_pv_trap asm_exc_machine_check 179*4882a593Smuzhiyun#endif /* CONFIG_X86_MCE */ 180*4882a593Smuzhiyunxen_pv_trap asm_exc_simd_coprocessor_error 181*4882a593Smuzhiyun#ifdef CONFIG_IA32_EMULATION 182*4882a593Smuzhiyunxen_pv_trap entry_INT80_compat 183*4882a593Smuzhiyun#endif 184*4882a593Smuzhiyunxen_pv_trap asm_exc_xen_unknown_trap 185*4882a593Smuzhiyunxen_pv_trap asm_exc_xen_hypervisor_callback 186*4882a593Smuzhiyun 187*4882a593Smuzhiyun __INIT 188*4882a593SmuzhiyunSYM_CODE_START(xen_early_idt_handler_array) 189*4882a593Smuzhiyun i = 0 190*4882a593Smuzhiyun .rept NUM_EXCEPTION_VECTORS 191*4882a593Smuzhiyun UNWIND_HINT_EMPTY 192*4882a593Smuzhiyun pop %rcx 193*4882a593Smuzhiyun pop %r11 194*4882a593Smuzhiyun jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE 195*4882a593Smuzhiyun i = i + 1 196*4882a593Smuzhiyun .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc 197*4882a593Smuzhiyun .endr 198*4882a593SmuzhiyunSYM_CODE_END(xen_early_idt_handler_array) 199*4882a593Smuzhiyun __FINIT 200*4882a593Smuzhiyun 201*4882a593Smuzhiyunhypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 202*4882a593Smuzhiyun/* 203*4882a593Smuzhiyun * Xen64 iret frame: 204*4882a593Smuzhiyun * 205*4882a593Smuzhiyun * ss 206*4882a593Smuzhiyun * rsp 207*4882a593Smuzhiyun * rflags 208*4882a593Smuzhiyun * cs 209*4882a593Smuzhiyun * rip <-- standard iret frame 210*4882a593Smuzhiyun * 211*4882a593Smuzhiyun * flags 212*4882a593Smuzhiyun * 213*4882a593Smuzhiyun * rcx } 214*4882a593Smuzhiyun * r11 }<-- pushed by hypercall page 215*4882a593Smuzhiyun * rsp->rax } 216*4882a593Smuzhiyun */ 217*4882a593SmuzhiyunSYM_CODE_START(xen_iret) 218*4882a593Smuzhiyun UNWIND_HINT_EMPTY 219*4882a593Smuzhiyun pushq $0 220*4882a593Smuzhiyun jmp hypercall_iret 221*4882a593SmuzhiyunSYM_CODE_END(xen_iret) 222*4882a593Smuzhiyun 223*4882a593SmuzhiyunSYM_CODE_START(xen_sysret64) 224*4882a593Smuzhiyun UNWIND_HINT_EMPTY 225*4882a593Smuzhiyun /* 226*4882a593Smuzhiyun * We're already on the usermode stack at this point, but 227*4882a593Smuzhiyun * still with the kernel gs, so we can easily switch back. 228*4882a593Smuzhiyun * 229*4882a593Smuzhiyun * tss.sp2 is scratch space. 230*4882a593Smuzhiyun */ 231*4882a593Smuzhiyun movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) 232*4882a593Smuzhiyun movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 233*4882a593Smuzhiyun 234*4882a593Smuzhiyun pushq $__USER_DS 235*4882a593Smuzhiyun pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) 236*4882a593Smuzhiyun pushq %r11 237*4882a593Smuzhiyun pushq $__USER_CS 238*4882a593Smuzhiyun pushq %rcx 239*4882a593Smuzhiyun 240*4882a593Smuzhiyun pushq $VGCF_in_syscall 241*4882a593Smuzhiyun jmp hypercall_iret 242*4882a593SmuzhiyunSYM_CODE_END(xen_sysret64) 243*4882a593Smuzhiyun 244*4882a593Smuzhiyun/* 245*4882a593Smuzhiyun * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is 246*4882a593Smuzhiyun * also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode() 247*4882a593Smuzhiyun * in XEN pv would cause %rsp to move up to the top of the kernel stack and 248*4882a593Smuzhiyun * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI 249*4882a593Smuzhiyun * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET 250*4882a593Smuzhiyun * frame at the same address is useless. 251*4882a593Smuzhiyun */ 252*4882a593SmuzhiyunSYM_CODE_START(xenpv_restore_regs_and_return_to_usermode) 253*4882a593Smuzhiyun UNWIND_HINT_REGS 254*4882a593Smuzhiyun POP_REGS 255*4882a593Smuzhiyun 256*4882a593Smuzhiyun /* stackleak_erase() can work safely on the kernel stack. */ 257*4882a593Smuzhiyun STACKLEAK_ERASE_NOCLOBBER 258*4882a593Smuzhiyun 259*4882a593Smuzhiyun addq $8, %rsp /* skip regs->orig_ax */ 260*4882a593Smuzhiyun jmp xen_iret 261*4882a593SmuzhiyunSYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) 262*4882a593Smuzhiyun 263*4882a593Smuzhiyun/* 264*4882a593Smuzhiyun * Xen handles syscall callbacks much like ordinary exceptions, which 265*4882a593Smuzhiyun * means we have: 266*4882a593Smuzhiyun * - kernel gs 267*4882a593Smuzhiyun * - kernel rsp 268*4882a593Smuzhiyun * - an iret-like stack frame on the stack (including rcx and r11): 269*4882a593Smuzhiyun * ss 270*4882a593Smuzhiyun * rsp 271*4882a593Smuzhiyun * rflags 272*4882a593Smuzhiyun * cs 273*4882a593Smuzhiyun * rip 274*4882a593Smuzhiyun * r11 275*4882a593Smuzhiyun * rsp->rcx 276*4882a593Smuzhiyun */ 277*4882a593Smuzhiyun 278*4882a593Smuzhiyun/* Normal 64-bit system call target */ 279*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSCALL_64) 280*4882a593Smuzhiyun UNWIND_HINT_ENTRY 281*4882a593Smuzhiyun popq %rcx 282*4882a593Smuzhiyun popq %r11 283*4882a593Smuzhiyun 284*4882a593Smuzhiyun /* 285*4882a593Smuzhiyun * Neither Xen nor the kernel really knows what the old SS and 286*4882a593Smuzhiyun * CS were. The kernel expects __USER_DS and __USER_CS, so 287*4882a593Smuzhiyun * report those values even though Xen will guess its own values. 288*4882a593Smuzhiyun */ 289*4882a593Smuzhiyun movq $__USER_DS, 4*8(%rsp) 290*4882a593Smuzhiyun movq $__USER_CS, 1*8(%rsp) 291*4882a593Smuzhiyun 292*4882a593Smuzhiyun jmp entry_SYSCALL_64_after_hwframe 293*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSCALL_64) 294*4882a593Smuzhiyun 295*4882a593Smuzhiyun#ifdef CONFIG_IA32_EMULATION 296*4882a593Smuzhiyun 297*4882a593Smuzhiyun/* 32-bit compat syscall target */ 298*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSCALL_compat) 299*4882a593Smuzhiyun UNWIND_HINT_ENTRY 300*4882a593Smuzhiyun popq %rcx 301*4882a593Smuzhiyun popq %r11 302*4882a593Smuzhiyun 303*4882a593Smuzhiyun /* 304*4882a593Smuzhiyun * Neither Xen nor the kernel really knows what the old SS and 305*4882a593Smuzhiyun * CS were. The kernel expects __USER32_DS and __USER32_CS, so 306*4882a593Smuzhiyun * report those values even though Xen will guess its own values. 307*4882a593Smuzhiyun */ 308*4882a593Smuzhiyun movq $__USER32_DS, 4*8(%rsp) 309*4882a593Smuzhiyun movq $__USER32_CS, 1*8(%rsp) 310*4882a593Smuzhiyun 311*4882a593Smuzhiyun jmp entry_SYSCALL_compat_after_hwframe 312*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSCALL_compat) 313*4882a593Smuzhiyun 314*4882a593Smuzhiyun/* 32-bit compat sysenter target */ 315*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSENTER_compat) 316*4882a593Smuzhiyun UNWIND_HINT_ENTRY 317*4882a593Smuzhiyun /* 318*4882a593Smuzhiyun * NB: Xen is polite and clears TF from EFLAGS for us. This means 319*4882a593Smuzhiyun * that we don't need to guard against single step exceptions here. 320*4882a593Smuzhiyun */ 321*4882a593Smuzhiyun popq %rcx 322*4882a593Smuzhiyun popq %r11 323*4882a593Smuzhiyun 324*4882a593Smuzhiyun /* 325*4882a593Smuzhiyun * Neither Xen nor the kernel really knows what the old SS and 326*4882a593Smuzhiyun * CS were. The kernel expects __USER32_DS and __USER32_CS, so 327*4882a593Smuzhiyun * report those values even though Xen will guess its own values. 328*4882a593Smuzhiyun */ 329*4882a593Smuzhiyun movq $__USER32_DS, 4*8(%rsp) 330*4882a593Smuzhiyun movq $__USER32_CS, 1*8(%rsp) 331*4882a593Smuzhiyun 332*4882a593Smuzhiyun jmp entry_SYSENTER_compat_after_hwframe 333*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSENTER_compat) 334*4882a593Smuzhiyun 335*4882a593Smuzhiyun#else /* !CONFIG_IA32_EMULATION */ 336*4882a593Smuzhiyun 337*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSCALL_compat) 338*4882a593SmuzhiyunSYM_CODE_START(xen_entry_SYSENTER_compat) 339*4882a593Smuzhiyun UNWIND_HINT_ENTRY 340*4882a593Smuzhiyun lea 16(%rsp), %rsp /* strip %rcx, %r11 */ 341*4882a593Smuzhiyun mov $-ENOSYS, %rax 342*4882a593Smuzhiyun pushq $0 343*4882a593Smuzhiyun jmp hypercall_iret 344*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSENTER_compat) 345*4882a593SmuzhiyunSYM_CODE_END(xen_entry_SYSCALL_compat) 346*4882a593Smuzhiyun 347*4882a593Smuzhiyun#endif /* CONFIG_IA32_EMULATION */ 348