1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun #include <linux/jump_label.h> 3*4882a593Smuzhiyun #include <asm/unwind_hints.h> 4*4882a593Smuzhiyun #include <asm/cpufeatures.h> 5*4882a593Smuzhiyun #include <asm/page_types.h> 6*4882a593Smuzhiyun #include <asm/percpu.h> 7*4882a593Smuzhiyun #include <asm/asm-offsets.h> 8*4882a593Smuzhiyun #include <asm/processor-flags.h> 9*4882a593Smuzhiyun #include <asm/msr.h> 10*4882a593Smuzhiyun #include <asm/nospec-branch.h> 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun /* 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun x86 function call convention, 64-bit: 15*4882a593Smuzhiyun ------------------------------------- 16*4882a593Smuzhiyun arguments | callee-saved | extra caller-saved | return 17*4882a593Smuzhiyun [callee-clobbered] | | [callee-clobbered] | 18*4882a593Smuzhiyun --------------------------------------------------------------------------- 19*4882a593Smuzhiyun rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun ( rsp is obviously invariant across normal function calls. (gcc can 'merge' 22*4882a593Smuzhiyun functions when it sees tail-call optimization possibilities) rflags is 23*4882a593Smuzhiyun clobbered. Leftover arguments are passed over the stack frame.) 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun [*] In the frame-pointers case rbp is fixed to the stack frame. 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun [**] for struct return values wider than 64 bits the return convention is a 28*4882a593Smuzhiyun bit more complex: up to 128 bits width we return small structures 29*4882a593Smuzhiyun straight in rax, rdx. For structures larger than that (3 words or 30*4882a593Smuzhiyun larger) the caller puts a pointer to an on-stack return struct 31*4882a593Smuzhiyun [allocated in the caller's stack frame] into the first argument - i.e. 32*4882a593Smuzhiyun into rdi. All other arguments shift up by one in this case. 33*4882a593Smuzhiyun Fortunately this case is rare in the kernel. 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun For 32-bit we have the following conventions - kernel is built with 36*4882a593Smuzhiyun -mregparm=3 and -freg-struct-return: 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun x86 function calling convention, 32-bit: 39*4882a593Smuzhiyun ---------------------------------------- 40*4882a593Smuzhiyun arguments | callee-saved | extra caller-saved | return 41*4882a593Smuzhiyun [callee-clobbered] | | [callee-clobbered] | 42*4882a593Smuzhiyun ------------------------------------------------------------------------- 43*4882a593Smuzhiyun eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**] 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun ( here too esp is obviously invariant across normal function calls. eflags 46*4882a593Smuzhiyun is clobbered. Leftover arguments are passed over the stack frame. ) 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun [*] In the frame-pointers case ebp is fixed to the stack frame. 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun [**] We build with -freg-struct-return, which on 32-bit means similar 51*4882a593Smuzhiyun semantics as on 64-bit: edx can be used for a second return value 52*4882a593Smuzhiyun (i.e. covering integer and structure sizes up to 64 bits) - after that 53*4882a593Smuzhiyun it gets more complex and more expensive: 3-word or larger struct returns 54*4882a593Smuzhiyun get done in the caller's frame and the pointer to the return struct goes 55*4882a593Smuzhiyun into regparm0, i.e. eax - the other arguments shift up and the 56*4882a593Smuzhiyun function's register parameters degenerate to regparm=2 in essence. 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun */ 59*4882a593Smuzhiyun 60*4882a593Smuzhiyun #ifdef CONFIG_X86_64 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun /* 63*4882a593Smuzhiyun * 64-bit system call stack frame layout defines and helpers, 64*4882a593Smuzhiyun * for assembly code: 65*4882a593Smuzhiyun */ 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun /* The layout forms the "struct pt_regs" on the stack: */ 68*4882a593Smuzhiyun /* 69*4882a593Smuzhiyun * C ABI says these regs are callee-preserved. They aren't saved on kernel entry 70*4882a593Smuzhiyun * unless syscall needs a complete, fully filled "struct pt_regs". 71*4882a593Smuzhiyun */ 72*4882a593Smuzhiyun #define R15 0*8 73*4882a593Smuzhiyun #define R14 1*8 74*4882a593Smuzhiyun #define R13 2*8 75*4882a593Smuzhiyun #define R12 3*8 76*4882a593Smuzhiyun #define RBP 4*8 77*4882a593Smuzhiyun #define RBX 5*8 78*4882a593Smuzhiyun /* These regs are callee-clobbered. Always saved on kernel entry. */ 79*4882a593Smuzhiyun #define R11 6*8 80*4882a593Smuzhiyun #define R10 7*8 81*4882a593Smuzhiyun #define R9 8*8 82*4882a593Smuzhiyun #define R8 9*8 83*4882a593Smuzhiyun #define RAX 10*8 84*4882a593Smuzhiyun #define RCX 11*8 85*4882a593Smuzhiyun #define RDX 12*8 86*4882a593Smuzhiyun #define RSI 13*8 87*4882a593Smuzhiyun #define RDI 14*8 88*4882a593Smuzhiyun /* 89*4882a593Smuzhiyun * On syscall entry, this is syscall#. On CPU exception, this is error code. 90*4882a593Smuzhiyun * On hw interrupt, it's IRQ number: 91*4882a593Smuzhiyun */ 92*4882a593Smuzhiyun #define ORIG_RAX 15*8 93*4882a593Smuzhiyun /* Return frame for iretq */ 94*4882a593Smuzhiyun #define RIP 16*8 95*4882a593Smuzhiyun #define CS 17*8 96*4882a593Smuzhiyun #define EFLAGS 18*8 97*4882a593Smuzhiyun #define RSP 19*8 98*4882a593Smuzhiyun #define SS 20*8 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun #define SIZEOF_PTREGS 21*8 101*4882a593Smuzhiyun 102*4882a593Smuzhiyun .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 103*4882a593Smuzhiyun .if \save_ret 104*4882a593Smuzhiyun pushq %rsi /* pt_regs->si */ 105*4882a593Smuzhiyun movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ 106*4882a593Smuzhiyun movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ 107*4882a593Smuzhiyun .else 108*4882a593Smuzhiyun pushq %rdi /* pt_regs->di */ 109*4882a593Smuzhiyun pushq %rsi /* pt_regs->si */ 110*4882a593Smuzhiyun .endif 111*4882a593Smuzhiyun pushq \rdx /* pt_regs->dx */ 112*4882a593Smuzhiyun pushq %rcx /* pt_regs->cx */ 113*4882a593Smuzhiyun pushq \rax /* pt_regs->ax */ 114*4882a593Smuzhiyun pushq %r8 /* pt_regs->r8 */ 115*4882a593Smuzhiyun pushq %r9 /* pt_regs->r9 */ 116*4882a593Smuzhiyun pushq %r10 /* pt_regs->r10 */ 117*4882a593Smuzhiyun pushq %r11 /* pt_regs->r11 */ 118*4882a593Smuzhiyun pushq %rbx /* pt_regs->rbx */ 119*4882a593Smuzhiyun pushq %rbp /* pt_regs->rbp */ 120*4882a593Smuzhiyun pushq %r12 /* pt_regs->r12 */ 121*4882a593Smuzhiyun pushq %r13 /* pt_regs->r13 */ 122*4882a593Smuzhiyun pushq %r14 /* pt_regs->r14 */ 123*4882a593Smuzhiyun pushq %r15 /* pt_regs->r15 */ 124*4882a593Smuzhiyun UNWIND_HINT_REGS 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun .if \save_ret 127*4882a593Smuzhiyun pushq %rsi /* return address on top of stack */ 128*4882a593Smuzhiyun .endif 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun /* 131*4882a593Smuzhiyun * Sanitize registers of values that a speculation attack might 132*4882a593Smuzhiyun * otherwise want to exploit. The lower registers are likely clobbered 133*4882a593Smuzhiyun * well before they could be put to use in a speculative execution 134*4882a593Smuzhiyun * gadget. 135*4882a593Smuzhiyun */ 136*4882a593Smuzhiyun xorl %edx, %edx /* nospec dx */ 137*4882a593Smuzhiyun xorl %ecx, %ecx /* nospec cx */ 138*4882a593Smuzhiyun xorl %r8d, %r8d /* nospec r8 */ 139*4882a593Smuzhiyun xorl %r9d, %r9d /* nospec r9 */ 140*4882a593Smuzhiyun xorl %r10d, %r10d /* nospec r10 */ 141*4882a593Smuzhiyun xorl %r11d, %r11d /* nospec r11 */ 142*4882a593Smuzhiyun xorl %ebx, %ebx /* nospec rbx */ 143*4882a593Smuzhiyun xorl %ebp, %ebp /* nospec rbp */ 144*4882a593Smuzhiyun xorl %r12d, %r12d /* nospec r12 */ 145*4882a593Smuzhiyun xorl %r13d, %r13d /* nospec r13 */ 146*4882a593Smuzhiyun xorl %r14d, %r14d /* nospec r14 */ 147*4882a593Smuzhiyun xorl %r15d, %r15d /* nospec r15 */ 148*4882a593Smuzhiyun 149*4882a593Smuzhiyun .endm 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun .macro POP_REGS pop_rdi=1 152*4882a593Smuzhiyun popq %r15 153*4882a593Smuzhiyun popq %r14 154*4882a593Smuzhiyun popq %r13 155*4882a593Smuzhiyun popq %r12 156*4882a593Smuzhiyun popq %rbp 157*4882a593Smuzhiyun popq %rbx 158*4882a593Smuzhiyun popq %r11 159*4882a593Smuzhiyun popq %r10 160*4882a593Smuzhiyun popq %r9 161*4882a593Smuzhiyun popq %r8 162*4882a593Smuzhiyun popq %rax 163*4882a593Smuzhiyun popq %rcx 164*4882a593Smuzhiyun popq %rdx 165*4882a593Smuzhiyun popq %rsi 166*4882a593Smuzhiyun .if \pop_rdi 167*4882a593Smuzhiyun popq %rdi 168*4882a593Smuzhiyun .endif 169*4882a593Smuzhiyun .endm 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun #ifdef CONFIG_PAGE_TABLE_ISOLATION 172*4882a593Smuzhiyun 173*4882a593Smuzhiyun /* 174*4882a593Smuzhiyun * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two 175*4882a593Smuzhiyun * halves: 176*4882a593Smuzhiyun */ 177*4882a593Smuzhiyun #define PTI_USER_PGTABLE_BIT PAGE_SHIFT 178*4882a593Smuzhiyun #define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) 179*4882a593Smuzhiyun #define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT 180*4882a593Smuzhiyun #define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) 181*4882a593Smuzhiyun #define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun .macro SET_NOFLUSH_BIT reg:req 184*4882a593Smuzhiyun bts $X86_CR3_PCID_NOFLUSH_BIT, \reg 185*4882a593Smuzhiyun .endm 186*4882a593Smuzhiyun 187*4882a593Smuzhiyun .macro ADJUST_KERNEL_CR3 reg:req 188*4882a593Smuzhiyun ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID 189*4882a593Smuzhiyun /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ 190*4882a593Smuzhiyun andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg 191*4882a593Smuzhiyun .endm 192*4882a593Smuzhiyun 193*4882a593Smuzhiyun .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req 194*4882a593Smuzhiyun ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI 195*4882a593Smuzhiyun mov %cr3, \scratch_reg 196*4882a593Smuzhiyun ADJUST_KERNEL_CR3 \scratch_reg 197*4882a593Smuzhiyun mov \scratch_reg, %cr3 198*4882a593Smuzhiyun .Lend_\@: 199*4882a593Smuzhiyun .endm 200*4882a593Smuzhiyun 201*4882a593Smuzhiyun #define THIS_CPU_user_pcid_flush_mask \ 202*4882a593Smuzhiyun PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask 203*4882a593Smuzhiyun 204*4882a593Smuzhiyun .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req 205*4882a593Smuzhiyun ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI 206*4882a593Smuzhiyun mov %cr3, \scratch_reg 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID 209*4882a593Smuzhiyun 210*4882a593Smuzhiyun /* 211*4882a593Smuzhiyun * Test if the ASID needs a flush. 212*4882a593Smuzhiyun */ 213*4882a593Smuzhiyun movq \scratch_reg, \scratch_reg2 214*4882a593Smuzhiyun andq $(0x7FF), \scratch_reg /* mask ASID */ 215*4882a593Smuzhiyun bt \scratch_reg, THIS_CPU_user_pcid_flush_mask 216*4882a593Smuzhiyun jnc .Lnoflush_\@ 217*4882a593Smuzhiyun 218*4882a593Smuzhiyun /* Flush needed, clear the bit */ 219*4882a593Smuzhiyun btr \scratch_reg, THIS_CPU_user_pcid_flush_mask 220*4882a593Smuzhiyun movq \scratch_reg2, \scratch_reg 221*4882a593Smuzhiyun jmp .Lwrcr3_pcid_\@ 222*4882a593Smuzhiyun 223*4882a593Smuzhiyun .Lnoflush_\@: 224*4882a593Smuzhiyun movq \scratch_reg2, \scratch_reg 225*4882a593Smuzhiyun SET_NOFLUSH_BIT \scratch_reg 226*4882a593Smuzhiyun 227*4882a593Smuzhiyun .Lwrcr3_pcid_\@: 228*4882a593Smuzhiyun /* Flip the ASID to the user version */ 229*4882a593Smuzhiyun orq $(PTI_USER_PCID_MASK), \scratch_reg 230*4882a593Smuzhiyun 231*4882a593Smuzhiyun .Lwrcr3_\@: 232*4882a593Smuzhiyun /* Flip the PGD to the user version */ 233*4882a593Smuzhiyun orq $(PTI_USER_PGTABLE_MASK), \scratch_reg 234*4882a593Smuzhiyun mov \scratch_reg, %cr3 235*4882a593Smuzhiyun .Lend_\@: 236*4882a593Smuzhiyun .endm 237*4882a593Smuzhiyun 238*4882a593Smuzhiyun .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req 239*4882a593Smuzhiyun pushq %rax 240*4882a593Smuzhiyun SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax 241*4882a593Smuzhiyun popq %rax 242*4882a593Smuzhiyun .endm 243*4882a593Smuzhiyun 244*4882a593Smuzhiyun .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req 245*4882a593Smuzhiyun ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI 246*4882a593Smuzhiyun movq %cr3, \scratch_reg 247*4882a593Smuzhiyun movq \scratch_reg, \save_reg 248*4882a593Smuzhiyun /* 249*4882a593Smuzhiyun * Test the user pagetable bit. If set, then the user page tables 250*4882a593Smuzhiyun * are active. If clear CR3 already has the kernel page table 251*4882a593Smuzhiyun * active. 252*4882a593Smuzhiyun */ 253*4882a593Smuzhiyun bt $PTI_USER_PGTABLE_BIT, \scratch_reg 254*4882a593Smuzhiyun jnc .Ldone_\@ 255*4882a593Smuzhiyun 256*4882a593Smuzhiyun ADJUST_KERNEL_CR3 \scratch_reg 257*4882a593Smuzhiyun movq \scratch_reg, %cr3 258*4882a593Smuzhiyun 259*4882a593Smuzhiyun .Ldone_\@: 260*4882a593Smuzhiyun .endm 261*4882a593Smuzhiyun 262*4882a593Smuzhiyun .macro RESTORE_CR3 scratch_reg:req save_reg:req 263*4882a593Smuzhiyun ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI 264*4882a593Smuzhiyun 265*4882a593Smuzhiyun ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID 266*4882a593Smuzhiyun 267*4882a593Smuzhiyun /* 268*4882a593Smuzhiyun * KERNEL pages can always resume with NOFLUSH as we do 269*4882a593Smuzhiyun * explicit flushes. 270*4882a593Smuzhiyun */ 271*4882a593Smuzhiyun bt $PTI_USER_PGTABLE_BIT, \save_reg 272*4882a593Smuzhiyun jnc .Lnoflush_\@ 273*4882a593Smuzhiyun 274*4882a593Smuzhiyun /* 275*4882a593Smuzhiyun * Check if there's a pending flush for the user ASID we're 276*4882a593Smuzhiyun * about to set. 277*4882a593Smuzhiyun */ 278*4882a593Smuzhiyun movq \save_reg, \scratch_reg 279*4882a593Smuzhiyun andq $(0x7FF), \scratch_reg 280*4882a593Smuzhiyun bt \scratch_reg, THIS_CPU_user_pcid_flush_mask 281*4882a593Smuzhiyun jnc .Lnoflush_\@ 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun btr \scratch_reg, THIS_CPU_user_pcid_flush_mask 284*4882a593Smuzhiyun jmp .Lwrcr3_\@ 285*4882a593Smuzhiyun 286*4882a593Smuzhiyun .Lnoflush_\@: 287*4882a593Smuzhiyun SET_NOFLUSH_BIT \save_reg 288*4882a593Smuzhiyun 289*4882a593Smuzhiyun .Lwrcr3_\@: 290*4882a593Smuzhiyun /* 291*4882a593Smuzhiyun * The CR3 write could be avoided when not changing its value, 292*4882a593Smuzhiyun * but would require a CR3 read *and* a scratch register. 293*4882a593Smuzhiyun */ 294*4882a593Smuzhiyun movq \save_reg, %cr3 295*4882a593Smuzhiyun .Lend_\@: 296*4882a593Smuzhiyun .endm 297*4882a593Smuzhiyun 298*4882a593Smuzhiyun #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ 299*4882a593Smuzhiyun 300*4882a593Smuzhiyun .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req 301*4882a593Smuzhiyun .endm 302*4882a593Smuzhiyun .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req 303*4882a593Smuzhiyun .endm 304*4882a593Smuzhiyun .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req 305*4882a593Smuzhiyun .endm 306*4882a593Smuzhiyun .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req 307*4882a593Smuzhiyun .endm 308*4882a593Smuzhiyun .macro RESTORE_CR3 scratch_reg:req save_reg:req 309*4882a593Smuzhiyun .endm 310*4882a593Smuzhiyun 311*4882a593Smuzhiyun #endif 312*4882a593Smuzhiyun 313*4882a593Smuzhiyun /* 314*4882a593Smuzhiyun * IBRS kernel mitigation for Spectre_v2. 315*4882a593Smuzhiyun * 316*4882a593Smuzhiyun * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers 317*4882a593Smuzhiyun * the regs it uses (AX, CX, DX). Must be called before the first RET 318*4882a593Smuzhiyun * instruction (NOTE! UNTRAIN_RET includes a RET instruction) 319*4882a593Smuzhiyun * 320*4882a593Smuzhiyun * The optional argument is used to save/restore the current value, 321*4882a593Smuzhiyun * which is used on the paranoid paths. 322*4882a593Smuzhiyun * 323*4882a593Smuzhiyun * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. 324*4882a593Smuzhiyun */ 325*4882a593Smuzhiyun .macro IBRS_ENTER save_reg 326*4882a593Smuzhiyun #ifdef CONFIG_CPU_IBRS_ENTRY 327*4882a593Smuzhiyun ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS 328*4882a593Smuzhiyun movl $MSR_IA32_SPEC_CTRL, %ecx 329*4882a593Smuzhiyun 330*4882a593Smuzhiyun .ifnb \save_reg 331*4882a593Smuzhiyun rdmsr 332*4882a593Smuzhiyun shl $32, %rdx 333*4882a593Smuzhiyun or %rdx, %rax 334*4882a593Smuzhiyun mov %rax, \save_reg 335*4882a593Smuzhiyun test $SPEC_CTRL_IBRS, %eax 336*4882a593Smuzhiyun jz .Ldo_wrmsr_\@ 337*4882a593Smuzhiyun lfence 338*4882a593Smuzhiyun jmp .Lend_\@ 339*4882a593Smuzhiyun .Ldo_wrmsr_\@: 340*4882a593Smuzhiyun .endif 341*4882a593Smuzhiyun 342*4882a593Smuzhiyun movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx 343*4882a593Smuzhiyun movl %edx, %eax 344*4882a593Smuzhiyun shr $32, %rdx 345*4882a593Smuzhiyun wrmsr 346*4882a593Smuzhiyun .Lend_\@: 347*4882a593Smuzhiyun #endif 348*4882a593Smuzhiyun .endm 349*4882a593Smuzhiyun 350*4882a593Smuzhiyun /* 351*4882a593Smuzhiyun * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) 352*4882a593Smuzhiyun * regs. Must be called after the last RET. 353*4882a593Smuzhiyun */ 354*4882a593Smuzhiyun .macro IBRS_EXIT save_reg 355*4882a593Smuzhiyun #ifdef CONFIG_CPU_IBRS_ENTRY 356*4882a593Smuzhiyun ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS 357*4882a593Smuzhiyun movl $MSR_IA32_SPEC_CTRL, %ecx 358*4882a593Smuzhiyun 359*4882a593Smuzhiyun .ifnb \save_reg 360*4882a593Smuzhiyun mov \save_reg, %rdx 361*4882a593Smuzhiyun .else 362*4882a593Smuzhiyun movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx 363*4882a593Smuzhiyun andl $(~SPEC_CTRL_IBRS), %edx 364*4882a593Smuzhiyun .endif 365*4882a593Smuzhiyun 366*4882a593Smuzhiyun movl %edx, %eax 367*4882a593Smuzhiyun shr $32, %rdx 368*4882a593Smuzhiyun wrmsr 369*4882a593Smuzhiyun .Lend_\@: 370*4882a593Smuzhiyun #endif 371*4882a593Smuzhiyun .endm 372*4882a593Smuzhiyun 373*4882a593Smuzhiyun /* 374*4882a593Smuzhiyun * Mitigate Spectre v1 for conditional swapgs code paths. 375*4882a593Smuzhiyun * 376*4882a593Smuzhiyun * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to 377*4882a593Smuzhiyun * prevent a speculative swapgs when coming from kernel space. 378*4882a593Smuzhiyun * 379*4882a593Smuzhiyun * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path, 380*4882a593Smuzhiyun * to prevent the swapgs from getting speculatively skipped when coming from 381*4882a593Smuzhiyun * user space. 382*4882a593Smuzhiyun */ 383*4882a593Smuzhiyun .macro FENCE_SWAPGS_USER_ENTRY 384*4882a593Smuzhiyun ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER 385*4882a593Smuzhiyun .endm 386*4882a593Smuzhiyun .macro FENCE_SWAPGS_KERNEL_ENTRY 387*4882a593Smuzhiyun ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL 388*4882a593Smuzhiyun .endm 389*4882a593Smuzhiyun 390*4882a593Smuzhiyun .macro STACKLEAK_ERASE_NOCLOBBER 391*4882a593Smuzhiyun #ifdef CONFIG_GCC_PLUGIN_STACKLEAK 392*4882a593Smuzhiyun PUSH_AND_CLEAR_REGS 393*4882a593Smuzhiyun call stackleak_erase 394*4882a593Smuzhiyun POP_REGS 395*4882a593Smuzhiyun #endif 396*4882a593Smuzhiyun .endm 397*4882a593Smuzhiyun 398*4882a593Smuzhiyun .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req 399*4882a593Smuzhiyun rdgsbase \save_reg 400*4882a593Smuzhiyun GET_PERCPU_BASE \scratch_reg 401*4882a593Smuzhiyun wrgsbase \scratch_reg 402*4882a593Smuzhiyun .endm 403*4882a593Smuzhiyun 404*4882a593Smuzhiyun #else /* CONFIG_X86_64 */ 405*4882a593Smuzhiyun # undef UNWIND_HINT_IRET_REGS 406*4882a593Smuzhiyun # define UNWIND_HINT_IRET_REGS 407*4882a593Smuzhiyun #endif /* !CONFIG_X86_64 */ 408*4882a593Smuzhiyun 409*4882a593Smuzhiyun .macro STACKLEAK_ERASE 410*4882a593Smuzhiyun #ifdef CONFIG_GCC_PLUGIN_STACKLEAK 411*4882a593Smuzhiyun call stackleak_erase 412*4882a593Smuzhiyun #endif 413*4882a593Smuzhiyun .endm 414*4882a593Smuzhiyun 415*4882a593Smuzhiyun #ifdef CONFIG_SMP 416*4882a593Smuzhiyun 417*4882a593Smuzhiyun /* 418*4882a593Smuzhiyun * CPU/node NR is loaded from the limit (size) field of a special segment 419*4882a593Smuzhiyun * descriptor entry in GDT. 420*4882a593Smuzhiyun */ 421*4882a593Smuzhiyun .macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req 422*4882a593Smuzhiyun movq $__CPUNODE_SEG, \reg 423*4882a593Smuzhiyun #ifdef __clang__ 424*4882a593Smuzhiyun .long 0xc0030f48 425*4882a593Smuzhiyun #else 426*4882a593Smuzhiyun lsl \reg, \reg 427*4882a593Smuzhiyun #endif 428*4882a593Smuzhiyun .endm 429*4882a593Smuzhiyun 430*4882a593Smuzhiyun /* 431*4882a593Smuzhiyun * Fetch the per-CPU GSBASE value for this processor and put it in @reg. 432*4882a593Smuzhiyun * We normally use %gs for accessing per-CPU data, but we are setting up 433*4882a593Smuzhiyun * %gs here and obviously can not use %gs itself to access per-CPU data. 434*4882a593Smuzhiyun * 435*4882a593Smuzhiyun * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and 436*4882a593Smuzhiyun * may not restore the host's value until the CPU returns to userspace. 437*4882a593Smuzhiyun * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives 438*4882a593Smuzhiyun * while running KVM's run loop. 439*4882a593Smuzhiyun */ 440*4882a593Smuzhiyun .macro GET_PERCPU_BASE reg:req 441*4882a593Smuzhiyun LOAD_CPU_AND_NODE_SEG_LIMIT \reg 442*4882a593Smuzhiyun andq $VDSO_CPUNODE_MASK, \reg 443*4882a593Smuzhiyun movq __per_cpu_offset(, \reg, 8), \reg 444*4882a593Smuzhiyun .endm 445*4882a593Smuzhiyun 446*4882a593Smuzhiyun #else 447*4882a593Smuzhiyun 448*4882a593Smuzhiyun .macro GET_PERCPU_BASE reg:req 449*4882a593Smuzhiyun movq pcpu_unit_offsets(%rip), \reg 450*4882a593Smuzhiyun .endm 451*4882a593Smuzhiyun 452*4882a593Smuzhiyun #endif /* CONFIG_SMP */ 453