1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */ 2*4882a593Smuzhiyun 3*4882a593Smuzhiyun#include <linux/stringify.h> 4*4882a593Smuzhiyun#include <linux/linkage.h> 5*4882a593Smuzhiyun#include <asm/dwarf2.h> 6*4882a593Smuzhiyun#include <asm/cpufeatures.h> 7*4882a593Smuzhiyun#include <asm/alternative.h> 8*4882a593Smuzhiyun#include <asm/export.h> 9*4882a593Smuzhiyun#include <asm/nospec-branch.h> 10*4882a593Smuzhiyun#include <asm/unwind_hints.h> 11*4882a593Smuzhiyun#include <asm/frame.h> 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun .section .text.__x86.indirect_thunk 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun.macro RETPOLINE reg 16*4882a593Smuzhiyun ANNOTATE_INTRA_FUNCTION_CALL 17*4882a593Smuzhiyun call .Ldo_rop_\@ 18*4882a593Smuzhiyun.Lspec_trap_\@: 19*4882a593Smuzhiyun UNWIND_HINT_EMPTY 20*4882a593Smuzhiyun pause 21*4882a593Smuzhiyun lfence 22*4882a593Smuzhiyun jmp .Lspec_trap_\@ 23*4882a593Smuzhiyun.Ldo_rop_\@: 24*4882a593Smuzhiyun mov %\reg, (%_ASM_SP) 25*4882a593Smuzhiyun UNWIND_HINT_FUNC 26*4882a593Smuzhiyun RET 27*4882a593Smuzhiyun.endm 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun.macro THUNK reg 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun .align RETPOLINE_THUNK_SIZE 32*4882a593SmuzhiyunSYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) 33*4882a593Smuzhiyun UNWIND_HINT_EMPTY 34*4882a593Smuzhiyun 35*4882a593Smuzhiyun ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ 36*4882a593Smuzhiyun __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ 37*4882a593Smuzhiyun __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun.endm 40*4882a593Smuzhiyun 41*4882a593Smuzhiyun/* 42*4882a593Smuzhiyun * Despite being an assembler file we can't just use .irp here 43*4882a593Smuzhiyun * because __KSYM_DEPS__ only uses the C preprocessor and would 44*4882a593Smuzhiyun * only see one instance of "__x86_indirect_thunk_\reg" rather 45*4882a593Smuzhiyun * than one per register with the correct names. So we do it 46*4882a593Smuzhiyun * the simple and nasty way... 47*4882a593Smuzhiyun * 48*4882a593Smuzhiyun * Worse, you can only have a single EXPORT_SYMBOL per line, 49*4882a593Smuzhiyun * and CPP can't insert newlines, so we have to repeat everything 50*4882a593Smuzhiyun * at least twice. 51*4882a593Smuzhiyun */ 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) 54*4882a593Smuzhiyun#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) 55*4882a593Smuzhiyun 56*4882a593Smuzhiyun .align RETPOLINE_THUNK_SIZE 57*4882a593SmuzhiyunSYM_CODE_START(__x86_indirect_thunk_array) 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun#define GEN(reg) THUNK reg 60*4882a593Smuzhiyun#include <asm/GEN-for-each-reg.h> 61*4882a593Smuzhiyun#undef GEN 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun .align RETPOLINE_THUNK_SIZE 64*4882a593SmuzhiyunSYM_CODE_END(__x86_indirect_thunk_array) 65*4882a593Smuzhiyun 66*4882a593Smuzhiyun#define GEN(reg) EXPORT_THUNK(reg) 67*4882a593Smuzhiyun#include <asm/GEN-for-each-reg.h> 68*4882a593Smuzhiyun#undef GEN 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun/* 71*4882a593Smuzhiyun * This function name is magical and is used by -mfunction-return=thunk-extern 72*4882a593Smuzhiyun * for the compiler to generate JMPs to it. 73*4882a593Smuzhiyun */ 74*4882a593Smuzhiyun#ifdef CONFIG_RETHUNK 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun .section .text.__x86.return_thunk 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun/* 79*4882a593Smuzhiyun * Safety details here pertain to the AMD Zen{1,2} microarchitecture: 80*4882a593Smuzhiyun * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for 81*4882a593Smuzhiyun * alignment within the BTB. 82*4882a593Smuzhiyun * 2) The instruction at zen_untrain_ret must contain, and not 83*4882a593Smuzhiyun * end with, the 0xc3 byte of the RET. 84*4882a593Smuzhiyun * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread 85*4882a593Smuzhiyun * from re-poisioning the BTB prediction. 86*4882a593Smuzhiyun */ 87*4882a593Smuzhiyun .align 64 88*4882a593Smuzhiyun .skip 63, 0xcc 89*4882a593SmuzhiyunSYM_FUNC_START_NOALIGN(zen_untrain_ret); 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun /* 92*4882a593Smuzhiyun * As executed from zen_untrain_ret, this is: 93*4882a593Smuzhiyun * 94*4882a593Smuzhiyun * TEST $0xcc, %bl 95*4882a593Smuzhiyun * LFENCE 96*4882a593Smuzhiyun * JMP __x86_return_thunk 97*4882a593Smuzhiyun * 98*4882a593Smuzhiyun * Executing the TEST instruction has a side effect of evicting any BTB 99*4882a593Smuzhiyun * prediction (potentially attacker controlled) attached to the RET, as 100*4882a593Smuzhiyun * __x86_return_thunk + 1 isn't an instruction boundary at the moment. 101*4882a593Smuzhiyun */ 102*4882a593Smuzhiyun .byte 0xf6 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun /* 105*4882a593Smuzhiyun * As executed from __x86_return_thunk, this is a plain RET. 106*4882a593Smuzhiyun * 107*4882a593Smuzhiyun * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. 108*4882a593Smuzhiyun * 109*4882a593Smuzhiyun * We subsequently jump backwards and architecturally execute the RET. 110*4882a593Smuzhiyun * This creates a correct BTB prediction (type=ret), but in the 111*4882a593Smuzhiyun * meantime we suffer Straight Line Speculation (because the type was 112*4882a593Smuzhiyun * no branch) which is halted by the INT3. 113*4882a593Smuzhiyun * 114*4882a593Smuzhiyun * With SMT enabled and STIBP active, a sibling thread cannot poison 115*4882a593Smuzhiyun * RET's prediction to a type of its choice, but can evict the 116*4882a593Smuzhiyun * prediction due to competitive sharing. If the prediction is 117*4882a593Smuzhiyun * evicted, __x86_return_thunk will suffer Straight Line Speculation 118*4882a593Smuzhiyun * which will be contained safely by the INT3. 119*4882a593Smuzhiyun */ 120*4882a593SmuzhiyunSYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) 121*4882a593Smuzhiyun ret 122*4882a593Smuzhiyun int3 123*4882a593SmuzhiyunSYM_CODE_END(__x86_return_thunk) 124*4882a593Smuzhiyun 125*4882a593Smuzhiyun /* 126*4882a593Smuzhiyun * Ensure the TEST decoding / BTB invalidation is complete. 127*4882a593Smuzhiyun */ 128*4882a593Smuzhiyun lfence 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun /* 131*4882a593Smuzhiyun * Jump back and execute the RET in the middle of the TEST instruction. 132*4882a593Smuzhiyun * INT3 is for SLS protection. 133*4882a593Smuzhiyun */ 134*4882a593Smuzhiyun jmp __x86_return_thunk 135*4882a593Smuzhiyun int3 136*4882a593SmuzhiyunSYM_FUNC_END(zen_untrain_ret) 137*4882a593Smuzhiyun__EXPORT_THUNK(zen_untrain_ret) 138*4882a593Smuzhiyun 139*4882a593SmuzhiyunEXPORT_SYMBOL(__x86_return_thunk) 140*4882a593Smuzhiyun 141*4882a593Smuzhiyun#endif /* CONFIG_RETHUNK */ 142