xref: /OK3568_Linux_fs/kernel/arch/x86/lib/retpoline.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun#include <linux/stringify.h>
4*4882a593Smuzhiyun#include <linux/linkage.h>
5*4882a593Smuzhiyun#include <asm/dwarf2.h>
6*4882a593Smuzhiyun#include <asm/cpufeatures.h>
7*4882a593Smuzhiyun#include <asm/alternative.h>
8*4882a593Smuzhiyun#include <asm/export.h>
9*4882a593Smuzhiyun#include <asm/nospec-branch.h>
10*4882a593Smuzhiyun#include <asm/unwind_hints.h>
11*4882a593Smuzhiyun#include <asm/frame.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun	.section .text.__x86.indirect_thunk
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun.macro RETPOLINE reg
16*4882a593Smuzhiyun	ANNOTATE_INTRA_FUNCTION_CALL
17*4882a593Smuzhiyun	call    .Ldo_rop_\@
18*4882a593Smuzhiyun.Lspec_trap_\@:
19*4882a593Smuzhiyun	UNWIND_HINT_EMPTY
20*4882a593Smuzhiyun	pause
21*4882a593Smuzhiyun	lfence
22*4882a593Smuzhiyun	jmp .Lspec_trap_\@
23*4882a593Smuzhiyun.Ldo_rop_\@:
24*4882a593Smuzhiyun	mov     %\reg, (%_ASM_SP)
25*4882a593Smuzhiyun	UNWIND_HINT_FUNC
26*4882a593Smuzhiyun	RET
27*4882a593Smuzhiyun.endm
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun.macro THUNK reg
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun	.align RETPOLINE_THUNK_SIZE
32*4882a593SmuzhiyunSYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
33*4882a593Smuzhiyun	UNWIND_HINT_EMPTY
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun	ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
36*4882a593Smuzhiyun		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
37*4882a593Smuzhiyun		      __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun.endm
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun/*
42*4882a593Smuzhiyun * Despite being an assembler file we can't just use .irp here
43*4882a593Smuzhiyun * because __KSYM_DEPS__ only uses the C preprocessor and would
44*4882a593Smuzhiyun * only see one instance of "__x86_indirect_thunk_\reg" rather
45*4882a593Smuzhiyun * than one per register with the correct names. So we do it
46*4882a593Smuzhiyun * the simple and nasty way...
47*4882a593Smuzhiyun *
48*4882a593Smuzhiyun * Worse, you can only have a single EXPORT_SYMBOL per line,
49*4882a593Smuzhiyun * and CPP can't insert newlines, so we have to repeat everything
50*4882a593Smuzhiyun * at least twice.
51*4882a593Smuzhiyun */
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun#define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
54*4882a593Smuzhiyun#define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun	.align RETPOLINE_THUNK_SIZE
57*4882a593SmuzhiyunSYM_CODE_START(__x86_indirect_thunk_array)
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun#define GEN(reg) THUNK reg
60*4882a593Smuzhiyun#include <asm/GEN-for-each-reg.h>
61*4882a593Smuzhiyun#undef GEN
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun	.align RETPOLINE_THUNK_SIZE
64*4882a593SmuzhiyunSYM_CODE_END(__x86_indirect_thunk_array)
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun#define GEN(reg) EXPORT_THUNK(reg)
67*4882a593Smuzhiyun#include <asm/GEN-for-each-reg.h>
68*4882a593Smuzhiyun#undef GEN
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun/*
71*4882a593Smuzhiyun * This function name is magical and is used by -mfunction-return=thunk-extern
72*4882a593Smuzhiyun * for the compiler to generate JMPs to it.
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun#ifdef CONFIG_RETHUNK
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun	.section .text.__x86.return_thunk
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun/*
79*4882a593Smuzhiyun * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
80*4882a593Smuzhiyun * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
81*4882a593Smuzhiyun *    alignment within the BTB.
82*4882a593Smuzhiyun * 2) The instruction at zen_untrain_ret must contain, and not
83*4882a593Smuzhiyun *    end with, the 0xc3 byte of the RET.
84*4882a593Smuzhiyun * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
85*4882a593Smuzhiyun *    from re-poisioning the BTB prediction.
86*4882a593Smuzhiyun */
87*4882a593Smuzhiyun	.align 64
88*4882a593Smuzhiyun	.skip 63, 0xcc
89*4882a593SmuzhiyunSYM_FUNC_START_NOALIGN(zen_untrain_ret);
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun	/*
92*4882a593Smuzhiyun	 * As executed from zen_untrain_ret, this is:
93*4882a593Smuzhiyun	 *
94*4882a593Smuzhiyun	 *   TEST $0xcc, %bl
95*4882a593Smuzhiyun	 *   LFENCE
96*4882a593Smuzhiyun	 *   JMP __x86_return_thunk
97*4882a593Smuzhiyun	 *
98*4882a593Smuzhiyun	 * Executing the TEST instruction has a side effect of evicting any BTB
99*4882a593Smuzhiyun	 * prediction (potentially attacker controlled) attached to the RET, as
100*4882a593Smuzhiyun	 * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
101*4882a593Smuzhiyun	 */
102*4882a593Smuzhiyun	.byte	0xf6
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun	/*
105*4882a593Smuzhiyun	 * As executed from __x86_return_thunk, this is a plain RET.
106*4882a593Smuzhiyun	 *
107*4882a593Smuzhiyun	 * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
108*4882a593Smuzhiyun	 *
109*4882a593Smuzhiyun	 * We subsequently jump backwards and architecturally execute the RET.
110*4882a593Smuzhiyun	 * This creates a correct BTB prediction (type=ret), but in the
111*4882a593Smuzhiyun	 * meantime we suffer Straight Line Speculation (because the type was
112*4882a593Smuzhiyun	 * no branch) which is halted by the INT3.
113*4882a593Smuzhiyun	 *
114*4882a593Smuzhiyun	 * With SMT enabled and STIBP active, a sibling thread cannot poison
115*4882a593Smuzhiyun	 * RET's prediction to a type of its choice, but can evict the
116*4882a593Smuzhiyun	 * prediction due to competitive sharing. If the prediction is
117*4882a593Smuzhiyun	 * evicted, __x86_return_thunk will suffer Straight Line Speculation
118*4882a593Smuzhiyun	 * which will be contained safely by the INT3.
119*4882a593Smuzhiyun	 */
120*4882a593SmuzhiyunSYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
121*4882a593Smuzhiyun	ret
122*4882a593Smuzhiyun	int3
123*4882a593SmuzhiyunSYM_CODE_END(__x86_return_thunk)
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun	/*
126*4882a593Smuzhiyun	 * Ensure the TEST decoding / BTB invalidation is complete.
127*4882a593Smuzhiyun	 */
128*4882a593Smuzhiyun	lfence
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun	/*
131*4882a593Smuzhiyun	 * Jump back and execute the RET in the middle of the TEST instruction.
132*4882a593Smuzhiyun	 * INT3 is for SLS protection.
133*4882a593Smuzhiyun	 */
134*4882a593Smuzhiyun	jmp __x86_return_thunk
135*4882a593Smuzhiyun	int3
136*4882a593SmuzhiyunSYM_FUNC_END(zen_untrain_ret)
137*4882a593Smuzhiyun__EXPORT_THUNK(zen_untrain_ret)
138*4882a593Smuzhiyun
139*4882a593SmuzhiyunEXPORT_SYMBOL(__x86_return_thunk)
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun#endif /* CONFIG_RETHUNK */
142