xref: /OK3568_Linux_fs/kernel/arch/x86/events/intel/lbr.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun #include <linux/perf_event.h>
3*4882a593Smuzhiyun #include <linux/types.h>
4*4882a593Smuzhiyun 
5*4882a593Smuzhiyun #include <asm/perf_event.h>
6*4882a593Smuzhiyun #include <asm/msr.h>
7*4882a593Smuzhiyun #include <asm/insn.h>
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include "../perf_event.h"
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun static const enum {
12*4882a593Smuzhiyun 	LBR_EIP_FLAGS		= 1,
13*4882a593Smuzhiyun 	LBR_TSX			= 2,
14*4882a593Smuzhiyun } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
15*4882a593Smuzhiyun 	[LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
16*4882a593Smuzhiyun 	[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
17*4882a593Smuzhiyun };
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun /*
20*4882a593Smuzhiyun  * Intel LBR_SELECT bits
21*4882a593Smuzhiyun  * Intel Vol3a, April 2011, Section 16.7 Table 16-10
22*4882a593Smuzhiyun  *
23*4882a593Smuzhiyun  * Hardware branch filter (not available on all CPUs)
24*4882a593Smuzhiyun  */
25*4882a593Smuzhiyun #define LBR_KERNEL_BIT		0 /* do not capture at ring0 */
26*4882a593Smuzhiyun #define LBR_USER_BIT		1 /* do not capture at ring > 0 */
27*4882a593Smuzhiyun #define LBR_JCC_BIT		2 /* do not capture conditional branches */
28*4882a593Smuzhiyun #define LBR_REL_CALL_BIT	3 /* do not capture relative calls */
29*4882a593Smuzhiyun #define LBR_IND_CALL_BIT	4 /* do not capture indirect calls */
30*4882a593Smuzhiyun #define LBR_RETURN_BIT		5 /* do not capture near returns */
31*4882a593Smuzhiyun #define LBR_IND_JMP_BIT		6 /* do not capture indirect jumps */
32*4882a593Smuzhiyun #define LBR_REL_JMP_BIT		7 /* do not capture relative jumps */
33*4882a593Smuzhiyun #define LBR_FAR_BIT		8 /* do not capture far branches */
34*4882a593Smuzhiyun #define LBR_CALL_STACK_BIT	9 /* enable call stack */
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun /*
37*4882a593Smuzhiyun  * Following bit only exists in Linux; we mask it out before writing it to
38*4882a593Smuzhiyun  * the actual MSR. But it helps the constraint perf code to understand
39*4882a593Smuzhiyun  * that this is a separate configuration.
40*4882a593Smuzhiyun  */
41*4882a593Smuzhiyun #define LBR_NO_INFO_BIT	       63 /* don't read LBR_INFO. */
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun #define LBR_KERNEL	(1 << LBR_KERNEL_BIT)
44*4882a593Smuzhiyun #define LBR_USER	(1 << LBR_USER_BIT)
45*4882a593Smuzhiyun #define LBR_JCC		(1 << LBR_JCC_BIT)
46*4882a593Smuzhiyun #define LBR_REL_CALL	(1 << LBR_REL_CALL_BIT)
47*4882a593Smuzhiyun #define LBR_IND_CALL	(1 << LBR_IND_CALL_BIT)
48*4882a593Smuzhiyun #define LBR_RETURN	(1 << LBR_RETURN_BIT)
49*4882a593Smuzhiyun #define LBR_REL_JMP	(1 << LBR_REL_JMP_BIT)
50*4882a593Smuzhiyun #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT)
51*4882a593Smuzhiyun #define LBR_FAR		(1 << LBR_FAR_BIT)
52*4882a593Smuzhiyun #define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT)
53*4882a593Smuzhiyun #define LBR_NO_INFO	(1ULL << LBR_NO_INFO_BIT)
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun #define LBR_PLM (LBR_KERNEL | LBR_USER)
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun #define LBR_SEL_MASK	0x3ff	/* valid bits in LBR_SELECT */
58*4882a593Smuzhiyun #define LBR_NOT_SUPP	-1	/* LBR filter not supported */
59*4882a593Smuzhiyun #define LBR_IGN		0	/* ignored */
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun #define LBR_ANY		 \
62*4882a593Smuzhiyun 	(LBR_JCC	|\
63*4882a593Smuzhiyun 	 LBR_REL_CALL	|\
64*4882a593Smuzhiyun 	 LBR_IND_CALL	|\
65*4882a593Smuzhiyun 	 LBR_RETURN	|\
66*4882a593Smuzhiyun 	 LBR_REL_JMP	|\
67*4882a593Smuzhiyun 	 LBR_IND_JMP	|\
68*4882a593Smuzhiyun 	 LBR_FAR)
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun #define LBR_FROM_FLAG_MISPRED	BIT_ULL(63)
71*4882a593Smuzhiyun #define LBR_FROM_FLAG_IN_TX	BIT_ULL(62)
72*4882a593Smuzhiyun #define LBR_FROM_FLAG_ABORT	BIT_ULL(61)
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun #define LBR_FROM_SIGNEXT_2MSB	(BIT_ULL(60) | BIT_ULL(59))
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun /*
77*4882a593Smuzhiyun  * x86control flow change classification
78*4882a593Smuzhiyun  * x86control flow changes include branches, interrupts, traps, faults
79*4882a593Smuzhiyun  */
80*4882a593Smuzhiyun enum {
81*4882a593Smuzhiyun 	X86_BR_NONE		= 0,      /* unknown */
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	X86_BR_USER		= 1 << 0, /* branch target is user */
84*4882a593Smuzhiyun 	X86_BR_KERNEL		= 1 << 1, /* branch target is kernel */
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	X86_BR_CALL		= 1 << 2, /* call */
87*4882a593Smuzhiyun 	X86_BR_RET		= 1 << 3, /* return */
88*4882a593Smuzhiyun 	X86_BR_SYSCALL		= 1 << 4, /* syscall */
89*4882a593Smuzhiyun 	X86_BR_SYSRET		= 1 << 5, /* syscall return */
90*4882a593Smuzhiyun 	X86_BR_INT		= 1 << 6, /* sw interrupt */
91*4882a593Smuzhiyun 	X86_BR_IRET		= 1 << 7, /* return from interrupt */
92*4882a593Smuzhiyun 	X86_BR_JCC		= 1 << 8, /* conditional */
93*4882a593Smuzhiyun 	X86_BR_JMP		= 1 << 9, /* jump */
94*4882a593Smuzhiyun 	X86_BR_IRQ		= 1 << 10,/* hw interrupt or trap or fault */
95*4882a593Smuzhiyun 	X86_BR_IND_CALL		= 1 << 11,/* indirect calls */
96*4882a593Smuzhiyun 	X86_BR_ABORT		= 1 << 12,/* transaction abort */
97*4882a593Smuzhiyun 	X86_BR_IN_TX		= 1 << 13,/* in transaction */
98*4882a593Smuzhiyun 	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
99*4882a593Smuzhiyun 	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
100*4882a593Smuzhiyun 	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
101*4882a593Smuzhiyun 	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun };
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
108*4882a593Smuzhiyun #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun #define X86_BR_ANY       \
111*4882a593Smuzhiyun 	(X86_BR_CALL    |\
112*4882a593Smuzhiyun 	 X86_BR_RET     |\
113*4882a593Smuzhiyun 	 X86_BR_SYSCALL |\
114*4882a593Smuzhiyun 	 X86_BR_SYSRET  |\
115*4882a593Smuzhiyun 	 X86_BR_INT     |\
116*4882a593Smuzhiyun 	 X86_BR_IRET    |\
117*4882a593Smuzhiyun 	 X86_BR_JCC     |\
118*4882a593Smuzhiyun 	 X86_BR_JMP	 |\
119*4882a593Smuzhiyun 	 X86_BR_IRQ	 |\
120*4882a593Smuzhiyun 	 X86_BR_ABORT	 |\
121*4882a593Smuzhiyun 	 X86_BR_IND_CALL |\
122*4882a593Smuzhiyun 	 X86_BR_IND_JMP  |\
123*4882a593Smuzhiyun 	 X86_BR_ZERO_CALL)
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun #define X86_BR_ANY_CALL		 \
128*4882a593Smuzhiyun 	(X86_BR_CALL		|\
129*4882a593Smuzhiyun 	 X86_BR_IND_CALL	|\
130*4882a593Smuzhiyun 	 X86_BR_ZERO_CALL	|\
131*4882a593Smuzhiyun 	 X86_BR_SYSCALL		|\
132*4882a593Smuzhiyun 	 X86_BR_IRQ		|\
133*4882a593Smuzhiyun 	 X86_BR_INT)
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun /*
136*4882a593Smuzhiyun  * Intel LBR_CTL bits
137*4882a593Smuzhiyun  *
138*4882a593Smuzhiyun  * Hardware branch filter for Arch LBR
139*4882a593Smuzhiyun  */
140*4882a593Smuzhiyun #define ARCH_LBR_KERNEL_BIT		1  /* capture at ring0 */
141*4882a593Smuzhiyun #define ARCH_LBR_USER_BIT		2  /* capture at ring > 0 */
142*4882a593Smuzhiyun #define ARCH_LBR_CALL_STACK_BIT		3  /* enable call stack */
143*4882a593Smuzhiyun #define ARCH_LBR_JCC_BIT		16 /* capture conditional branches */
144*4882a593Smuzhiyun #define ARCH_LBR_REL_JMP_BIT		17 /* capture relative jumps */
145*4882a593Smuzhiyun #define ARCH_LBR_IND_JMP_BIT		18 /* capture indirect jumps */
146*4882a593Smuzhiyun #define ARCH_LBR_REL_CALL_BIT		19 /* capture relative calls */
147*4882a593Smuzhiyun #define ARCH_LBR_IND_CALL_BIT		20 /* capture indirect calls */
148*4882a593Smuzhiyun #define ARCH_LBR_RETURN_BIT		21 /* capture near returns */
149*4882a593Smuzhiyun #define ARCH_LBR_OTHER_BRANCH_BIT	22 /* capture other branches */
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun #define ARCH_LBR_KERNEL			(1ULL << ARCH_LBR_KERNEL_BIT)
152*4882a593Smuzhiyun #define ARCH_LBR_USER			(1ULL << ARCH_LBR_USER_BIT)
153*4882a593Smuzhiyun #define ARCH_LBR_CALL_STACK		(1ULL << ARCH_LBR_CALL_STACK_BIT)
154*4882a593Smuzhiyun #define ARCH_LBR_JCC			(1ULL << ARCH_LBR_JCC_BIT)
155*4882a593Smuzhiyun #define ARCH_LBR_REL_JMP		(1ULL << ARCH_LBR_REL_JMP_BIT)
156*4882a593Smuzhiyun #define ARCH_LBR_IND_JMP		(1ULL << ARCH_LBR_IND_JMP_BIT)
157*4882a593Smuzhiyun #define ARCH_LBR_REL_CALL		(1ULL << ARCH_LBR_REL_CALL_BIT)
158*4882a593Smuzhiyun #define ARCH_LBR_IND_CALL		(1ULL << ARCH_LBR_IND_CALL_BIT)
159*4882a593Smuzhiyun #define ARCH_LBR_RETURN			(1ULL << ARCH_LBR_RETURN_BIT)
160*4882a593Smuzhiyun #define ARCH_LBR_OTHER_BRANCH		(1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun #define ARCH_LBR_ANY			 \
163*4882a593Smuzhiyun 	(ARCH_LBR_JCC			|\
164*4882a593Smuzhiyun 	 ARCH_LBR_REL_JMP		|\
165*4882a593Smuzhiyun 	 ARCH_LBR_IND_JMP		|\
166*4882a593Smuzhiyun 	 ARCH_LBR_REL_CALL		|\
167*4882a593Smuzhiyun 	 ARCH_LBR_IND_CALL		|\
168*4882a593Smuzhiyun 	 ARCH_LBR_RETURN		|\
169*4882a593Smuzhiyun 	 ARCH_LBR_OTHER_BRANCH)
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun #define ARCH_LBR_CTL_MASK			0x7f000e
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
174*4882a593Smuzhiyun 
is_lbr_call_stack_bit_set(u64 config)175*4882a593Smuzhiyun static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR))
178*4882a593Smuzhiyun 		return !!(config & ARCH_LBR_CALL_STACK);
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	return !!(config & LBR_CALL_STACK);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun /*
184*4882a593Smuzhiyun  * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
185*4882a593Smuzhiyun  * otherwise it becomes near impossible to get a reliable stack.
186*4882a593Smuzhiyun  */
187*4882a593Smuzhiyun 
__intel_pmu_lbr_enable(bool pmi)188*4882a593Smuzhiyun static void __intel_pmu_lbr_enable(bool pmi)
189*4882a593Smuzhiyun {
190*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
191*4882a593Smuzhiyun 	u64 debugctl, lbr_select = 0, orig_debugctl;
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	/*
194*4882a593Smuzhiyun 	 * No need to unfreeze manually, as v4 can do that as part
195*4882a593Smuzhiyun 	 * of the GLOBAL_STATUS ack.
196*4882a593Smuzhiyun 	 */
197*4882a593Smuzhiyun 	if (pmi && x86_pmu.version >= 4)
198*4882a593Smuzhiyun 		return;
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	/*
201*4882a593Smuzhiyun 	 * No need to reprogram LBR_SELECT in a PMI, as it
202*4882a593Smuzhiyun 	 * did not change.
203*4882a593Smuzhiyun 	 */
204*4882a593Smuzhiyun 	if (cpuc->lbr_sel)
205*4882a593Smuzhiyun 		lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
206*4882a593Smuzhiyun 	if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
207*4882a593Smuzhiyun 		wrmsrl(MSR_LBR_SELECT, lbr_select);
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
210*4882a593Smuzhiyun 	orig_debugctl = debugctl;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
213*4882a593Smuzhiyun 		debugctl |= DEBUGCTLMSR_LBR;
214*4882a593Smuzhiyun 	/*
215*4882a593Smuzhiyun 	 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
216*4882a593Smuzhiyun 	 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
217*4882a593Smuzhiyun 	 * may cause superfluous increase/decrease of LBR_TOS.
218*4882a593Smuzhiyun 	 */
219*4882a593Smuzhiyun 	if (is_lbr_call_stack_bit_set(lbr_select))
220*4882a593Smuzhiyun 		debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
221*4882a593Smuzhiyun 	else
222*4882a593Smuzhiyun 		debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	if (orig_debugctl != debugctl)
225*4882a593Smuzhiyun 		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR))
228*4882a593Smuzhiyun 		wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun 
__intel_pmu_lbr_disable(void)231*4882a593Smuzhiyun static void __intel_pmu_lbr_disable(void)
232*4882a593Smuzhiyun {
233*4882a593Smuzhiyun 	u64 debugctl;
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
236*4882a593Smuzhiyun 		wrmsrl(MSR_ARCH_LBR_CTL, 0);
237*4882a593Smuzhiyun 		return;
238*4882a593Smuzhiyun 	}
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
241*4882a593Smuzhiyun 	debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
242*4882a593Smuzhiyun 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun 
intel_pmu_lbr_reset_32(void)245*4882a593Smuzhiyun void intel_pmu_lbr_reset_32(void)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun 	int i;
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	for (i = 0; i < x86_pmu.lbr_nr; i++)
250*4882a593Smuzhiyun 		wrmsrl(x86_pmu.lbr_from + i, 0);
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun 
intel_pmu_lbr_reset_64(void)253*4882a593Smuzhiyun void intel_pmu_lbr_reset_64(void)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun 	int i;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
258*4882a593Smuzhiyun 		wrmsrl(x86_pmu.lbr_from + i, 0);
259*4882a593Smuzhiyun 		wrmsrl(x86_pmu.lbr_to   + i, 0);
260*4882a593Smuzhiyun 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
261*4882a593Smuzhiyun 			wrmsrl(x86_pmu.lbr_info + i, 0);
262*4882a593Smuzhiyun 	}
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun 
intel_pmu_arch_lbr_reset(void)265*4882a593Smuzhiyun static void intel_pmu_arch_lbr_reset(void)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun 	/* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
268*4882a593Smuzhiyun 	wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
269*4882a593Smuzhiyun }
270*4882a593Smuzhiyun 
intel_pmu_lbr_reset(void)271*4882a593Smuzhiyun void intel_pmu_lbr_reset(void)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	if (!x86_pmu.lbr_nr)
276*4882a593Smuzhiyun 		return;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	x86_pmu.lbr_reset();
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	cpuc->last_task_ctx = NULL;
281*4882a593Smuzhiyun 	cpuc->last_log_id = 0;
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun /*
285*4882a593Smuzhiyun  * TOS = most recently recorded branch
286*4882a593Smuzhiyun  */
intel_pmu_lbr_tos(void)287*4882a593Smuzhiyun static inline u64 intel_pmu_lbr_tos(void)
288*4882a593Smuzhiyun {
289*4882a593Smuzhiyun 	u64 tos;
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	rdmsrl(x86_pmu.lbr_tos, tos);
292*4882a593Smuzhiyun 	return tos;
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun enum {
296*4882a593Smuzhiyun 	LBR_NONE,
297*4882a593Smuzhiyun 	LBR_VALID,
298*4882a593Smuzhiyun };
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun /*
301*4882a593Smuzhiyun  * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
302*4882a593Smuzhiyun  * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
303*4882a593Smuzhiyun  * TSX is not supported they have no consistent behavior:
304*4882a593Smuzhiyun  *
305*4882a593Smuzhiyun  *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
306*4882a593Smuzhiyun  *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
307*4882a593Smuzhiyun  *     part of the sign extension.
308*4882a593Smuzhiyun  *
309*4882a593Smuzhiyun  * Therefore, if:
310*4882a593Smuzhiyun  *
311*4882a593Smuzhiyun  *   1) LBR has TSX format
312*4882a593Smuzhiyun  *   2) CPU has no TSX support enabled
313*4882a593Smuzhiyun  *
314*4882a593Smuzhiyun  * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
315*4882a593Smuzhiyun  * value from rdmsr() must be converted to have a 61 bits sign extension,
316*4882a593Smuzhiyun  * ignoring the TSX flags.
317*4882a593Smuzhiyun  */
lbr_from_signext_quirk_needed(void)318*4882a593Smuzhiyun static inline bool lbr_from_signext_quirk_needed(void)
319*4882a593Smuzhiyun {
320*4882a593Smuzhiyun 	int lbr_format = x86_pmu.intel_cap.lbr_format;
321*4882a593Smuzhiyun 	bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
322*4882a593Smuzhiyun 			   boot_cpu_has(X86_FEATURE_RTM);
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun 	return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
325*4882a593Smuzhiyun }
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun /* If quirk is enabled, ensure sign extension is 63 bits: */
lbr_from_signext_quirk_wr(u64 val)330*4882a593Smuzhiyun inline u64 lbr_from_signext_quirk_wr(u64 val)
331*4882a593Smuzhiyun {
332*4882a593Smuzhiyun 	if (static_branch_unlikely(&lbr_from_quirk_key)) {
333*4882a593Smuzhiyun 		/*
334*4882a593Smuzhiyun 		 * Sign extend into bits 61:62 while preserving bit 63.
335*4882a593Smuzhiyun 		 *
336*4882a593Smuzhiyun 		 * Quirk is enabled when TSX is disabled. Therefore TSX bits
337*4882a593Smuzhiyun 		 * in val are always OFF and must be changed to be sign
338*4882a593Smuzhiyun 		 * extension bits. Since bits 59:60 are guaranteed to be
339*4882a593Smuzhiyun 		 * part of the sign extension bits, we can just copy them
340*4882a593Smuzhiyun 		 * to 61:62.
341*4882a593Smuzhiyun 		 */
342*4882a593Smuzhiyun 		val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
343*4882a593Smuzhiyun 	}
344*4882a593Smuzhiyun 	return val;
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun /*
348*4882a593Smuzhiyun  * If quirk is needed, ensure sign extension is 61 bits:
349*4882a593Smuzhiyun  */
lbr_from_signext_quirk_rd(u64 val)350*4882a593Smuzhiyun static u64 lbr_from_signext_quirk_rd(u64 val)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	if (static_branch_unlikely(&lbr_from_quirk_key)) {
353*4882a593Smuzhiyun 		/*
354*4882a593Smuzhiyun 		 * Quirk is on when TSX is not enabled. Therefore TSX
355*4882a593Smuzhiyun 		 * flags must be read as OFF.
356*4882a593Smuzhiyun 		 */
357*4882a593Smuzhiyun 		val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
358*4882a593Smuzhiyun 	}
359*4882a593Smuzhiyun 	return val;
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun 
wrlbr_from(unsigned int idx,u64 val)362*4882a593Smuzhiyun static __always_inline void wrlbr_from(unsigned int idx, u64 val)
363*4882a593Smuzhiyun {
364*4882a593Smuzhiyun 	val = lbr_from_signext_quirk_wr(val);
365*4882a593Smuzhiyun 	wrmsrl(x86_pmu.lbr_from + idx, val);
366*4882a593Smuzhiyun }
367*4882a593Smuzhiyun 
wrlbr_to(unsigned int idx,u64 val)368*4882a593Smuzhiyun static __always_inline void wrlbr_to(unsigned int idx, u64 val)
369*4882a593Smuzhiyun {
370*4882a593Smuzhiyun 	wrmsrl(x86_pmu.lbr_to + idx, val);
371*4882a593Smuzhiyun }
372*4882a593Smuzhiyun 
wrlbr_info(unsigned int idx,u64 val)373*4882a593Smuzhiyun static __always_inline void wrlbr_info(unsigned int idx, u64 val)
374*4882a593Smuzhiyun {
375*4882a593Smuzhiyun 	wrmsrl(x86_pmu.lbr_info + idx, val);
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun 
rdlbr_from(unsigned int idx,struct lbr_entry * lbr)378*4882a593Smuzhiyun static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun 	u64 val;
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 	if (lbr)
383*4882a593Smuzhiyun 		return lbr->from;
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	rdmsrl(x86_pmu.lbr_from + idx, val);
386*4882a593Smuzhiyun 
387*4882a593Smuzhiyun 	return lbr_from_signext_quirk_rd(val);
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun 
rdlbr_to(unsigned int idx,struct lbr_entry * lbr)390*4882a593Smuzhiyun static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
391*4882a593Smuzhiyun {
392*4882a593Smuzhiyun 	u64 val;
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 	if (lbr)
395*4882a593Smuzhiyun 		return lbr->to;
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	rdmsrl(x86_pmu.lbr_to + idx, val);
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	return val;
400*4882a593Smuzhiyun }
401*4882a593Smuzhiyun 
rdlbr_info(unsigned int idx,struct lbr_entry * lbr)402*4882a593Smuzhiyun static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
403*4882a593Smuzhiyun {
404*4882a593Smuzhiyun 	u64 val;
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	if (lbr)
407*4882a593Smuzhiyun 		return lbr->info;
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	rdmsrl(x86_pmu.lbr_info + idx, val);
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	return val;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun static inline void
wrlbr_all(struct lbr_entry * lbr,unsigned int idx,bool need_info)415*4882a593Smuzhiyun wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
416*4882a593Smuzhiyun {
417*4882a593Smuzhiyun 	wrlbr_from(idx, lbr->from);
418*4882a593Smuzhiyun 	wrlbr_to(idx, lbr->to);
419*4882a593Smuzhiyun 	if (need_info)
420*4882a593Smuzhiyun 		wrlbr_info(idx, lbr->info);
421*4882a593Smuzhiyun }
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun static inline bool
rdlbr_all(struct lbr_entry * lbr,unsigned int idx,bool need_info)424*4882a593Smuzhiyun rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
425*4882a593Smuzhiyun {
426*4882a593Smuzhiyun 	u64 from = rdlbr_from(idx, NULL);
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 	/* Don't read invalid entry */
429*4882a593Smuzhiyun 	if (!from)
430*4882a593Smuzhiyun 		return false;
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	lbr->from = from;
433*4882a593Smuzhiyun 	lbr->to = rdlbr_to(idx, NULL);
434*4882a593Smuzhiyun 	if (need_info)
435*4882a593Smuzhiyun 		lbr->info = rdlbr_info(idx, NULL);
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun 	return true;
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun 
intel_pmu_lbr_restore(void * ctx)440*4882a593Smuzhiyun void intel_pmu_lbr_restore(void *ctx)
441*4882a593Smuzhiyun {
442*4882a593Smuzhiyun 	bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
443*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
444*4882a593Smuzhiyun 	struct x86_perf_task_context *task_ctx = ctx;
445*4882a593Smuzhiyun 	int i;
446*4882a593Smuzhiyun 	unsigned lbr_idx, mask;
447*4882a593Smuzhiyun 	u64 tos = task_ctx->tos;
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 	mask = x86_pmu.lbr_nr - 1;
450*4882a593Smuzhiyun 	for (i = 0; i < task_ctx->valid_lbrs; i++) {
451*4882a593Smuzhiyun 		lbr_idx = (tos - i) & mask;
452*4882a593Smuzhiyun 		wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
453*4882a593Smuzhiyun 	}
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 	for (; i < x86_pmu.lbr_nr; i++) {
456*4882a593Smuzhiyun 		lbr_idx = (tos - i) & mask;
457*4882a593Smuzhiyun 		wrlbr_from(lbr_idx, 0);
458*4882a593Smuzhiyun 		wrlbr_to(lbr_idx, 0);
459*4882a593Smuzhiyun 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
460*4882a593Smuzhiyun 			wrlbr_info(lbr_idx, 0);
461*4882a593Smuzhiyun 	}
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	wrmsrl(x86_pmu.lbr_tos, tos);
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 	if (cpuc->lbr_select)
466*4882a593Smuzhiyun 		wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
467*4882a593Smuzhiyun }
468*4882a593Smuzhiyun 
intel_pmu_arch_lbr_restore(void * ctx)469*4882a593Smuzhiyun static void intel_pmu_arch_lbr_restore(void *ctx)
470*4882a593Smuzhiyun {
471*4882a593Smuzhiyun 	struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
472*4882a593Smuzhiyun 	struct lbr_entry *entries = task_ctx->entries;
473*4882a593Smuzhiyun 	int i;
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 	/* Fast reset the LBRs before restore if the call stack is not full. */
476*4882a593Smuzhiyun 	if (!entries[x86_pmu.lbr_nr - 1].from)
477*4882a593Smuzhiyun 		intel_pmu_arch_lbr_reset();
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
480*4882a593Smuzhiyun 		if (!entries[i].from)
481*4882a593Smuzhiyun 			break;
482*4882a593Smuzhiyun 		wrlbr_all(&entries[i], i, true);
483*4882a593Smuzhiyun 	}
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun /*
487*4882a593Smuzhiyun  * Restore the Architecture LBR state from the xsave area in the perf
488*4882a593Smuzhiyun  * context data for the task via the XRSTORS instruction.
489*4882a593Smuzhiyun  */
intel_pmu_arch_lbr_xrstors(void * ctx)490*4882a593Smuzhiyun static void intel_pmu_arch_lbr_xrstors(void *ctx)
491*4882a593Smuzhiyun {
492*4882a593Smuzhiyun 	struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun 
lbr_is_reset_in_cstate(void * ctx)497*4882a593Smuzhiyun static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR))
500*4882a593Smuzhiyun 		return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun 	return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
503*4882a593Smuzhiyun }
504*4882a593Smuzhiyun 
__intel_pmu_lbr_restore(void * ctx)505*4882a593Smuzhiyun static void __intel_pmu_lbr_restore(void *ctx)
506*4882a593Smuzhiyun {
507*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
508*4882a593Smuzhiyun 
509*4882a593Smuzhiyun 	if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
510*4882a593Smuzhiyun 	    task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
511*4882a593Smuzhiyun 		intel_pmu_lbr_reset();
512*4882a593Smuzhiyun 		return;
513*4882a593Smuzhiyun 	}
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 	/*
516*4882a593Smuzhiyun 	 * Does not restore the LBR registers, if
517*4882a593Smuzhiyun 	 * - No one else touched them, and
518*4882a593Smuzhiyun 	 * - Was not cleared in Cstate
519*4882a593Smuzhiyun 	 */
520*4882a593Smuzhiyun 	if ((ctx == cpuc->last_task_ctx) &&
521*4882a593Smuzhiyun 	    (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
522*4882a593Smuzhiyun 	    !lbr_is_reset_in_cstate(ctx)) {
523*4882a593Smuzhiyun 		task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
524*4882a593Smuzhiyun 		return;
525*4882a593Smuzhiyun 	}
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 	x86_pmu.lbr_restore(ctx);
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun 
intel_pmu_lbr_save(void * ctx)532*4882a593Smuzhiyun void intel_pmu_lbr_save(void *ctx)
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun 	bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
535*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
536*4882a593Smuzhiyun 	struct x86_perf_task_context *task_ctx = ctx;
537*4882a593Smuzhiyun 	unsigned lbr_idx, mask;
538*4882a593Smuzhiyun 	u64 tos;
539*4882a593Smuzhiyun 	int i;
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	mask = x86_pmu.lbr_nr - 1;
542*4882a593Smuzhiyun 	tos = intel_pmu_lbr_tos();
543*4882a593Smuzhiyun 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
544*4882a593Smuzhiyun 		lbr_idx = (tos - i) & mask;
545*4882a593Smuzhiyun 		if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
546*4882a593Smuzhiyun 			break;
547*4882a593Smuzhiyun 	}
548*4882a593Smuzhiyun 	task_ctx->valid_lbrs = i;
549*4882a593Smuzhiyun 	task_ctx->tos = tos;
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	if (cpuc->lbr_select)
552*4882a593Smuzhiyun 		rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
553*4882a593Smuzhiyun }
554*4882a593Smuzhiyun 
intel_pmu_arch_lbr_save(void * ctx)555*4882a593Smuzhiyun static void intel_pmu_arch_lbr_save(void *ctx)
556*4882a593Smuzhiyun {
557*4882a593Smuzhiyun 	struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
558*4882a593Smuzhiyun 	struct lbr_entry *entries = task_ctx->entries;
559*4882a593Smuzhiyun 	int i;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
562*4882a593Smuzhiyun 		if (!rdlbr_all(&entries[i], i, true))
563*4882a593Smuzhiyun 			break;
564*4882a593Smuzhiyun 	}
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun 	/* LBR call stack is not full. Reset is required in restore. */
567*4882a593Smuzhiyun 	if (i < x86_pmu.lbr_nr)
568*4882a593Smuzhiyun 		entries[x86_pmu.lbr_nr - 1].from = 0;
569*4882a593Smuzhiyun }
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun /*
572*4882a593Smuzhiyun  * Save the Architecture LBR state to the xsave area in the perf
573*4882a593Smuzhiyun  * context data for the task via the XSAVES instruction.
574*4882a593Smuzhiyun  */
intel_pmu_arch_lbr_xsaves(void * ctx)575*4882a593Smuzhiyun static void intel_pmu_arch_lbr_xsaves(void *ctx)
576*4882a593Smuzhiyun {
577*4882a593Smuzhiyun 	struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 	copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
580*4882a593Smuzhiyun }
581*4882a593Smuzhiyun 
__intel_pmu_lbr_save(void * ctx)582*4882a593Smuzhiyun static void __intel_pmu_lbr_save(void *ctx)
583*4882a593Smuzhiyun {
584*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	if (task_context_opt(ctx)->lbr_callstack_users == 0) {
587*4882a593Smuzhiyun 		task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
588*4882a593Smuzhiyun 		return;
589*4882a593Smuzhiyun 	}
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun 	x86_pmu.lbr_save(ctx);
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun 	task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	cpuc->last_task_ctx = ctx;
596*4882a593Smuzhiyun 	cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
597*4882a593Smuzhiyun }
598*4882a593Smuzhiyun 
intel_pmu_lbr_swap_task_ctx(struct perf_event_context * prev,struct perf_event_context * next)599*4882a593Smuzhiyun void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
600*4882a593Smuzhiyun 				 struct perf_event_context *next)
601*4882a593Smuzhiyun {
602*4882a593Smuzhiyun 	void *prev_ctx_data, *next_ctx_data;
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	swap(prev->task_ctx_data, next->task_ctx_data);
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	/*
607*4882a593Smuzhiyun 	 * Architecture specific synchronization makes sense in
608*4882a593Smuzhiyun 	 * case both prev->task_ctx_data and next->task_ctx_data
609*4882a593Smuzhiyun 	 * pointers are allocated.
610*4882a593Smuzhiyun 	 */
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	prev_ctx_data = next->task_ctx_data;
613*4882a593Smuzhiyun 	next_ctx_data = prev->task_ctx_data;
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	if (!prev_ctx_data || !next_ctx_data)
616*4882a593Smuzhiyun 		return;
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun 	swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
619*4882a593Smuzhiyun 	     task_context_opt(next_ctx_data)->lbr_callstack_users);
620*4882a593Smuzhiyun }
621*4882a593Smuzhiyun 
intel_pmu_lbr_sched_task(struct perf_event_context * ctx,bool sched_in)622*4882a593Smuzhiyun void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
623*4882a593Smuzhiyun {
624*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
625*4882a593Smuzhiyun 	void *task_ctx;
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun 	if (!cpuc->lbr_users)
628*4882a593Smuzhiyun 		return;
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 	/*
631*4882a593Smuzhiyun 	 * If LBR callstack feature is enabled and the stack was saved when
632*4882a593Smuzhiyun 	 * the task was scheduled out, restore the stack. Otherwise flush
633*4882a593Smuzhiyun 	 * the LBR stack.
634*4882a593Smuzhiyun 	 */
635*4882a593Smuzhiyun 	task_ctx = ctx ? ctx->task_ctx_data : NULL;
636*4882a593Smuzhiyun 	if (task_ctx) {
637*4882a593Smuzhiyun 		if (sched_in)
638*4882a593Smuzhiyun 			__intel_pmu_lbr_restore(task_ctx);
639*4882a593Smuzhiyun 		else
640*4882a593Smuzhiyun 			__intel_pmu_lbr_save(task_ctx);
641*4882a593Smuzhiyun 		return;
642*4882a593Smuzhiyun 	}
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	/*
645*4882a593Smuzhiyun 	 * Since a context switch can flip the address space and LBR entries
646*4882a593Smuzhiyun 	 * are not tagged with an identifier, we need to wipe the LBR, even for
647*4882a593Smuzhiyun 	 * per-cpu events. You simply cannot resolve the branches from the old
648*4882a593Smuzhiyun 	 * address space.
649*4882a593Smuzhiyun 	 */
650*4882a593Smuzhiyun 	if (sched_in)
651*4882a593Smuzhiyun 		intel_pmu_lbr_reset();
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun 
branch_user_callstack(unsigned br_sel)654*4882a593Smuzhiyun static inline bool branch_user_callstack(unsigned br_sel)
655*4882a593Smuzhiyun {
656*4882a593Smuzhiyun 	return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun 
intel_pmu_lbr_add(struct perf_event * event)659*4882a593Smuzhiyun void intel_pmu_lbr_add(struct perf_event *event)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 	if (!x86_pmu.lbr_nr)
664*4882a593Smuzhiyun 		return;
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun 	if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
667*4882a593Smuzhiyun 		cpuc->lbr_select = 1;
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 	cpuc->br_sel = event->hw.branch_reg.reg;
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun 	if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
672*4882a593Smuzhiyun 		task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	/*
675*4882a593Smuzhiyun 	 * Request pmu::sched_task() callback, which will fire inside the
676*4882a593Smuzhiyun 	 * regular perf event scheduling, so that call will:
677*4882a593Smuzhiyun 	 *
678*4882a593Smuzhiyun 	 *  - restore or wipe; when LBR-callstack,
679*4882a593Smuzhiyun 	 *  - wipe; otherwise,
680*4882a593Smuzhiyun 	 *
681*4882a593Smuzhiyun 	 * when this is from __perf_event_task_sched_in().
682*4882a593Smuzhiyun 	 *
683*4882a593Smuzhiyun 	 * However, if this is from perf_install_in_context(), no such callback
684*4882a593Smuzhiyun 	 * will follow and we'll need to reset the LBR here if this is the
685*4882a593Smuzhiyun 	 * first LBR event.
686*4882a593Smuzhiyun 	 *
687*4882a593Smuzhiyun 	 * The problem is, we cannot tell these cases apart... but we can
688*4882a593Smuzhiyun 	 * exclude the biggest chunk of cases by looking at
689*4882a593Smuzhiyun 	 * event->total_time_running. An event that has accrued runtime cannot
690*4882a593Smuzhiyun 	 * be 'new'. Conversely, a new event can get installed through the
691*4882a593Smuzhiyun 	 * context switch path for the first time.
692*4882a593Smuzhiyun 	 */
693*4882a593Smuzhiyun 	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
694*4882a593Smuzhiyun 		cpuc->lbr_pebs_users++;
695*4882a593Smuzhiyun 	perf_sched_cb_inc(event->ctx->pmu);
696*4882a593Smuzhiyun 	if (!cpuc->lbr_users++ && !event->total_time_running)
697*4882a593Smuzhiyun 		intel_pmu_lbr_reset();
698*4882a593Smuzhiyun }
699*4882a593Smuzhiyun 
release_lbr_buffers(void)700*4882a593Smuzhiyun void release_lbr_buffers(void)
701*4882a593Smuzhiyun {
702*4882a593Smuzhiyun 	struct kmem_cache *kmem_cache;
703*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc;
704*4882a593Smuzhiyun 	int cpu;
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun 	if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
707*4882a593Smuzhiyun 		return;
708*4882a593Smuzhiyun 
709*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
710*4882a593Smuzhiyun 		cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
711*4882a593Smuzhiyun 		kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
712*4882a593Smuzhiyun 		if (kmem_cache && cpuc->lbr_xsave) {
713*4882a593Smuzhiyun 			kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
714*4882a593Smuzhiyun 			cpuc->lbr_xsave = NULL;
715*4882a593Smuzhiyun 		}
716*4882a593Smuzhiyun 	}
717*4882a593Smuzhiyun }
718*4882a593Smuzhiyun 
reserve_lbr_buffers(void)719*4882a593Smuzhiyun void reserve_lbr_buffers(void)
720*4882a593Smuzhiyun {
721*4882a593Smuzhiyun 	struct kmem_cache *kmem_cache;
722*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc;
723*4882a593Smuzhiyun 	int cpu;
724*4882a593Smuzhiyun 
725*4882a593Smuzhiyun 	if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
726*4882a593Smuzhiyun 		return;
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
729*4882a593Smuzhiyun 		cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
730*4882a593Smuzhiyun 		kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
731*4882a593Smuzhiyun 		if (!kmem_cache || cpuc->lbr_xsave)
732*4882a593Smuzhiyun 			continue;
733*4882a593Smuzhiyun 
734*4882a593Smuzhiyun 		cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
735*4882a593Smuzhiyun 							GFP_KERNEL | __GFP_ZERO,
736*4882a593Smuzhiyun 							cpu_to_node(cpu));
737*4882a593Smuzhiyun 	}
738*4882a593Smuzhiyun }
739*4882a593Smuzhiyun 
intel_pmu_lbr_del(struct perf_event * event)740*4882a593Smuzhiyun void intel_pmu_lbr_del(struct perf_event *event)
741*4882a593Smuzhiyun {
742*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
743*4882a593Smuzhiyun 
744*4882a593Smuzhiyun 	if (!x86_pmu.lbr_nr)
745*4882a593Smuzhiyun 		return;
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun 	if (branch_user_callstack(cpuc->br_sel) &&
748*4882a593Smuzhiyun 	    event->ctx->task_ctx_data)
749*4882a593Smuzhiyun 		task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun 	if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
752*4882a593Smuzhiyun 		cpuc->lbr_select = 0;
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
755*4882a593Smuzhiyun 		cpuc->lbr_pebs_users--;
756*4882a593Smuzhiyun 	cpuc->lbr_users--;
757*4882a593Smuzhiyun 	WARN_ON_ONCE(cpuc->lbr_users < 0);
758*4882a593Smuzhiyun 	WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
759*4882a593Smuzhiyun 	perf_sched_cb_dec(event->ctx->pmu);
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun 
vlbr_exclude_host(void)762*4882a593Smuzhiyun static inline bool vlbr_exclude_host(void)
763*4882a593Smuzhiyun {
764*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 	return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
767*4882a593Smuzhiyun 		(unsigned long *)&cpuc->intel_ctrl_guest_mask);
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun 
intel_pmu_lbr_enable_all(bool pmi)770*4882a593Smuzhiyun void intel_pmu_lbr_enable_all(bool pmi)
771*4882a593Smuzhiyun {
772*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
773*4882a593Smuzhiyun 
774*4882a593Smuzhiyun 	if (cpuc->lbr_users && !vlbr_exclude_host())
775*4882a593Smuzhiyun 		__intel_pmu_lbr_enable(pmi);
776*4882a593Smuzhiyun }
777*4882a593Smuzhiyun 
intel_pmu_lbr_disable_all(void)778*4882a593Smuzhiyun void intel_pmu_lbr_disable_all(void)
779*4882a593Smuzhiyun {
780*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
781*4882a593Smuzhiyun 
782*4882a593Smuzhiyun 	if (cpuc->lbr_users && !vlbr_exclude_host())
783*4882a593Smuzhiyun 		__intel_pmu_lbr_disable();
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun 
intel_pmu_lbr_read_32(struct cpu_hw_events * cpuc)786*4882a593Smuzhiyun void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
787*4882a593Smuzhiyun {
788*4882a593Smuzhiyun 	unsigned long mask = x86_pmu.lbr_nr - 1;
789*4882a593Smuzhiyun 	u64 tos = intel_pmu_lbr_tos();
790*4882a593Smuzhiyun 	int i;
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
793*4882a593Smuzhiyun 		unsigned long lbr_idx = (tos - i) & mask;
794*4882a593Smuzhiyun 		union {
795*4882a593Smuzhiyun 			struct {
796*4882a593Smuzhiyun 				u32 from;
797*4882a593Smuzhiyun 				u32 to;
798*4882a593Smuzhiyun 			};
799*4882a593Smuzhiyun 			u64     lbr;
800*4882a593Smuzhiyun 		} msr_lastbranch;
801*4882a593Smuzhiyun 
802*4882a593Smuzhiyun 		rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
803*4882a593Smuzhiyun 
804*4882a593Smuzhiyun 		cpuc->lbr_entries[i].from	= msr_lastbranch.from;
805*4882a593Smuzhiyun 		cpuc->lbr_entries[i].to		= msr_lastbranch.to;
806*4882a593Smuzhiyun 		cpuc->lbr_entries[i].mispred	= 0;
807*4882a593Smuzhiyun 		cpuc->lbr_entries[i].predicted	= 0;
808*4882a593Smuzhiyun 		cpuc->lbr_entries[i].in_tx	= 0;
809*4882a593Smuzhiyun 		cpuc->lbr_entries[i].abort	= 0;
810*4882a593Smuzhiyun 		cpuc->lbr_entries[i].cycles	= 0;
811*4882a593Smuzhiyun 		cpuc->lbr_entries[i].type	= 0;
812*4882a593Smuzhiyun 		cpuc->lbr_entries[i].reserved	= 0;
813*4882a593Smuzhiyun 	}
814*4882a593Smuzhiyun 	cpuc->lbr_stack.nr = i;
815*4882a593Smuzhiyun 	cpuc->lbr_stack.hw_idx = tos;
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun 
818*4882a593Smuzhiyun /*
819*4882a593Smuzhiyun  * Due to lack of segmentation in Linux the effective address (offset)
820*4882a593Smuzhiyun  * is the same as the linear address, allowing us to merge the LIP and EIP
821*4882a593Smuzhiyun  * LBR formats.
822*4882a593Smuzhiyun  */
intel_pmu_lbr_read_64(struct cpu_hw_events * cpuc)823*4882a593Smuzhiyun void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
824*4882a593Smuzhiyun {
825*4882a593Smuzhiyun 	bool need_info = false, call_stack = false;
826*4882a593Smuzhiyun 	unsigned long mask = x86_pmu.lbr_nr - 1;
827*4882a593Smuzhiyun 	int lbr_format = x86_pmu.intel_cap.lbr_format;
828*4882a593Smuzhiyun 	u64 tos = intel_pmu_lbr_tos();
829*4882a593Smuzhiyun 	int i;
830*4882a593Smuzhiyun 	int out = 0;
831*4882a593Smuzhiyun 	int num = x86_pmu.lbr_nr;
832*4882a593Smuzhiyun 
833*4882a593Smuzhiyun 	if (cpuc->lbr_sel) {
834*4882a593Smuzhiyun 		need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
835*4882a593Smuzhiyun 		if (cpuc->lbr_sel->config & LBR_CALL_STACK)
836*4882a593Smuzhiyun 			call_stack = true;
837*4882a593Smuzhiyun 	}
838*4882a593Smuzhiyun 
839*4882a593Smuzhiyun 	for (i = 0; i < num; i++) {
840*4882a593Smuzhiyun 		unsigned long lbr_idx = (tos - i) & mask;
841*4882a593Smuzhiyun 		u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
842*4882a593Smuzhiyun 		int skip = 0;
843*4882a593Smuzhiyun 		u16 cycles = 0;
844*4882a593Smuzhiyun 		int lbr_flags = lbr_desc[lbr_format];
845*4882a593Smuzhiyun 
846*4882a593Smuzhiyun 		from = rdlbr_from(lbr_idx, NULL);
847*4882a593Smuzhiyun 		to   = rdlbr_to(lbr_idx, NULL);
848*4882a593Smuzhiyun 
849*4882a593Smuzhiyun 		/*
850*4882a593Smuzhiyun 		 * Read LBR call stack entries
851*4882a593Smuzhiyun 		 * until invalid entry (0s) is detected.
852*4882a593Smuzhiyun 		 */
853*4882a593Smuzhiyun 		if (call_stack && !from)
854*4882a593Smuzhiyun 			break;
855*4882a593Smuzhiyun 
856*4882a593Smuzhiyun 		if (lbr_format == LBR_FORMAT_INFO && need_info) {
857*4882a593Smuzhiyun 			u64 info;
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun 			info = rdlbr_info(lbr_idx, NULL);
860*4882a593Smuzhiyun 			mis = !!(info & LBR_INFO_MISPRED);
861*4882a593Smuzhiyun 			pred = !mis;
862*4882a593Smuzhiyun 			in_tx = !!(info & LBR_INFO_IN_TX);
863*4882a593Smuzhiyun 			abort = !!(info & LBR_INFO_ABORT);
864*4882a593Smuzhiyun 			cycles = (info & LBR_INFO_CYCLES);
865*4882a593Smuzhiyun 		}
866*4882a593Smuzhiyun 
867*4882a593Smuzhiyun 		if (lbr_format == LBR_FORMAT_TIME) {
868*4882a593Smuzhiyun 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
869*4882a593Smuzhiyun 			pred = !mis;
870*4882a593Smuzhiyun 			skip = 1;
871*4882a593Smuzhiyun 			cycles = ((to >> 48) & LBR_INFO_CYCLES);
872*4882a593Smuzhiyun 
873*4882a593Smuzhiyun 			to = (u64)((((s64)to) << 16) >> 16);
874*4882a593Smuzhiyun 		}
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 		if (lbr_flags & LBR_EIP_FLAGS) {
877*4882a593Smuzhiyun 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
878*4882a593Smuzhiyun 			pred = !mis;
879*4882a593Smuzhiyun 			skip = 1;
880*4882a593Smuzhiyun 		}
881*4882a593Smuzhiyun 		if (lbr_flags & LBR_TSX) {
882*4882a593Smuzhiyun 			in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
883*4882a593Smuzhiyun 			abort = !!(from & LBR_FROM_FLAG_ABORT);
884*4882a593Smuzhiyun 			skip = 3;
885*4882a593Smuzhiyun 		}
886*4882a593Smuzhiyun 		from = (u64)((((s64)from) << skip) >> skip);
887*4882a593Smuzhiyun 
888*4882a593Smuzhiyun 		/*
889*4882a593Smuzhiyun 		 * Some CPUs report duplicated abort records,
890*4882a593Smuzhiyun 		 * with the second entry not having an abort bit set.
891*4882a593Smuzhiyun 		 * Skip them here. This loop runs backwards,
892*4882a593Smuzhiyun 		 * so we need to undo the previous record.
893*4882a593Smuzhiyun 		 * If the abort just happened outside the window
894*4882a593Smuzhiyun 		 * the extra entry cannot be removed.
895*4882a593Smuzhiyun 		 */
896*4882a593Smuzhiyun 		if (abort && x86_pmu.lbr_double_abort && out > 0)
897*4882a593Smuzhiyun 			out--;
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 		cpuc->lbr_entries[out].from	 = from;
900*4882a593Smuzhiyun 		cpuc->lbr_entries[out].to	 = to;
901*4882a593Smuzhiyun 		cpuc->lbr_entries[out].mispred	 = mis;
902*4882a593Smuzhiyun 		cpuc->lbr_entries[out].predicted = pred;
903*4882a593Smuzhiyun 		cpuc->lbr_entries[out].in_tx	 = in_tx;
904*4882a593Smuzhiyun 		cpuc->lbr_entries[out].abort	 = abort;
905*4882a593Smuzhiyun 		cpuc->lbr_entries[out].cycles	 = cycles;
906*4882a593Smuzhiyun 		cpuc->lbr_entries[out].type	 = 0;
907*4882a593Smuzhiyun 		cpuc->lbr_entries[out].reserved	 = 0;
908*4882a593Smuzhiyun 		out++;
909*4882a593Smuzhiyun 	}
910*4882a593Smuzhiyun 	cpuc->lbr_stack.nr = out;
911*4882a593Smuzhiyun 	cpuc->lbr_stack.hw_idx = tos;
912*4882a593Smuzhiyun }
913*4882a593Smuzhiyun 
get_lbr_br_type(u64 info)914*4882a593Smuzhiyun static __always_inline int get_lbr_br_type(u64 info)
915*4882a593Smuzhiyun {
916*4882a593Smuzhiyun 	if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
917*4882a593Smuzhiyun 		return 0;
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 	return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
920*4882a593Smuzhiyun }
921*4882a593Smuzhiyun 
get_lbr_mispred(u64 info)922*4882a593Smuzhiyun static __always_inline bool get_lbr_mispred(u64 info)
923*4882a593Smuzhiyun {
924*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
925*4882a593Smuzhiyun 		return 0;
926*4882a593Smuzhiyun 
927*4882a593Smuzhiyun 	return !!(info & LBR_INFO_MISPRED);
928*4882a593Smuzhiyun }
929*4882a593Smuzhiyun 
get_lbr_predicted(u64 info)930*4882a593Smuzhiyun static __always_inline bool get_lbr_predicted(u64 info)
931*4882a593Smuzhiyun {
932*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
933*4882a593Smuzhiyun 		return 0;
934*4882a593Smuzhiyun 
935*4882a593Smuzhiyun 	return !(info & LBR_INFO_MISPRED);
936*4882a593Smuzhiyun }
937*4882a593Smuzhiyun 
get_lbr_cycles(u64 info)938*4882a593Smuzhiyun static __always_inline u16 get_lbr_cycles(u64 info)
939*4882a593Smuzhiyun {
940*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
941*4882a593Smuzhiyun 	    !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
942*4882a593Smuzhiyun 		return 0;
943*4882a593Smuzhiyun 
944*4882a593Smuzhiyun 	return info & LBR_INFO_CYCLES;
945*4882a593Smuzhiyun }
946*4882a593Smuzhiyun 
intel_pmu_store_lbr(struct cpu_hw_events * cpuc,struct lbr_entry * entries)947*4882a593Smuzhiyun static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
948*4882a593Smuzhiyun 				struct lbr_entry *entries)
949*4882a593Smuzhiyun {
950*4882a593Smuzhiyun 	struct perf_branch_entry *e;
951*4882a593Smuzhiyun 	struct lbr_entry *lbr;
952*4882a593Smuzhiyun 	u64 from, to, info;
953*4882a593Smuzhiyun 	int i;
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
956*4882a593Smuzhiyun 		lbr = entries ? &entries[i] : NULL;
957*4882a593Smuzhiyun 		e = &cpuc->lbr_entries[i];
958*4882a593Smuzhiyun 
959*4882a593Smuzhiyun 		from = rdlbr_from(i, lbr);
960*4882a593Smuzhiyun 		/*
961*4882a593Smuzhiyun 		 * Read LBR entries until invalid entry (0s) is detected.
962*4882a593Smuzhiyun 		 */
963*4882a593Smuzhiyun 		if (!from)
964*4882a593Smuzhiyun 			break;
965*4882a593Smuzhiyun 
966*4882a593Smuzhiyun 		to = rdlbr_to(i, lbr);
967*4882a593Smuzhiyun 		info = rdlbr_info(i, lbr);
968*4882a593Smuzhiyun 
969*4882a593Smuzhiyun 		e->from		= from;
970*4882a593Smuzhiyun 		e->to		= to;
971*4882a593Smuzhiyun 		e->mispred	= get_lbr_mispred(info);
972*4882a593Smuzhiyun 		e->predicted	= get_lbr_predicted(info);
973*4882a593Smuzhiyun 		e->in_tx	= !!(info & LBR_INFO_IN_TX);
974*4882a593Smuzhiyun 		e->abort	= !!(info & LBR_INFO_ABORT);
975*4882a593Smuzhiyun 		e->cycles	= get_lbr_cycles(info);
976*4882a593Smuzhiyun 		e->type		= get_lbr_br_type(info);
977*4882a593Smuzhiyun 		e->reserved	= 0;
978*4882a593Smuzhiyun 	}
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 	cpuc->lbr_stack.nr = i;
981*4882a593Smuzhiyun }
982*4882a593Smuzhiyun 
intel_pmu_arch_lbr_read(struct cpu_hw_events * cpuc)983*4882a593Smuzhiyun static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
984*4882a593Smuzhiyun {
985*4882a593Smuzhiyun 	intel_pmu_store_lbr(cpuc, NULL);
986*4882a593Smuzhiyun }
987*4882a593Smuzhiyun 
intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events * cpuc)988*4882a593Smuzhiyun static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
989*4882a593Smuzhiyun {
990*4882a593Smuzhiyun 	struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
991*4882a593Smuzhiyun 
992*4882a593Smuzhiyun 	if (!xsave) {
993*4882a593Smuzhiyun 		intel_pmu_store_lbr(cpuc, NULL);
994*4882a593Smuzhiyun 		return;
995*4882a593Smuzhiyun 	}
996*4882a593Smuzhiyun 	copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR);
997*4882a593Smuzhiyun 
998*4882a593Smuzhiyun 	intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
999*4882a593Smuzhiyun }
1000*4882a593Smuzhiyun 
intel_pmu_lbr_read(void)1001*4882a593Smuzhiyun void intel_pmu_lbr_read(void)
1002*4882a593Smuzhiyun {
1003*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1004*4882a593Smuzhiyun 
1005*4882a593Smuzhiyun 	/*
1006*4882a593Smuzhiyun 	 * Don't read when all LBRs users are using adaptive PEBS.
1007*4882a593Smuzhiyun 	 *
1008*4882a593Smuzhiyun 	 * This could be smarter and actually check the event,
1009*4882a593Smuzhiyun 	 * but this simple approach seems to work for now.
1010*4882a593Smuzhiyun 	 */
1011*4882a593Smuzhiyun 	if (!cpuc->lbr_users || vlbr_exclude_host() ||
1012*4882a593Smuzhiyun 	    cpuc->lbr_users == cpuc->lbr_pebs_users)
1013*4882a593Smuzhiyun 		return;
1014*4882a593Smuzhiyun 
1015*4882a593Smuzhiyun 	x86_pmu.lbr_read(cpuc);
1016*4882a593Smuzhiyun 
1017*4882a593Smuzhiyun 	intel_pmu_lbr_filter(cpuc);
1018*4882a593Smuzhiyun }
1019*4882a593Smuzhiyun 
1020*4882a593Smuzhiyun /*
1021*4882a593Smuzhiyun  * SW filter is used:
1022*4882a593Smuzhiyun  * - in case there is no HW filter
1023*4882a593Smuzhiyun  * - in case the HW filter has errata or limitations
1024*4882a593Smuzhiyun  */
intel_pmu_setup_sw_lbr_filter(struct perf_event * event)1025*4882a593Smuzhiyun static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
1026*4882a593Smuzhiyun {
1027*4882a593Smuzhiyun 	u64 br_type = event->attr.branch_sample_type;
1028*4882a593Smuzhiyun 	int mask = 0;
1029*4882a593Smuzhiyun 
1030*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_USER)
1031*4882a593Smuzhiyun 		mask |= X86_BR_USER;
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
1034*4882a593Smuzhiyun 		mask |= X86_BR_KERNEL;
1035*4882a593Smuzhiyun 
1036*4882a593Smuzhiyun 	/* we ignore BRANCH_HV here */
1037*4882a593Smuzhiyun 
1038*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_ANY)
1039*4882a593Smuzhiyun 		mask |= X86_BR_ANY;
1040*4882a593Smuzhiyun 
1041*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
1042*4882a593Smuzhiyun 		mask |= X86_BR_ANY_CALL;
1043*4882a593Smuzhiyun 
1044*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
1045*4882a593Smuzhiyun 		mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
1046*4882a593Smuzhiyun 
1047*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
1048*4882a593Smuzhiyun 		mask |= X86_BR_IND_CALL;
1049*4882a593Smuzhiyun 
1050*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
1051*4882a593Smuzhiyun 		mask |= X86_BR_ABORT;
1052*4882a593Smuzhiyun 
1053*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
1054*4882a593Smuzhiyun 		mask |= X86_BR_IN_TX;
1055*4882a593Smuzhiyun 
1056*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
1057*4882a593Smuzhiyun 		mask |= X86_BR_NO_TX;
1058*4882a593Smuzhiyun 
1059*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_COND)
1060*4882a593Smuzhiyun 		mask |= X86_BR_JCC;
1061*4882a593Smuzhiyun 
1062*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
1063*4882a593Smuzhiyun 		if (!x86_pmu_has_lbr_callstack())
1064*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1065*4882a593Smuzhiyun 		if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
1066*4882a593Smuzhiyun 			return -EINVAL;
1067*4882a593Smuzhiyun 		mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
1068*4882a593Smuzhiyun 			X86_BR_CALL_STACK;
1069*4882a593Smuzhiyun 	}
1070*4882a593Smuzhiyun 
1071*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
1072*4882a593Smuzhiyun 		mask |= X86_BR_IND_JMP;
1073*4882a593Smuzhiyun 
1074*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
1075*4882a593Smuzhiyun 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
1076*4882a593Smuzhiyun 
1077*4882a593Smuzhiyun 	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
1078*4882a593Smuzhiyun 		mask |= X86_BR_TYPE_SAVE;
1079*4882a593Smuzhiyun 
1080*4882a593Smuzhiyun 	/*
1081*4882a593Smuzhiyun 	 * stash actual user request into reg, it may
1082*4882a593Smuzhiyun 	 * be used by fixup code for some CPU
1083*4882a593Smuzhiyun 	 */
1084*4882a593Smuzhiyun 	event->hw.branch_reg.reg = mask;
1085*4882a593Smuzhiyun 	return 0;
1086*4882a593Smuzhiyun }
1087*4882a593Smuzhiyun 
1088*4882a593Smuzhiyun /*
1089*4882a593Smuzhiyun  * setup the HW LBR filter
1090*4882a593Smuzhiyun  * Used only when available, may not be enough to disambiguate
1091*4882a593Smuzhiyun  * all branches, may need the help of the SW filter
1092*4882a593Smuzhiyun  */
intel_pmu_setup_hw_lbr_filter(struct perf_event * event)1093*4882a593Smuzhiyun static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
1094*4882a593Smuzhiyun {
1095*4882a593Smuzhiyun 	struct hw_perf_event_extra *reg;
1096*4882a593Smuzhiyun 	u64 br_type = event->attr.branch_sample_type;
1097*4882a593Smuzhiyun 	u64 mask = 0, v;
1098*4882a593Smuzhiyun 	int i;
1099*4882a593Smuzhiyun 
1100*4882a593Smuzhiyun 	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
1101*4882a593Smuzhiyun 		if (!(br_type & (1ULL << i)))
1102*4882a593Smuzhiyun 			continue;
1103*4882a593Smuzhiyun 
1104*4882a593Smuzhiyun 		v = x86_pmu.lbr_sel_map[i];
1105*4882a593Smuzhiyun 		if (v == LBR_NOT_SUPP)
1106*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1107*4882a593Smuzhiyun 
1108*4882a593Smuzhiyun 		if (v != LBR_IGN)
1109*4882a593Smuzhiyun 			mask |= v;
1110*4882a593Smuzhiyun 	}
1111*4882a593Smuzhiyun 
1112*4882a593Smuzhiyun 	reg = &event->hw.branch_reg;
1113*4882a593Smuzhiyun 	reg->idx = EXTRA_REG_LBR;
1114*4882a593Smuzhiyun 
1115*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
1116*4882a593Smuzhiyun 		reg->config = mask;
1117*4882a593Smuzhiyun 
1118*4882a593Smuzhiyun 		/*
1119*4882a593Smuzhiyun 		 * The Arch LBR HW can retrieve the common branch types
1120*4882a593Smuzhiyun 		 * from the LBR_INFO. It doesn't require the high overhead
1121*4882a593Smuzhiyun 		 * SW disassemble.
1122*4882a593Smuzhiyun 		 * Enable the branch type by default for the Arch LBR.
1123*4882a593Smuzhiyun 		 */
1124*4882a593Smuzhiyun 		reg->reg |= X86_BR_TYPE_SAVE;
1125*4882a593Smuzhiyun 		return 0;
1126*4882a593Smuzhiyun 	}
1127*4882a593Smuzhiyun 
1128*4882a593Smuzhiyun 	/*
1129*4882a593Smuzhiyun 	 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
1130*4882a593Smuzhiyun 	 * in suppress mode. So LBR_SELECT should be set to
1131*4882a593Smuzhiyun 	 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
1132*4882a593Smuzhiyun 	 * But the 10th bit LBR_CALL_STACK does not operate
1133*4882a593Smuzhiyun 	 * in suppress mode.
1134*4882a593Smuzhiyun 	 */
1135*4882a593Smuzhiyun 	reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
1136*4882a593Smuzhiyun 
1137*4882a593Smuzhiyun 	if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
1138*4882a593Smuzhiyun 	    (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1139*4882a593Smuzhiyun 	    (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
1140*4882a593Smuzhiyun 		reg->config |= LBR_NO_INFO;
1141*4882a593Smuzhiyun 
1142*4882a593Smuzhiyun 	return 0;
1143*4882a593Smuzhiyun }
1144*4882a593Smuzhiyun 
intel_pmu_setup_lbr_filter(struct perf_event * event)1145*4882a593Smuzhiyun int intel_pmu_setup_lbr_filter(struct perf_event *event)
1146*4882a593Smuzhiyun {
1147*4882a593Smuzhiyun 	int ret = 0;
1148*4882a593Smuzhiyun 
1149*4882a593Smuzhiyun 	/*
1150*4882a593Smuzhiyun 	 * no LBR on this PMU
1151*4882a593Smuzhiyun 	 */
1152*4882a593Smuzhiyun 	if (!x86_pmu.lbr_nr)
1153*4882a593Smuzhiyun 		return -EOPNOTSUPP;
1154*4882a593Smuzhiyun 
1155*4882a593Smuzhiyun 	/*
1156*4882a593Smuzhiyun 	 * setup SW LBR filter
1157*4882a593Smuzhiyun 	 */
1158*4882a593Smuzhiyun 	ret = intel_pmu_setup_sw_lbr_filter(event);
1159*4882a593Smuzhiyun 	if (ret)
1160*4882a593Smuzhiyun 		return ret;
1161*4882a593Smuzhiyun 
1162*4882a593Smuzhiyun 	/*
1163*4882a593Smuzhiyun 	 * setup HW LBR filter, if any
1164*4882a593Smuzhiyun 	 */
1165*4882a593Smuzhiyun 	if (x86_pmu.lbr_sel_map)
1166*4882a593Smuzhiyun 		ret = intel_pmu_setup_hw_lbr_filter(event);
1167*4882a593Smuzhiyun 
1168*4882a593Smuzhiyun 	return ret;
1169*4882a593Smuzhiyun }
1170*4882a593Smuzhiyun 
1171*4882a593Smuzhiyun /*
1172*4882a593Smuzhiyun  * return the type of control flow change at address "from"
1173*4882a593Smuzhiyun  * instruction is not necessarily a branch (in case of interrupt).
1174*4882a593Smuzhiyun  *
1175*4882a593Smuzhiyun  * The branch type returned also includes the priv level of the
1176*4882a593Smuzhiyun  * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
1177*4882a593Smuzhiyun  *
1178*4882a593Smuzhiyun  * If a branch type is unknown OR the instruction cannot be
1179*4882a593Smuzhiyun  * decoded (e.g., text page not present), then X86_BR_NONE is
1180*4882a593Smuzhiyun  * returned.
1181*4882a593Smuzhiyun  */
branch_type(unsigned long from,unsigned long to,int abort)1182*4882a593Smuzhiyun static int branch_type(unsigned long from, unsigned long to, int abort)
1183*4882a593Smuzhiyun {
1184*4882a593Smuzhiyun 	struct insn insn;
1185*4882a593Smuzhiyun 	void *addr;
1186*4882a593Smuzhiyun 	int bytes_read, bytes_left;
1187*4882a593Smuzhiyun 	int ret = X86_BR_NONE;
1188*4882a593Smuzhiyun 	int ext, to_plm, from_plm;
1189*4882a593Smuzhiyun 	u8 buf[MAX_INSN_SIZE];
1190*4882a593Smuzhiyun 	int is64 = 0;
1191*4882a593Smuzhiyun 
1192*4882a593Smuzhiyun 	to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1193*4882a593Smuzhiyun 	from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
1194*4882a593Smuzhiyun 
1195*4882a593Smuzhiyun 	/*
1196*4882a593Smuzhiyun 	 * maybe zero if lbr did not fill up after a reset by the time
1197*4882a593Smuzhiyun 	 * we get a PMU interrupt
1198*4882a593Smuzhiyun 	 */
1199*4882a593Smuzhiyun 	if (from == 0 || to == 0)
1200*4882a593Smuzhiyun 		return X86_BR_NONE;
1201*4882a593Smuzhiyun 
1202*4882a593Smuzhiyun 	if (abort)
1203*4882a593Smuzhiyun 		return X86_BR_ABORT | to_plm;
1204*4882a593Smuzhiyun 
1205*4882a593Smuzhiyun 	if (from_plm == X86_BR_USER) {
1206*4882a593Smuzhiyun 		/*
1207*4882a593Smuzhiyun 		 * can happen if measuring at the user level only
1208*4882a593Smuzhiyun 		 * and we interrupt in a kernel thread, e.g., idle.
1209*4882a593Smuzhiyun 		 */
1210*4882a593Smuzhiyun 		if (!current->mm)
1211*4882a593Smuzhiyun 			return X86_BR_NONE;
1212*4882a593Smuzhiyun 
1213*4882a593Smuzhiyun 		/* may fail if text not present */
1214*4882a593Smuzhiyun 		bytes_left = copy_from_user_nmi(buf, (void __user *)from,
1215*4882a593Smuzhiyun 						MAX_INSN_SIZE);
1216*4882a593Smuzhiyun 		bytes_read = MAX_INSN_SIZE - bytes_left;
1217*4882a593Smuzhiyun 		if (!bytes_read)
1218*4882a593Smuzhiyun 			return X86_BR_NONE;
1219*4882a593Smuzhiyun 
1220*4882a593Smuzhiyun 		addr = buf;
1221*4882a593Smuzhiyun 	} else {
1222*4882a593Smuzhiyun 		/*
1223*4882a593Smuzhiyun 		 * The LBR logs any address in the IP, even if the IP just
1224*4882a593Smuzhiyun 		 * faulted. This means userspace can control the from address.
1225*4882a593Smuzhiyun 		 * Ensure we don't blindy read any address by validating it is
1226*4882a593Smuzhiyun 		 * a known text address.
1227*4882a593Smuzhiyun 		 */
1228*4882a593Smuzhiyun 		if (kernel_text_address(from)) {
1229*4882a593Smuzhiyun 			addr = (void *)from;
1230*4882a593Smuzhiyun 			/*
1231*4882a593Smuzhiyun 			 * Assume we can get the maximum possible size
1232*4882a593Smuzhiyun 			 * when grabbing kernel data.  This is not
1233*4882a593Smuzhiyun 			 * _strictly_ true since we could possibly be
1234*4882a593Smuzhiyun 			 * executing up next to a memory hole, but
1235*4882a593Smuzhiyun 			 * it is very unlikely to be a problem.
1236*4882a593Smuzhiyun 			 */
1237*4882a593Smuzhiyun 			bytes_read = MAX_INSN_SIZE;
1238*4882a593Smuzhiyun 		} else {
1239*4882a593Smuzhiyun 			return X86_BR_NONE;
1240*4882a593Smuzhiyun 		}
1241*4882a593Smuzhiyun 	}
1242*4882a593Smuzhiyun 
1243*4882a593Smuzhiyun 	/*
1244*4882a593Smuzhiyun 	 * decoder needs to know the ABI especially
1245*4882a593Smuzhiyun 	 * on 64-bit systems running 32-bit apps
1246*4882a593Smuzhiyun 	 */
1247*4882a593Smuzhiyun #ifdef CONFIG_X86_64
1248*4882a593Smuzhiyun 	is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
1249*4882a593Smuzhiyun #endif
1250*4882a593Smuzhiyun 	insn_init(&insn, addr, bytes_read, is64);
1251*4882a593Smuzhiyun 	insn_get_opcode(&insn);
1252*4882a593Smuzhiyun 	if (!insn.opcode.got)
1253*4882a593Smuzhiyun 		return X86_BR_ABORT;
1254*4882a593Smuzhiyun 
1255*4882a593Smuzhiyun 	switch (insn.opcode.bytes[0]) {
1256*4882a593Smuzhiyun 	case 0xf:
1257*4882a593Smuzhiyun 		switch (insn.opcode.bytes[1]) {
1258*4882a593Smuzhiyun 		case 0x05: /* syscall */
1259*4882a593Smuzhiyun 		case 0x34: /* sysenter */
1260*4882a593Smuzhiyun 			ret = X86_BR_SYSCALL;
1261*4882a593Smuzhiyun 			break;
1262*4882a593Smuzhiyun 		case 0x07: /* sysret */
1263*4882a593Smuzhiyun 		case 0x35: /* sysexit */
1264*4882a593Smuzhiyun 			ret = X86_BR_SYSRET;
1265*4882a593Smuzhiyun 			break;
1266*4882a593Smuzhiyun 		case 0x80 ... 0x8f: /* conditional */
1267*4882a593Smuzhiyun 			ret = X86_BR_JCC;
1268*4882a593Smuzhiyun 			break;
1269*4882a593Smuzhiyun 		default:
1270*4882a593Smuzhiyun 			ret = X86_BR_NONE;
1271*4882a593Smuzhiyun 		}
1272*4882a593Smuzhiyun 		break;
1273*4882a593Smuzhiyun 	case 0x70 ... 0x7f: /* conditional */
1274*4882a593Smuzhiyun 		ret = X86_BR_JCC;
1275*4882a593Smuzhiyun 		break;
1276*4882a593Smuzhiyun 	case 0xc2: /* near ret */
1277*4882a593Smuzhiyun 	case 0xc3: /* near ret */
1278*4882a593Smuzhiyun 	case 0xca: /* far ret */
1279*4882a593Smuzhiyun 	case 0xcb: /* far ret */
1280*4882a593Smuzhiyun 		ret = X86_BR_RET;
1281*4882a593Smuzhiyun 		break;
1282*4882a593Smuzhiyun 	case 0xcf: /* iret */
1283*4882a593Smuzhiyun 		ret = X86_BR_IRET;
1284*4882a593Smuzhiyun 		break;
1285*4882a593Smuzhiyun 	case 0xcc ... 0xce: /* int */
1286*4882a593Smuzhiyun 		ret = X86_BR_INT;
1287*4882a593Smuzhiyun 		break;
1288*4882a593Smuzhiyun 	case 0xe8: /* call near rel */
1289*4882a593Smuzhiyun 		insn_get_immediate(&insn);
1290*4882a593Smuzhiyun 		if (insn.immediate1.value == 0) {
1291*4882a593Smuzhiyun 			/* zero length call */
1292*4882a593Smuzhiyun 			ret = X86_BR_ZERO_CALL;
1293*4882a593Smuzhiyun 			break;
1294*4882a593Smuzhiyun 		}
1295*4882a593Smuzhiyun 		fallthrough;
1296*4882a593Smuzhiyun 	case 0x9a: /* call far absolute */
1297*4882a593Smuzhiyun 		ret = X86_BR_CALL;
1298*4882a593Smuzhiyun 		break;
1299*4882a593Smuzhiyun 	case 0xe0 ... 0xe3: /* loop jmp */
1300*4882a593Smuzhiyun 		ret = X86_BR_JCC;
1301*4882a593Smuzhiyun 		break;
1302*4882a593Smuzhiyun 	case 0xe9 ... 0xeb: /* jmp */
1303*4882a593Smuzhiyun 		ret = X86_BR_JMP;
1304*4882a593Smuzhiyun 		break;
1305*4882a593Smuzhiyun 	case 0xff: /* call near absolute, call far absolute ind */
1306*4882a593Smuzhiyun 		insn_get_modrm(&insn);
1307*4882a593Smuzhiyun 		ext = (insn.modrm.bytes[0] >> 3) & 0x7;
1308*4882a593Smuzhiyun 		switch (ext) {
1309*4882a593Smuzhiyun 		case 2: /* near ind call */
1310*4882a593Smuzhiyun 		case 3: /* far ind call */
1311*4882a593Smuzhiyun 			ret = X86_BR_IND_CALL;
1312*4882a593Smuzhiyun 			break;
1313*4882a593Smuzhiyun 		case 4:
1314*4882a593Smuzhiyun 		case 5:
1315*4882a593Smuzhiyun 			ret = X86_BR_IND_JMP;
1316*4882a593Smuzhiyun 			break;
1317*4882a593Smuzhiyun 		}
1318*4882a593Smuzhiyun 		break;
1319*4882a593Smuzhiyun 	default:
1320*4882a593Smuzhiyun 		ret = X86_BR_NONE;
1321*4882a593Smuzhiyun 	}
1322*4882a593Smuzhiyun 	/*
1323*4882a593Smuzhiyun 	 * interrupts, traps, faults (and thus ring transition) may
1324*4882a593Smuzhiyun 	 * occur on any instructions. Thus, to classify them correctly,
1325*4882a593Smuzhiyun 	 * we need to first look at the from and to priv levels. If they
1326*4882a593Smuzhiyun 	 * are different and to is in the kernel, then it indicates
1327*4882a593Smuzhiyun 	 * a ring transition. If the from instruction is not a ring
1328*4882a593Smuzhiyun 	 * transition instr (syscall, systenter, int), then it means
1329*4882a593Smuzhiyun 	 * it was a irq, trap or fault.
1330*4882a593Smuzhiyun 	 *
1331*4882a593Smuzhiyun 	 * we have no way of detecting kernel to kernel faults.
1332*4882a593Smuzhiyun 	 */
1333*4882a593Smuzhiyun 	if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
1334*4882a593Smuzhiyun 	    && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
1335*4882a593Smuzhiyun 		ret = X86_BR_IRQ;
1336*4882a593Smuzhiyun 
1337*4882a593Smuzhiyun 	/*
1338*4882a593Smuzhiyun 	 * branch priv level determined by target as
1339*4882a593Smuzhiyun 	 * is done by HW when LBR_SELECT is implemented
1340*4882a593Smuzhiyun 	 */
1341*4882a593Smuzhiyun 	if (ret != X86_BR_NONE)
1342*4882a593Smuzhiyun 		ret |= to_plm;
1343*4882a593Smuzhiyun 
1344*4882a593Smuzhiyun 	return ret;
1345*4882a593Smuzhiyun }
1346*4882a593Smuzhiyun 
1347*4882a593Smuzhiyun #define X86_BR_TYPE_MAP_MAX	16
1348*4882a593Smuzhiyun 
1349*4882a593Smuzhiyun static int branch_map[X86_BR_TYPE_MAP_MAX] = {
1350*4882a593Smuzhiyun 	PERF_BR_CALL,		/* X86_BR_CALL */
1351*4882a593Smuzhiyun 	PERF_BR_RET,		/* X86_BR_RET */
1352*4882a593Smuzhiyun 	PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
1353*4882a593Smuzhiyun 	PERF_BR_SYSRET,		/* X86_BR_SYSRET */
1354*4882a593Smuzhiyun 	PERF_BR_UNKNOWN,	/* X86_BR_INT */
1355*4882a593Smuzhiyun 	PERF_BR_UNKNOWN,	/* X86_BR_IRET */
1356*4882a593Smuzhiyun 	PERF_BR_COND,		/* X86_BR_JCC */
1357*4882a593Smuzhiyun 	PERF_BR_UNCOND,		/* X86_BR_JMP */
1358*4882a593Smuzhiyun 	PERF_BR_UNKNOWN,	/* X86_BR_IRQ */
1359*4882a593Smuzhiyun 	PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
1360*4882a593Smuzhiyun 	PERF_BR_UNKNOWN,	/* X86_BR_ABORT */
1361*4882a593Smuzhiyun 	PERF_BR_UNKNOWN,	/* X86_BR_IN_TX */
1362*4882a593Smuzhiyun 	PERF_BR_UNKNOWN,	/* X86_BR_NO_TX */
1363*4882a593Smuzhiyun 	PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
1364*4882a593Smuzhiyun 	PERF_BR_UNKNOWN,	/* X86_BR_CALL_STACK */
1365*4882a593Smuzhiyun 	PERF_BR_IND,		/* X86_BR_IND_JMP */
1366*4882a593Smuzhiyun };
1367*4882a593Smuzhiyun 
1368*4882a593Smuzhiyun static int
common_branch_type(int type)1369*4882a593Smuzhiyun common_branch_type(int type)
1370*4882a593Smuzhiyun {
1371*4882a593Smuzhiyun 	int i;
1372*4882a593Smuzhiyun 
1373*4882a593Smuzhiyun 	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
1374*4882a593Smuzhiyun 
1375*4882a593Smuzhiyun 	if (type) {
1376*4882a593Smuzhiyun 		i = __ffs(type);
1377*4882a593Smuzhiyun 		if (i < X86_BR_TYPE_MAP_MAX)
1378*4882a593Smuzhiyun 			return branch_map[i];
1379*4882a593Smuzhiyun 	}
1380*4882a593Smuzhiyun 
1381*4882a593Smuzhiyun 	return PERF_BR_UNKNOWN;
1382*4882a593Smuzhiyun }
1383*4882a593Smuzhiyun 
1384*4882a593Smuzhiyun enum {
1385*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_JCC			= 0,
1386*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_NEAR_IND_JMP		= 1,
1387*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_NEAR_REL_JMP		= 2,
1388*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_NEAR_IND_CALL		= 3,
1389*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_NEAR_REL_CALL		= 4,
1390*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_NEAR_RET		= 5,
1391*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_KNOWN_MAX		= ARCH_LBR_BR_TYPE_NEAR_RET,
1392*4882a593Smuzhiyun 
1393*4882a593Smuzhiyun 	ARCH_LBR_BR_TYPE_MAP_MAX		= 16,
1394*4882a593Smuzhiyun };
1395*4882a593Smuzhiyun 
1396*4882a593Smuzhiyun static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
1397*4882a593Smuzhiyun 	[ARCH_LBR_BR_TYPE_JCC]			= X86_BR_JCC,
1398*4882a593Smuzhiyun 	[ARCH_LBR_BR_TYPE_NEAR_IND_JMP]		= X86_BR_IND_JMP,
1399*4882a593Smuzhiyun 	[ARCH_LBR_BR_TYPE_NEAR_REL_JMP]		= X86_BR_JMP,
1400*4882a593Smuzhiyun 	[ARCH_LBR_BR_TYPE_NEAR_IND_CALL]	= X86_BR_IND_CALL,
1401*4882a593Smuzhiyun 	[ARCH_LBR_BR_TYPE_NEAR_REL_CALL]	= X86_BR_CALL,
1402*4882a593Smuzhiyun 	[ARCH_LBR_BR_TYPE_NEAR_RET]		= X86_BR_RET,
1403*4882a593Smuzhiyun };
1404*4882a593Smuzhiyun 
1405*4882a593Smuzhiyun /*
1406*4882a593Smuzhiyun  * implement actual branch filter based on user demand.
1407*4882a593Smuzhiyun  * Hardware may not exactly satisfy that request, thus
1408*4882a593Smuzhiyun  * we need to inspect opcodes. Mismatched branches are
1409*4882a593Smuzhiyun  * discarded. Therefore, the number of branches returned
1410*4882a593Smuzhiyun  * in PERF_SAMPLE_BRANCH_STACK sample may vary.
1411*4882a593Smuzhiyun  */
1412*4882a593Smuzhiyun static void
intel_pmu_lbr_filter(struct cpu_hw_events * cpuc)1413*4882a593Smuzhiyun intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
1414*4882a593Smuzhiyun {
1415*4882a593Smuzhiyun 	u64 from, to;
1416*4882a593Smuzhiyun 	int br_sel = cpuc->br_sel;
1417*4882a593Smuzhiyun 	int i, j, type, to_plm;
1418*4882a593Smuzhiyun 	bool compress = false;
1419*4882a593Smuzhiyun 
1420*4882a593Smuzhiyun 	/* if sampling all branches, then nothing to filter */
1421*4882a593Smuzhiyun 	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
1422*4882a593Smuzhiyun 	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
1423*4882a593Smuzhiyun 		return;
1424*4882a593Smuzhiyun 
1425*4882a593Smuzhiyun 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
1426*4882a593Smuzhiyun 
1427*4882a593Smuzhiyun 		from = cpuc->lbr_entries[i].from;
1428*4882a593Smuzhiyun 		to = cpuc->lbr_entries[i].to;
1429*4882a593Smuzhiyun 		type = cpuc->lbr_entries[i].type;
1430*4882a593Smuzhiyun 
1431*4882a593Smuzhiyun 		/*
1432*4882a593Smuzhiyun 		 * Parse the branch type recorded in LBR_x_INFO MSR.
1433*4882a593Smuzhiyun 		 * Doesn't support OTHER_BRANCH decoding for now.
1434*4882a593Smuzhiyun 		 * OTHER_BRANCH branch type still rely on software decoding.
1435*4882a593Smuzhiyun 		 */
1436*4882a593Smuzhiyun 		if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
1437*4882a593Smuzhiyun 		    type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
1438*4882a593Smuzhiyun 			to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1439*4882a593Smuzhiyun 			type = arch_lbr_br_type_map[type] | to_plm;
1440*4882a593Smuzhiyun 		} else
1441*4882a593Smuzhiyun 			type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1442*4882a593Smuzhiyun 		if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
1443*4882a593Smuzhiyun 			if (cpuc->lbr_entries[i].in_tx)
1444*4882a593Smuzhiyun 				type |= X86_BR_IN_TX;
1445*4882a593Smuzhiyun 			else
1446*4882a593Smuzhiyun 				type |= X86_BR_NO_TX;
1447*4882a593Smuzhiyun 		}
1448*4882a593Smuzhiyun 
1449*4882a593Smuzhiyun 		/* if type does not correspond, then discard */
1450*4882a593Smuzhiyun 		if (type == X86_BR_NONE || (br_sel & type) != type) {
1451*4882a593Smuzhiyun 			cpuc->lbr_entries[i].from = 0;
1452*4882a593Smuzhiyun 			compress = true;
1453*4882a593Smuzhiyun 		}
1454*4882a593Smuzhiyun 
1455*4882a593Smuzhiyun 		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1456*4882a593Smuzhiyun 			cpuc->lbr_entries[i].type = common_branch_type(type);
1457*4882a593Smuzhiyun 	}
1458*4882a593Smuzhiyun 
1459*4882a593Smuzhiyun 	if (!compress)
1460*4882a593Smuzhiyun 		return;
1461*4882a593Smuzhiyun 
1462*4882a593Smuzhiyun 	/* remove all entries with from=0 */
1463*4882a593Smuzhiyun 	for (i = 0; i < cpuc->lbr_stack.nr; ) {
1464*4882a593Smuzhiyun 		if (!cpuc->lbr_entries[i].from) {
1465*4882a593Smuzhiyun 			j = i;
1466*4882a593Smuzhiyun 			while (++j < cpuc->lbr_stack.nr)
1467*4882a593Smuzhiyun 				cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1468*4882a593Smuzhiyun 			cpuc->lbr_stack.nr--;
1469*4882a593Smuzhiyun 			if (!cpuc->lbr_entries[i].from)
1470*4882a593Smuzhiyun 				continue;
1471*4882a593Smuzhiyun 		}
1472*4882a593Smuzhiyun 		i++;
1473*4882a593Smuzhiyun 	}
1474*4882a593Smuzhiyun }
1475*4882a593Smuzhiyun 
intel_pmu_store_pebs_lbrs(struct lbr_entry * lbr)1476*4882a593Smuzhiyun void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
1477*4882a593Smuzhiyun {
1478*4882a593Smuzhiyun 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1479*4882a593Smuzhiyun 
1480*4882a593Smuzhiyun 	/* Cannot get TOS for large PEBS and Arch LBR */
1481*4882a593Smuzhiyun 	if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
1482*4882a593Smuzhiyun 	    (cpuc->n_pebs == cpuc->n_large_pebs))
1483*4882a593Smuzhiyun 		cpuc->lbr_stack.hw_idx = -1ULL;
1484*4882a593Smuzhiyun 	else
1485*4882a593Smuzhiyun 		cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1486*4882a593Smuzhiyun 
1487*4882a593Smuzhiyun 	intel_pmu_store_lbr(cpuc, lbr);
1488*4882a593Smuzhiyun 	intel_pmu_lbr_filter(cpuc);
1489*4882a593Smuzhiyun }
1490*4882a593Smuzhiyun 
1491*4882a593Smuzhiyun /*
1492*4882a593Smuzhiyun  * Map interface branch filters onto LBR filters
1493*4882a593Smuzhiyun  */
1494*4882a593Smuzhiyun static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1495*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
1496*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
1497*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
1498*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
1499*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_REL_JMP
1500*4882a593Smuzhiyun 						| LBR_IND_JMP | LBR_FAR,
1501*4882a593Smuzhiyun 	/*
1502*4882a593Smuzhiyun 	 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
1503*4882a593Smuzhiyun 	 */
1504*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
1505*4882a593Smuzhiyun 	 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1506*4882a593Smuzhiyun 	/*
1507*4882a593Smuzhiyun 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
1508*4882a593Smuzhiyun 	 */
1509*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1510*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
1511*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1512*4882a593Smuzhiyun };
1513*4882a593Smuzhiyun 
1514*4882a593Smuzhiyun static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1515*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
1516*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
1517*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
1518*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
1519*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
1520*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
1521*4882a593Smuzhiyun 						| LBR_FAR,
1522*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
1523*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
1524*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
1525*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
1526*4882a593Smuzhiyun };
1527*4882a593Smuzhiyun 
1528*4882a593Smuzhiyun static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1529*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
1530*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
1531*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
1532*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
1533*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
1534*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
1535*4882a593Smuzhiyun 						| LBR_FAR,
1536*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
1537*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
1538*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
1539*4882a593Smuzhiyun 						| LBR_RETURN | LBR_CALL_STACK,
1540*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
1541*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
1542*4882a593Smuzhiyun };
1543*4882a593Smuzhiyun 
1544*4882a593Smuzhiyun static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1545*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= ARCH_LBR_ANY,
1546*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= ARCH_LBR_USER,
1547*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= ARCH_LBR_KERNEL,
1548*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
1549*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= ARCH_LBR_RETURN |
1550*4882a593Smuzhiyun 						  ARCH_LBR_OTHER_BRANCH,
1551*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = ARCH_LBR_REL_CALL |
1552*4882a593Smuzhiyun 						  ARCH_LBR_IND_CALL |
1553*4882a593Smuzhiyun 						  ARCH_LBR_OTHER_BRANCH,
1554*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = ARCH_LBR_IND_CALL,
1555*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_COND_SHIFT]         = ARCH_LBR_JCC,
1556*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = ARCH_LBR_REL_CALL |
1557*4882a593Smuzhiyun 						  ARCH_LBR_IND_CALL |
1558*4882a593Smuzhiyun 						  ARCH_LBR_RETURN |
1559*4882a593Smuzhiyun 						  ARCH_LBR_CALL_STACK,
1560*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= ARCH_LBR_IND_JMP,
1561*4882a593Smuzhiyun 	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= ARCH_LBR_REL_CALL,
1562*4882a593Smuzhiyun };
1563*4882a593Smuzhiyun 
1564*4882a593Smuzhiyun /* core */
intel_pmu_lbr_init_core(void)1565*4882a593Smuzhiyun void __init intel_pmu_lbr_init_core(void)
1566*4882a593Smuzhiyun {
1567*4882a593Smuzhiyun 	x86_pmu.lbr_nr     = 4;
1568*4882a593Smuzhiyun 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1569*4882a593Smuzhiyun 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1570*4882a593Smuzhiyun 	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1571*4882a593Smuzhiyun 
1572*4882a593Smuzhiyun 	/*
1573*4882a593Smuzhiyun 	 * SW branch filter usage:
1574*4882a593Smuzhiyun 	 * - compensate for lack of HW filter
1575*4882a593Smuzhiyun 	 */
1576*4882a593Smuzhiyun }
1577*4882a593Smuzhiyun 
1578*4882a593Smuzhiyun /* nehalem/westmere */
intel_pmu_lbr_init_nhm(void)1579*4882a593Smuzhiyun void __init intel_pmu_lbr_init_nhm(void)
1580*4882a593Smuzhiyun {
1581*4882a593Smuzhiyun 	x86_pmu.lbr_nr     = 16;
1582*4882a593Smuzhiyun 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1583*4882a593Smuzhiyun 	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1584*4882a593Smuzhiyun 	x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1585*4882a593Smuzhiyun 
1586*4882a593Smuzhiyun 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1587*4882a593Smuzhiyun 	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1588*4882a593Smuzhiyun 
1589*4882a593Smuzhiyun 	/*
1590*4882a593Smuzhiyun 	 * SW branch filter usage:
1591*4882a593Smuzhiyun 	 * - workaround LBR_SEL errata (see above)
1592*4882a593Smuzhiyun 	 * - support syscall, sysret capture.
1593*4882a593Smuzhiyun 	 *   That requires LBR_FAR but that means far
1594*4882a593Smuzhiyun 	 *   jmp need to be filtered out
1595*4882a593Smuzhiyun 	 */
1596*4882a593Smuzhiyun }
1597*4882a593Smuzhiyun 
1598*4882a593Smuzhiyun /* sandy bridge */
intel_pmu_lbr_init_snb(void)1599*4882a593Smuzhiyun void __init intel_pmu_lbr_init_snb(void)
1600*4882a593Smuzhiyun {
1601*4882a593Smuzhiyun 	x86_pmu.lbr_nr	 = 16;
1602*4882a593Smuzhiyun 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
1603*4882a593Smuzhiyun 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1604*4882a593Smuzhiyun 	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1605*4882a593Smuzhiyun 
1606*4882a593Smuzhiyun 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1607*4882a593Smuzhiyun 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1608*4882a593Smuzhiyun 
1609*4882a593Smuzhiyun 	/*
1610*4882a593Smuzhiyun 	 * SW branch filter usage:
1611*4882a593Smuzhiyun 	 * - support syscall, sysret capture.
1612*4882a593Smuzhiyun 	 *   That requires LBR_FAR but that means far
1613*4882a593Smuzhiyun 	 *   jmp need to be filtered out
1614*4882a593Smuzhiyun 	 */
1615*4882a593Smuzhiyun }
1616*4882a593Smuzhiyun 
1617*4882a593Smuzhiyun static inline struct kmem_cache *
create_lbr_kmem_cache(size_t size,size_t align)1618*4882a593Smuzhiyun create_lbr_kmem_cache(size_t size, size_t align)
1619*4882a593Smuzhiyun {
1620*4882a593Smuzhiyun 	return kmem_cache_create("x86_lbr", size, align, 0, NULL);
1621*4882a593Smuzhiyun }
1622*4882a593Smuzhiyun 
1623*4882a593Smuzhiyun /* haswell */
intel_pmu_lbr_init_hsw(void)1624*4882a593Smuzhiyun void intel_pmu_lbr_init_hsw(void)
1625*4882a593Smuzhiyun {
1626*4882a593Smuzhiyun 	size_t size = sizeof(struct x86_perf_task_context);
1627*4882a593Smuzhiyun 
1628*4882a593Smuzhiyun 	x86_pmu.lbr_nr	 = 16;
1629*4882a593Smuzhiyun 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
1630*4882a593Smuzhiyun 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1631*4882a593Smuzhiyun 	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1632*4882a593Smuzhiyun 
1633*4882a593Smuzhiyun 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1634*4882a593Smuzhiyun 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1635*4882a593Smuzhiyun 
1636*4882a593Smuzhiyun 	x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1637*4882a593Smuzhiyun 
1638*4882a593Smuzhiyun 	if (lbr_from_signext_quirk_needed())
1639*4882a593Smuzhiyun 		static_branch_enable(&lbr_from_quirk_key);
1640*4882a593Smuzhiyun }
1641*4882a593Smuzhiyun 
1642*4882a593Smuzhiyun /* skylake */
intel_pmu_lbr_init_skl(void)1643*4882a593Smuzhiyun __init void intel_pmu_lbr_init_skl(void)
1644*4882a593Smuzhiyun {
1645*4882a593Smuzhiyun 	size_t size = sizeof(struct x86_perf_task_context);
1646*4882a593Smuzhiyun 
1647*4882a593Smuzhiyun 	x86_pmu.lbr_nr	 = 32;
1648*4882a593Smuzhiyun 	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
1649*4882a593Smuzhiyun 	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1650*4882a593Smuzhiyun 	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1651*4882a593Smuzhiyun 	x86_pmu.lbr_info = MSR_LBR_INFO_0;
1652*4882a593Smuzhiyun 
1653*4882a593Smuzhiyun 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1654*4882a593Smuzhiyun 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1655*4882a593Smuzhiyun 
1656*4882a593Smuzhiyun 	x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1657*4882a593Smuzhiyun 
1658*4882a593Smuzhiyun 	/*
1659*4882a593Smuzhiyun 	 * SW branch filter usage:
1660*4882a593Smuzhiyun 	 * - support syscall, sysret capture.
1661*4882a593Smuzhiyun 	 *   That requires LBR_FAR but that means far
1662*4882a593Smuzhiyun 	 *   jmp need to be filtered out
1663*4882a593Smuzhiyun 	 */
1664*4882a593Smuzhiyun }
1665*4882a593Smuzhiyun 
1666*4882a593Smuzhiyun /* atom */
intel_pmu_lbr_init_atom(void)1667*4882a593Smuzhiyun void __init intel_pmu_lbr_init_atom(void)
1668*4882a593Smuzhiyun {
1669*4882a593Smuzhiyun 	/*
1670*4882a593Smuzhiyun 	 * only models starting at stepping 10 seems
1671*4882a593Smuzhiyun 	 * to have an operational LBR which can freeze
1672*4882a593Smuzhiyun 	 * on PMU interrupt
1673*4882a593Smuzhiyun 	 */
1674*4882a593Smuzhiyun 	if (boot_cpu_data.x86_model == 28
1675*4882a593Smuzhiyun 	    && boot_cpu_data.x86_stepping < 10) {
1676*4882a593Smuzhiyun 		pr_cont("LBR disabled due to erratum");
1677*4882a593Smuzhiyun 		return;
1678*4882a593Smuzhiyun 	}
1679*4882a593Smuzhiyun 
1680*4882a593Smuzhiyun 	x86_pmu.lbr_nr	   = 8;
1681*4882a593Smuzhiyun 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1682*4882a593Smuzhiyun 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1683*4882a593Smuzhiyun 	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1684*4882a593Smuzhiyun 
1685*4882a593Smuzhiyun 	/*
1686*4882a593Smuzhiyun 	 * SW branch filter usage:
1687*4882a593Smuzhiyun 	 * - compensate for lack of HW filter
1688*4882a593Smuzhiyun 	 */
1689*4882a593Smuzhiyun }
1690*4882a593Smuzhiyun 
1691*4882a593Smuzhiyun /* slm */
intel_pmu_lbr_init_slm(void)1692*4882a593Smuzhiyun void __init intel_pmu_lbr_init_slm(void)
1693*4882a593Smuzhiyun {
1694*4882a593Smuzhiyun 	x86_pmu.lbr_nr	   = 8;
1695*4882a593Smuzhiyun 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1696*4882a593Smuzhiyun 	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1697*4882a593Smuzhiyun 	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1698*4882a593Smuzhiyun 
1699*4882a593Smuzhiyun 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1700*4882a593Smuzhiyun 	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1701*4882a593Smuzhiyun 
1702*4882a593Smuzhiyun 	/*
1703*4882a593Smuzhiyun 	 * SW branch filter usage:
1704*4882a593Smuzhiyun 	 * - compensate for lack of HW filter
1705*4882a593Smuzhiyun 	 */
1706*4882a593Smuzhiyun 	pr_cont("8-deep LBR, ");
1707*4882a593Smuzhiyun }
1708*4882a593Smuzhiyun 
1709*4882a593Smuzhiyun /* Knights Landing */
intel_pmu_lbr_init_knl(void)1710*4882a593Smuzhiyun void intel_pmu_lbr_init_knl(void)
1711*4882a593Smuzhiyun {
1712*4882a593Smuzhiyun 	x86_pmu.lbr_nr	   = 8;
1713*4882a593Smuzhiyun 	x86_pmu.lbr_tos    = MSR_LBR_TOS;
1714*4882a593Smuzhiyun 	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1715*4882a593Smuzhiyun 	x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1716*4882a593Smuzhiyun 
1717*4882a593Smuzhiyun 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1718*4882a593Smuzhiyun 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1719*4882a593Smuzhiyun 
1720*4882a593Smuzhiyun 	/* Knights Landing does have MISPREDICT bit */
1721*4882a593Smuzhiyun 	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
1722*4882a593Smuzhiyun 		x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
1723*4882a593Smuzhiyun }
1724*4882a593Smuzhiyun 
1725*4882a593Smuzhiyun /*
1726*4882a593Smuzhiyun  * LBR state size is variable based on the max number of registers.
1727*4882a593Smuzhiyun  * This calculates the expected state size, which should match
1728*4882a593Smuzhiyun  * what the hardware enumerates for the size of XFEATURE_LBR.
1729*4882a593Smuzhiyun  */
get_lbr_state_size(void)1730*4882a593Smuzhiyun static inline unsigned int get_lbr_state_size(void)
1731*4882a593Smuzhiyun {
1732*4882a593Smuzhiyun 	return sizeof(struct arch_lbr_state) +
1733*4882a593Smuzhiyun 	       x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1734*4882a593Smuzhiyun }
1735*4882a593Smuzhiyun 
is_arch_lbr_xsave_available(void)1736*4882a593Smuzhiyun static bool is_arch_lbr_xsave_available(void)
1737*4882a593Smuzhiyun {
1738*4882a593Smuzhiyun 	if (!boot_cpu_has(X86_FEATURE_XSAVES))
1739*4882a593Smuzhiyun 		return false;
1740*4882a593Smuzhiyun 
1741*4882a593Smuzhiyun 	/*
1742*4882a593Smuzhiyun 	 * Check the LBR state with the corresponding software structure.
1743*4882a593Smuzhiyun 	 * Disable LBR XSAVES support if the size doesn't match.
1744*4882a593Smuzhiyun 	 */
1745*4882a593Smuzhiyun 	if (xfeature_size(XFEATURE_LBR) == 0)
1746*4882a593Smuzhiyun 		return false;
1747*4882a593Smuzhiyun 
1748*4882a593Smuzhiyun 	if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1749*4882a593Smuzhiyun 		return false;
1750*4882a593Smuzhiyun 
1751*4882a593Smuzhiyun 	return true;
1752*4882a593Smuzhiyun }
1753*4882a593Smuzhiyun 
intel_pmu_arch_lbr_init(void)1754*4882a593Smuzhiyun void __init intel_pmu_arch_lbr_init(void)
1755*4882a593Smuzhiyun {
1756*4882a593Smuzhiyun 	struct pmu *pmu = x86_get_pmu(smp_processor_id());
1757*4882a593Smuzhiyun 	union cpuid28_eax eax;
1758*4882a593Smuzhiyun 	union cpuid28_ebx ebx;
1759*4882a593Smuzhiyun 	union cpuid28_ecx ecx;
1760*4882a593Smuzhiyun 	unsigned int unused_edx;
1761*4882a593Smuzhiyun 	bool arch_lbr_xsave;
1762*4882a593Smuzhiyun 	size_t size;
1763*4882a593Smuzhiyun 	u64 lbr_nr;
1764*4882a593Smuzhiyun 
1765*4882a593Smuzhiyun 	/* Arch LBR Capabilities */
1766*4882a593Smuzhiyun 	cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
1767*4882a593Smuzhiyun 
1768*4882a593Smuzhiyun 	lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
1769*4882a593Smuzhiyun 	if (!lbr_nr)
1770*4882a593Smuzhiyun 		goto clear_arch_lbr;
1771*4882a593Smuzhiyun 
1772*4882a593Smuzhiyun 	/* Apply the max depth of Arch LBR */
1773*4882a593Smuzhiyun 	if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
1774*4882a593Smuzhiyun 		goto clear_arch_lbr;
1775*4882a593Smuzhiyun 
1776*4882a593Smuzhiyun 	x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
1777*4882a593Smuzhiyun 	x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
1778*4882a593Smuzhiyun 	x86_pmu.lbr_lip = eax.split.lbr_lip;
1779*4882a593Smuzhiyun 	x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
1780*4882a593Smuzhiyun 	x86_pmu.lbr_filter = ebx.split.lbr_filter;
1781*4882a593Smuzhiyun 	x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
1782*4882a593Smuzhiyun 	x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
1783*4882a593Smuzhiyun 	x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
1784*4882a593Smuzhiyun 	x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1785*4882a593Smuzhiyun 	x86_pmu.lbr_nr = lbr_nr;
1786*4882a593Smuzhiyun 
1787*4882a593Smuzhiyun 
1788*4882a593Smuzhiyun 	arch_lbr_xsave = is_arch_lbr_xsave_available();
1789*4882a593Smuzhiyun 	if (arch_lbr_xsave) {
1790*4882a593Smuzhiyun 		size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1791*4882a593Smuzhiyun 		       get_lbr_state_size();
1792*4882a593Smuzhiyun 		pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1793*4882a593Smuzhiyun 							    XSAVE_ALIGNMENT);
1794*4882a593Smuzhiyun 	}
1795*4882a593Smuzhiyun 
1796*4882a593Smuzhiyun 	if (!pmu->task_ctx_cache) {
1797*4882a593Smuzhiyun 		arch_lbr_xsave = false;
1798*4882a593Smuzhiyun 
1799*4882a593Smuzhiyun 		size = sizeof(struct x86_perf_task_context_arch_lbr) +
1800*4882a593Smuzhiyun 		       lbr_nr * sizeof(struct lbr_entry);
1801*4882a593Smuzhiyun 		pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1802*4882a593Smuzhiyun 	}
1803*4882a593Smuzhiyun 
1804*4882a593Smuzhiyun 	x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
1805*4882a593Smuzhiyun 	x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
1806*4882a593Smuzhiyun 	x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
1807*4882a593Smuzhiyun 
1808*4882a593Smuzhiyun 	/* LBR callstack requires both CPL and Branch Filtering support */
1809*4882a593Smuzhiyun 	if (!x86_pmu.lbr_cpl ||
1810*4882a593Smuzhiyun 	    !x86_pmu.lbr_filter ||
1811*4882a593Smuzhiyun 	    !x86_pmu.lbr_call_stack)
1812*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
1813*4882a593Smuzhiyun 
1814*4882a593Smuzhiyun 	if (!x86_pmu.lbr_cpl) {
1815*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
1816*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
1817*4882a593Smuzhiyun 	} else if (!x86_pmu.lbr_filter) {
1818*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
1819*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
1820*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
1821*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
1822*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
1823*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
1824*4882a593Smuzhiyun 		arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
1825*4882a593Smuzhiyun 	}
1826*4882a593Smuzhiyun 
1827*4882a593Smuzhiyun 	x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
1828*4882a593Smuzhiyun 	x86_pmu.lbr_ctl_map  = arch_lbr_ctl_map;
1829*4882a593Smuzhiyun 
1830*4882a593Smuzhiyun 	if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
1831*4882a593Smuzhiyun 		x86_pmu.lbr_ctl_map = NULL;
1832*4882a593Smuzhiyun 
1833*4882a593Smuzhiyun 	x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
1834*4882a593Smuzhiyun 	if (arch_lbr_xsave) {
1835*4882a593Smuzhiyun 		x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1836*4882a593Smuzhiyun 		x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1837*4882a593Smuzhiyun 		x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
1838*4882a593Smuzhiyun 		pr_cont("XSAVE ");
1839*4882a593Smuzhiyun 	} else {
1840*4882a593Smuzhiyun 		x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1841*4882a593Smuzhiyun 		x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1842*4882a593Smuzhiyun 		x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1843*4882a593Smuzhiyun 	}
1844*4882a593Smuzhiyun 
1845*4882a593Smuzhiyun 	pr_cont("Architectural LBR, ");
1846*4882a593Smuzhiyun 
1847*4882a593Smuzhiyun 	return;
1848*4882a593Smuzhiyun 
1849*4882a593Smuzhiyun clear_arch_lbr:
1850*4882a593Smuzhiyun 	setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR);
1851*4882a593Smuzhiyun }
1852*4882a593Smuzhiyun 
1853*4882a593Smuzhiyun /**
1854*4882a593Smuzhiyun  * x86_perf_get_lbr - get the LBR records information
1855*4882a593Smuzhiyun  *
1856*4882a593Smuzhiyun  * @lbr: the caller's memory to store the LBR records information
1857*4882a593Smuzhiyun  *
1858*4882a593Smuzhiyun  * Returns: 0 indicates the LBR info has been successfully obtained
1859*4882a593Smuzhiyun  */
x86_perf_get_lbr(struct x86_pmu_lbr * lbr)1860*4882a593Smuzhiyun int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1861*4882a593Smuzhiyun {
1862*4882a593Smuzhiyun 	int lbr_fmt = x86_pmu.intel_cap.lbr_format;
1863*4882a593Smuzhiyun 
1864*4882a593Smuzhiyun 	lbr->nr = x86_pmu.lbr_nr;
1865*4882a593Smuzhiyun 	lbr->from = x86_pmu.lbr_from;
1866*4882a593Smuzhiyun 	lbr->to = x86_pmu.lbr_to;
1867*4882a593Smuzhiyun 	lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
1868*4882a593Smuzhiyun 
1869*4882a593Smuzhiyun 	return 0;
1870*4882a593Smuzhiyun }
1871*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
1872*4882a593Smuzhiyun 
1873*4882a593Smuzhiyun struct event_constraint vlbr_constraint =
1874*4882a593Smuzhiyun 	__EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1875*4882a593Smuzhiyun 			  FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);
1876