1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Performance events x86 architecture code
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5*4882a593Smuzhiyun * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6*4882a593Smuzhiyun * Copyright (C) 2009 Jaswinder Singh Rajput
7*4882a593Smuzhiyun * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8*4882a593Smuzhiyun * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
9*4882a593Smuzhiyun * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10*4882a593Smuzhiyun * Copyright (C) 2009 Google, Inc., Stephane Eranian
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * For licencing details see kernel-base/COPYING
13*4882a593Smuzhiyun */
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include <linux/perf_event.h>
16*4882a593Smuzhiyun #include <linux/capability.h>
17*4882a593Smuzhiyun #include <linux/notifier.h>
18*4882a593Smuzhiyun #include <linux/hardirq.h>
19*4882a593Smuzhiyun #include <linux/kprobes.h>
20*4882a593Smuzhiyun #include <linux/export.h>
21*4882a593Smuzhiyun #include <linux/init.h>
22*4882a593Smuzhiyun #include <linux/kdebug.h>
23*4882a593Smuzhiyun #include <linux/sched/mm.h>
24*4882a593Smuzhiyun #include <linux/sched/clock.h>
25*4882a593Smuzhiyun #include <linux/uaccess.h>
26*4882a593Smuzhiyun #include <linux/slab.h>
27*4882a593Smuzhiyun #include <linux/cpu.h>
28*4882a593Smuzhiyun #include <linux/bitops.h>
29*4882a593Smuzhiyun #include <linux/device.h>
30*4882a593Smuzhiyun #include <linux/nospec.h>
31*4882a593Smuzhiyun #include <linux/static_call.h>
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun #include <asm/apic.h>
34*4882a593Smuzhiyun #include <asm/stacktrace.h>
35*4882a593Smuzhiyun #include <asm/nmi.h>
36*4882a593Smuzhiyun #include <asm/smp.h>
37*4882a593Smuzhiyun #include <asm/alternative.h>
38*4882a593Smuzhiyun #include <asm/mmu_context.h>
39*4882a593Smuzhiyun #include <asm/tlbflush.h>
40*4882a593Smuzhiyun #include <asm/timer.h>
41*4882a593Smuzhiyun #include <asm/desc.h>
42*4882a593Smuzhiyun #include <asm/ldt.h>
43*4882a593Smuzhiyun #include <asm/unwind.h>
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #include "perf_event.h"
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun struct x86_pmu x86_pmu __read_mostly;
48*4882a593Smuzhiyun static struct pmu pmu;
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
51*4882a593Smuzhiyun .enabled = 1,
52*4882a593Smuzhiyun .pmu = &pmu,
53*4882a593Smuzhiyun };
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
56*4882a593Smuzhiyun DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun /*
59*4882a593Smuzhiyun * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
60*4882a593Smuzhiyun * from just a typename, as opposed to an actual function.
61*4882a593Smuzhiyun */
62*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq);
63*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all);
64*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
65*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
66*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
69*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
70*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
73*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
74*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling);
77*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
78*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling);
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task);
81*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
84*4882a593Smuzhiyun DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun u64 __read_mostly hw_cache_event_ids
87*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_MAX]
88*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_OP_MAX]
89*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_RESULT_MAX];
90*4882a593Smuzhiyun u64 __read_mostly hw_cache_extra_regs
91*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_MAX]
92*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_OP_MAX]
93*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_RESULT_MAX];
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun /*
96*4882a593Smuzhiyun * Propagate event elapsed time into the generic event.
97*4882a593Smuzhiyun * Can only be executed on the CPU where the event is active.
98*4882a593Smuzhiyun * Returns the delta events processed.
99*4882a593Smuzhiyun */
x86_perf_event_update(struct perf_event * event)100*4882a593Smuzhiyun u64 x86_perf_event_update(struct perf_event *event)
101*4882a593Smuzhiyun {
102*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
103*4882a593Smuzhiyun int shift = 64 - x86_pmu.cntval_bits;
104*4882a593Smuzhiyun u64 prev_raw_count, new_raw_count;
105*4882a593Smuzhiyun u64 delta;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun if (unlikely(!hwc->event_base))
108*4882a593Smuzhiyun return 0;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
111*4882a593Smuzhiyun return x86_pmu.update_topdown_event(event);
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun /*
114*4882a593Smuzhiyun * Careful: an NMI might modify the previous event value.
115*4882a593Smuzhiyun *
116*4882a593Smuzhiyun * Our tactic to handle this is to first atomically read and
117*4882a593Smuzhiyun * exchange a new raw count - then add that new-prev delta
118*4882a593Smuzhiyun * count to the generic event atomically:
119*4882a593Smuzhiyun */
120*4882a593Smuzhiyun again:
121*4882a593Smuzhiyun prev_raw_count = local64_read(&hwc->prev_count);
122*4882a593Smuzhiyun rdpmcl(hwc->event_base_rdpmc, new_raw_count);
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
125*4882a593Smuzhiyun new_raw_count) != prev_raw_count)
126*4882a593Smuzhiyun goto again;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun /*
129*4882a593Smuzhiyun * Now we have the new raw value and have updated the prev
130*4882a593Smuzhiyun * timestamp already. We can now calculate the elapsed delta
131*4882a593Smuzhiyun * (event-)time and add that to the generic event.
132*4882a593Smuzhiyun *
133*4882a593Smuzhiyun * Careful, not all hw sign-extends above the physical width
134*4882a593Smuzhiyun * of the count.
135*4882a593Smuzhiyun */
136*4882a593Smuzhiyun delta = (new_raw_count << shift) - (prev_raw_count << shift);
137*4882a593Smuzhiyun delta >>= shift;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun local64_add(delta, &event->count);
140*4882a593Smuzhiyun local64_sub(delta, &hwc->period_left);
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun return new_raw_count;
143*4882a593Smuzhiyun }
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun /*
146*4882a593Smuzhiyun * Find and validate any extra registers to set up.
147*4882a593Smuzhiyun */
x86_pmu_extra_regs(u64 config,struct perf_event * event)148*4882a593Smuzhiyun static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun struct hw_perf_event_extra *reg;
151*4882a593Smuzhiyun struct extra_reg *er;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun reg = &event->hw.extra_reg;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun if (!x86_pmu.extra_regs)
156*4882a593Smuzhiyun return 0;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun for (er = x86_pmu.extra_regs; er->msr; er++) {
159*4882a593Smuzhiyun if (er->event != (config & er->config_mask))
160*4882a593Smuzhiyun continue;
161*4882a593Smuzhiyun if (event->attr.config1 & ~er->valid_mask)
162*4882a593Smuzhiyun return -EINVAL;
163*4882a593Smuzhiyun /* Check if the extra msrs can be safely accessed*/
164*4882a593Smuzhiyun if (!er->extra_msr_access)
165*4882a593Smuzhiyun return -ENXIO;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun reg->idx = er->idx;
168*4882a593Smuzhiyun reg->config = event->attr.config1;
169*4882a593Smuzhiyun reg->reg = er->msr;
170*4882a593Smuzhiyun break;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun return 0;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun static atomic_t active_events;
176*4882a593Smuzhiyun static atomic_t pmc_refcount;
177*4882a593Smuzhiyun static DEFINE_MUTEX(pmc_reserve_mutex);
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun #ifdef CONFIG_X86_LOCAL_APIC
180*4882a593Smuzhiyun
reserve_pmc_hardware(void)181*4882a593Smuzhiyun static bool reserve_pmc_hardware(void)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun int i;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun for (i = 0; i < x86_pmu.num_counters; i++) {
186*4882a593Smuzhiyun if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
187*4882a593Smuzhiyun goto perfctr_fail;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun for (i = 0; i < x86_pmu.num_counters; i++) {
191*4882a593Smuzhiyun if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
192*4882a593Smuzhiyun goto eventsel_fail;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun return true;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun eventsel_fail:
198*4882a593Smuzhiyun for (i--; i >= 0; i--)
199*4882a593Smuzhiyun release_evntsel_nmi(x86_pmu_config_addr(i));
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun i = x86_pmu.num_counters;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun perfctr_fail:
204*4882a593Smuzhiyun for (i--; i >= 0; i--)
205*4882a593Smuzhiyun release_perfctr_nmi(x86_pmu_event_addr(i));
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun return false;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun
release_pmc_hardware(void)210*4882a593Smuzhiyun static void release_pmc_hardware(void)
211*4882a593Smuzhiyun {
212*4882a593Smuzhiyun int i;
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun for (i = 0; i < x86_pmu.num_counters; i++) {
215*4882a593Smuzhiyun release_perfctr_nmi(x86_pmu_event_addr(i));
216*4882a593Smuzhiyun release_evntsel_nmi(x86_pmu_config_addr(i));
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun #else
221*4882a593Smuzhiyun
reserve_pmc_hardware(void)222*4882a593Smuzhiyun static bool reserve_pmc_hardware(void) { return true; }
release_pmc_hardware(void)223*4882a593Smuzhiyun static void release_pmc_hardware(void) {}
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun #endif
226*4882a593Smuzhiyun
check_hw_exists(void)227*4882a593Smuzhiyun static bool check_hw_exists(void)
228*4882a593Smuzhiyun {
229*4882a593Smuzhiyun u64 val, val_fail = -1, val_new= ~0;
230*4882a593Smuzhiyun int i, reg, reg_fail = -1, ret = 0;
231*4882a593Smuzhiyun int bios_fail = 0;
232*4882a593Smuzhiyun int reg_safe = -1;
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun /*
235*4882a593Smuzhiyun * Check to see if the BIOS enabled any of the counters, if so
236*4882a593Smuzhiyun * complain and bail.
237*4882a593Smuzhiyun */
238*4882a593Smuzhiyun for (i = 0; i < x86_pmu.num_counters; i++) {
239*4882a593Smuzhiyun reg = x86_pmu_config_addr(i);
240*4882a593Smuzhiyun ret = rdmsrl_safe(reg, &val);
241*4882a593Smuzhiyun if (ret)
242*4882a593Smuzhiyun goto msr_fail;
243*4882a593Smuzhiyun if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
244*4882a593Smuzhiyun bios_fail = 1;
245*4882a593Smuzhiyun val_fail = val;
246*4882a593Smuzhiyun reg_fail = reg;
247*4882a593Smuzhiyun } else {
248*4882a593Smuzhiyun reg_safe = i;
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun if (x86_pmu.num_counters_fixed) {
253*4882a593Smuzhiyun reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
254*4882a593Smuzhiyun ret = rdmsrl_safe(reg, &val);
255*4882a593Smuzhiyun if (ret)
256*4882a593Smuzhiyun goto msr_fail;
257*4882a593Smuzhiyun for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
258*4882a593Smuzhiyun if (val & (0x03 << i*4)) {
259*4882a593Smuzhiyun bios_fail = 1;
260*4882a593Smuzhiyun val_fail = val;
261*4882a593Smuzhiyun reg_fail = reg;
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun /*
267*4882a593Smuzhiyun * If all the counters are enabled, the below test will always
268*4882a593Smuzhiyun * fail. The tools will also become useless in this scenario.
269*4882a593Smuzhiyun * Just fail and disable the hardware counters.
270*4882a593Smuzhiyun */
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun if (reg_safe == -1) {
273*4882a593Smuzhiyun reg = reg_safe;
274*4882a593Smuzhiyun goto msr_fail;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun /*
278*4882a593Smuzhiyun * Read the current value, change it and read it back to see if it
279*4882a593Smuzhiyun * matches, this is needed to detect certain hardware emulators
280*4882a593Smuzhiyun * (qemu/kvm) that don't trap on the MSR access and always return 0s.
281*4882a593Smuzhiyun */
282*4882a593Smuzhiyun reg = x86_pmu_event_addr(reg_safe);
283*4882a593Smuzhiyun if (rdmsrl_safe(reg, &val))
284*4882a593Smuzhiyun goto msr_fail;
285*4882a593Smuzhiyun val ^= 0xffffUL;
286*4882a593Smuzhiyun ret = wrmsrl_safe(reg, val);
287*4882a593Smuzhiyun ret |= rdmsrl_safe(reg, &val_new);
288*4882a593Smuzhiyun if (ret || val != val_new)
289*4882a593Smuzhiyun goto msr_fail;
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun /*
292*4882a593Smuzhiyun * We still allow the PMU driver to operate:
293*4882a593Smuzhiyun */
294*4882a593Smuzhiyun if (bios_fail) {
295*4882a593Smuzhiyun pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
296*4882a593Smuzhiyun pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
297*4882a593Smuzhiyun reg_fail, val_fail);
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun return true;
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun msr_fail:
303*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
304*4882a593Smuzhiyun pr_cont("PMU not available due to virtualization, using software events only.\n");
305*4882a593Smuzhiyun } else {
306*4882a593Smuzhiyun pr_cont("Broken PMU hardware detected, using software events only.\n");
307*4882a593Smuzhiyun pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
308*4882a593Smuzhiyun reg, val_new);
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun return false;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
hw_perf_event_destroy(struct perf_event * event)314*4882a593Smuzhiyun static void hw_perf_event_destroy(struct perf_event *event)
315*4882a593Smuzhiyun {
316*4882a593Smuzhiyun x86_release_hardware();
317*4882a593Smuzhiyun atomic_dec(&active_events);
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun
hw_perf_lbr_event_destroy(struct perf_event * event)320*4882a593Smuzhiyun void hw_perf_lbr_event_destroy(struct perf_event *event)
321*4882a593Smuzhiyun {
322*4882a593Smuzhiyun hw_perf_event_destroy(event);
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun /* undo the lbr/bts event accounting */
325*4882a593Smuzhiyun x86_del_exclusive(x86_lbr_exclusive_lbr);
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
x86_pmu_initialized(void)328*4882a593Smuzhiyun static inline int x86_pmu_initialized(void)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun return x86_pmu.handle_irq != NULL;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun static inline int
set_ext_hw_attr(struct hw_perf_event * hwc,struct perf_event * event)334*4882a593Smuzhiyun set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun struct perf_event_attr *attr = &event->attr;
337*4882a593Smuzhiyun unsigned int cache_type, cache_op, cache_result;
338*4882a593Smuzhiyun u64 config, val;
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun config = attr->config;
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun cache_type = (config >> 0) & 0xff;
343*4882a593Smuzhiyun if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
344*4882a593Smuzhiyun return -EINVAL;
345*4882a593Smuzhiyun cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun cache_op = (config >> 8) & 0xff;
348*4882a593Smuzhiyun if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
349*4882a593Smuzhiyun return -EINVAL;
350*4882a593Smuzhiyun cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun cache_result = (config >> 16) & 0xff;
353*4882a593Smuzhiyun if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
354*4882a593Smuzhiyun return -EINVAL;
355*4882a593Smuzhiyun cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun val = hw_cache_event_ids[cache_type][cache_op][cache_result];
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun if (val == 0)
360*4882a593Smuzhiyun return -ENOENT;
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun if (val == -1)
363*4882a593Smuzhiyun return -EINVAL;
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun hwc->config |= val;
366*4882a593Smuzhiyun attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
367*4882a593Smuzhiyun return x86_pmu_extra_regs(val, event);
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun
x86_reserve_hardware(void)370*4882a593Smuzhiyun int x86_reserve_hardware(void)
371*4882a593Smuzhiyun {
372*4882a593Smuzhiyun int err = 0;
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun if (!atomic_inc_not_zero(&pmc_refcount)) {
375*4882a593Smuzhiyun mutex_lock(&pmc_reserve_mutex);
376*4882a593Smuzhiyun if (atomic_read(&pmc_refcount) == 0) {
377*4882a593Smuzhiyun if (!reserve_pmc_hardware()) {
378*4882a593Smuzhiyun err = -EBUSY;
379*4882a593Smuzhiyun } else {
380*4882a593Smuzhiyun reserve_ds_buffers();
381*4882a593Smuzhiyun reserve_lbr_buffers();
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun if (!err)
385*4882a593Smuzhiyun atomic_inc(&pmc_refcount);
386*4882a593Smuzhiyun mutex_unlock(&pmc_reserve_mutex);
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun
389*4882a593Smuzhiyun return err;
390*4882a593Smuzhiyun }
391*4882a593Smuzhiyun
x86_release_hardware(void)392*4882a593Smuzhiyun void x86_release_hardware(void)
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
395*4882a593Smuzhiyun release_pmc_hardware();
396*4882a593Smuzhiyun release_ds_buffers();
397*4882a593Smuzhiyun release_lbr_buffers();
398*4882a593Smuzhiyun mutex_unlock(&pmc_reserve_mutex);
399*4882a593Smuzhiyun }
400*4882a593Smuzhiyun }
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun /*
403*4882a593Smuzhiyun * Check if we can create event of a certain type (that no conflicting events
404*4882a593Smuzhiyun * are present).
405*4882a593Smuzhiyun */
x86_add_exclusive(unsigned int what)406*4882a593Smuzhiyun int x86_add_exclusive(unsigned int what)
407*4882a593Smuzhiyun {
408*4882a593Smuzhiyun int i;
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun /*
411*4882a593Smuzhiyun * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS.
412*4882a593Smuzhiyun * LBR and BTS are still mutually exclusive.
413*4882a593Smuzhiyun */
414*4882a593Smuzhiyun if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
415*4882a593Smuzhiyun goto out;
416*4882a593Smuzhiyun
417*4882a593Smuzhiyun if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
418*4882a593Smuzhiyun mutex_lock(&pmc_reserve_mutex);
419*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
420*4882a593Smuzhiyun if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
421*4882a593Smuzhiyun goto fail_unlock;
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun atomic_inc(&x86_pmu.lbr_exclusive[what]);
424*4882a593Smuzhiyun mutex_unlock(&pmc_reserve_mutex);
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun out:
428*4882a593Smuzhiyun atomic_inc(&active_events);
429*4882a593Smuzhiyun return 0;
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun fail_unlock:
432*4882a593Smuzhiyun mutex_unlock(&pmc_reserve_mutex);
433*4882a593Smuzhiyun return -EBUSY;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
x86_del_exclusive(unsigned int what)436*4882a593Smuzhiyun void x86_del_exclusive(unsigned int what)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun atomic_dec(&active_events);
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun /*
441*4882a593Smuzhiyun * See the comment in x86_add_exclusive().
442*4882a593Smuzhiyun */
443*4882a593Smuzhiyun if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
444*4882a593Smuzhiyun return;
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun atomic_dec(&x86_pmu.lbr_exclusive[what]);
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun
x86_setup_perfctr(struct perf_event * event)449*4882a593Smuzhiyun int x86_setup_perfctr(struct perf_event *event)
450*4882a593Smuzhiyun {
451*4882a593Smuzhiyun struct perf_event_attr *attr = &event->attr;
452*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
453*4882a593Smuzhiyun u64 config;
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun if (!is_sampling_event(event)) {
456*4882a593Smuzhiyun hwc->sample_period = x86_pmu.max_period;
457*4882a593Smuzhiyun hwc->last_period = hwc->sample_period;
458*4882a593Smuzhiyun local64_set(&hwc->period_left, hwc->sample_period);
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun if (attr->type == PERF_TYPE_RAW)
462*4882a593Smuzhiyun return x86_pmu_extra_regs(event->attr.config, event);
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun if (attr->type == PERF_TYPE_HW_CACHE)
465*4882a593Smuzhiyun return set_ext_hw_attr(hwc, event);
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun if (attr->config >= x86_pmu.max_events)
468*4882a593Smuzhiyun return -EINVAL;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun /*
473*4882a593Smuzhiyun * The generic map:
474*4882a593Smuzhiyun */
475*4882a593Smuzhiyun config = x86_pmu.event_map(attr->config);
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun if (config == 0)
478*4882a593Smuzhiyun return -ENOENT;
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun if (config == -1LL)
481*4882a593Smuzhiyun return -EINVAL;
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun hwc->config |= config;
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun return 0;
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun /*
489*4882a593Smuzhiyun * check that branch_sample_type is compatible with
490*4882a593Smuzhiyun * settings needed for precise_ip > 1 which implies
491*4882a593Smuzhiyun * using the LBR to capture ALL taken branches at the
492*4882a593Smuzhiyun * priv levels of the measurement
493*4882a593Smuzhiyun */
precise_br_compat(struct perf_event * event)494*4882a593Smuzhiyun static inline int precise_br_compat(struct perf_event *event)
495*4882a593Smuzhiyun {
496*4882a593Smuzhiyun u64 m = event->attr.branch_sample_type;
497*4882a593Smuzhiyun u64 b = 0;
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun /* must capture all branches */
500*4882a593Smuzhiyun if (!(m & PERF_SAMPLE_BRANCH_ANY))
501*4882a593Smuzhiyun return 0;
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun if (!event->attr.exclude_user)
506*4882a593Smuzhiyun b |= PERF_SAMPLE_BRANCH_USER;
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun if (!event->attr.exclude_kernel)
509*4882a593Smuzhiyun b |= PERF_SAMPLE_BRANCH_KERNEL;
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun /*
512*4882a593Smuzhiyun * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
513*4882a593Smuzhiyun */
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun return m == b;
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun
x86_pmu_max_precise(void)518*4882a593Smuzhiyun int x86_pmu_max_precise(void)
519*4882a593Smuzhiyun {
520*4882a593Smuzhiyun int precise = 0;
521*4882a593Smuzhiyun
522*4882a593Smuzhiyun /* Support for constant skid */
523*4882a593Smuzhiyun if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
524*4882a593Smuzhiyun precise++;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun /* Support for IP fixup */
527*4882a593Smuzhiyun if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
528*4882a593Smuzhiyun precise++;
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun if (x86_pmu.pebs_prec_dist)
531*4882a593Smuzhiyun precise++;
532*4882a593Smuzhiyun }
533*4882a593Smuzhiyun return precise;
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun
x86_pmu_hw_config(struct perf_event * event)536*4882a593Smuzhiyun int x86_pmu_hw_config(struct perf_event *event)
537*4882a593Smuzhiyun {
538*4882a593Smuzhiyun if (event->attr.precise_ip) {
539*4882a593Smuzhiyun int precise = x86_pmu_max_precise();
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun if (event->attr.precise_ip > precise)
542*4882a593Smuzhiyun return -EOPNOTSUPP;
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun /* There's no sense in having PEBS for non sampling events: */
545*4882a593Smuzhiyun if (!is_sampling_event(event))
546*4882a593Smuzhiyun return -EINVAL;
547*4882a593Smuzhiyun }
548*4882a593Smuzhiyun /*
549*4882a593Smuzhiyun * check that PEBS LBR correction does not conflict with
550*4882a593Smuzhiyun * whatever the user is asking with attr->branch_sample_type
551*4882a593Smuzhiyun */
552*4882a593Smuzhiyun if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
553*4882a593Smuzhiyun u64 *br_type = &event->attr.branch_sample_type;
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun if (has_branch_stack(event)) {
556*4882a593Smuzhiyun if (!precise_br_compat(event))
557*4882a593Smuzhiyun return -EOPNOTSUPP;
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun /* branch_sample_type is compatible */
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun } else {
562*4882a593Smuzhiyun /*
563*4882a593Smuzhiyun * user did not specify branch_sample_type
564*4882a593Smuzhiyun *
565*4882a593Smuzhiyun * For PEBS fixups, we capture all
566*4882a593Smuzhiyun * the branches at the priv level of the
567*4882a593Smuzhiyun * event.
568*4882a593Smuzhiyun */
569*4882a593Smuzhiyun *br_type = PERF_SAMPLE_BRANCH_ANY;
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun if (!event->attr.exclude_user)
572*4882a593Smuzhiyun *br_type |= PERF_SAMPLE_BRANCH_USER;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun if (!event->attr.exclude_kernel)
575*4882a593Smuzhiyun *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
576*4882a593Smuzhiyun }
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
580*4882a593Smuzhiyun event->attach_state |= PERF_ATTACH_TASK_DATA;
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun /*
583*4882a593Smuzhiyun * Generate PMC IRQs:
584*4882a593Smuzhiyun * (keep 'enabled' bit clear for now)
585*4882a593Smuzhiyun */
586*4882a593Smuzhiyun event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
587*4882a593Smuzhiyun
588*4882a593Smuzhiyun /*
589*4882a593Smuzhiyun * Count user and OS events unless requested not to
590*4882a593Smuzhiyun */
591*4882a593Smuzhiyun if (!event->attr.exclude_user)
592*4882a593Smuzhiyun event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
593*4882a593Smuzhiyun if (!event->attr.exclude_kernel)
594*4882a593Smuzhiyun event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun if (event->attr.type == PERF_TYPE_RAW)
597*4882a593Smuzhiyun event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun if (event->attr.sample_period && x86_pmu.limit_period) {
600*4882a593Smuzhiyun if (x86_pmu.limit_period(event, event->attr.sample_period) >
601*4882a593Smuzhiyun event->attr.sample_period)
602*4882a593Smuzhiyun return -EINVAL;
603*4882a593Smuzhiyun }
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun /* sample_regs_user never support XMM registers */
606*4882a593Smuzhiyun if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK))
607*4882a593Smuzhiyun return -EINVAL;
608*4882a593Smuzhiyun /*
609*4882a593Smuzhiyun * Besides the general purpose registers, XMM registers may
610*4882a593Smuzhiyun * be collected in PEBS on some platforms, e.g. Icelake
611*4882a593Smuzhiyun */
612*4882a593Smuzhiyun if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) {
613*4882a593Smuzhiyun if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
614*4882a593Smuzhiyun return -EINVAL;
615*4882a593Smuzhiyun
616*4882a593Smuzhiyun if (!event->attr.precise_ip)
617*4882a593Smuzhiyun return -EINVAL;
618*4882a593Smuzhiyun }
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun return x86_setup_perfctr(event);
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun /*
624*4882a593Smuzhiyun * Setup the hardware configuration for a given attr_type
625*4882a593Smuzhiyun */
__x86_pmu_event_init(struct perf_event * event)626*4882a593Smuzhiyun static int __x86_pmu_event_init(struct perf_event *event)
627*4882a593Smuzhiyun {
628*4882a593Smuzhiyun int err;
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun if (!x86_pmu_initialized())
631*4882a593Smuzhiyun return -ENODEV;
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun err = x86_reserve_hardware();
634*4882a593Smuzhiyun if (err)
635*4882a593Smuzhiyun return err;
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun atomic_inc(&active_events);
638*4882a593Smuzhiyun event->destroy = hw_perf_event_destroy;
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun event->hw.idx = -1;
641*4882a593Smuzhiyun event->hw.last_cpu = -1;
642*4882a593Smuzhiyun event->hw.last_tag = ~0ULL;
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun /* mark unused */
645*4882a593Smuzhiyun event->hw.extra_reg.idx = EXTRA_REG_NONE;
646*4882a593Smuzhiyun event->hw.branch_reg.idx = EXTRA_REG_NONE;
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun return x86_pmu.hw_config(event);
649*4882a593Smuzhiyun }
650*4882a593Smuzhiyun
x86_pmu_disable_all(void)651*4882a593Smuzhiyun void x86_pmu_disable_all(void)
652*4882a593Smuzhiyun {
653*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
654*4882a593Smuzhiyun int idx;
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun for (idx = 0; idx < x86_pmu.num_counters; idx++) {
657*4882a593Smuzhiyun struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
658*4882a593Smuzhiyun u64 val;
659*4882a593Smuzhiyun
660*4882a593Smuzhiyun if (!test_bit(idx, cpuc->active_mask))
661*4882a593Smuzhiyun continue;
662*4882a593Smuzhiyun rdmsrl(x86_pmu_config_addr(idx), val);
663*4882a593Smuzhiyun if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
664*4882a593Smuzhiyun continue;
665*4882a593Smuzhiyun val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
666*4882a593Smuzhiyun wrmsrl(x86_pmu_config_addr(idx), val);
667*4882a593Smuzhiyun if (is_counter_pair(hwc))
668*4882a593Smuzhiyun wrmsrl(x86_pmu_config_addr(idx + 1), 0);
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun }
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun /*
673*4882a593Smuzhiyun * There may be PMI landing after enabled=0. The PMI hitting could be before or
674*4882a593Smuzhiyun * after disable_all.
675*4882a593Smuzhiyun *
676*4882a593Smuzhiyun * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
677*4882a593Smuzhiyun * It will not be re-enabled in the NMI handler again, because enabled=0. After
678*4882a593Smuzhiyun * handling the NMI, disable_all will be called, which will not change the
679*4882a593Smuzhiyun * state either. If PMI hits after disable_all, the PMU is already disabled
680*4882a593Smuzhiyun * before entering NMI handler. The NMI handler will not change the state
681*4882a593Smuzhiyun * either.
682*4882a593Smuzhiyun *
683*4882a593Smuzhiyun * So either situation is harmless.
684*4882a593Smuzhiyun */
x86_pmu_disable(struct pmu * pmu)685*4882a593Smuzhiyun static void x86_pmu_disable(struct pmu *pmu)
686*4882a593Smuzhiyun {
687*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
688*4882a593Smuzhiyun
689*4882a593Smuzhiyun if (!x86_pmu_initialized())
690*4882a593Smuzhiyun return;
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun if (!cpuc->enabled)
693*4882a593Smuzhiyun return;
694*4882a593Smuzhiyun
695*4882a593Smuzhiyun cpuc->n_added = 0;
696*4882a593Smuzhiyun cpuc->enabled = 0;
697*4882a593Smuzhiyun barrier();
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun static_call(x86_pmu_disable_all)();
700*4882a593Smuzhiyun }
701*4882a593Smuzhiyun
x86_pmu_enable_all(int added)702*4882a593Smuzhiyun void x86_pmu_enable_all(int added)
703*4882a593Smuzhiyun {
704*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
705*4882a593Smuzhiyun int idx;
706*4882a593Smuzhiyun
707*4882a593Smuzhiyun for (idx = 0; idx < x86_pmu.num_counters; idx++) {
708*4882a593Smuzhiyun struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun if (!test_bit(idx, cpuc->active_mask))
711*4882a593Smuzhiyun continue;
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
714*4882a593Smuzhiyun }
715*4882a593Smuzhiyun }
716*4882a593Smuzhiyun
is_x86_event(struct perf_event * event)717*4882a593Smuzhiyun static inline int is_x86_event(struct perf_event *event)
718*4882a593Smuzhiyun {
719*4882a593Smuzhiyun return event->pmu == &pmu;
720*4882a593Smuzhiyun }
721*4882a593Smuzhiyun
x86_get_pmu(unsigned int cpu)722*4882a593Smuzhiyun struct pmu *x86_get_pmu(unsigned int cpu)
723*4882a593Smuzhiyun {
724*4882a593Smuzhiyun struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
725*4882a593Smuzhiyun
726*4882a593Smuzhiyun /*
727*4882a593Smuzhiyun * All CPUs of the hybrid type have been offline.
728*4882a593Smuzhiyun * The x86_get_pmu() should not be invoked.
729*4882a593Smuzhiyun */
730*4882a593Smuzhiyun if (WARN_ON_ONCE(!cpuc->pmu))
731*4882a593Smuzhiyun return &pmu;
732*4882a593Smuzhiyun
733*4882a593Smuzhiyun return cpuc->pmu;
734*4882a593Smuzhiyun }
735*4882a593Smuzhiyun /*
736*4882a593Smuzhiyun * Event scheduler state:
737*4882a593Smuzhiyun *
738*4882a593Smuzhiyun * Assign events iterating over all events and counters, beginning
739*4882a593Smuzhiyun * with events with least weights first. Keep the current iterator
740*4882a593Smuzhiyun * state in struct sched_state.
741*4882a593Smuzhiyun */
742*4882a593Smuzhiyun struct sched_state {
743*4882a593Smuzhiyun int weight;
744*4882a593Smuzhiyun int event; /* event index */
745*4882a593Smuzhiyun int counter; /* counter index */
746*4882a593Smuzhiyun int unassigned; /* number of events to be assigned left */
747*4882a593Smuzhiyun int nr_gp; /* number of GP counters used */
748*4882a593Smuzhiyun u64 used;
749*4882a593Smuzhiyun };
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
752*4882a593Smuzhiyun #define SCHED_STATES_MAX 2
753*4882a593Smuzhiyun
754*4882a593Smuzhiyun struct perf_sched {
755*4882a593Smuzhiyun int max_weight;
756*4882a593Smuzhiyun int max_events;
757*4882a593Smuzhiyun int max_gp;
758*4882a593Smuzhiyun int saved_states;
759*4882a593Smuzhiyun struct event_constraint **constraints;
760*4882a593Smuzhiyun struct sched_state state;
761*4882a593Smuzhiyun struct sched_state saved[SCHED_STATES_MAX];
762*4882a593Smuzhiyun };
763*4882a593Smuzhiyun
764*4882a593Smuzhiyun /*
765*4882a593Smuzhiyun * Initialize interator that runs through all events and counters.
766*4882a593Smuzhiyun */
perf_sched_init(struct perf_sched * sched,struct event_constraint ** constraints,int num,int wmin,int wmax,int gpmax)767*4882a593Smuzhiyun static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
768*4882a593Smuzhiyun int num, int wmin, int wmax, int gpmax)
769*4882a593Smuzhiyun {
770*4882a593Smuzhiyun int idx;
771*4882a593Smuzhiyun
772*4882a593Smuzhiyun memset(sched, 0, sizeof(*sched));
773*4882a593Smuzhiyun sched->max_events = num;
774*4882a593Smuzhiyun sched->max_weight = wmax;
775*4882a593Smuzhiyun sched->max_gp = gpmax;
776*4882a593Smuzhiyun sched->constraints = constraints;
777*4882a593Smuzhiyun
778*4882a593Smuzhiyun for (idx = 0; idx < num; idx++) {
779*4882a593Smuzhiyun if (constraints[idx]->weight == wmin)
780*4882a593Smuzhiyun break;
781*4882a593Smuzhiyun }
782*4882a593Smuzhiyun
783*4882a593Smuzhiyun sched->state.event = idx; /* start with min weight */
784*4882a593Smuzhiyun sched->state.weight = wmin;
785*4882a593Smuzhiyun sched->state.unassigned = num;
786*4882a593Smuzhiyun }
787*4882a593Smuzhiyun
perf_sched_save_state(struct perf_sched * sched)788*4882a593Smuzhiyun static void perf_sched_save_state(struct perf_sched *sched)
789*4882a593Smuzhiyun {
790*4882a593Smuzhiyun if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
791*4882a593Smuzhiyun return;
792*4882a593Smuzhiyun
793*4882a593Smuzhiyun sched->saved[sched->saved_states] = sched->state;
794*4882a593Smuzhiyun sched->saved_states++;
795*4882a593Smuzhiyun }
796*4882a593Smuzhiyun
perf_sched_restore_state(struct perf_sched * sched)797*4882a593Smuzhiyun static bool perf_sched_restore_state(struct perf_sched *sched)
798*4882a593Smuzhiyun {
799*4882a593Smuzhiyun if (!sched->saved_states)
800*4882a593Smuzhiyun return false;
801*4882a593Smuzhiyun
802*4882a593Smuzhiyun sched->saved_states--;
803*4882a593Smuzhiyun sched->state = sched->saved[sched->saved_states];
804*4882a593Smuzhiyun
805*4882a593Smuzhiyun /* this assignment didn't work out */
806*4882a593Smuzhiyun /* XXX broken vs EVENT_PAIR */
807*4882a593Smuzhiyun sched->state.used &= ~BIT_ULL(sched->state.counter);
808*4882a593Smuzhiyun
809*4882a593Smuzhiyun /* try the next one */
810*4882a593Smuzhiyun sched->state.counter++;
811*4882a593Smuzhiyun
812*4882a593Smuzhiyun return true;
813*4882a593Smuzhiyun }
814*4882a593Smuzhiyun
815*4882a593Smuzhiyun /*
816*4882a593Smuzhiyun * Select a counter for the current event to schedule. Return true on
817*4882a593Smuzhiyun * success.
818*4882a593Smuzhiyun */
__perf_sched_find_counter(struct perf_sched * sched)819*4882a593Smuzhiyun static bool __perf_sched_find_counter(struct perf_sched *sched)
820*4882a593Smuzhiyun {
821*4882a593Smuzhiyun struct event_constraint *c;
822*4882a593Smuzhiyun int idx;
823*4882a593Smuzhiyun
824*4882a593Smuzhiyun if (!sched->state.unassigned)
825*4882a593Smuzhiyun return false;
826*4882a593Smuzhiyun
827*4882a593Smuzhiyun if (sched->state.event >= sched->max_events)
828*4882a593Smuzhiyun return false;
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun c = sched->constraints[sched->state.event];
831*4882a593Smuzhiyun /* Prefer fixed purpose counters */
832*4882a593Smuzhiyun if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
833*4882a593Smuzhiyun idx = INTEL_PMC_IDX_FIXED;
834*4882a593Smuzhiyun for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
835*4882a593Smuzhiyun u64 mask = BIT_ULL(idx);
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun if (sched->state.used & mask)
838*4882a593Smuzhiyun continue;
839*4882a593Smuzhiyun
840*4882a593Smuzhiyun sched->state.used |= mask;
841*4882a593Smuzhiyun goto done;
842*4882a593Smuzhiyun }
843*4882a593Smuzhiyun }
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun /* Grab the first unused counter starting with idx */
846*4882a593Smuzhiyun idx = sched->state.counter;
847*4882a593Smuzhiyun for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
848*4882a593Smuzhiyun u64 mask = BIT_ULL(idx);
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun if (c->flags & PERF_X86_EVENT_PAIR)
851*4882a593Smuzhiyun mask |= mask << 1;
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun if (sched->state.used & mask)
854*4882a593Smuzhiyun continue;
855*4882a593Smuzhiyun
856*4882a593Smuzhiyun if (sched->state.nr_gp++ >= sched->max_gp)
857*4882a593Smuzhiyun return false;
858*4882a593Smuzhiyun
859*4882a593Smuzhiyun sched->state.used |= mask;
860*4882a593Smuzhiyun goto done;
861*4882a593Smuzhiyun }
862*4882a593Smuzhiyun
863*4882a593Smuzhiyun return false;
864*4882a593Smuzhiyun
865*4882a593Smuzhiyun done:
866*4882a593Smuzhiyun sched->state.counter = idx;
867*4882a593Smuzhiyun
868*4882a593Smuzhiyun if (c->overlap)
869*4882a593Smuzhiyun perf_sched_save_state(sched);
870*4882a593Smuzhiyun
871*4882a593Smuzhiyun return true;
872*4882a593Smuzhiyun }
873*4882a593Smuzhiyun
perf_sched_find_counter(struct perf_sched * sched)874*4882a593Smuzhiyun static bool perf_sched_find_counter(struct perf_sched *sched)
875*4882a593Smuzhiyun {
876*4882a593Smuzhiyun while (!__perf_sched_find_counter(sched)) {
877*4882a593Smuzhiyun if (!perf_sched_restore_state(sched))
878*4882a593Smuzhiyun return false;
879*4882a593Smuzhiyun }
880*4882a593Smuzhiyun
881*4882a593Smuzhiyun return true;
882*4882a593Smuzhiyun }
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun /*
885*4882a593Smuzhiyun * Go through all unassigned events and find the next one to schedule.
886*4882a593Smuzhiyun * Take events with the least weight first. Return true on success.
887*4882a593Smuzhiyun */
perf_sched_next_event(struct perf_sched * sched)888*4882a593Smuzhiyun static bool perf_sched_next_event(struct perf_sched *sched)
889*4882a593Smuzhiyun {
890*4882a593Smuzhiyun struct event_constraint *c;
891*4882a593Smuzhiyun
892*4882a593Smuzhiyun if (!sched->state.unassigned || !--sched->state.unassigned)
893*4882a593Smuzhiyun return false;
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun do {
896*4882a593Smuzhiyun /* next event */
897*4882a593Smuzhiyun sched->state.event++;
898*4882a593Smuzhiyun if (sched->state.event >= sched->max_events) {
899*4882a593Smuzhiyun /* next weight */
900*4882a593Smuzhiyun sched->state.event = 0;
901*4882a593Smuzhiyun sched->state.weight++;
902*4882a593Smuzhiyun if (sched->state.weight > sched->max_weight)
903*4882a593Smuzhiyun return false;
904*4882a593Smuzhiyun }
905*4882a593Smuzhiyun c = sched->constraints[sched->state.event];
906*4882a593Smuzhiyun } while (c->weight != sched->state.weight);
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun sched->state.counter = 0; /* start with first counter */
909*4882a593Smuzhiyun
910*4882a593Smuzhiyun return true;
911*4882a593Smuzhiyun }
912*4882a593Smuzhiyun
913*4882a593Smuzhiyun /*
914*4882a593Smuzhiyun * Assign a counter for each event.
915*4882a593Smuzhiyun */
perf_assign_events(struct event_constraint ** constraints,int n,int wmin,int wmax,int gpmax,int * assign)916*4882a593Smuzhiyun int perf_assign_events(struct event_constraint **constraints, int n,
917*4882a593Smuzhiyun int wmin, int wmax, int gpmax, int *assign)
918*4882a593Smuzhiyun {
919*4882a593Smuzhiyun struct perf_sched sched;
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
922*4882a593Smuzhiyun
923*4882a593Smuzhiyun do {
924*4882a593Smuzhiyun if (!perf_sched_find_counter(&sched))
925*4882a593Smuzhiyun break; /* failed */
926*4882a593Smuzhiyun if (assign)
927*4882a593Smuzhiyun assign[sched.state.event] = sched.state.counter;
928*4882a593Smuzhiyun } while (perf_sched_next_event(&sched));
929*4882a593Smuzhiyun
930*4882a593Smuzhiyun return sched.state.unassigned;
931*4882a593Smuzhiyun }
932*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(perf_assign_events);
933*4882a593Smuzhiyun
x86_schedule_events(struct cpu_hw_events * cpuc,int n,int * assign)934*4882a593Smuzhiyun int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
935*4882a593Smuzhiyun {
936*4882a593Smuzhiyun struct event_constraint *c;
937*4882a593Smuzhiyun struct perf_event *e;
938*4882a593Smuzhiyun int n0, i, wmin, wmax, unsched = 0;
939*4882a593Smuzhiyun struct hw_perf_event *hwc;
940*4882a593Smuzhiyun u64 used_mask = 0;
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun /*
943*4882a593Smuzhiyun * Compute the number of events already present; see x86_pmu_add(),
944*4882a593Smuzhiyun * validate_group() and x86_pmu_commit_txn(). For the former two
945*4882a593Smuzhiyun * cpuc->n_events hasn't been updated yet, while for the latter
946*4882a593Smuzhiyun * cpuc->n_txn contains the number of events added in the current
947*4882a593Smuzhiyun * transaction.
948*4882a593Smuzhiyun */
949*4882a593Smuzhiyun n0 = cpuc->n_events;
950*4882a593Smuzhiyun if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
951*4882a593Smuzhiyun n0 -= cpuc->n_txn;
952*4882a593Smuzhiyun
953*4882a593Smuzhiyun static_call_cond(x86_pmu_start_scheduling)(cpuc);
954*4882a593Smuzhiyun
955*4882a593Smuzhiyun for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
956*4882a593Smuzhiyun c = cpuc->event_constraint[i];
957*4882a593Smuzhiyun
958*4882a593Smuzhiyun /*
959*4882a593Smuzhiyun * Previously scheduled events should have a cached constraint,
960*4882a593Smuzhiyun * while new events should not have one.
961*4882a593Smuzhiyun */
962*4882a593Smuzhiyun WARN_ON_ONCE((c && i >= n0) || (!c && i < n0));
963*4882a593Smuzhiyun
964*4882a593Smuzhiyun /*
965*4882a593Smuzhiyun * Request constraints for new events; or for those events that
966*4882a593Smuzhiyun * have a dynamic constraint -- for those the constraint can
967*4882a593Smuzhiyun * change due to external factors (sibling state, allow_tfa).
968*4882a593Smuzhiyun */
969*4882a593Smuzhiyun if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
970*4882a593Smuzhiyun c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]);
971*4882a593Smuzhiyun cpuc->event_constraint[i] = c;
972*4882a593Smuzhiyun }
973*4882a593Smuzhiyun
974*4882a593Smuzhiyun wmin = min(wmin, c->weight);
975*4882a593Smuzhiyun wmax = max(wmax, c->weight);
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun
978*4882a593Smuzhiyun /*
979*4882a593Smuzhiyun * fastpath, try to reuse previous register
980*4882a593Smuzhiyun */
981*4882a593Smuzhiyun for (i = 0; i < n; i++) {
982*4882a593Smuzhiyun u64 mask;
983*4882a593Smuzhiyun
984*4882a593Smuzhiyun hwc = &cpuc->event_list[i]->hw;
985*4882a593Smuzhiyun c = cpuc->event_constraint[i];
986*4882a593Smuzhiyun
987*4882a593Smuzhiyun /* never assigned */
988*4882a593Smuzhiyun if (hwc->idx == -1)
989*4882a593Smuzhiyun break;
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun /* constraint still honored */
992*4882a593Smuzhiyun if (!test_bit(hwc->idx, c->idxmsk))
993*4882a593Smuzhiyun break;
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun mask = BIT_ULL(hwc->idx);
996*4882a593Smuzhiyun if (is_counter_pair(hwc))
997*4882a593Smuzhiyun mask |= mask << 1;
998*4882a593Smuzhiyun
999*4882a593Smuzhiyun /* not already used */
1000*4882a593Smuzhiyun if (used_mask & mask)
1001*4882a593Smuzhiyun break;
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun used_mask |= mask;
1004*4882a593Smuzhiyun
1005*4882a593Smuzhiyun if (assign)
1006*4882a593Smuzhiyun assign[i] = hwc->idx;
1007*4882a593Smuzhiyun }
1008*4882a593Smuzhiyun
1009*4882a593Smuzhiyun /* slow path */
1010*4882a593Smuzhiyun if (i != n) {
1011*4882a593Smuzhiyun int gpmax = x86_pmu.num_counters;
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun /*
1014*4882a593Smuzhiyun * Do not allow scheduling of more than half the available
1015*4882a593Smuzhiyun * generic counters.
1016*4882a593Smuzhiyun *
1017*4882a593Smuzhiyun * This helps avoid counter starvation of sibling thread by
1018*4882a593Smuzhiyun * ensuring at most half the counters cannot be in exclusive
1019*4882a593Smuzhiyun * mode. There is no designated counters for the limits. Any
1020*4882a593Smuzhiyun * N/2 counters can be used. This helps with events with
1021*4882a593Smuzhiyun * specific counter constraints.
1022*4882a593Smuzhiyun */
1023*4882a593Smuzhiyun if (is_ht_workaround_enabled() && !cpuc->is_fake &&
1024*4882a593Smuzhiyun READ_ONCE(cpuc->excl_cntrs->exclusive_present))
1025*4882a593Smuzhiyun gpmax /= 2;
1026*4882a593Smuzhiyun
1027*4882a593Smuzhiyun /*
1028*4882a593Smuzhiyun * Reduce the amount of available counters to allow fitting
1029*4882a593Smuzhiyun * the extra Merge events needed by large increment events.
1030*4882a593Smuzhiyun */
1031*4882a593Smuzhiyun if (x86_pmu.flags & PMU_FL_PAIR) {
1032*4882a593Smuzhiyun gpmax = x86_pmu.num_counters - cpuc->n_pair;
1033*4882a593Smuzhiyun WARN_ON(gpmax <= 0);
1034*4882a593Smuzhiyun }
1035*4882a593Smuzhiyun
1036*4882a593Smuzhiyun unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
1037*4882a593Smuzhiyun wmax, gpmax, assign);
1038*4882a593Smuzhiyun }
1039*4882a593Smuzhiyun
1040*4882a593Smuzhiyun /*
1041*4882a593Smuzhiyun * In case of success (unsched = 0), mark events as committed,
1042*4882a593Smuzhiyun * so we do not put_constraint() in case new events are added
1043*4882a593Smuzhiyun * and fail to be scheduled
1044*4882a593Smuzhiyun *
1045*4882a593Smuzhiyun * We invoke the lower level commit callback to lock the resource
1046*4882a593Smuzhiyun *
1047*4882a593Smuzhiyun * We do not need to do all of this in case we are called to
1048*4882a593Smuzhiyun * validate an event group (assign == NULL)
1049*4882a593Smuzhiyun */
1050*4882a593Smuzhiyun if (!unsched && assign) {
1051*4882a593Smuzhiyun for (i = 0; i < n; i++) {
1052*4882a593Smuzhiyun e = cpuc->event_list[i];
1053*4882a593Smuzhiyun static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
1054*4882a593Smuzhiyun }
1055*4882a593Smuzhiyun } else {
1056*4882a593Smuzhiyun for (i = n0; i < n; i++) {
1057*4882a593Smuzhiyun e = cpuc->event_list[i];
1058*4882a593Smuzhiyun
1059*4882a593Smuzhiyun /*
1060*4882a593Smuzhiyun * release events that failed scheduling
1061*4882a593Smuzhiyun */
1062*4882a593Smuzhiyun static_call_cond(x86_pmu_put_event_constraints)(cpuc, e);
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun cpuc->event_constraint[i] = NULL;
1065*4882a593Smuzhiyun }
1066*4882a593Smuzhiyun }
1067*4882a593Smuzhiyun
1068*4882a593Smuzhiyun static_call_cond(x86_pmu_stop_scheduling)(cpuc);
1069*4882a593Smuzhiyun
1070*4882a593Smuzhiyun return unsched ? -EINVAL : 0;
1071*4882a593Smuzhiyun }
1072*4882a593Smuzhiyun
add_nr_metric_event(struct cpu_hw_events * cpuc,struct perf_event * event)1073*4882a593Smuzhiyun static int add_nr_metric_event(struct cpu_hw_events *cpuc,
1074*4882a593Smuzhiyun struct perf_event *event)
1075*4882a593Smuzhiyun {
1076*4882a593Smuzhiyun if (is_metric_event(event)) {
1077*4882a593Smuzhiyun if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
1078*4882a593Smuzhiyun return -EINVAL;
1079*4882a593Smuzhiyun cpuc->n_metric++;
1080*4882a593Smuzhiyun cpuc->n_txn_metric++;
1081*4882a593Smuzhiyun }
1082*4882a593Smuzhiyun
1083*4882a593Smuzhiyun return 0;
1084*4882a593Smuzhiyun }
1085*4882a593Smuzhiyun
del_nr_metric_event(struct cpu_hw_events * cpuc,struct perf_event * event)1086*4882a593Smuzhiyun static void del_nr_metric_event(struct cpu_hw_events *cpuc,
1087*4882a593Smuzhiyun struct perf_event *event)
1088*4882a593Smuzhiyun {
1089*4882a593Smuzhiyun if (is_metric_event(event))
1090*4882a593Smuzhiyun cpuc->n_metric--;
1091*4882a593Smuzhiyun }
1092*4882a593Smuzhiyun
collect_event(struct cpu_hw_events * cpuc,struct perf_event * event,int max_count,int n)1093*4882a593Smuzhiyun static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
1094*4882a593Smuzhiyun int max_count, int n)
1095*4882a593Smuzhiyun {
1096*4882a593Smuzhiyun
1097*4882a593Smuzhiyun if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
1098*4882a593Smuzhiyun return -EINVAL;
1099*4882a593Smuzhiyun
1100*4882a593Smuzhiyun if (n >= max_count + cpuc->n_metric)
1101*4882a593Smuzhiyun return -EINVAL;
1102*4882a593Smuzhiyun
1103*4882a593Smuzhiyun cpuc->event_list[n] = event;
1104*4882a593Smuzhiyun if (is_counter_pair(&event->hw)) {
1105*4882a593Smuzhiyun cpuc->n_pair++;
1106*4882a593Smuzhiyun cpuc->n_txn_pair++;
1107*4882a593Smuzhiyun }
1108*4882a593Smuzhiyun
1109*4882a593Smuzhiyun return 0;
1110*4882a593Smuzhiyun }
1111*4882a593Smuzhiyun
1112*4882a593Smuzhiyun /*
1113*4882a593Smuzhiyun * dogrp: true if must collect siblings events (group)
1114*4882a593Smuzhiyun * returns total number of events and error code
1115*4882a593Smuzhiyun */
collect_events(struct cpu_hw_events * cpuc,struct perf_event * leader,bool dogrp)1116*4882a593Smuzhiyun static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1117*4882a593Smuzhiyun {
1118*4882a593Smuzhiyun struct perf_event *event;
1119*4882a593Smuzhiyun int n, max_count;
1120*4882a593Smuzhiyun
1121*4882a593Smuzhiyun max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
1122*4882a593Smuzhiyun
1123*4882a593Smuzhiyun /* current number of events already accepted */
1124*4882a593Smuzhiyun n = cpuc->n_events;
1125*4882a593Smuzhiyun if (!cpuc->n_events)
1126*4882a593Smuzhiyun cpuc->pebs_output = 0;
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun if (!cpuc->is_fake && leader->attr.precise_ip) {
1129*4882a593Smuzhiyun /*
1130*4882a593Smuzhiyun * For PEBS->PT, if !aux_event, the group leader (PT) went
1131*4882a593Smuzhiyun * away, the group was broken down and this singleton event
1132*4882a593Smuzhiyun * can't schedule any more.
1133*4882a593Smuzhiyun */
1134*4882a593Smuzhiyun if (is_pebs_pt(leader) && !leader->aux_event)
1135*4882a593Smuzhiyun return -EINVAL;
1136*4882a593Smuzhiyun
1137*4882a593Smuzhiyun /*
1138*4882a593Smuzhiyun * pebs_output: 0: no PEBS so far, 1: PT, 2: DS
1139*4882a593Smuzhiyun */
1140*4882a593Smuzhiyun if (cpuc->pebs_output &&
1141*4882a593Smuzhiyun cpuc->pebs_output != is_pebs_pt(leader) + 1)
1142*4882a593Smuzhiyun return -EINVAL;
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun cpuc->pebs_output = is_pebs_pt(leader) + 1;
1145*4882a593Smuzhiyun }
1146*4882a593Smuzhiyun
1147*4882a593Smuzhiyun if (is_x86_event(leader)) {
1148*4882a593Smuzhiyun if (collect_event(cpuc, leader, max_count, n))
1149*4882a593Smuzhiyun return -EINVAL;
1150*4882a593Smuzhiyun n++;
1151*4882a593Smuzhiyun }
1152*4882a593Smuzhiyun
1153*4882a593Smuzhiyun if (!dogrp)
1154*4882a593Smuzhiyun return n;
1155*4882a593Smuzhiyun
1156*4882a593Smuzhiyun for_each_sibling_event(event, leader) {
1157*4882a593Smuzhiyun if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
1158*4882a593Smuzhiyun continue;
1159*4882a593Smuzhiyun
1160*4882a593Smuzhiyun if (collect_event(cpuc, event, max_count, n))
1161*4882a593Smuzhiyun return -EINVAL;
1162*4882a593Smuzhiyun
1163*4882a593Smuzhiyun n++;
1164*4882a593Smuzhiyun }
1165*4882a593Smuzhiyun return n;
1166*4882a593Smuzhiyun }
1167*4882a593Smuzhiyun
x86_assign_hw_event(struct perf_event * event,struct cpu_hw_events * cpuc,int i)1168*4882a593Smuzhiyun static inline void x86_assign_hw_event(struct perf_event *event,
1169*4882a593Smuzhiyun struct cpu_hw_events *cpuc, int i)
1170*4882a593Smuzhiyun {
1171*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
1172*4882a593Smuzhiyun int idx;
1173*4882a593Smuzhiyun
1174*4882a593Smuzhiyun idx = hwc->idx = cpuc->assign[i];
1175*4882a593Smuzhiyun hwc->last_cpu = smp_processor_id();
1176*4882a593Smuzhiyun hwc->last_tag = ++cpuc->tags[i];
1177*4882a593Smuzhiyun
1178*4882a593Smuzhiyun switch (hwc->idx) {
1179*4882a593Smuzhiyun case INTEL_PMC_IDX_FIXED_BTS:
1180*4882a593Smuzhiyun case INTEL_PMC_IDX_FIXED_VLBR:
1181*4882a593Smuzhiyun hwc->config_base = 0;
1182*4882a593Smuzhiyun hwc->event_base = 0;
1183*4882a593Smuzhiyun break;
1184*4882a593Smuzhiyun
1185*4882a593Smuzhiyun case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
1186*4882a593Smuzhiyun /* All the metric events are mapped onto the fixed counter 3. */
1187*4882a593Smuzhiyun idx = INTEL_PMC_IDX_FIXED_SLOTS;
1188*4882a593Smuzhiyun /* fall through */
1189*4882a593Smuzhiyun case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
1190*4882a593Smuzhiyun hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1191*4882a593Smuzhiyun hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
1192*4882a593Smuzhiyun (idx - INTEL_PMC_IDX_FIXED);
1193*4882a593Smuzhiyun hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
1194*4882a593Smuzhiyun INTEL_PMC_FIXED_RDPMC_BASE;
1195*4882a593Smuzhiyun break;
1196*4882a593Smuzhiyun
1197*4882a593Smuzhiyun default:
1198*4882a593Smuzhiyun hwc->config_base = x86_pmu_config_addr(hwc->idx);
1199*4882a593Smuzhiyun hwc->event_base = x86_pmu_event_addr(hwc->idx);
1200*4882a593Smuzhiyun hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
1201*4882a593Smuzhiyun break;
1202*4882a593Smuzhiyun }
1203*4882a593Smuzhiyun }
1204*4882a593Smuzhiyun
1205*4882a593Smuzhiyun /**
1206*4882a593Smuzhiyun * x86_perf_rdpmc_index - Return PMC counter used for event
1207*4882a593Smuzhiyun * @event: the perf_event to which the PMC counter was assigned
1208*4882a593Smuzhiyun *
1209*4882a593Smuzhiyun * The counter assigned to this performance event may change if interrupts
1210*4882a593Smuzhiyun * are enabled. This counter should thus never be used while interrupts are
1211*4882a593Smuzhiyun * enabled. Before this function is used to obtain the assigned counter the
1212*4882a593Smuzhiyun * event should be checked for validity using, for example,
1213*4882a593Smuzhiyun * perf_event_read_local(), within the same interrupt disabled section in
1214*4882a593Smuzhiyun * which this counter is planned to be used.
1215*4882a593Smuzhiyun *
1216*4882a593Smuzhiyun * Return: The index of the performance monitoring counter assigned to
1217*4882a593Smuzhiyun * @perf_event.
1218*4882a593Smuzhiyun */
x86_perf_rdpmc_index(struct perf_event * event)1219*4882a593Smuzhiyun int x86_perf_rdpmc_index(struct perf_event *event)
1220*4882a593Smuzhiyun {
1221*4882a593Smuzhiyun lockdep_assert_irqs_disabled();
1222*4882a593Smuzhiyun
1223*4882a593Smuzhiyun return event->hw.event_base_rdpmc;
1224*4882a593Smuzhiyun }
1225*4882a593Smuzhiyun
match_prev_assignment(struct hw_perf_event * hwc,struct cpu_hw_events * cpuc,int i)1226*4882a593Smuzhiyun static inline int match_prev_assignment(struct hw_perf_event *hwc,
1227*4882a593Smuzhiyun struct cpu_hw_events *cpuc,
1228*4882a593Smuzhiyun int i)
1229*4882a593Smuzhiyun {
1230*4882a593Smuzhiyun return hwc->idx == cpuc->assign[i] &&
1231*4882a593Smuzhiyun hwc->last_cpu == smp_processor_id() &&
1232*4882a593Smuzhiyun hwc->last_tag == cpuc->tags[i];
1233*4882a593Smuzhiyun }
1234*4882a593Smuzhiyun
1235*4882a593Smuzhiyun static void x86_pmu_start(struct perf_event *event, int flags);
1236*4882a593Smuzhiyun
x86_pmu_enable(struct pmu * pmu)1237*4882a593Smuzhiyun static void x86_pmu_enable(struct pmu *pmu)
1238*4882a593Smuzhiyun {
1239*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1240*4882a593Smuzhiyun struct perf_event *event;
1241*4882a593Smuzhiyun struct hw_perf_event *hwc;
1242*4882a593Smuzhiyun int i, added = cpuc->n_added;
1243*4882a593Smuzhiyun
1244*4882a593Smuzhiyun if (!x86_pmu_initialized())
1245*4882a593Smuzhiyun return;
1246*4882a593Smuzhiyun
1247*4882a593Smuzhiyun if (cpuc->enabled)
1248*4882a593Smuzhiyun return;
1249*4882a593Smuzhiyun
1250*4882a593Smuzhiyun if (cpuc->n_added) {
1251*4882a593Smuzhiyun int n_running = cpuc->n_events - cpuc->n_added;
1252*4882a593Smuzhiyun /*
1253*4882a593Smuzhiyun * apply assignment obtained either from
1254*4882a593Smuzhiyun * hw_perf_group_sched_in() or x86_pmu_enable()
1255*4882a593Smuzhiyun *
1256*4882a593Smuzhiyun * step1: save events moving to new counters
1257*4882a593Smuzhiyun */
1258*4882a593Smuzhiyun for (i = 0; i < n_running; i++) {
1259*4882a593Smuzhiyun event = cpuc->event_list[i];
1260*4882a593Smuzhiyun hwc = &event->hw;
1261*4882a593Smuzhiyun
1262*4882a593Smuzhiyun /*
1263*4882a593Smuzhiyun * we can avoid reprogramming counter if:
1264*4882a593Smuzhiyun * - assigned same counter as last time
1265*4882a593Smuzhiyun * - running on same CPU as last time
1266*4882a593Smuzhiyun * - no other event has used the counter since
1267*4882a593Smuzhiyun */
1268*4882a593Smuzhiyun if (hwc->idx == -1 ||
1269*4882a593Smuzhiyun match_prev_assignment(hwc, cpuc, i))
1270*4882a593Smuzhiyun continue;
1271*4882a593Smuzhiyun
1272*4882a593Smuzhiyun /*
1273*4882a593Smuzhiyun * Ensure we don't accidentally enable a stopped
1274*4882a593Smuzhiyun * counter simply because we rescheduled.
1275*4882a593Smuzhiyun */
1276*4882a593Smuzhiyun if (hwc->state & PERF_HES_STOPPED)
1277*4882a593Smuzhiyun hwc->state |= PERF_HES_ARCH;
1278*4882a593Smuzhiyun
1279*4882a593Smuzhiyun x86_pmu_stop(event, PERF_EF_UPDATE);
1280*4882a593Smuzhiyun }
1281*4882a593Smuzhiyun
1282*4882a593Smuzhiyun /*
1283*4882a593Smuzhiyun * step2: reprogram moved events into new counters
1284*4882a593Smuzhiyun */
1285*4882a593Smuzhiyun for (i = 0; i < cpuc->n_events; i++) {
1286*4882a593Smuzhiyun event = cpuc->event_list[i];
1287*4882a593Smuzhiyun hwc = &event->hw;
1288*4882a593Smuzhiyun
1289*4882a593Smuzhiyun if (!match_prev_assignment(hwc, cpuc, i))
1290*4882a593Smuzhiyun x86_assign_hw_event(event, cpuc, i);
1291*4882a593Smuzhiyun else if (i < n_running)
1292*4882a593Smuzhiyun continue;
1293*4882a593Smuzhiyun
1294*4882a593Smuzhiyun if (hwc->state & PERF_HES_ARCH)
1295*4882a593Smuzhiyun continue;
1296*4882a593Smuzhiyun
1297*4882a593Smuzhiyun x86_pmu_start(event, PERF_EF_RELOAD);
1298*4882a593Smuzhiyun }
1299*4882a593Smuzhiyun cpuc->n_added = 0;
1300*4882a593Smuzhiyun perf_events_lapic_init();
1301*4882a593Smuzhiyun }
1302*4882a593Smuzhiyun
1303*4882a593Smuzhiyun cpuc->enabled = 1;
1304*4882a593Smuzhiyun barrier();
1305*4882a593Smuzhiyun
1306*4882a593Smuzhiyun static_call(x86_pmu_enable_all)(added);
1307*4882a593Smuzhiyun }
1308*4882a593Smuzhiyun
1309*4882a593Smuzhiyun static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1310*4882a593Smuzhiyun
1311*4882a593Smuzhiyun /*
1312*4882a593Smuzhiyun * Set the next IRQ period, based on the hwc->period_left value.
1313*4882a593Smuzhiyun * To be called with the event disabled in hw:
1314*4882a593Smuzhiyun */
x86_perf_event_set_period(struct perf_event * event)1315*4882a593Smuzhiyun int x86_perf_event_set_period(struct perf_event *event)
1316*4882a593Smuzhiyun {
1317*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
1318*4882a593Smuzhiyun s64 left = local64_read(&hwc->period_left);
1319*4882a593Smuzhiyun s64 period = hwc->sample_period;
1320*4882a593Smuzhiyun int ret = 0, idx = hwc->idx;
1321*4882a593Smuzhiyun
1322*4882a593Smuzhiyun if (unlikely(!hwc->event_base))
1323*4882a593Smuzhiyun return 0;
1324*4882a593Smuzhiyun
1325*4882a593Smuzhiyun if (unlikely(is_topdown_count(event)) &&
1326*4882a593Smuzhiyun x86_pmu.set_topdown_event_period)
1327*4882a593Smuzhiyun return x86_pmu.set_topdown_event_period(event);
1328*4882a593Smuzhiyun
1329*4882a593Smuzhiyun /*
1330*4882a593Smuzhiyun * If we are way outside a reasonable range then just skip forward:
1331*4882a593Smuzhiyun */
1332*4882a593Smuzhiyun if (unlikely(left <= -period)) {
1333*4882a593Smuzhiyun left = period;
1334*4882a593Smuzhiyun local64_set(&hwc->period_left, left);
1335*4882a593Smuzhiyun hwc->last_period = period;
1336*4882a593Smuzhiyun ret = 1;
1337*4882a593Smuzhiyun }
1338*4882a593Smuzhiyun
1339*4882a593Smuzhiyun if (unlikely(left <= 0)) {
1340*4882a593Smuzhiyun left += period;
1341*4882a593Smuzhiyun local64_set(&hwc->period_left, left);
1342*4882a593Smuzhiyun hwc->last_period = period;
1343*4882a593Smuzhiyun ret = 1;
1344*4882a593Smuzhiyun }
1345*4882a593Smuzhiyun /*
1346*4882a593Smuzhiyun * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1347*4882a593Smuzhiyun */
1348*4882a593Smuzhiyun if (unlikely(left < 2))
1349*4882a593Smuzhiyun left = 2;
1350*4882a593Smuzhiyun
1351*4882a593Smuzhiyun if (left > x86_pmu.max_period)
1352*4882a593Smuzhiyun left = x86_pmu.max_period;
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyun if (x86_pmu.limit_period)
1355*4882a593Smuzhiyun left = x86_pmu.limit_period(event, left);
1356*4882a593Smuzhiyun
1357*4882a593Smuzhiyun per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1358*4882a593Smuzhiyun
1359*4882a593Smuzhiyun /*
1360*4882a593Smuzhiyun * The hw event starts counting from this event offset,
1361*4882a593Smuzhiyun * mark it to be able to extra future deltas:
1362*4882a593Smuzhiyun */
1363*4882a593Smuzhiyun local64_set(&hwc->prev_count, (u64)-left);
1364*4882a593Smuzhiyun
1365*4882a593Smuzhiyun wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
1366*4882a593Smuzhiyun
1367*4882a593Smuzhiyun /*
1368*4882a593Smuzhiyun * Sign extend the Merge event counter's upper 16 bits since
1369*4882a593Smuzhiyun * we currently declare a 48-bit counter width
1370*4882a593Smuzhiyun */
1371*4882a593Smuzhiyun if (is_counter_pair(hwc))
1372*4882a593Smuzhiyun wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
1373*4882a593Smuzhiyun
1374*4882a593Smuzhiyun /*
1375*4882a593Smuzhiyun * Due to erratum on certan cpu we need
1376*4882a593Smuzhiyun * a second write to be sure the register
1377*4882a593Smuzhiyun * is updated properly
1378*4882a593Smuzhiyun */
1379*4882a593Smuzhiyun if (x86_pmu.perfctr_second_write) {
1380*4882a593Smuzhiyun wrmsrl(hwc->event_base,
1381*4882a593Smuzhiyun (u64)(-left) & x86_pmu.cntval_mask);
1382*4882a593Smuzhiyun }
1383*4882a593Smuzhiyun
1384*4882a593Smuzhiyun perf_event_update_userpage(event);
1385*4882a593Smuzhiyun
1386*4882a593Smuzhiyun return ret;
1387*4882a593Smuzhiyun }
1388*4882a593Smuzhiyun
x86_pmu_enable_event(struct perf_event * event)1389*4882a593Smuzhiyun void x86_pmu_enable_event(struct perf_event *event)
1390*4882a593Smuzhiyun {
1391*4882a593Smuzhiyun if (__this_cpu_read(cpu_hw_events.enabled))
1392*4882a593Smuzhiyun __x86_pmu_enable_event(&event->hw,
1393*4882a593Smuzhiyun ARCH_PERFMON_EVENTSEL_ENABLE);
1394*4882a593Smuzhiyun }
1395*4882a593Smuzhiyun
1396*4882a593Smuzhiyun /*
1397*4882a593Smuzhiyun * Add a single event to the PMU.
1398*4882a593Smuzhiyun *
1399*4882a593Smuzhiyun * The event is added to the group of enabled events
1400*4882a593Smuzhiyun * but only if it can be scheduled with existing events.
1401*4882a593Smuzhiyun */
x86_pmu_add(struct perf_event * event,int flags)1402*4882a593Smuzhiyun static int x86_pmu_add(struct perf_event *event, int flags)
1403*4882a593Smuzhiyun {
1404*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1405*4882a593Smuzhiyun struct hw_perf_event *hwc;
1406*4882a593Smuzhiyun int assign[X86_PMC_IDX_MAX];
1407*4882a593Smuzhiyun int n, n0, ret;
1408*4882a593Smuzhiyun
1409*4882a593Smuzhiyun hwc = &event->hw;
1410*4882a593Smuzhiyun
1411*4882a593Smuzhiyun n0 = cpuc->n_events;
1412*4882a593Smuzhiyun ret = n = collect_events(cpuc, event, false);
1413*4882a593Smuzhiyun if (ret < 0)
1414*4882a593Smuzhiyun goto out;
1415*4882a593Smuzhiyun
1416*4882a593Smuzhiyun hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1417*4882a593Smuzhiyun if (!(flags & PERF_EF_START))
1418*4882a593Smuzhiyun hwc->state |= PERF_HES_ARCH;
1419*4882a593Smuzhiyun
1420*4882a593Smuzhiyun /*
1421*4882a593Smuzhiyun * If group events scheduling transaction was started,
1422*4882a593Smuzhiyun * skip the schedulability test here, it will be performed
1423*4882a593Smuzhiyun * at commit time (->commit_txn) as a whole.
1424*4882a593Smuzhiyun *
1425*4882a593Smuzhiyun * If commit fails, we'll call ->del() on all events
1426*4882a593Smuzhiyun * for which ->add() was called.
1427*4882a593Smuzhiyun */
1428*4882a593Smuzhiyun if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
1429*4882a593Smuzhiyun goto done_collect;
1430*4882a593Smuzhiyun
1431*4882a593Smuzhiyun ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
1432*4882a593Smuzhiyun if (ret)
1433*4882a593Smuzhiyun goto out;
1434*4882a593Smuzhiyun /*
1435*4882a593Smuzhiyun * copy new assignment, now we know it is possible
1436*4882a593Smuzhiyun * will be used by hw_perf_enable()
1437*4882a593Smuzhiyun */
1438*4882a593Smuzhiyun memcpy(cpuc->assign, assign, n*sizeof(int));
1439*4882a593Smuzhiyun
1440*4882a593Smuzhiyun done_collect:
1441*4882a593Smuzhiyun /*
1442*4882a593Smuzhiyun * Commit the collect_events() state. See x86_pmu_del() and
1443*4882a593Smuzhiyun * x86_pmu_*_txn().
1444*4882a593Smuzhiyun */
1445*4882a593Smuzhiyun cpuc->n_events = n;
1446*4882a593Smuzhiyun cpuc->n_added += n - n0;
1447*4882a593Smuzhiyun cpuc->n_txn += n - n0;
1448*4882a593Smuzhiyun
1449*4882a593Smuzhiyun /*
1450*4882a593Smuzhiyun * This is before x86_pmu_enable() will call x86_pmu_start(),
1451*4882a593Smuzhiyun * so we enable LBRs before an event needs them etc..
1452*4882a593Smuzhiyun */
1453*4882a593Smuzhiyun static_call_cond(x86_pmu_add)(event);
1454*4882a593Smuzhiyun
1455*4882a593Smuzhiyun ret = 0;
1456*4882a593Smuzhiyun out:
1457*4882a593Smuzhiyun return ret;
1458*4882a593Smuzhiyun }
1459*4882a593Smuzhiyun
x86_pmu_start(struct perf_event * event,int flags)1460*4882a593Smuzhiyun static void x86_pmu_start(struct perf_event *event, int flags)
1461*4882a593Smuzhiyun {
1462*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1463*4882a593Smuzhiyun int idx = event->hw.idx;
1464*4882a593Smuzhiyun
1465*4882a593Smuzhiyun if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1466*4882a593Smuzhiyun return;
1467*4882a593Smuzhiyun
1468*4882a593Smuzhiyun if (WARN_ON_ONCE(idx == -1))
1469*4882a593Smuzhiyun return;
1470*4882a593Smuzhiyun
1471*4882a593Smuzhiyun if (flags & PERF_EF_RELOAD) {
1472*4882a593Smuzhiyun WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1473*4882a593Smuzhiyun x86_perf_event_set_period(event);
1474*4882a593Smuzhiyun }
1475*4882a593Smuzhiyun
1476*4882a593Smuzhiyun event->hw.state = 0;
1477*4882a593Smuzhiyun
1478*4882a593Smuzhiyun cpuc->events[idx] = event;
1479*4882a593Smuzhiyun __set_bit(idx, cpuc->active_mask);
1480*4882a593Smuzhiyun __set_bit(idx, cpuc->running);
1481*4882a593Smuzhiyun static_call(x86_pmu_enable)(event);
1482*4882a593Smuzhiyun perf_event_update_userpage(event);
1483*4882a593Smuzhiyun }
1484*4882a593Smuzhiyun
perf_event_print_debug(void)1485*4882a593Smuzhiyun void perf_event_print_debug(void)
1486*4882a593Smuzhiyun {
1487*4882a593Smuzhiyun u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1488*4882a593Smuzhiyun u64 pebs, debugctl;
1489*4882a593Smuzhiyun struct cpu_hw_events *cpuc;
1490*4882a593Smuzhiyun unsigned long flags;
1491*4882a593Smuzhiyun int cpu, idx;
1492*4882a593Smuzhiyun
1493*4882a593Smuzhiyun if (!x86_pmu.num_counters)
1494*4882a593Smuzhiyun return;
1495*4882a593Smuzhiyun
1496*4882a593Smuzhiyun local_irq_save(flags);
1497*4882a593Smuzhiyun
1498*4882a593Smuzhiyun cpu = smp_processor_id();
1499*4882a593Smuzhiyun cpuc = &per_cpu(cpu_hw_events, cpu);
1500*4882a593Smuzhiyun
1501*4882a593Smuzhiyun if (x86_pmu.version >= 2) {
1502*4882a593Smuzhiyun rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1503*4882a593Smuzhiyun rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1504*4882a593Smuzhiyun rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1505*4882a593Smuzhiyun rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1506*4882a593Smuzhiyun
1507*4882a593Smuzhiyun pr_info("\n");
1508*4882a593Smuzhiyun pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1509*4882a593Smuzhiyun pr_info("CPU#%d: status: %016llx\n", cpu, status);
1510*4882a593Smuzhiyun pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1511*4882a593Smuzhiyun pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1512*4882a593Smuzhiyun if (x86_pmu.pebs_constraints) {
1513*4882a593Smuzhiyun rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1514*4882a593Smuzhiyun pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1515*4882a593Smuzhiyun }
1516*4882a593Smuzhiyun if (x86_pmu.lbr_nr) {
1517*4882a593Smuzhiyun rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
1518*4882a593Smuzhiyun pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl);
1519*4882a593Smuzhiyun }
1520*4882a593Smuzhiyun }
1521*4882a593Smuzhiyun pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1522*4882a593Smuzhiyun
1523*4882a593Smuzhiyun for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1524*4882a593Smuzhiyun rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
1525*4882a593Smuzhiyun rdmsrl(x86_pmu_event_addr(idx), pmc_count);
1526*4882a593Smuzhiyun
1527*4882a593Smuzhiyun prev_left = per_cpu(pmc_prev_left[idx], cpu);
1528*4882a593Smuzhiyun
1529*4882a593Smuzhiyun pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
1530*4882a593Smuzhiyun cpu, idx, pmc_ctrl);
1531*4882a593Smuzhiyun pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
1532*4882a593Smuzhiyun cpu, idx, pmc_count);
1533*4882a593Smuzhiyun pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1534*4882a593Smuzhiyun cpu, idx, prev_left);
1535*4882a593Smuzhiyun }
1536*4882a593Smuzhiyun for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1537*4882a593Smuzhiyun rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1538*4882a593Smuzhiyun
1539*4882a593Smuzhiyun pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1540*4882a593Smuzhiyun cpu, idx, pmc_count);
1541*4882a593Smuzhiyun }
1542*4882a593Smuzhiyun local_irq_restore(flags);
1543*4882a593Smuzhiyun }
1544*4882a593Smuzhiyun
x86_pmu_stop(struct perf_event * event,int flags)1545*4882a593Smuzhiyun void x86_pmu_stop(struct perf_event *event, int flags)
1546*4882a593Smuzhiyun {
1547*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1548*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
1549*4882a593Smuzhiyun
1550*4882a593Smuzhiyun if (test_bit(hwc->idx, cpuc->active_mask)) {
1551*4882a593Smuzhiyun static_call(x86_pmu_disable)(event);
1552*4882a593Smuzhiyun __clear_bit(hwc->idx, cpuc->active_mask);
1553*4882a593Smuzhiyun cpuc->events[hwc->idx] = NULL;
1554*4882a593Smuzhiyun WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1555*4882a593Smuzhiyun hwc->state |= PERF_HES_STOPPED;
1556*4882a593Smuzhiyun }
1557*4882a593Smuzhiyun
1558*4882a593Smuzhiyun if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1559*4882a593Smuzhiyun /*
1560*4882a593Smuzhiyun * Drain the remaining delta count out of a event
1561*4882a593Smuzhiyun * that we are disabling:
1562*4882a593Smuzhiyun */
1563*4882a593Smuzhiyun x86_perf_event_update(event);
1564*4882a593Smuzhiyun hwc->state |= PERF_HES_UPTODATE;
1565*4882a593Smuzhiyun }
1566*4882a593Smuzhiyun }
1567*4882a593Smuzhiyun
x86_pmu_del(struct perf_event * event,int flags)1568*4882a593Smuzhiyun static void x86_pmu_del(struct perf_event *event, int flags)
1569*4882a593Smuzhiyun {
1570*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1571*4882a593Smuzhiyun int i;
1572*4882a593Smuzhiyun
1573*4882a593Smuzhiyun /*
1574*4882a593Smuzhiyun * If we're called during a txn, we only need to undo x86_pmu.add.
1575*4882a593Smuzhiyun * The events never got scheduled and ->cancel_txn will truncate
1576*4882a593Smuzhiyun * the event_list.
1577*4882a593Smuzhiyun *
1578*4882a593Smuzhiyun * XXX assumes any ->del() called during a TXN will only be on
1579*4882a593Smuzhiyun * an event added during that same TXN.
1580*4882a593Smuzhiyun */
1581*4882a593Smuzhiyun if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
1582*4882a593Smuzhiyun goto do_del;
1583*4882a593Smuzhiyun
1584*4882a593Smuzhiyun /*
1585*4882a593Smuzhiyun * Not a TXN, therefore cleanup properly.
1586*4882a593Smuzhiyun */
1587*4882a593Smuzhiyun x86_pmu_stop(event, PERF_EF_UPDATE);
1588*4882a593Smuzhiyun
1589*4882a593Smuzhiyun for (i = 0; i < cpuc->n_events; i++) {
1590*4882a593Smuzhiyun if (event == cpuc->event_list[i])
1591*4882a593Smuzhiyun break;
1592*4882a593Smuzhiyun }
1593*4882a593Smuzhiyun
1594*4882a593Smuzhiyun if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
1595*4882a593Smuzhiyun return;
1596*4882a593Smuzhiyun
1597*4882a593Smuzhiyun /* If we have a newly added event; make sure to decrease n_added. */
1598*4882a593Smuzhiyun if (i >= cpuc->n_events - cpuc->n_added)
1599*4882a593Smuzhiyun --cpuc->n_added;
1600*4882a593Smuzhiyun
1601*4882a593Smuzhiyun static_call_cond(x86_pmu_put_event_constraints)(cpuc, event);
1602*4882a593Smuzhiyun
1603*4882a593Smuzhiyun /* Delete the array entry. */
1604*4882a593Smuzhiyun while (++i < cpuc->n_events) {
1605*4882a593Smuzhiyun cpuc->event_list[i-1] = cpuc->event_list[i];
1606*4882a593Smuzhiyun cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
1607*4882a593Smuzhiyun }
1608*4882a593Smuzhiyun cpuc->event_constraint[i-1] = NULL;
1609*4882a593Smuzhiyun --cpuc->n_events;
1610*4882a593Smuzhiyun if (x86_pmu.intel_cap.perf_metrics)
1611*4882a593Smuzhiyun del_nr_metric_event(cpuc, event);
1612*4882a593Smuzhiyun
1613*4882a593Smuzhiyun perf_event_update_userpage(event);
1614*4882a593Smuzhiyun
1615*4882a593Smuzhiyun do_del:
1616*4882a593Smuzhiyun
1617*4882a593Smuzhiyun /*
1618*4882a593Smuzhiyun * This is after x86_pmu_stop(); so we disable LBRs after any
1619*4882a593Smuzhiyun * event can need them etc..
1620*4882a593Smuzhiyun */
1621*4882a593Smuzhiyun static_call_cond(x86_pmu_del)(event);
1622*4882a593Smuzhiyun }
1623*4882a593Smuzhiyun
x86_pmu_handle_irq(struct pt_regs * regs)1624*4882a593Smuzhiyun int x86_pmu_handle_irq(struct pt_regs *regs)
1625*4882a593Smuzhiyun {
1626*4882a593Smuzhiyun struct perf_sample_data data;
1627*4882a593Smuzhiyun struct cpu_hw_events *cpuc;
1628*4882a593Smuzhiyun struct perf_event *event;
1629*4882a593Smuzhiyun int idx, handled = 0;
1630*4882a593Smuzhiyun u64 val;
1631*4882a593Smuzhiyun
1632*4882a593Smuzhiyun cpuc = this_cpu_ptr(&cpu_hw_events);
1633*4882a593Smuzhiyun
1634*4882a593Smuzhiyun /*
1635*4882a593Smuzhiyun * Some chipsets need to unmask the LVTPC in a particular spot
1636*4882a593Smuzhiyun * inside the nmi handler. As a result, the unmasking was pushed
1637*4882a593Smuzhiyun * into all the nmi handlers.
1638*4882a593Smuzhiyun *
1639*4882a593Smuzhiyun * This generic handler doesn't seem to have any issues where the
1640*4882a593Smuzhiyun * unmasking occurs so it was left at the top.
1641*4882a593Smuzhiyun */
1642*4882a593Smuzhiyun apic_write(APIC_LVTPC, APIC_DM_NMI);
1643*4882a593Smuzhiyun
1644*4882a593Smuzhiyun for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1645*4882a593Smuzhiyun if (!test_bit(idx, cpuc->active_mask))
1646*4882a593Smuzhiyun continue;
1647*4882a593Smuzhiyun
1648*4882a593Smuzhiyun event = cpuc->events[idx];
1649*4882a593Smuzhiyun
1650*4882a593Smuzhiyun val = x86_perf_event_update(event);
1651*4882a593Smuzhiyun if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1652*4882a593Smuzhiyun continue;
1653*4882a593Smuzhiyun
1654*4882a593Smuzhiyun /*
1655*4882a593Smuzhiyun * event overflow
1656*4882a593Smuzhiyun */
1657*4882a593Smuzhiyun handled++;
1658*4882a593Smuzhiyun perf_sample_data_init(&data, 0, event->hw.last_period);
1659*4882a593Smuzhiyun
1660*4882a593Smuzhiyun if (!x86_perf_event_set_period(event))
1661*4882a593Smuzhiyun continue;
1662*4882a593Smuzhiyun
1663*4882a593Smuzhiyun if (perf_event_overflow(event, &data, regs))
1664*4882a593Smuzhiyun x86_pmu_stop(event, 0);
1665*4882a593Smuzhiyun }
1666*4882a593Smuzhiyun
1667*4882a593Smuzhiyun if (handled)
1668*4882a593Smuzhiyun inc_irq_stat(apic_perf_irqs);
1669*4882a593Smuzhiyun
1670*4882a593Smuzhiyun return handled;
1671*4882a593Smuzhiyun }
1672*4882a593Smuzhiyun
perf_events_lapic_init(void)1673*4882a593Smuzhiyun void perf_events_lapic_init(void)
1674*4882a593Smuzhiyun {
1675*4882a593Smuzhiyun if (!x86_pmu.apic || !x86_pmu_initialized())
1676*4882a593Smuzhiyun return;
1677*4882a593Smuzhiyun
1678*4882a593Smuzhiyun /*
1679*4882a593Smuzhiyun * Always use NMI for PMU
1680*4882a593Smuzhiyun */
1681*4882a593Smuzhiyun apic_write(APIC_LVTPC, APIC_DM_NMI);
1682*4882a593Smuzhiyun }
1683*4882a593Smuzhiyun
1684*4882a593Smuzhiyun static int
perf_event_nmi_handler(unsigned int cmd,struct pt_regs * regs)1685*4882a593Smuzhiyun perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1686*4882a593Smuzhiyun {
1687*4882a593Smuzhiyun u64 start_clock;
1688*4882a593Smuzhiyun u64 finish_clock;
1689*4882a593Smuzhiyun int ret;
1690*4882a593Smuzhiyun
1691*4882a593Smuzhiyun /*
1692*4882a593Smuzhiyun * All PMUs/events that share this PMI handler should make sure to
1693*4882a593Smuzhiyun * increment active_events for their events.
1694*4882a593Smuzhiyun */
1695*4882a593Smuzhiyun if (!atomic_read(&active_events))
1696*4882a593Smuzhiyun return NMI_DONE;
1697*4882a593Smuzhiyun
1698*4882a593Smuzhiyun start_clock = sched_clock();
1699*4882a593Smuzhiyun ret = static_call(x86_pmu_handle_irq)(regs);
1700*4882a593Smuzhiyun finish_clock = sched_clock();
1701*4882a593Smuzhiyun
1702*4882a593Smuzhiyun perf_sample_event_took(finish_clock - start_clock);
1703*4882a593Smuzhiyun
1704*4882a593Smuzhiyun return ret;
1705*4882a593Smuzhiyun }
1706*4882a593Smuzhiyun NOKPROBE_SYMBOL(perf_event_nmi_handler);
1707*4882a593Smuzhiyun
1708*4882a593Smuzhiyun struct event_constraint emptyconstraint;
1709*4882a593Smuzhiyun struct event_constraint unconstrained;
1710*4882a593Smuzhiyun
x86_pmu_prepare_cpu(unsigned int cpu)1711*4882a593Smuzhiyun static int x86_pmu_prepare_cpu(unsigned int cpu)
1712*4882a593Smuzhiyun {
1713*4882a593Smuzhiyun struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1714*4882a593Smuzhiyun int i;
1715*4882a593Smuzhiyun
1716*4882a593Smuzhiyun for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
1717*4882a593Smuzhiyun cpuc->kfree_on_online[i] = NULL;
1718*4882a593Smuzhiyun if (x86_pmu.cpu_prepare)
1719*4882a593Smuzhiyun return x86_pmu.cpu_prepare(cpu);
1720*4882a593Smuzhiyun return 0;
1721*4882a593Smuzhiyun }
1722*4882a593Smuzhiyun
x86_pmu_dead_cpu(unsigned int cpu)1723*4882a593Smuzhiyun static int x86_pmu_dead_cpu(unsigned int cpu)
1724*4882a593Smuzhiyun {
1725*4882a593Smuzhiyun if (x86_pmu.cpu_dead)
1726*4882a593Smuzhiyun x86_pmu.cpu_dead(cpu);
1727*4882a593Smuzhiyun return 0;
1728*4882a593Smuzhiyun }
1729*4882a593Smuzhiyun
x86_pmu_online_cpu(unsigned int cpu)1730*4882a593Smuzhiyun static int x86_pmu_online_cpu(unsigned int cpu)
1731*4882a593Smuzhiyun {
1732*4882a593Smuzhiyun struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1733*4882a593Smuzhiyun int i;
1734*4882a593Smuzhiyun
1735*4882a593Smuzhiyun for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
1736*4882a593Smuzhiyun kfree(cpuc->kfree_on_online[i]);
1737*4882a593Smuzhiyun cpuc->kfree_on_online[i] = NULL;
1738*4882a593Smuzhiyun }
1739*4882a593Smuzhiyun return 0;
1740*4882a593Smuzhiyun }
1741*4882a593Smuzhiyun
x86_pmu_starting_cpu(unsigned int cpu)1742*4882a593Smuzhiyun static int x86_pmu_starting_cpu(unsigned int cpu)
1743*4882a593Smuzhiyun {
1744*4882a593Smuzhiyun if (x86_pmu.cpu_starting)
1745*4882a593Smuzhiyun x86_pmu.cpu_starting(cpu);
1746*4882a593Smuzhiyun return 0;
1747*4882a593Smuzhiyun }
1748*4882a593Smuzhiyun
x86_pmu_dying_cpu(unsigned int cpu)1749*4882a593Smuzhiyun static int x86_pmu_dying_cpu(unsigned int cpu)
1750*4882a593Smuzhiyun {
1751*4882a593Smuzhiyun if (x86_pmu.cpu_dying)
1752*4882a593Smuzhiyun x86_pmu.cpu_dying(cpu);
1753*4882a593Smuzhiyun return 0;
1754*4882a593Smuzhiyun }
1755*4882a593Smuzhiyun
pmu_check_apic(void)1756*4882a593Smuzhiyun static void __init pmu_check_apic(void)
1757*4882a593Smuzhiyun {
1758*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_APIC))
1759*4882a593Smuzhiyun return;
1760*4882a593Smuzhiyun
1761*4882a593Smuzhiyun x86_pmu.apic = 0;
1762*4882a593Smuzhiyun pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1763*4882a593Smuzhiyun pr_info("no hardware sampling interrupt available.\n");
1764*4882a593Smuzhiyun
1765*4882a593Smuzhiyun /*
1766*4882a593Smuzhiyun * If we have a PMU initialized but no APIC
1767*4882a593Smuzhiyun * interrupts, we cannot sample hardware
1768*4882a593Smuzhiyun * events (user-space has to fall back and
1769*4882a593Smuzhiyun * sample via a hrtimer based software event):
1770*4882a593Smuzhiyun */
1771*4882a593Smuzhiyun pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1772*4882a593Smuzhiyun
1773*4882a593Smuzhiyun }
1774*4882a593Smuzhiyun
1775*4882a593Smuzhiyun static struct attribute_group x86_pmu_format_group __ro_after_init = {
1776*4882a593Smuzhiyun .name = "format",
1777*4882a593Smuzhiyun .attrs = NULL,
1778*4882a593Smuzhiyun };
1779*4882a593Smuzhiyun
events_sysfs_show(struct device * dev,struct device_attribute * attr,char * page)1780*4882a593Smuzhiyun ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
1781*4882a593Smuzhiyun {
1782*4882a593Smuzhiyun struct perf_pmu_events_attr *pmu_attr =
1783*4882a593Smuzhiyun container_of(attr, struct perf_pmu_events_attr, attr);
1784*4882a593Smuzhiyun u64 config = 0;
1785*4882a593Smuzhiyun
1786*4882a593Smuzhiyun if (pmu_attr->id < x86_pmu.max_events)
1787*4882a593Smuzhiyun config = x86_pmu.event_map(pmu_attr->id);
1788*4882a593Smuzhiyun
1789*4882a593Smuzhiyun /* string trumps id */
1790*4882a593Smuzhiyun if (pmu_attr->event_str)
1791*4882a593Smuzhiyun return sprintf(page, "%s", pmu_attr->event_str);
1792*4882a593Smuzhiyun
1793*4882a593Smuzhiyun return x86_pmu.events_sysfs_show(page, config);
1794*4882a593Smuzhiyun }
1795*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(events_sysfs_show);
1796*4882a593Smuzhiyun
events_ht_sysfs_show(struct device * dev,struct device_attribute * attr,char * page)1797*4882a593Smuzhiyun ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
1798*4882a593Smuzhiyun char *page)
1799*4882a593Smuzhiyun {
1800*4882a593Smuzhiyun struct perf_pmu_events_ht_attr *pmu_attr =
1801*4882a593Smuzhiyun container_of(attr, struct perf_pmu_events_ht_attr, attr);
1802*4882a593Smuzhiyun
1803*4882a593Smuzhiyun /*
1804*4882a593Smuzhiyun * Report conditional events depending on Hyper-Threading.
1805*4882a593Smuzhiyun *
1806*4882a593Smuzhiyun * This is overly conservative as usually the HT special
1807*4882a593Smuzhiyun * handling is not needed if the other CPU thread is idle.
1808*4882a593Smuzhiyun *
1809*4882a593Smuzhiyun * Note this does not (and cannot) handle the case when thread
1810*4882a593Smuzhiyun * siblings are invisible, for example with virtualization
1811*4882a593Smuzhiyun * if they are owned by some other guest. The user tool
1812*4882a593Smuzhiyun * has to re-read when a thread sibling gets onlined later.
1813*4882a593Smuzhiyun */
1814*4882a593Smuzhiyun return sprintf(page, "%s",
1815*4882a593Smuzhiyun topology_max_smt_threads() > 1 ?
1816*4882a593Smuzhiyun pmu_attr->event_str_ht :
1817*4882a593Smuzhiyun pmu_attr->event_str_noht);
1818*4882a593Smuzhiyun }
1819*4882a593Smuzhiyun
1820*4882a593Smuzhiyun EVENT_ATTR(cpu-cycles, CPU_CYCLES );
1821*4882a593Smuzhiyun EVENT_ATTR(instructions, INSTRUCTIONS );
1822*4882a593Smuzhiyun EVENT_ATTR(cache-references, CACHE_REFERENCES );
1823*4882a593Smuzhiyun EVENT_ATTR(cache-misses, CACHE_MISSES );
1824*4882a593Smuzhiyun EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
1825*4882a593Smuzhiyun EVENT_ATTR(branch-misses, BRANCH_MISSES );
1826*4882a593Smuzhiyun EVENT_ATTR(bus-cycles, BUS_CYCLES );
1827*4882a593Smuzhiyun EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
1828*4882a593Smuzhiyun EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
1829*4882a593Smuzhiyun EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
1830*4882a593Smuzhiyun
1831*4882a593Smuzhiyun static struct attribute *empty_attrs;
1832*4882a593Smuzhiyun
1833*4882a593Smuzhiyun static struct attribute *events_attr[] = {
1834*4882a593Smuzhiyun EVENT_PTR(CPU_CYCLES),
1835*4882a593Smuzhiyun EVENT_PTR(INSTRUCTIONS),
1836*4882a593Smuzhiyun EVENT_PTR(CACHE_REFERENCES),
1837*4882a593Smuzhiyun EVENT_PTR(CACHE_MISSES),
1838*4882a593Smuzhiyun EVENT_PTR(BRANCH_INSTRUCTIONS),
1839*4882a593Smuzhiyun EVENT_PTR(BRANCH_MISSES),
1840*4882a593Smuzhiyun EVENT_PTR(BUS_CYCLES),
1841*4882a593Smuzhiyun EVENT_PTR(STALLED_CYCLES_FRONTEND),
1842*4882a593Smuzhiyun EVENT_PTR(STALLED_CYCLES_BACKEND),
1843*4882a593Smuzhiyun EVENT_PTR(REF_CPU_CYCLES),
1844*4882a593Smuzhiyun NULL,
1845*4882a593Smuzhiyun };
1846*4882a593Smuzhiyun
1847*4882a593Smuzhiyun /*
1848*4882a593Smuzhiyun * Remove all undefined events (x86_pmu.event_map(id) == 0)
1849*4882a593Smuzhiyun * out of events_attr attributes.
1850*4882a593Smuzhiyun */
1851*4882a593Smuzhiyun static umode_t
is_visible(struct kobject * kobj,struct attribute * attr,int idx)1852*4882a593Smuzhiyun is_visible(struct kobject *kobj, struct attribute *attr, int idx)
1853*4882a593Smuzhiyun {
1854*4882a593Smuzhiyun struct perf_pmu_events_attr *pmu_attr;
1855*4882a593Smuzhiyun
1856*4882a593Smuzhiyun if (idx >= x86_pmu.max_events)
1857*4882a593Smuzhiyun return 0;
1858*4882a593Smuzhiyun
1859*4882a593Smuzhiyun pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
1860*4882a593Smuzhiyun /* str trumps id */
1861*4882a593Smuzhiyun return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0;
1862*4882a593Smuzhiyun }
1863*4882a593Smuzhiyun
1864*4882a593Smuzhiyun static struct attribute_group x86_pmu_events_group __ro_after_init = {
1865*4882a593Smuzhiyun .name = "events",
1866*4882a593Smuzhiyun .attrs = events_attr,
1867*4882a593Smuzhiyun .is_visible = is_visible,
1868*4882a593Smuzhiyun };
1869*4882a593Smuzhiyun
x86_event_sysfs_show(char * page,u64 config,u64 event)1870*4882a593Smuzhiyun ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
1871*4882a593Smuzhiyun {
1872*4882a593Smuzhiyun u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
1873*4882a593Smuzhiyun u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
1874*4882a593Smuzhiyun bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
1875*4882a593Smuzhiyun bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
1876*4882a593Smuzhiyun bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
1877*4882a593Smuzhiyun bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
1878*4882a593Smuzhiyun ssize_t ret;
1879*4882a593Smuzhiyun
1880*4882a593Smuzhiyun /*
1881*4882a593Smuzhiyun * We have whole page size to spend and just little data
1882*4882a593Smuzhiyun * to write, so we can safely use sprintf.
1883*4882a593Smuzhiyun */
1884*4882a593Smuzhiyun ret = sprintf(page, "event=0x%02llx", event);
1885*4882a593Smuzhiyun
1886*4882a593Smuzhiyun if (umask)
1887*4882a593Smuzhiyun ret += sprintf(page + ret, ",umask=0x%02llx", umask);
1888*4882a593Smuzhiyun
1889*4882a593Smuzhiyun if (edge)
1890*4882a593Smuzhiyun ret += sprintf(page + ret, ",edge");
1891*4882a593Smuzhiyun
1892*4882a593Smuzhiyun if (pc)
1893*4882a593Smuzhiyun ret += sprintf(page + ret, ",pc");
1894*4882a593Smuzhiyun
1895*4882a593Smuzhiyun if (any)
1896*4882a593Smuzhiyun ret += sprintf(page + ret, ",any");
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun if (inv)
1899*4882a593Smuzhiyun ret += sprintf(page + ret, ",inv");
1900*4882a593Smuzhiyun
1901*4882a593Smuzhiyun if (cmask)
1902*4882a593Smuzhiyun ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
1903*4882a593Smuzhiyun
1904*4882a593Smuzhiyun ret += sprintf(page + ret, "\n");
1905*4882a593Smuzhiyun
1906*4882a593Smuzhiyun return ret;
1907*4882a593Smuzhiyun }
1908*4882a593Smuzhiyun
1909*4882a593Smuzhiyun static struct attribute_group x86_pmu_attr_group;
1910*4882a593Smuzhiyun static struct attribute_group x86_pmu_caps_group;
1911*4882a593Smuzhiyun
x86_pmu_static_call_update(void)1912*4882a593Smuzhiyun static void x86_pmu_static_call_update(void)
1913*4882a593Smuzhiyun {
1914*4882a593Smuzhiyun static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
1915*4882a593Smuzhiyun static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
1916*4882a593Smuzhiyun static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
1917*4882a593Smuzhiyun static_call_update(x86_pmu_enable, x86_pmu.enable);
1918*4882a593Smuzhiyun static_call_update(x86_pmu_disable, x86_pmu.disable);
1919*4882a593Smuzhiyun
1920*4882a593Smuzhiyun static_call_update(x86_pmu_add, x86_pmu.add);
1921*4882a593Smuzhiyun static_call_update(x86_pmu_del, x86_pmu.del);
1922*4882a593Smuzhiyun static_call_update(x86_pmu_read, x86_pmu.read);
1923*4882a593Smuzhiyun
1924*4882a593Smuzhiyun static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
1925*4882a593Smuzhiyun static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
1926*4882a593Smuzhiyun static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
1927*4882a593Smuzhiyun
1928*4882a593Smuzhiyun static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
1929*4882a593Smuzhiyun static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling);
1930*4882a593Smuzhiyun static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
1931*4882a593Smuzhiyun
1932*4882a593Smuzhiyun static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
1933*4882a593Smuzhiyun static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
1934*4882a593Smuzhiyun
1935*4882a593Smuzhiyun static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
1936*4882a593Smuzhiyun static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
1937*4882a593Smuzhiyun }
1938*4882a593Smuzhiyun
_x86_pmu_read(struct perf_event * event)1939*4882a593Smuzhiyun static void _x86_pmu_read(struct perf_event *event)
1940*4882a593Smuzhiyun {
1941*4882a593Smuzhiyun x86_perf_event_update(event);
1942*4882a593Smuzhiyun }
1943*4882a593Smuzhiyun
init_hw_perf_events(void)1944*4882a593Smuzhiyun static int __init init_hw_perf_events(void)
1945*4882a593Smuzhiyun {
1946*4882a593Smuzhiyun struct x86_pmu_quirk *quirk;
1947*4882a593Smuzhiyun int err;
1948*4882a593Smuzhiyun
1949*4882a593Smuzhiyun pr_info("Performance Events: ");
1950*4882a593Smuzhiyun
1951*4882a593Smuzhiyun switch (boot_cpu_data.x86_vendor) {
1952*4882a593Smuzhiyun case X86_VENDOR_INTEL:
1953*4882a593Smuzhiyun err = intel_pmu_init();
1954*4882a593Smuzhiyun break;
1955*4882a593Smuzhiyun case X86_VENDOR_AMD:
1956*4882a593Smuzhiyun err = amd_pmu_init();
1957*4882a593Smuzhiyun break;
1958*4882a593Smuzhiyun case X86_VENDOR_HYGON:
1959*4882a593Smuzhiyun err = amd_pmu_init();
1960*4882a593Smuzhiyun x86_pmu.name = "HYGON";
1961*4882a593Smuzhiyun break;
1962*4882a593Smuzhiyun case X86_VENDOR_ZHAOXIN:
1963*4882a593Smuzhiyun case X86_VENDOR_CENTAUR:
1964*4882a593Smuzhiyun err = zhaoxin_pmu_init();
1965*4882a593Smuzhiyun break;
1966*4882a593Smuzhiyun default:
1967*4882a593Smuzhiyun err = -ENOTSUPP;
1968*4882a593Smuzhiyun }
1969*4882a593Smuzhiyun if (err != 0) {
1970*4882a593Smuzhiyun pr_cont("no PMU driver, software events only.\n");
1971*4882a593Smuzhiyun return 0;
1972*4882a593Smuzhiyun }
1973*4882a593Smuzhiyun
1974*4882a593Smuzhiyun pmu_check_apic();
1975*4882a593Smuzhiyun
1976*4882a593Smuzhiyun /* sanity check that the hardware exists or is emulated */
1977*4882a593Smuzhiyun if (!check_hw_exists())
1978*4882a593Smuzhiyun return 0;
1979*4882a593Smuzhiyun
1980*4882a593Smuzhiyun pr_cont("%s PMU driver.\n", x86_pmu.name);
1981*4882a593Smuzhiyun
1982*4882a593Smuzhiyun x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1983*4882a593Smuzhiyun
1984*4882a593Smuzhiyun for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1985*4882a593Smuzhiyun quirk->func();
1986*4882a593Smuzhiyun
1987*4882a593Smuzhiyun if (!x86_pmu.intel_ctrl)
1988*4882a593Smuzhiyun x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1989*4882a593Smuzhiyun
1990*4882a593Smuzhiyun perf_events_lapic_init();
1991*4882a593Smuzhiyun register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
1992*4882a593Smuzhiyun
1993*4882a593Smuzhiyun unconstrained = (struct event_constraint)
1994*4882a593Smuzhiyun __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1995*4882a593Smuzhiyun 0, x86_pmu.num_counters, 0, 0);
1996*4882a593Smuzhiyun
1997*4882a593Smuzhiyun x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1998*4882a593Smuzhiyun
1999*4882a593Smuzhiyun if (!x86_pmu.events_sysfs_show)
2000*4882a593Smuzhiyun x86_pmu_events_group.attrs = &empty_attrs;
2001*4882a593Smuzhiyun
2002*4882a593Smuzhiyun pmu.attr_update = x86_pmu.attr_update;
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun pr_info("... version: %d\n", x86_pmu.version);
2005*4882a593Smuzhiyun pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
2006*4882a593Smuzhiyun pr_info("... generic registers: %d\n", x86_pmu.num_counters);
2007*4882a593Smuzhiyun pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
2008*4882a593Smuzhiyun pr_info("... max period: %016Lx\n", x86_pmu.max_period);
2009*4882a593Smuzhiyun pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
2010*4882a593Smuzhiyun pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
2011*4882a593Smuzhiyun
2012*4882a593Smuzhiyun if (!x86_pmu.read)
2013*4882a593Smuzhiyun x86_pmu.read = _x86_pmu_read;
2014*4882a593Smuzhiyun
2015*4882a593Smuzhiyun x86_pmu_static_call_update();
2016*4882a593Smuzhiyun
2017*4882a593Smuzhiyun /*
2018*4882a593Smuzhiyun * Install callbacks. Core will call them for each online
2019*4882a593Smuzhiyun * cpu.
2020*4882a593Smuzhiyun */
2021*4882a593Smuzhiyun err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "perf/x86:prepare",
2022*4882a593Smuzhiyun x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
2023*4882a593Smuzhiyun if (err)
2024*4882a593Smuzhiyun return err;
2025*4882a593Smuzhiyun
2026*4882a593Smuzhiyun err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
2027*4882a593Smuzhiyun "perf/x86:starting", x86_pmu_starting_cpu,
2028*4882a593Smuzhiyun x86_pmu_dying_cpu);
2029*4882a593Smuzhiyun if (err)
2030*4882a593Smuzhiyun goto out;
2031*4882a593Smuzhiyun
2032*4882a593Smuzhiyun err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "perf/x86:online",
2033*4882a593Smuzhiyun x86_pmu_online_cpu, NULL);
2034*4882a593Smuzhiyun if (err)
2035*4882a593Smuzhiyun goto out1;
2036*4882a593Smuzhiyun
2037*4882a593Smuzhiyun err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
2038*4882a593Smuzhiyun if (err)
2039*4882a593Smuzhiyun goto out2;
2040*4882a593Smuzhiyun
2041*4882a593Smuzhiyun return 0;
2042*4882a593Smuzhiyun
2043*4882a593Smuzhiyun out2:
2044*4882a593Smuzhiyun cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
2045*4882a593Smuzhiyun out1:
2046*4882a593Smuzhiyun cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
2047*4882a593Smuzhiyun out:
2048*4882a593Smuzhiyun cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
2049*4882a593Smuzhiyun return err;
2050*4882a593Smuzhiyun }
2051*4882a593Smuzhiyun early_initcall(init_hw_perf_events);
2052*4882a593Smuzhiyun
x86_pmu_read(struct perf_event * event)2053*4882a593Smuzhiyun static void x86_pmu_read(struct perf_event *event)
2054*4882a593Smuzhiyun {
2055*4882a593Smuzhiyun static_call(x86_pmu_read)(event);
2056*4882a593Smuzhiyun }
2057*4882a593Smuzhiyun
2058*4882a593Smuzhiyun /*
2059*4882a593Smuzhiyun * Start group events scheduling transaction
2060*4882a593Smuzhiyun * Set the flag to make pmu::enable() not perform the
2061*4882a593Smuzhiyun * schedulability test, it will be performed at commit time
2062*4882a593Smuzhiyun *
2063*4882a593Smuzhiyun * We only support PERF_PMU_TXN_ADD transactions. Save the
2064*4882a593Smuzhiyun * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
2065*4882a593Smuzhiyun * transactions.
2066*4882a593Smuzhiyun */
x86_pmu_start_txn(struct pmu * pmu,unsigned int txn_flags)2067*4882a593Smuzhiyun static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
2068*4882a593Smuzhiyun {
2069*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2070*4882a593Smuzhiyun
2071*4882a593Smuzhiyun WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */
2072*4882a593Smuzhiyun
2073*4882a593Smuzhiyun cpuc->txn_flags = txn_flags;
2074*4882a593Smuzhiyun if (txn_flags & ~PERF_PMU_TXN_ADD)
2075*4882a593Smuzhiyun return;
2076*4882a593Smuzhiyun
2077*4882a593Smuzhiyun perf_pmu_disable(pmu);
2078*4882a593Smuzhiyun __this_cpu_write(cpu_hw_events.n_txn, 0);
2079*4882a593Smuzhiyun __this_cpu_write(cpu_hw_events.n_txn_pair, 0);
2080*4882a593Smuzhiyun __this_cpu_write(cpu_hw_events.n_txn_metric, 0);
2081*4882a593Smuzhiyun }
2082*4882a593Smuzhiyun
2083*4882a593Smuzhiyun /*
2084*4882a593Smuzhiyun * Stop group events scheduling transaction
2085*4882a593Smuzhiyun * Clear the flag and pmu::enable() will perform the
2086*4882a593Smuzhiyun * schedulability test.
2087*4882a593Smuzhiyun */
x86_pmu_cancel_txn(struct pmu * pmu)2088*4882a593Smuzhiyun static void x86_pmu_cancel_txn(struct pmu *pmu)
2089*4882a593Smuzhiyun {
2090*4882a593Smuzhiyun unsigned int txn_flags;
2091*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2092*4882a593Smuzhiyun
2093*4882a593Smuzhiyun WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
2094*4882a593Smuzhiyun
2095*4882a593Smuzhiyun txn_flags = cpuc->txn_flags;
2096*4882a593Smuzhiyun cpuc->txn_flags = 0;
2097*4882a593Smuzhiyun if (txn_flags & ~PERF_PMU_TXN_ADD)
2098*4882a593Smuzhiyun return;
2099*4882a593Smuzhiyun
2100*4882a593Smuzhiyun /*
2101*4882a593Smuzhiyun * Truncate collected array by the number of events added in this
2102*4882a593Smuzhiyun * transaction. See x86_pmu_add() and x86_pmu_*_txn().
2103*4882a593Smuzhiyun */
2104*4882a593Smuzhiyun __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
2105*4882a593Smuzhiyun __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
2106*4882a593Smuzhiyun __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
2107*4882a593Smuzhiyun __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
2108*4882a593Smuzhiyun perf_pmu_enable(pmu);
2109*4882a593Smuzhiyun }
2110*4882a593Smuzhiyun
2111*4882a593Smuzhiyun /*
2112*4882a593Smuzhiyun * Commit group events scheduling transaction
2113*4882a593Smuzhiyun * Perform the group schedulability test as a whole
2114*4882a593Smuzhiyun * Return 0 if success
2115*4882a593Smuzhiyun *
2116*4882a593Smuzhiyun * Does not cancel the transaction on failure; expects the caller to do this.
2117*4882a593Smuzhiyun */
x86_pmu_commit_txn(struct pmu * pmu)2118*4882a593Smuzhiyun static int x86_pmu_commit_txn(struct pmu *pmu)
2119*4882a593Smuzhiyun {
2120*4882a593Smuzhiyun struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2121*4882a593Smuzhiyun int assign[X86_PMC_IDX_MAX];
2122*4882a593Smuzhiyun int n, ret;
2123*4882a593Smuzhiyun
2124*4882a593Smuzhiyun WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
2125*4882a593Smuzhiyun
2126*4882a593Smuzhiyun if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
2127*4882a593Smuzhiyun cpuc->txn_flags = 0;
2128*4882a593Smuzhiyun return 0;
2129*4882a593Smuzhiyun }
2130*4882a593Smuzhiyun
2131*4882a593Smuzhiyun n = cpuc->n_events;
2132*4882a593Smuzhiyun
2133*4882a593Smuzhiyun if (!x86_pmu_initialized())
2134*4882a593Smuzhiyun return -EAGAIN;
2135*4882a593Smuzhiyun
2136*4882a593Smuzhiyun ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
2137*4882a593Smuzhiyun if (ret)
2138*4882a593Smuzhiyun return ret;
2139*4882a593Smuzhiyun
2140*4882a593Smuzhiyun /*
2141*4882a593Smuzhiyun * copy new assignment, now we know it is possible
2142*4882a593Smuzhiyun * will be used by hw_perf_enable()
2143*4882a593Smuzhiyun */
2144*4882a593Smuzhiyun memcpy(cpuc->assign, assign, n*sizeof(int));
2145*4882a593Smuzhiyun
2146*4882a593Smuzhiyun cpuc->txn_flags = 0;
2147*4882a593Smuzhiyun perf_pmu_enable(pmu);
2148*4882a593Smuzhiyun return 0;
2149*4882a593Smuzhiyun }
2150*4882a593Smuzhiyun /*
2151*4882a593Smuzhiyun * a fake_cpuc is used to validate event groups. Due to
2152*4882a593Smuzhiyun * the extra reg logic, we need to also allocate a fake
2153*4882a593Smuzhiyun * per_core and per_cpu structure. Otherwise, group events
2154*4882a593Smuzhiyun * using extra reg may conflict without the kernel being
2155*4882a593Smuzhiyun * able to catch this when the last event gets added to
2156*4882a593Smuzhiyun * the group.
2157*4882a593Smuzhiyun */
free_fake_cpuc(struct cpu_hw_events * cpuc)2158*4882a593Smuzhiyun static void free_fake_cpuc(struct cpu_hw_events *cpuc)
2159*4882a593Smuzhiyun {
2160*4882a593Smuzhiyun intel_cpuc_finish(cpuc);
2161*4882a593Smuzhiyun kfree(cpuc);
2162*4882a593Smuzhiyun }
2163*4882a593Smuzhiyun
allocate_fake_cpuc(void)2164*4882a593Smuzhiyun static struct cpu_hw_events *allocate_fake_cpuc(void)
2165*4882a593Smuzhiyun {
2166*4882a593Smuzhiyun struct cpu_hw_events *cpuc;
2167*4882a593Smuzhiyun int cpu = raw_smp_processor_id();
2168*4882a593Smuzhiyun
2169*4882a593Smuzhiyun cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
2170*4882a593Smuzhiyun if (!cpuc)
2171*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
2172*4882a593Smuzhiyun cpuc->is_fake = 1;
2173*4882a593Smuzhiyun
2174*4882a593Smuzhiyun if (intel_cpuc_prepare(cpuc, cpu))
2175*4882a593Smuzhiyun goto error;
2176*4882a593Smuzhiyun
2177*4882a593Smuzhiyun return cpuc;
2178*4882a593Smuzhiyun error:
2179*4882a593Smuzhiyun free_fake_cpuc(cpuc);
2180*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
2181*4882a593Smuzhiyun }
2182*4882a593Smuzhiyun
2183*4882a593Smuzhiyun /*
2184*4882a593Smuzhiyun * validate that we can schedule this event
2185*4882a593Smuzhiyun */
validate_event(struct perf_event * event)2186*4882a593Smuzhiyun static int validate_event(struct perf_event *event)
2187*4882a593Smuzhiyun {
2188*4882a593Smuzhiyun struct cpu_hw_events *fake_cpuc;
2189*4882a593Smuzhiyun struct event_constraint *c;
2190*4882a593Smuzhiyun int ret = 0;
2191*4882a593Smuzhiyun
2192*4882a593Smuzhiyun fake_cpuc = allocate_fake_cpuc();
2193*4882a593Smuzhiyun if (IS_ERR(fake_cpuc))
2194*4882a593Smuzhiyun return PTR_ERR(fake_cpuc);
2195*4882a593Smuzhiyun
2196*4882a593Smuzhiyun c = x86_pmu.get_event_constraints(fake_cpuc, 0, event);
2197*4882a593Smuzhiyun
2198*4882a593Smuzhiyun if (!c || !c->weight)
2199*4882a593Smuzhiyun ret = -EINVAL;
2200*4882a593Smuzhiyun
2201*4882a593Smuzhiyun if (x86_pmu.put_event_constraints)
2202*4882a593Smuzhiyun x86_pmu.put_event_constraints(fake_cpuc, event);
2203*4882a593Smuzhiyun
2204*4882a593Smuzhiyun free_fake_cpuc(fake_cpuc);
2205*4882a593Smuzhiyun
2206*4882a593Smuzhiyun return ret;
2207*4882a593Smuzhiyun }
2208*4882a593Smuzhiyun
2209*4882a593Smuzhiyun /*
2210*4882a593Smuzhiyun * validate a single event group
2211*4882a593Smuzhiyun *
2212*4882a593Smuzhiyun * validation include:
2213*4882a593Smuzhiyun * - check events are compatible which each other
2214*4882a593Smuzhiyun * - events do not compete for the same counter
2215*4882a593Smuzhiyun * - number of events <= number of counters
2216*4882a593Smuzhiyun *
2217*4882a593Smuzhiyun * validation ensures the group can be loaded onto the
2218*4882a593Smuzhiyun * PMU if it was the only group available.
2219*4882a593Smuzhiyun */
validate_group(struct perf_event * event)2220*4882a593Smuzhiyun static int validate_group(struct perf_event *event)
2221*4882a593Smuzhiyun {
2222*4882a593Smuzhiyun struct perf_event *leader = event->group_leader;
2223*4882a593Smuzhiyun struct cpu_hw_events *fake_cpuc;
2224*4882a593Smuzhiyun int ret = -EINVAL, n;
2225*4882a593Smuzhiyun
2226*4882a593Smuzhiyun fake_cpuc = allocate_fake_cpuc();
2227*4882a593Smuzhiyun if (IS_ERR(fake_cpuc))
2228*4882a593Smuzhiyun return PTR_ERR(fake_cpuc);
2229*4882a593Smuzhiyun /*
2230*4882a593Smuzhiyun * the event is not yet connected with its
2231*4882a593Smuzhiyun * siblings therefore we must first collect
2232*4882a593Smuzhiyun * existing siblings, then add the new event
2233*4882a593Smuzhiyun * before we can simulate the scheduling
2234*4882a593Smuzhiyun */
2235*4882a593Smuzhiyun n = collect_events(fake_cpuc, leader, true);
2236*4882a593Smuzhiyun if (n < 0)
2237*4882a593Smuzhiyun goto out;
2238*4882a593Smuzhiyun
2239*4882a593Smuzhiyun fake_cpuc->n_events = n;
2240*4882a593Smuzhiyun n = collect_events(fake_cpuc, event, false);
2241*4882a593Smuzhiyun if (n < 0)
2242*4882a593Smuzhiyun goto out;
2243*4882a593Smuzhiyun
2244*4882a593Smuzhiyun fake_cpuc->n_events = 0;
2245*4882a593Smuzhiyun ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
2246*4882a593Smuzhiyun
2247*4882a593Smuzhiyun out:
2248*4882a593Smuzhiyun free_fake_cpuc(fake_cpuc);
2249*4882a593Smuzhiyun return ret;
2250*4882a593Smuzhiyun }
2251*4882a593Smuzhiyun
x86_pmu_event_init(struct perf_event * event)2252*4882a593Smuzhiyun static int x86_pmu_event_init(struct perf_event *event)
2253*4882a593Smuzhiyun {
2254*4882a593Smuzhiyun struct pmu *tmp;
2255*4882a593Smuzhiyun int err;
2256*4882a593Smuzhiyun
2257*4882a593Smuzhiyun switch (event->attr.type) {
2258*4882a593Smuzhiyun case PERF_TYPE_RAW:
2259*4882a593Smuzhiyun case PERF_TYPE_HARDWARE:
2260*4882a593Smuzhiyun case PERF_TYPE_HW_CACHE:
2261*4882a593Smuzhiyun break;
2262*4882a593Smuzhiyun
2263*4882a593Smuzhiyun default:
2264*4882a593Smuzhiyun return -ENOENT;
2265*4882a593Smuzhiyun }
2266*4882a593Smuzhiyun
2267*4882a593Smuzhiyun err = __x86_pmu_event_init(event);
2268*4882a593Smuzhiyun if (!err) {
2269*4882a593Smuzhiyun /*
2270*4882a593Smuzhiyun * we temporarily connect event to its pmu
2271*4882a593Smuzhiyun * such that validate_group() can classify
2272*4882a593Smuzhiyun * it as an x86 event using is_x86_event()
2273*4882a593Smuzhiyun */
2274*4882a593Smuzhiyun tmp = event->pmu;
2275*4882a593Smuzhiyun event->pmu = &pmu;
2276*4882a593Smuzhiyun
2277*4882a593Smuzhiyun if (event->group_leader != event)
2278*4882a593Smuzhiyun err = validate_group(event);
2279*4882a593Smuzhiyun else
2280*4882a593Smuzhiyun err = validate_event(event);
2281*4882a593Smuzhiyun
2282*4882a593Smuzhiyun event->pmu = tmp;
2283*4882a593Smuzhiyun }
2284*4882a593Smuzhiyun if (err) {
2285*4882a593Smuzhiyun if (event->destroy)
2286*4882a593Smuzhiyun event->destroy(event);
2287*4882a593Smuzhiyun event->destroy = NULL;
2288*4882a593Smuzhiyun }
2289*4882a593Smuzhiyun
2290*4882a593Smuzhiyun if (READ_ONCE(x86_pmu.attr_rdpmc) &&
2291*4882a593Smuzhiyun !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
2292*4882a593Smuzhiyun event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
2293*4882a593Smuzhiyun
2294*4882a593Smuzhiyun return err;
2295*4882a593Smuzhiyun }
2296*4882a593Smuzhiyun
x86_pmu_event_mapped(struct perf_event * event,struct mm_struct * mm)2297*4882a593Smuzhiyun static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
2298*4882a593Smuzhiyun {
2299*4882a593Smuzhiyun if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
2300*4882a593Smuzhiyun return;
2301*4882a593Smuzhiyun
2302*4882a593Smuzhiyun /*
2303*4882a593Smuzhiyun * This function relies on not being called concurrently in two
2304*4882a593Smuzhiyun * tasks in the same mm. Otherwise one task could observe
2305*4882a593Smuzhiyun * perf_rdpmc_allowed > 1 and return all the way back to
2306*4882a593Smuzhiyun * userspace with CR4.PCE clear while another task is still
2307*4882a593Smuzhiyun * doing on_each_cpu_mask() to propagate CR4.PCE.
2308*4882a593Smuzhiyun *
2309*4882a593Smuzhiyun * For now, this can't happen because all callers hold mmap_lock
2310*4882a593Smuzhiyun * for write. If this changes, we'll need a different solution.
2311*4882a593Smuzhiyun */
2312*4882a593Smuzhiyun mmap_assert_write_locked(mm);
2313*4882a593Smuzhiyun
2314*4882a593Smuzhiyun if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
2315*4882a593Smuzhiyun on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1);
2316*4882a593Smuzhiyun }
2317*4882a593Smuzhiyun
x86_pmu_event_unmapped(struct perf_event * event,struct mm_struct * mm)2318*4882a593Smuzhiyun static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
2319*4882a593Smuzhiyun {
2320*4882a593Smuzhiyun
2321*4882a593Smuzhiyun if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
2322*4882a593Smuzhiyun return;
2323*4882a593Smuzhiyun
2324*4882a593Smuzhiyun if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
2325*4882a593Smuzhiyun on_each_cpu_mask(mm_cpumask(mm), cr4_update_pce, NULL, 1);
2326*4882a593Smuzhiyun }
2327*4882a593Smuzhiyun
x86_pmu_event_idx(struct perf_event * event)2328*4882a593Smuzhiyun static int x86_pmu_event_idx(struct perf_event *event)
2329*4882a593Smuzhiyun {
2330*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
2331*4882a593Smuzhiyun
2332*4882a593Smuzhiyun if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
2333*4882a593Smuzhiyun return 0;
2334*4882a593Smuzhiyun
2335*4882a593Smuzhiyun if (is_metric_idx(hwc->idx))
2336*4882a593Smuzhiyun return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
2337*4882a593Smuzhiyun else
2338*4882a593Smuzhiyun return hwc->event_base_rdpmc + 1;
2339*4882a593Smuzhiyun }
2340*4882a593Smuzhiyun
get_attr_rdpmc(struct device * cdev,struct device_attribute * attr,char * buf)2341*4882a593Smuzhiyun static ssize_t get_attr_rdpmc(struct device *cdev,
2342*4882a593Smuzhiyun struct device_attribute *attr,
2343*4882a593Smuzhiyun char *buf)
2344*4882a593Smuzhiyun {
2345*4882a593Smuzhiyun return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
2346*4882a593Smuzhiyun }
2347*4882a593Smuzhiyun
set_attr_rdpmc(struct device * cdev,struct device_attribute * attr,const char * buf,size_t count)2348*4882a593Smuzhiyun static ssize_t set_attr_rdpmc(struct device *cdev,
2349*4882a593Smuzhiyun struct device_attribute *attr,
2350*4882a593Smuzhiyun const char *buf, size_t count)
2351*4882a593Smuzhiyun {
2352*4882a593Smuzhiyun unsigned long val;
2353*4882a593Smuzhiyun ssize_t ret;
2354*4882a593Smuzhiyun
2355*4882a593Smuzhiyun ret = kstrtoul(buf, 0, &val);
2356*4882a593Smuzhiyun if (ret)
2357*4882a593Smuzhiyun return ret;
2358*4882a593Smuzhiyun
2359*4882a593Smuzhiyun if (val > 2)
2360*4882a593Smuzhiyun return -EINVAL;
2361*4882a593Smuzhiyun
2362*4882a593Smuzhiyun if (x86_pmu.attr_rdpmc_broken)
2363*4882a593Smuzhiyun return -ENOTSUPP;
2364*4882a593Smuzhiyun
2365*4882a593Smuzhiyun if (val != x86_pmu.attr_rdpmc) {
2366*4882a593Smuzhiyun /*
2367*4882a593Smuzhiyun * Changing into or out of never available or always available,
2368*4882a593Smuzhiyun * aka perf-event-bypassing mode. This path is extremely slow,
2369*4882a593Smuzhiyun * but only root can trigger it, so it's okay.
2370*4882a593Smuzhiyun */
2371*4882a593Smuzhiyun if (val == 0)
2372*4882a593Smuzhiyun static_branch_inc(&rdpmc_never_available_key);
2373*4882a593Smuzhiyun else if (x86_pmu.attr_rdpmc == 0)
2374*4882a593Smuzhiyun static_branch_dec(&rdpmc_never_available_key);
2375*4882a593Smuzhiyun
2376*4882a593Smuzhiyun if (val == 2)
2377*4882a593Smuzhiyun static_branch_inc(&rdpmc_always_available_key);
2378*4882a593Smuzhiyun else if (x86_pmu.attr_rdpmc == 2)
2379*4882a593Smuzhiyun static_branch_dec(&rdpmc_always_available_key);
2380*4882a593Smuzhiyun
2381*4882a593Smuzhiyun on_each_cpu(cr4_update_pce, NULL, 1);
2382*4882a593Smuzhiyun x86_pmu.attr_rdpmc = val;
2383*4882a593Smuzhiyun }
2384*4882a593Smuzhiyun
2385*4882a593Smuzhiyun return count;
2386*4882a593Smuzhiyun }
2387*4882a593Smuzhiyun
2388*4882a593Smuzhiyun static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
2389*4882a593Smuzhiyun
2390*4882a593Smuzhiyun static struct attribute *x86_pmu_attrs[] = {
2391*4882a593Smuzhiyun &dev_attr_rdpmc.attr,
2392*4882a593Smuzhiyun NULL,
2393*4882a593Smuzhiyun };
2394*4882a593Smuzhiyun
2395*4882a593Smuzhiyun static struct attribute_group x86_pmu_attr_group __ro_after_init = {
2396*4882a593Smuzhiyun .attrs = x86_pmu_attrs,
2397*4882a593Smuzhiyun };
2398*4882a593Smuzhiyun
max_precise_show(struct device * cdev,struct device_attribute * attr,char * buf)2399*4882a593Smuzhiyun static ssize_t max_precise_show(struct device *cdev,
2400*4882a593Smuzhiyun struct device_attribute *attr,
2401*4882a593Smuzhiyun char *buf)
2402*4882a593Smuzhiyun {
2403*4882a593Smuzhiyun return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
2404*4882a593Smuzhiyun }
2405*4882a593Smuzhiyun
2406*4882a593Smuzhiyun static DEVICE_ATTR_RO(max_precise);
2407*4882a593Smuzhiyun
2408*4882a593Smuzhiyun static struct attribute *x86_pmu_caps_attrs[] = {
2409*4882a593Smuzhiyun &dev_attr_max_precise.attr,
2410*4882a593Smuzhiyun NULL
2411*4882a593Smuzhiyun };
2412*4882a593Smuzhiyun
2413*4882a593Smuzhiyun static struct attribute_group x86_pmu_caps_group __ro_after_init = {
2414*4882a593Smuzhiyun .name = "caps",
2415*4882a593Smuzhiyun .attrs = x86_pmu_caps_attrs,
2416*4882a593Smuzhiyun };
2417*4882a593Smuzhiyun
2418*4882a593Smuzhiyun static const struct attribute_group *x86_pmu_attr_groups[] = {
2419*4882a593Smuzhiyun &x86_pmu_attr_group,
2420*4882a593Smuzhiyun &x86_pmu_format_group,
2421*4882a593Smuzhiyun &x86_pmu_events_group,
2422*4882a593Smuzhiyun &x86_pmu_caps_group,
2423*4882a593Smuzhiyun NULL,
2424*4882a593Smuzhiyun };
2425*4882a593Smuzhiyun
x86_pmu_sched_task(struct perf_event_context * ctx,bool sched_in)2426*4882a593Smuzhiyun static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
2427*4882a593Smuzhiyun {
2428*4882a593Smuzhiyun static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
2429*4882a593Smuzhiyun }
2430*4882a593Smuzhiyun
x86_pmu_swap_task_ctx(struct perf_event_context * prev,struct perf_event_context * next)2431*4882a593Smuzhiyun static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
2432*4882a593Smuzhiyun struct perf_event_context *next)
2433*4882a593Smuzhiyun {
2434*4882a593Smuzhiyun static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
2435*4882a593Smuzhiyun }
2436*4882a593Smuzhiyun
perf_check_microcode(void)2437*4882a593Smuzhiyun void perf_check_microcode(void)
2438*4882a593Smuzhiyun {
2439*4882a593Smuzhiyun if (x86_pmu.check_microcode)
2440*4882a593Smuzhiyun x86_pmu.check_microcode();
2441*4882a593Smuzhiyun }
2442*4882a593Smuzhiyun
x86_pmu_check_period(struct perf_event * event,u64 value)2443*4882a593Smuzhiyun static int x86_pmu_check_period(struct perf_event *event, u64 value)
2444*4882a593Smuzhiyun {
2445*4882a593Smuzhiyun if (x86_pmu.check_period && x86_pmu.check_period(event, value))
2446*4882a593Smuzhiyun return -EINVAL;
2447*4882a593Smuzhiyun
2448*4882a593Smuzhiyun if (value && x86_pmu.limit_period) {
2449*4882a593Smuzhiyun if (x86_pmu.limit_period(event, value) > value)
2450*4882a593Smuzhiyun return -EINVAL;
2451*4882a593Smuzhiyun }
2452*4882a593Smuzhiyun
2453*4882a593Smuzhiyun return 0;
2454*4882a593Smuzhiyun }
2455*4882a593Smuzhiyun
x86_pmu_aux_output_match(struct perf_event * event)2456*4882a593Smuzhiyun static int x86_pmu_aux_output_match(struct perf_event *event)
2457*4882a593Smuzhiyun {
2458*4882a593Smuzhiyun if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
2459*4882a593Smuzhiyun return 0;
2460*4882a593Smuzhiyun
2461*4882a593Smuzhiyun if (x86_pmu.aux_output_match)
2462*4882a593Smuzhiyun return x86_pmu.aux_output_match(event);
2463*4882a593Smuzhiyun
2464*4882a593Smuzhiyun return 0;
2465*4882a593Smuzhiyun }
2466*4882a593Smuzhiyun
2467*4882a593Smuzhiyun static struct pmu pmu = {
2468*4882a593Smuzhiyun .pmu_enable = x86_pmu_enable,
2469*4882a593Smuzhiyun .pmu_disable = x86_pmu_disable,
2470*4882a593Smuzhiyun
2471*4882a593Smuzhiyun .attr_groups = x86_pmu_attr_groups,
2472*4882a593Smuzhiyun
2473*4882a593Smuzhiyun .event_init = x86_pmu_event_init,
2474*4882a593Smuzhiyun
2475*4882a593Smuzhiyun .event_mapped = x86_pmu_event_mapped,
2476*4882a593Smuzhiyun .event_unmapped = x86_pmu_event_unmapped,
2477*4882a593Smuzhiyun
2478*4882a593Smuzhiyun .add = x86_pmu_add,
2479*4882a593Smuzhiyun .del = x86_pmu_del,
2480*4882a593Smuzhiyun .start = x86_pmu_start,
2481*4882a593Smuzhiyun .stop = x86_pmu_stop,
2482*4882a593Smuzhiyun .read = x86_pmu_read,
2483*4882a593Smuzhiyun
2484*4882a593Smuzhiyun .start_txn = x86_pmu_start_txn,
2485*4882a593Smuzhiyun .cancel_txn = x86_pmu_cancel_txn,
2486*4882a593Smuzhiyun .commit_txn = x86_pmu_commit_txn,
2487*4882a593Smuzhiyun
2488*4882a593Smuzhiyun .event_idx = x86_pmu_event_idx,
2489*4882a593Smuzhiyun .sched_task = x86_pmu_sched_task,
2490*4882a593Smuzhiyun .swap_task_ctx = x86_pmu_swap_task_ctx,
2491*4882a593Smuzhiyun .check_period = x86_pmu_check_period,
2492*4882a593Smuzhiyun
2493*4882a593Smuzhiyun .aux_output_match = x86_pmu_aux_output_match,
2494*4882a593Smuzhiyun };
2495*4882a593Smuzhiyun
arch_perf_update_userpage(struct perf_event * event,struct perf_event_mmap_page * userpg,u64 now)2496*4882a593Smuzhiyun void arch_perf_update_userpage(struct perf_event *event,
2497*4882a593Smuzhiyun struct perf_event_mmap_page *userpg, u64 now)
2498*4882a593Smuzhiyun {
2499*4882a593Smuzhiyun struct cyc2ns_data data;
2500*4882a593Smuzhiyun u64 offset;
2501*4882a593Smuzhiyun
2502*4882a593Smuzhiyun userpg->cap_user_time = 0;
2503*4882a593Smuzhiyun userpg->cap_user_time_zero = 0;
2504*4882a593Smuzhiyun userpg->cap_user_rdpmc =
2505*4882a593Smuzhiyun !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
2506*4882a593Smuzhiyun userpg->pmc_width = x86_pmu.cntval_bits;
2507*4882a593Smuzhiyun
2508*4882a593Smuzhiyun if (!using_native_sched_clock() || !sched_clock_stable())
2509*4882a593Smuzhiyun return;
2510*4882a593Smuzhiyun
2511*4882a593Smuzhiyun cyc2ns_read_begin(&data);
2512*4882a593Smuzhiyun
2513*4882a593Smuzhiyun offset = data.cyc2ns_offset + __sched_clock_offset;
2514*4882a593Smuzhiyun
2515*4882a593Smuzhiyun /*
2516*4882a593Smuzhiyun * Internal timekeeping for enabled/running/stopped times
2517*4882a593Smuzhiyun * is always in the local_clock domain.
2518*4882a593Smuzhiyun */
2519*4882a593Smuzhiyun userpg->cap_user_time = 1;
2520*4882a593Smuzhiyun userpg->time_mult = data.cyc2ns_mul;
2521*4882a593Smuzhiyun userpg->time_shift = data.cyc2ns_shift;
2522*4882a593Smuzhiyun userpg->time_offset = offset - now;
2523*4882a593Smuzhiyun
2524*4882a593Smuzhiyun /*
2525*4882a593Smuzhiyun * cap_user_time_zero doesn't make sense when we're using a different
2526*4882a593Smuzhiyun * time base for the records.
2527*4882a593Smuzhiyun */
2528*4882a593Smuzhiyun if (!event->attr.use_clockid) {
2529*4882a593Smuzhiyun userpg->cap_user_time_zero = 1;
2530*4882a593Smuzhiyun userpg->time_zero = offset;
2531*4882a593Smuzhiyun }
2532*4882a593Smuzhiyun
2533*4882a593Smuzhiyun cyc2ns_read_end();
2534*4882a593Smuzhiyun }
2535*4882a593Smuzhiyun
2536*4882a593Smuzhiyun /*
2537*4882a593Smuzhiyun * Determine whether the regs were taken from an irq/exception handler rather
2538*4882a593Smuzhiyun * than from perf_arch_fetch_caller_regs().
2539*4882a593Smuzhiyun */
perf_hw_regs(struct pt_regs * regs)2540*4882a593Smuzhiyun static bool perf_hw_regs(struct pt_regs *regs)
2541*4882a593Smuzhiyun {
2542*4882a593Smuzhiyun return regs->flags & X86_EFLAGS_FIXED;
2543*4882a593Smuzhiyun }
2544*4882a593Smuzhiyun
2545*4882a593Smuzhiyun void
perf_callchain_kernel(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)2546*4882a593Smuzhiyun perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
2547*4882a593Smuzhiyun {
2548*4882a593Smuzhiyun struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
2549*4882a593Smuzhiyun struct unwind_state state;
2550*4882a593Smuzhiyun unsigned long addr;
2551*4882a593Smuzhiyun
2552*4882a593Smuzhiyun if (guest_cbs && guest_cbs->is_in_guest()) {
2553*4882a593Smuzhiyun /* TODO: We don't support guest os callchain now */
2554*4882a593Smuzhiyun return;
2555*4882a593Smuzhiyun }
2556*4882a593Smuzhiyun
2557*4882a593Smuzhiyun if (perf_callchain_store(entry, regs->ip))
2558*4882a593Smuzhiyun return;
2559*4882a593Smuzhiyun
2560*4882a593Smuzhiyun if (perf_hw_regs(regs))
2561*4882a593Smuzhiyun unwind_start(&state, current, regs, NULL);
2562*4882a593Smuzhiyun else
2563*4882a593Smuzhiyun unwind_start(&state, current, NULL, (void *)regs->sp);
2564*4882a593Smuzhiyun
2565*4882a593Smuzhiyun for (; !unwind_done(&state); unwind_next_frame(&state)) {
2566*4882a593Smuzhiyun addr = unwind_get_return_address(&state);
2567*4882a593Smuzhiyun if (!addr || perf_callchain_store(entry, addr))
2568*4882a593Smuzhiyun return;
2569*4882a593Smuzhiyun }
2570*4882a593Smuzhiyun }
2571*4882a593Smuzhiyun
2572*4882a593Smuzhiyun static inline int
valid_user_frame(const void __user * fp,unsigned long size)2573*4882a593Smuzhiyun valid_user_frame(const void __user *fp, unsigned long size)
2574*4882a593Smuzhiyun {
2575*4882a593Smuzhiyun return (__range_not_ok(fp, size, TASK_SIZE) == 0);
2576*4882a593Smuzhiyun }
2577*4882a593Smuzhiyun
get_segment_base(unsigned int segment)2578*4882a593Smuzhiyun static unsigned long get_segment_base(unsigned int segment)
2579*4882a593Smuzhiyun {
2580*4882a593Smuzhiyun struct desc_struct *desc;
2581*4882a593Smuzhiyun unsigned int idx = segment >> 3;
2582*4882a593Smuzhiyun
2583*4882a593Smuzhiyun if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
2584*4882a593Smuzhiyun #ifdef CONFIG_MODIFY_LDT_SYSCALL
2585*4882a593Smuzhiyun struct ldt_struct *ldt;
2586*4882a593Smuzhiyun
2587*4882a593Smuzhiyun /* IRQs are off, so this synchronizes with smp_store_release */
2588*4882a593Smuzhiyun ldt = READ_ONCE(current->active_mm->context.ldt);
2589*4882a593Smuzhiyun if (!ldt || idx >= ldt->nr_entries)
2590*4882a593Smuzhiyun return 0;
2591*4882a593Smuzhiyun
2592*4882a593Smuzhiyun desc = &ldt->entries[idx];
2593*4882a593Smuzhiyun #else
2594*4882a593Smuzhiyun return 0;
2595*4882a593Smuzhiyun #endif
2596*4882a593Smuzhiyun } else {
2597*4882a593Smuzhiyun if (idx >= GDT_ENTRIES)
2598*4882a593Smuzhiyun return 0;
2599*4882a593Smuzhiyun
2600*4882a593Smuzhiyun desc = raw_cpu_ptr(gdt_page.gdt) + idx;
2601*4882a593Smuzhiyun }
2602*4882a593Smuzhiyun
2603*4882a593Smuzhiyun return get_desc_base(desc);
2604*4882a593Smuzhiyun }
2605*4882a593Smuzhiyun
2606*4882a593Smuzhiyun #ifdef CONFIG_IA32_EMULATION
2607*4882a593Smuzhiyun
2608*4882a593Smuzhiyun #include <linux/compat.h>
2609*4882a593Smuzhiyun
2610*4882a593Smuzhiyun static inline int
perf_callchain_user32(struct pt_regs * regs,struct perf_callchain_entry_ctx * entry)2611*4882a593Smuzhiyun perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
2612*4882a593Smuzhiyun {
2613*4882a593Smuzhiyun /* 32-bit process in 64-bit kernel. */
2614*4882a593Smuzhiyun unsigned long ss_base, cs_base;
2615*4882a593Smuzhiyun struct stack_frame_ia32 frame;
2616*4882a593Smuzhiyun const struct stack_frame_ia32 __user *fp;
2617*4882a593Smuzhiyun
2618*4882a593Smuzhiyun if (!test_thread_flag(TIF_IA32))
2619*4882a593Smuzhiyun return 0;
2620*4882a593Smuzhiyun
2621*4882a593Smuzhiyun cs_base = get_segment_base(regs->cs);
2622*4882a593Smuzhiyun ss_base = get_segment_base(regs->ss);
2623*4882a593Smuzhiyun
2624*4882a593Smuzhiyun fp = compat_ptr(ss_base + regs->bp);
2625*4882a593Smuzhiyun pagefault_disable();
2626*4882a593Smuzhiyun while (entry->nr < entry->max_stack) {
2627*4882a593Smuzhiyun if (!valid_user_frame(fp, sizeof(frame)))
2628*4882a593Smuzhiyun break;
2629*4882a593Smuzhiyun
2630*4882a593Smuzhiyun if (__get_user(frame.next_frame, &fp->next_frame))
2631*4882a593Smuzhiyun break;
2632*4882a593Smuzhiyun if (__get_user(frame.return_address, &fp->return_address))
2633*4882a593Smuzhiyun break;
2634*4882a593Smuzhiyun
2635*4882a593Smuzhiyun perf_callchain_store(entry, cs_base + frame.return_address);
2636*4882a593Smuzhiyun fp = compat_ptr(ss_base + frame.next_frame);
2637*4882a593Smuzhiyun }
2638*4882a593Smuzhiyun pagefault_enable();
2639*4882a593Smuzhiyun return 1;
2640*4882a593Smuzhiyun }
2641*4882a593Smuzhiyun #else
2642*4882a593Smuzhiyun static inline int
perf_callchain_user32(struct pt_regs * regs,struct perf_callchain_entry_ctx * entry)2643*4882a593Smuzhiyun perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
2644*4882a593Smuzhiyun {
2645*4882a593Smuzhiyun return 0;
2646*4882a593Smuzhiyun }
2647*4882a593Smuzhiyun #endif
2648*4882a593Smuzhiyun
2649*4882a593Smuzhiyun void
perf_callchain_user(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)2650*4882a593Smuzhiyun perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
2651*4882a593Smuzhiyun {
2652*4882a593Smuzhiyun struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
2653*4882a593Smuzhiyun struct stack_frame frame;
2654*4882a593Smuzhiyun const struct stack_frame __user *fp;
2655*4882a593Smuzhiyun
2656*4882a593Smuzhiyun if (guest_cbs && guest_cbs->is_in_guest()) {
2657*4882a593Smuzhiyun /* TODO: We don't support guest os callchain now */
2658*4882a593Smuzhiyun return;
2659*4882a593Smuzhiyun }
2660*4882a593Smuzhiyun
2661*4882a593Smuzhiyun /*
2662*4882a593Smuzhiyun * We don't know what to do with VM86 stacks.. ignore them for now.
2663*4882a593Smuzhiyun */
2664*4882a593Smuzhiyun if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
2665*4882a593Smuzhiyun return;
2666*4882a593Smuzhiyun
2667*4882a593Smuzhiyun fp = (void __user *)regs->bp;
2668*4882a593Smuzhiyun
2669*4882a593Smuzhiyun perf_callchain_store(entry, regs->ip);
2670*4882a593Smuzhiyun
2671*4882a593Smuzhiyun if (!nmi_uaccess_okay())
2672*4882a593Smuzhiyun return;
2673*4882a593Smuzhiyun
2674*4882a593Smuzhiyun if (perf_callchain_user32(regs, entry))
2675*4882a593Smuzhiyun return;
2676*4882a593Smuzhiyun
2677*4882a593Smuzhiyun pagefault_disable();
2678*4882a593Smuzhiyun while (entry->nr < entry->max_stack) {
2679*4882a593Smuzhiyun if (!valid_user_frame(fp, sizeof(frame)))
2680*4882a593Smuzhiyun break;
2681*4882a593Smuzhiyun
2682*4882a593Smuzhiyun if (__get_user(frame.next_frame, &fp->next_frame))
2683*4882a593Smuzhiyun break;
2684*4882a593Smuzhiyun if (__get_user(frame.return_address, &fp->return_address))
2685*4882a593Smuzhiyun break;
2686*4882a593Smuzhiyun
2687*4882a593Smuzhiyun perf_callchain_store(entry, frame.return_address);
2688*4882a593Smuzhiyun fp = (void __user *)frame.next_frame;
2689*4882a593Smuzhiyun }
2690*4882a593Smuzhiyun pagefault_enable();
2691*4882a593Smuzhiyun }
2692*4882a593Smuzhiyun
2693*4882a593Smuzhiyun /*
2694*4882a593Smuzhiyun * Deal with code segment offsets for the various execution modes:
2695*4882a593Smuzhiyun *
2696*4882a593Smuzhiyun * VM86 - the good olde 16 bit days, where the linear address is
2697*4882a593Smuzhiyun * 20 bits and we use regs->ip + 0x10 * regs->cs.
2698*4882a593Smuzhiyun *
2699*4882a593Smuzhiyun * IA32 - Where we need to look at GDT/LDT segment descriptor tables
2700*4882a593Smuzhiyun * to figure out what the 32bit base address is.
2701*4882a593Smuzhiyun *
2702*4882a593Smuzhiyun * X32 - has TIF_X32 set, but is running in x86_64
2703*4882a593Smuzhiyun *
2704*4882a593Smuzhiyun * X86_64 - CS,DS,SS,ES are all zero based.
2705*4882a593Smuzhiyun */
code_segment_base(struct pt_regs * regs)2706*4882a593Smuzhiyun static unsigned long code_segment_base(struct pt_regs *regs)
2707*4882a593Smuzhiyun {
2708*4882a593Smuzhiyun /*
2709*4882a593Smuzhiyun * For IA32 we look at the GDT/LDT segment base to convert the
2710*4882a593Smuzhiyun * effective IP to a linear address.
2711*4882a593Smuzhiyun */
2712*4882a593Smuzhiyun
2713*4882a593Smuzhiyun #ifdef CONFIG_X86_32
2714*4882a593Smuzhiyun /*
2715*4882a593Smuzhiyun * If we are in VM86 mode, add the segment offset to convert to a
2716*4882a593Smuzhiyun * linear address.
2717*4882a593Smuzhiyun */
2718*4882a593Smuzhiyun if (regs->flags & X86_VM_MASK)
2719*4882a593Smuzhiyun return 0x10 * regs->cs;
2720*4882a593Smuzhiyun
2721*4882a593Smuzhiyun if (user_mode(regs) && regs->cs != __USER_CS)
2722*4882a593Smuzhiyun return get_segment_base(regs->cs);
2723*4882a593Smuzhiyun #else
2724*4882a593Smuzhiyun if (user_mode(regs) && !user_64bit_mode(regs) &&
2725*4882a593Smuzhiyun regs->cs != __USER32_CS)
2726*4882a593Smuzhiyun return get_segment_base(regs->cs);
2727*4882a593Smuzhiyun #endif
2728*4882a593Smuzhiyun return 0;
2729*4882a593Smuzhiyun }
2730*4882a593Smuzhiyun
perf_instruction_pointer(struct pt_regs * regs)2731*4882a593Smuzhiyun unsigned long perf_instruction_pointer(struct pt_regs *regs)
2732*4882a593Smuzhiyun {
2733*4882a593Smuzhiyun struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
2734*4882a593Smuzhiyun
2735*4882a593Smuzhiyun if (guest_cbs && guest_cbs->is_in_guest())
2736*4882a593Smuzhiyun return guest_cbs->get_guest_ip();
2737*4882a593Smuzhiyun
2738*4882a593Smuzhiyun return regs->ip + code_segment_base(regs);
2739*4882a593Smuzhiyun }
2740*4882a593Smuzhiyun
perf_misc_flags(struct pt_regs * regs)2741*4882a593Smuzhiyun unsigned long perf_misc_flags(struct pt_regs *regs)
2742*4882a593Smuzhiyun {
2743*4882a593Smuzhiyun struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
2744*4882a593Smuzhiyun int misc = 0;
2745*4882a593Smuzhiyun
2746*4882a593Smuzhiyun if (guest_cbs && guest_cbs->is_in_guest()) {
2747*4882a593Smuzhiyun if (guest_cbs->is_user_mode())
2748*4882a593Smuzhiyun misc |= PERF_RECORD_MISC_GUEST_USER;
2749*4882a593Smuzhiyun else
2750*4882a593Smuzhiyun misc |= PERF_RECORD_MISC_GUEST_KERNEL;
2751*4882a593Smuzhiyun } else {
2752*4882a593Smuzhiyun if (user_mode(regs))
2753*4882a593Smuzhiyun misc |= PERF_RECORD_MISC_USER;
2754*4882a593Smuzhiyun else
2755*4882a593Smuzhiyun misc |= PERF_RECORD_MISC_KERNEL;
2756*4882a593Smuzhiyun }
2757*4882a593Smuzhiyun
2758*4882a593Smuzhiyun if (regs->flags & PERF_EFLAGS_EXACT)
2759*4882a593Smuzhiyun misc |= PERF_RECORD_MISC_EXACT_IP;
2760*4882a593Smuzhiyun
2761*4882a593Smuzhiyun return misc;
2762*4882a593Smuzhiyun }
2763*4882a593Smuzhiyun
perf_get_x86_pmu_capability(struct x86_pmu_capability * cap)2764*4882a593Smuzhiyun void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
2765*4882a593Smuzhiyun {
2766*4882a593Smuzhiyun cap->version = x86_pmu.version;
2767*4882a593Smuzhiyun cap->num_counters_gp = x86_pmu.num_counters;
2768*4882a593Smuzhiyun cap->num_counters_fixed = x86_pmu.num_counters_fixed;
2769*4882a593Smuzhiyun cap->bit_width_gp = x86_pmu.cntval_bits;
2770*4882a593Smuzhiyun cap->bit_width_fixed = x86_pmu.cntval_bits;
2771*4882a593Smuzhiyun cap->events_mask = (unsigned int)x86_pmu.events_maskl;
2772*4882a593Smuzhiyun cap->events_mask_len = x86_pmu.events_mask_len;
2773*4882a593Smuzhiyun }
2774*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
2775