1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Driver for Intel Xeon Phi "Knights Corner" PMU */
3*4882a593Smuzhiyun
4*4882a593Smuzhiyun #include <linux/perf_event.h>
5*4882a593Smuzhiyun #include <linux/types.h>
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <asm/hardirq.h>
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include "../perf_event.h"
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun static const u64 knc_perfmon_event_map[] =
12*4882a593Smuzhiyun {
13*4882a593Smuzhiyun [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
14*4882a593Smuzhiyun [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
15*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
16*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
17*4882a593Smuzhiyun [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
18*4882a593Smuzhiyun [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
19*4882a593Smuzhiyun };
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun static const u64 __initconst knc_hw_cache_event_ids
22*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_MAX]
23*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_OP_MAX]
24*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_RESULT_MAX] =
25*4882a593Smuzhiyun {
26*4882a593Smuzhiyun [ C(L1D) ] = {
27*4882a593Smuzhiyun [ C(OP_READ) ] = {
28*4882a593Smuzhiyun /* On Xeon Phi event "0" is a valid DATA_READ */
29*4882a593Smuzhiyun /* (L1 Data Cache Reads) Instruction. */
30*4882a593Smuzhiyun /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
31*4882a593Smuzhiyun /* bit will always be set in x86_pmu_hw_config(). */
32*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
33*4882a593Smuzhiyun /* DATA_READ */
34*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
35*4882a593Smuzhiyun },
36*4882a593Smuzhiyun [ C(OP_WRITE) ] = {
37*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
38*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
39*4882a593Smuzhiyun },
40*4882a593Smuzhiyun [ C(OP_PREFETCH) ] = {
41*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
42*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
43*4882a593Smuzhiyun },
44*4882a593Smuzhiyun },
45*4882a593Smuzhiyun [ C(L1I ) ] = {
46*4882a593Smuzhiyun [ C(OP_READ) ] = {
47*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
48*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
49*4882a593Smuzhiyun },
50*4882a593Smuzhiyun [ C(OP_WRITE) ] = {
51*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = -1,
52*4882a593Smuzhiyun [ C(RESULT_MISS) ] = -1,
53*4882a593Smuzhiyun },
54*4882a593Smuzhiyun [ C(OP_PREFETCH) ] = {
55*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x0,
56*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x0,
57*4882a593Smuzhiyun },
58*4882a593Smuzhiyun },
59*4882a593Smuzhiyun [ C(LL ) ] = {
60*4882a593Smuzhiyun [ C(OP_READ) ] = {
61*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0,
62*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
63*4882a593Smuzhiyun },
64*4882a593Smuzhiyun [ C(OP_WRITE) ] = {
65*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
66*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0,
67*4882a593Smuzhiyun },
68*4882a593Smuzhiyun [ C(OP_PREFETCH) ] = {
69*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
70*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
71*4882a593Smuzhiyun },
72*4882a593Smuzhiyun },
73*4882a593Smuzhiyun [ C(DTLB) ] = {
74*4882a593Smuzhiyun [ C(OP_READ) ] = {
75*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
76*4882a593Smuzhiyun /* DATA_READ */
77*4882a593Smuzhiyun /* see note on L1 OP_READ */
78*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
79*4882a593Smuzhiyun },
80*4882a593Smuzhiyun [ C(OP_WRITE) ] = {
81*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
82*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
83*4882a593Smuzhiyun },
84*4882a593Smuzhiyun [ C(OP_PREFETCH) ] = {
85*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x0,
86*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x0,
87*4882a593Smuzhiyun },
88*4882a593Smuzhiyun },
89*4882a593Smuzhiyun [ C(ITLB) ] = {
90*4882a593Smuzhiyun [ C(OP_READ) ] = {
91*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
92*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
93*4882a593Smuzhiyun },
94*4882a593Smuzhiyun [ C(OP_WRITE) ] = {
95*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = -1,
96*4882a593Smuzhiyun [ C(RESULT_MISS) ] = -1,
97*4882a593Smuzhiyun },
98*4882a593Smuzhiyun [ C(OP_PREFETCH) ] = {
99*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = -1,
100*4882a593Smuzhiyun [ C(RESULT_MISS) ] = -1,
101*4882a593Smuzhiyun },
102*4882a593Smuzhiyun },
103*4882a593Smuzhiyun [ C(BPU ) ] = {
104*4882a593Smuzhiyun [ C(OP_READ) ] = {
105*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
106*4882a593Smuzhiyun [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
107*4882a593Smuzhiyun },
108*4882a593Smuzhiyun [ C(OP_WRITE) ] = {
109*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = -1,
110*4882a593Smuzhiyun [ C(RESULT_MISS) ] = -1,
111*4882a593Smuzhiyun },
112*4882a593Smuzhiyun [ C(OP_PREFETCH) ] = {
113*4882a593Smuzhiyun [ C(RESULT_ACCESS) ] = -1,
114*4882a593Smuzhiyun [ C(RESULT_MISS) ] = -1,
115*4882a593Smuzhiyun },
116*4882a593Smuzhiyun },
117*4882a593Smuzhiyun };
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun
knc_pmu_event_map(int hw_event)120*4882a593Smuzhiyun static u64 knc_pmu_event_map(int hw_event)
121*4882a593Smuzhiyun {
122*4882a593Smuzhiyun return knc_perfmon_event_map[hw_event];
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun static struct event_constraint knc_event_constraints[] =
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
128*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
129*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
130*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
131*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
132*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
133*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
134*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
135*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
136*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
137*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
138*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
139*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
140*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
141*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
142*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
143*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
144*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
145*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
146*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
147*4882a593Smuzhiyun INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
148*4882a593Smuzhiyun EVENT_CONSTRAINT_END
149*4882a593Smuzhiyun };
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun #define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
152*4882a593Smuzhiyun #define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
153*4882a593Smuzhiyun #define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun #define KNC_ENABLE_COUNTER0 0x00000001
156*4882a593Smuzhiyun #define KNC_ENABLE_COUNTER1 0x00000002
157*4882a593Smuzhiyun
knc_pmu_disable_all(void)158*4882a593Smuzhiyun static void knc_pmu_disable_all(void)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun u64 val;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
163*4882a593Smuzhiyun val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
164*4882a593Smuzhiyun wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun
knc_pmu_enable_all(int added)167*4882a593Smuzhiyun static void knc_pmu_enable_all(int added)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun u64 val;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
172*4882a593Smuzhiyun val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
173*4882a593Smuzhiyun wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun static inline void
knc_pmu_disable_event(struct perf_event * event)177*4882a593Smuzhiyun knc_pmu_disable_event(struct perf_event *event)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
180*4882a593Smuzhiyun u64 val;
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun val = hwc->config;
183*4882a593Smuzhiyun val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun
knc_pmu_enable_event(struct perf_event * event)188*4882a593Smuzhiyun static void knc_pmu_enable_event(struct perf_event *event)
189*4882a593Smuzhiyun {
190*4882a593Smuzhiyun struct hw_perf_event *hwc = &event->hw;
191*4882a593Smuzhiyun u64 val;
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun val = hwc->config;
194*4882a593Smuzhiyun val |= ARCH_PERFMON_EVENTSEL_ENABLE;
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
knc_pmu_get_status(void)199*4882a593Smuzhiyun static inline u64 knc_pmu_get_status(void)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun u64 status;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun return status;
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun
knc_pmu_ack_status(u64 ack)208*4882a593Smuzhiyun static inline void knc_pmu_ack_status(u64 ack)
209*4882a593Smuzhiyun {
210*4882a593Smuzhiyun wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
knc_pmu_handle_irq(struct pt_regs * regs)213*4882a593Smuzhiyun static int knc_pmu_handle_irq(struct pt_regs *regs)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun struct perf_sample_data data;
216*4882a593Smuzhiyun struct cpu_hw_events *cpuc;
217*4882a593Smuzhiyun int handled = 0;
218*4882a593Smuzhiyun int bit, loops;
219*4882a593Smuzhiyun u64 status;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun cpuc = this_cpu_ptr(&cpu_hw_events);
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun knc_pmu_disable_all();
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun status = knc_pmu_get_status();
226*4882a593Smuzhiyun if (!status) {
227*4882a593Smuzhiyun knc_pmu_enable_all(0);
228*4882a593Smuzhiyun return handled;
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun loops = 0;
232*4882a593Smuzhiyun again:
233*4882a593Smuzhiyun knc_pmu_ack_status(status);
234*4882a593Smuzhiyun if (++loops > 100) {
235*4882a593Smuzhiyun WARN_ONCE(1, "perf: irq loop stuck!\n");
236*4882a593Smuzhiyun perf_event_print_debug();
237*4882a593Smuzhiyun goto done;
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun inc_irq_stat(apic_perf_irqs);
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
243*4882a593Smuzhiyun struct perf_event *event = cpuc->events[bit];
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun handled++;
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun if (!test_bit(bit, cpuc->active_mask))
248*4882a593Smuzhiyun continue;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun if (!intel_pmu_save_and_restart(event))
251*4882a593Smuzhiyun continue;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun perf_sample_data_init(&data, 0, event->hw.last_period);
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun if (perf_event_overflow(event, &data, regs))
256*4882a593Smuzhiyun x86_pmu_stop(event, 0);
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun /*
260*4882a593Smuzhiyun * Repeat if there is more work to be done:
261*4882a593Smuzhiyun */
262*4882a593Smuzhiyun status = knc_pmu_get_status();
263*4882a593Smuzhiyun if (status)
264*4882a593Smuzhiyun goto again;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun done:
267*4882a593Smuzhiyun /* Only restore PMU state when it's active. See x86_pmu_disable(). */
268*4882a593Smuzhiyun if (cpuc->enabled)
269*4882a593Smuzhiyun knc_pmu_enable_all(0);
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun return handled;
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun PMU_FORMAT_ATTR(event, "config:0-7" );
276*4882a593Smuzhiyun PMU_FORMAT_ATTR(umask, "config:8-15" );
277*4882a593Smuzhiyun PMU_FORMAT_ATTR(edge, "config:18" );
278*4882a593Smuzhiyun PMU_FORMAT_ATTR(inv, "config:23" );
279*4882a593Smuzhiyun PMU_FORMAT_ATTR(cmask, "config:24-31" );
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun static struct attribute *intel_knc_formats_attr[] = {
282*4882a593Smuzhiyun &format_attr_event.attr,
283*4882a593Smuzhiyun &format_attr_umask.attr,
284*4882a593Smuzhiyun &format_attr_edge.attr,
285*4882a593Smuzhiyun &format_attr_inv.attr,
286*4882a593Smuzhiyun &format_attr_cmask.attr,
287*4882a593Smuzhiyun NULL,
288*4882a593Smuzhiyun };
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun static const struct x86_pmu knc_pmu __initconst = {
291*4882a593Smuzhiyun .name = "knc",
292*4882a593Smuzhiyun .handle_irq = knc_pmu_handle_irq,
293*4882a593Smuzhiyun .disable_all = knc_pmu_disable_all,
294*4882a593Smuzhiyun .enable_all = knc_pmu_enable_all,
295*4882a593Smuzhiyun .enable = knc_pmu_enable_event,
296*4882a593Smuzhiyun .disable = knc_pmu_disable_event,
297*4882a593Smuzhiyun .hw_config = x86_pmu_hw_config,
298*4882a593Smuzhiyun .schedule_events = x86_schedule_events,
299*4882a593Smuzhiyun .eventsel = MSR_KNC_EVNTSEL0,
300*4882a593Smuzhiyun .perfctr = MSR_KNC_PERFCTR0,
301*4882a593Smuzhiyun .event_map = knc_pmu_event_map,
302*4882a593Smuzhiyun .max_events = ARRAY_SIZE(knc_perfmon_event_map),
303*4882a593Smuzhiyun .apic = 1,
304*4882a593Smuzhiyun .max_period = (1ULL << 39) - 1,
305*4882a593Smuzhiyun .version = 0,
306*4882a593Smuzhiyun .num_counters = 2,
307*4882a593Smuzhiyun .cntval_bits = 40,
308*4882a593Smuzhiyun .cntval_mask = (1ULL << 40) - 1,
309*4882a593Smuzhiyun .get_event_constraints = x86_get_event_constraints,
310*4882a593Smuzhiyun .event_constraints = knc_event_constraints,
311*4882a593Smuzhiyun .format_attrs = intel_knc_formats_attr,
312*4882a593Smuzhiyun };
313*4882a593Smuzhiyun
knc_pmu_init(void)314*4882a593Smuzhiyun __init int knc_pmu_init(void)
315*4882a593Smuzhiyun {
316*4882a593Smuzhiyun x86_pmu = knc_pmu;
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
319*4882a593Smuzhiyun sizeof(hw_cache_event_ids));
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun return 0;
322*4882a593Smuzhiyun }
323