1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Performance counter support for e500 family processors.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
6*4882a593Smuzhiyun * Copyright 2010 Freescale Semiconductor, Inc.
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun #include <linux/string.h>
9*4882a593Smuzhiyun #include <linux/perf_event.h>
10*4882a593Smuzhiyun #include <asm/reg.h>
11*4882a593Smuzhiyun #include <asm/cputable.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun /*
14*4882a593Smuzhiyun * Map of generic hardware event types to hardware events
15*4882a593Smuzhiyun * Zero if unsupported
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun static int e500_generic_events[] = {
18*4882a593Smuzhiyun [PERF_COUNT_HW_CPU_CYCLES] = 1,
19*4882a593Smuzhiyun [PERF_COUNT_HW_INSTRUCTIONS] = 2,
20*4882a593Smuzhiyun [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */
21*4882a593Smuzhiyun [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12,
22*4882a593Smuzhiyun [PERF_COUNT_HW_BRANCH_MISSES] = 15,
23*4882a593Smuzhiyun [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 18,
24*4882a593Smuzhiyun [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 19,
25*4882a593Smuzhiyun };
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #define C(x) PERF_COUNT_HW_CACHE_##x
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun /*
30*4882a593Smuzhiyun * Table of generalized cache-related events.
31*4882a593Smuzhiyun * 0 means not supported, -1 means nonsensical, other values
32*4882a593Smuzhiyun * are event codes.
33*4882a593Smuzhiyun */
34*4882a593Smuzhiyun static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun * D-cache misses are not split into read/write/prefetch;
37*4882a593Smuzhiyun * use raw event 41.
38*4882a593Smuzhiyun */
39*4882a593Smuzhiyun [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
40*4882a593Smuzhiyun [C(OP_READ)] = { 27, 0 },
41*4882a593Smuzhiyun [C(OP_WRITE)] = { 28, 0 },
42*4882a593Smuzhiyun [C(OP_PREFETCH)] = { 29, 0 },
43*4882a593Smuzhiyun },
44*4882a593Smuzhiyun [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
45*4882a593Smuzhiyun [C(OP_READ)] = { 2, 60 },
46*4882a593Smuzhiyun [C(OP_WRITE)] = { -1, -1 },
47*4882a593Smuzhiyun [C(OP_PREFETCH)] = { 0, 0 },
48*4882a593Smuzhiyun },
49*4882a593Smuzhiyun /*
50*4882a593Smuzhiyun * Assuming LL means L2, it's not a good match for this model.
51*4882a593Smuzhiyun * It allocates only on L1 castout or explicit prefetch, and
52*4882a593Smuzhiyun * does not have separate read/write events (but it does have
53*4882a593Smuzhiyun * separate instruction/data events).
54*4882a593Smuzhiyun */
55*4882a593Smuzhiyun [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
56*4882a593Smuzhiyun [C(OP_READ)] = { 0, 0 },
57*4882a593Smuzhiyun [C(OP_WRITE)] = { 0, 0 },
58*4882a593Smuzhiyun [C(OP_PREFETCH)] = { 0, 0 },
59*4882a593Smuzhiyun },
60*4882a593Smuzhiyun /*
61*4882a593Smuzhiyun * There are data/instruction MMU misses, but that's a miss on
62*4882a593Smuzhiyun * the chip's internal level-one TLB which is probably not
63*4882a593Smuzhiyun * what the user wants. Instead, unified level-two TLB misses
64*4882a593Smuzhiyun * are reported here.
65*4882a593Smuzhiyun */
66*4882a593Smuzhiyun [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
67*4882a593Smuzhiyun [C(OP_READ)] = { 26, 66 },
68*4882a593Smuzhiyun [C(OP_WRITE)] = { -1, -1 },
69*4882a593Smuzhiyun [C(OP_PREFETCH)] = { -1, -1 },
70*4882a593Smuzhiyun },
71*4882a593Smuzhiyun [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
72*4882a593Smuzhiyun [C(OP_READ)] = { 12, 15 },
73*4882a593Smuzhiyun [C(OP_WRITE)] = { -1, -1 },
74*4882a593Smuzhiyun [C(OP_PREFETCH)] = { -1, -1 },
75*4882a593Smuzhiyun },
76*4882a593Smuzhiyun [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
77*4882a593Smuzhiyun [C(OP_READ)] = { -1, -1 },
78*4882a593Smuzhiyun [C(OP_WRITE)] = { -1, -1 },
79*4882a593Smuzhiyun [C(OP_PREFETCH)] = { -1, -1 },
80*4882a593Smuzhiyun },
81*4882a593Smuzhiyun };
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun static int num_events = 128;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun /* Upper half of event id is PMLCb, for threshold events */
e500_xlate_event(u64 event_id)86*4882a593Smuzhiyun static u64 e500_xlate_event(u64 event_id)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun u32 event_low = (u32)event_id;
89*4882a593Smuzhiyun u64 ret;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun if (event_low >= num_events)
92*4882a593Smuzhiyun return 0;
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun ret = FSL_EMB_EVENT_VALID;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun if (event_low >= 76 && event_low <= 81) {
97*4882a593Smuzhiyun ret |= FSL_EMB_EVENT_RESTRICTED;
98*4882a593Smuzhiyun ret |= event_id &
99*4882a593Smuzhiyun (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH);
100*4882a593Smuzhiyun } else if (event_id &
101*4882a593Smuzhiyun (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) {
102*4882a593Smuzhiyun /* Threshold requested on non-threshold event */
103*4882a593Smuzhiyun return 0;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun return ret;
107*4882a593Smuzhiyun }
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun static struct fsl_emb_pmu e500_pmu = {
110*4882a593Smuzhiyun .name = "e500 family",
111*4882a593Smuzhiyun .n_counter = 4,
112*4882a593Smuzhiyun .n_restricted = 2,
113*4882a593Smuzhiyun .xlate_event = e500_xlate_event,
114*4882a593Smuzhiyun .n_generic = ARRAY_SIZE(e500_generic_events),
115*4882a593Smuzhiyun .generic_events = e500_generic_events,
116*4882a593Smuzhiyun .cache_events = &e500_cache_events,
117*4882a593Smuzhiyun };
118*4882a593Smuzhiyun
init_e500_pmu(void)119*4882a593Smuzhiyun static int init_e500_pmu(void)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun if (!cur_cpu_spec->oprofile_cpu_type)
122*4882a593Smuzhiyun return -ENODEV;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc"))
125*4882a593Smuzhiyun num_events = 256;
126*4882a593Smuzhiyun else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500"))
127*4882a593Smuzhiyun return -ENODEV;
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun return register_fsl_emb_pmu(&e500_pmu);
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun early_initcall(init_e500_pmu);
133