xref: /OK3568_Linux_fs/kernel/arch/arc/include/asm/perf_event.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Linux performance counter support for ARC
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
6*4882a593Smuzhiyun  * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #ifndef __ASM_PERF_EVENT_H
10*4882a593Smuzhiyun #define __ASM_PERF_EVENT_H
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun /* Max number of counters that PCT block may ever have */
13*4882a593Smuzhiyun #define ARC_PERF_MAX_COUNTERS	32
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun #define ARC_REG_CC_BUILD	0xF6
16*4882a593Smuzhiyun #define ARC_REG_CC_INDEX	0x240
17*4882a593Smuzhiyun #define ARC_REG_CC_NAME0	0x241
18*4882a593Smuzhiyun #define ARC_REG_CC_NAME1	0x242
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #define ARC_REG_PCT_BUILD	0xF5
21*4882a593Smuzhiyun #define ARC_REG_PCT_COUNTL	0x250
22*4882a593Smuzhiyun #define ARC_REG_PCT_COUNTH	0x251
23*4882a593Smuzhiyun #define ARC_REG_PCT_SNAPL	0x252
24*4882a593Smuzhiyun #define ARC_REG_PCT_SNAPH	0x253
25*4882a593Smuzhiyun #define ARC_REG_PCT_CONFIG	0x254
26*4882a593Smuzhiyun #define ARC_REG_PCT_CONTROL	0x255
27*4882a593Smuzhiyun #define ARC_REG_PCT_INDEX	0x256
28*4882a593Smuzhiyun #define ARC_REG_PCT_INT_CNTL	0x25C
29*4882a593Smuzhiyun #define ARC_REG_PCT_INT_CNTH	0x25D
30*4882a593Smuzhiyun #define ARC_REG_PCT_INT_CTRL	0x25E
31*4882a593Smuzhiyun #define ARC_REG_PCT_INT_ACT	0x25F
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun #define ARC_REG_PCT_CONFIG_USER	(1 << 18)	/* count in user mode */
34*4882a593Smuzhiyun #define ARC_REG_PCT_CONFIG_KERN	(1 << 19)	/* count in kernel mode */
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #define ARC_REG_PCT_CONTROL_CC	(1 << 16)	/* clear counts */
37*4882a593Smuzhiyun #define ARC_REG_PCT_CONTROL_SN	(1 << 17)	/* snapshot */
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun struct arc_reg_pct_build {
40*4882a593Smuzhiyun #ifdef CONFIG_CPU_BIG_ENDIAN
41*4882a593Smuzhiyun 	unsigned int m:8, c:8, r:5, i:1, s:2, v:8;
42*4882a593Smuzhiyun #else
43*4882a593Smuzhiyun 	unsigned int v:8, s:2, i:1, r:5, c:8, m:8;
44*4882a593Smuzhiyun #endif
45*4882a593Smuzhiyun };
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun struct arc_reg_cc_build {
48*4882a593Smuzhiyun #ifdef CONFIG_CPU_BIG_ENDIAN
49*4882a593Smuzhiyun 	unsigned int c:16, r:8, v:8;
50*4882a593Smuzhiyun #else
51*4882a593Smuzhiyun 	unsigned int v:8, r:8, c:16;
52*4882a593Smuzhiyun #endif
53*4882a593Smuzhiyun };
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun #define PERF_COUNT_ARC_DCLM	(PERF_COUNT_HW_MAX + 0)
56*4882a593Smuzhiyun #define PERF_COUNT_ARC_DCSM	(PERF_COUNT_HW_MAX + 1)
57*4882a593Smuzhiyun #define PERF_COUNT_ARC_ICM	(PERF_COUNT_HW_MAX + 2)
58*4882a593Smuzhiyun #define PERF_COUNT_ARC_BPOK	(PERF_COUNT_HW_MAX + 3)
59*4882a593Smuzhiyun #define PERF_COUNT_ARC_EDTLB	(PERF_COUNT_HW_MAX + 4)
60*4882a593Smuzhiyun #define PERF_COUNT_ARC_EITLB	(PERF_COUNT_HW_MAX + 5)
61*4882a593Smuzhiyun #define PERF_COUNT_ARC_LDC	(PERF_COUNT_HW_MAX + 6)
62*4882a593Smuzhiyun #define PERF_COUNT_ARC_STC	(PERF_COUNT_HW_MAX + 7)
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun #define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 8)
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun /*
67*4882a593Smuzhiyun  * Some ARC pct quirks:
68*4882a593Smuzhiyun  *
69*4882a593Smuzhiyun  * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
70*4882a593Smuzhiyun  * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
71*4882a593Smuzhiyun  *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
72*4882a593Smuzhiyun  *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
73*4882a593Smuzhiyun  *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
74*4882a593Smuzhiyun  *	STALLED_CYCLES_FRONTEND.
75*4882a593Smuzhiyun  *
76*4882a593Smuzhiyun  *	We could start multiple performance counters and combine everything
77*4882a593Smuzhiyun  *	afterwards, but that makes it complicated.
78*4882a593Smuzhiyun  *
79*4882a593Smuzhiyun  *	Note that I$ cache misses aren't counted by either of the two!
80*4882a593Smuzhiyun  */
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun /*
83*4882a593Smuzhiyun  * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
84*4882a593Smuzhiyun  * (based on a specific RTL build)
85*4882a593Smuzhiyun  * Below is the static map between perf generic/arc specific event_id and
86*4882a593Smuzhiyun  * h/w condition names.
87*4882a593Smuzhiyun  * At the time of probe, we loop thru each index and find it's name to
88*4882a593Smuzhiyun  * complete the mapping of perf event_id to h/w index as latter is needed
89*4882a593Smuzhiyun  * to program the counter really
90*4882a593Smuzhiyun  */
91*4882a593Smuzhiyun static const char * const arc_pmu_ev_hw_map[] = {
92*4882a593Smuzhiyun 	/* count cycles */
93*4882a593Smuzhiyun 	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
94*4882a593Smuzhiyun 	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
95*4882a593Smuzhiyun 	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
98*4882a593Smuzhiyun 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	/* counts condition */
101*4882a593Smuzhiyun 	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
102*4882a593Smuzhiyun 	/* All jump instructions that are taken */
103*4882a593Smuzhiyun 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
104*4882a593Smuzhiyun #ifdef CONFIG_ISA_ARCV2
105*4882a593Smuzhiyun 	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
106*4882a593Smuzhiyun #else
107*4882a593Smuzhiyun 	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
108*4882a593Smuzhiyun 	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
109*4882a593Smuzhiyun #endif
110*4882a593Smuzhiyun 	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
111*4882a593Smuzhiyun 	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
114*4882a593Smuzhiyun 	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
115*4882a593Smuzhiyun 	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
116*4882a593Smuzhiyun 	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
117*4882a593Smuzhiyun 	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
120*4882a593Smuzhiyun 	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
121*4882a593Smuzhiyun };
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun #define C(_x)			PERF_COUNT_HW_CACHE_##_x
124*4882a593Smuzhiyun #define CACHE_OP_UNSUPPORTED	0xffff
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
127*4882a593Smuzhiyun 	[C(L1D)] = {
128*4882a593Smuzhiyun 		[C(OP_READ)] = {
129*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
130*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
131*4882a593Smuzhiyun 		},
132*4882a593Smuzhiyun 		[C(OP_WRITE)] = {
133*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
134*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
135*4882a593Smuzhiyun 		},
136*4882a593Smuzhiyun 		[C(OP_PREFETCH)] = {
137*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
138*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
139*4882a593Smuzhiyun 		},
140*4882a593Smuzhiyun 	},
141*4882a593Smuzhiyun 	[C(L1I)] = {
142*4882a593Smuzhiyun 		[C(OP_READ)] = {
143*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
144*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
145*4882a593Smuzhiyun 		},
146*4882a593Smuzhiyun 		[C(OP_WRITE)] = {
147*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
148*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
149*4882a593Smuzhiyun 		},
150*4882a593Smuzhiyun 		[C(OP_PREFETCH)] = {
151*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
152*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
153*4882a593Smuzhiyun 		},
154*4882a593Smuzhiyun 	},
155*4882a593Smuzhiyun 	[C(LL)] = {
156*4882a593Smuzhiyun 		[C(OP_READ)] = {
157*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
158*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
159*4882a593Smuzhiyun 		},
160*4882a593Smuzhiyun 		[C(OP_WRITE)] = {
161*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
162*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
163*4882a593Smuzhiyun 		},
164*4882a593Smuzhiyun 		[C(OP_PREFETCH)] = {
165*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
166*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
167*4882a593Smuzhiyun 		},
168*4882a593Smuzhiyun 	},
169*4882a593Smuzhiyun 	[C(DTLB)] = {
170*4882a593Smuzhiyun 		[C(OP_READ)] = {
171*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
172*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
173*4882a593Smuzhiyun 		},
174*4882a593Smuzhiyun 			/* DTLB LD/ST Miss not segregated by h/w*/
175*4882a593Smuzhiyun 		[C(OP_WRITE)] = {
176*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
177*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
178*4882a593Smuzhiyun 		},
179*4882a593Smuzhiyun 		[C(OP_PREFETCH)] = {
180*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
181*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
182*4882a593Smuzhiyun 		},
183*4882a593Smuzhiyun 	},
184*4882a593Smuzhiyun 	[C(ITLB)] = {
185*4882a593Smuzhiyun 		[C(OP_READ)] = {
186*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
187*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
188*4882a593Smuzhiyun 		},
189*4882a593Smuzhiyun 		[C(OP_WRITE)] = {
190*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
191*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
192*4882a593Smuzhiyun 		},
193*4882a593Smuzhiyun 		[C(OP_PREFETCH)] = {
194*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
195*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
196*4882a593Smuzhiyun 		},
197*4882a593Smuzhiyun 	},
198*4882a593Smuzhiyun 	[C(BPU)] = {
199*4882a593Smuzhiyun 		[C(OP_READ)] = {
200*4882a593Smuzhiyun 			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
201*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
202*4882a593Smuzhiyun 		},
203*4882a593Smuzhiyun 		[C(OP_WRITE)] = {
204*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
205*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
206*4882a593Smuzhiyun 		},
207*4882a593Smuzhiyun 		[C(OP_PREFETCH)] = {
208*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
209*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
210*4882a593Smuzhiyun 		},
211*4882a593Smuzhiyun 	},
212*4882a593Smuzhiyun 	[C(NODE)] = {
213*4882a593Smuzhiyun 		[C(OP_READ)] = {
214*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
215*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
216*4882a593Smuzhiyun 		},
217*4882a593Smuzhiyun 		[C(OP_WRITE)] = {
218*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
219*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
220*4882a593Smuzhiyun 		},
221*4882a593Smuzhiyun 		[C(OP_PREFETCH)] = {
222*4882a593Smuzhiyun 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
223*4882a593Smuzhiyun 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
224*4882a593Smuzhiyun 		},
225*4882a593Smuzhiyun 	},
226*4882a593Smuzhiyun };
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun #endif /* __ASM_PERF_EVENT_H */
229