xref: /OK3568_Linux_fs/kernel/samples/bpf/cpustat_kern.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun #include <linux/version.h>
4*4882a593Smuzhiyun #include <linux/ptrace.h>
5*4882a593Smuzhiyun #include <uapi/linux/bpf.h>
6*4882a593Smuzhiyun #include <bpf/bpf_helpers.h>
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun /*
9*4882a593Smuzhiyun  * The CPU number, cstate number and pstate number are based
10*4882a593Smuzhiyun  * on 96boards Hikey with octa CA53 CPUs.
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  * Every CPU have three idle states for cstate:
13*4882a593Smuzhiyun  *   WFI, CPU_OFF, CLUSTER_OFF
14*4882a593Smuzhiyun  *
15*4882a593Smuzhiyun  * Every CPU have 5 operating points:
16*4882a593Smuzhiyun  *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * This code is based on these assumption and other platforms
19*4882a593Smuzhiyun  * need to adjust these definitions.
20*4882a593Smuzhiyun  */
21*4882a593Smuzhiyun #define MAX_CPU			8
22*4882a593Smuzhiyun #define MAX_PSTATE_ENTRIES	5
23*4882a593Smuzhiyun #define MAX_CSTATE_ENTRIES	3
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun /*
28*4882a593Smuzhiyun  * my_map structure is used to record cstate and pstate index and
29*4882a593Smuzhiyun  * timestamp (Idx, Ts), when new event incoming we need to update
30*4882a593Smuzhiyun  * combination for new state index and timestamp (Idx`, Ts`).
31*4882a593Smuzhiyun  *
32*4882a593Smuzhiyun  * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
33*4882a593Smuzhiyun  * interval for the previous state: Duration(Idx) = Ts` - Ts.
34*4882a593Smuzhiyun  *
35*4882a593Smuzhiyun  * Every CPU has one below array for recording state index and
36*4882a593Smuzhiyun  * timestamp, and record for cstate and pstate saperately:
37*4882a593Smuzhiyun  *
38*4882a593Smuzhiyun  * +--------------------------+
39*4882a593Smuzhiyun  * | cstate timestamp         |
40*4882a593Smuzhiyun  * +--------------------------+
41*4882a593Smuzhiyun  * | cstate index             |
42*4882a593Smuzhiyun  * +--------------------------+
43*4882a593Smuzhiyun  * | pstate timestamp         |
44*4882a593Smuzhiyun  * +--------------------------+
45*4882a593Smuzhiyun  * | pstate index             |
46*4882a593Smuzhiyun  * +--------------------------+
47*4882a593Smuzhiyun  */
48*4882a593Smuzhiyun #define MAP_OFF_CSTATE_TIME	0
49*4882a593Smuzhiyun #define MAP_OFF_CSTATE_IDX	1
50*4882a593Smuzhiyun #define MAP_OFF_PSTATE_TIME	2
51*4882a593Smuzhiyun #define MAP_OFF_PSTATE_IDX	3
52*4882a593Smuzhiyun #define MAP_OFF_NUM		4
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun struct {
55*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_ARRAY);
56*4882a593Smuzhiyun 	__type(key, u32);
57*4882a593Smuzhiyun 	__type(value, u64);
58*4882a593Smuzhiyun 	__uint(max_entries, MAX_CPU * MAP_OFF_NUM);
59*4882a593Smuzhiyun } my_map SEC(".maps");
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun /* cstate_duration records duration time for every idle state per CPU */
62*4882a593Smuzhiyun struct {
63*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_ARRAY);
64*4882a593Smuzhiyun 	__type(key, u32);
65*4882a593Smuzhiyun 	__type(value, u64);
66*4882a593Smuzhiyun 	__uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
67*4882a593Smuzhiyun } cstate_duration SEC(".maps");
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun /* pstate_duration records duration time for every operating point per CPU */
70*4882a593Smuzhiyun struct {
71*4882a593Smuzhiyun 	__uint(type, BPF_MAP_TYPE_ARRAY);
72*4882a593Smuzhiyun 	__type(key, u32);
73*4882a593Smuzhiyun 	__type(value, u64);
74*4882a593Smuzhiyun 	__uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
75*4882a593Smuzhiyun } pstate_duration SEC(".maps");
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun /*
78*4882a593Smuzhiyun  * The trace events for cpu_idle and cpu_frequency are taken from:
79*4882a593Smuzhiyun  * /sys/kernel/debug/tracing/events/power/cpu_idle/format
80*4882a593Smuzhiyun  * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
81*4882a593Smuzhiyun  *
82*4882a593Smuzhiyun  * These two events have same format, so define one common structure.
83*4882a593Smuzhiyun  */
84*4882a593Smuzhiyun struct cpu_args {
85*4882a593Smuzhiyun 	u64 pad;
86*4882a593Smuzhiyun 	u32 state;
87*4882a593Smuzhiyun 	u32 cpu_id;
88*4882a593Smuzhiyun };
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun /* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
find_cpu_pstate_idx(u32 frequency)91*4882a593Smuzhiyun static u32 find_cpu_pstate_idx(u32 frequency)
92*4882a593Smuzhiyun {
93*4882a593Smuzhiyun 	u32 i;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
96*4882a593Smuzhiyun 		if (frequency == cpu_opps[i])
97*4882a593Smuzhiyun 			return i;
98*4882a593Smuzhiyun 	}
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	return i;
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun SEC("tracepoint/power/cpu_idle")
bpf_prog1(struct cpu_args * ctx)104*4882a593Smuzhiyun int bpf_prog1(struct cpu_args *ctx)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun 	u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
107*4882a593Smuzhiyun 	u32 key, cpu, pstate_idx;
108*4882a593Smuzhiyun 	u64 *val;
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	if (ctx->cpu_id > MAX_CPU)
111*4882a593Smuzhiyun 		return 0;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	cpu = ctx->cpu_id;
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
116*4882a593Smuzhiyun 	cts = bpf_map_lookup_elem(&my_map, &key);
117*4882a593Smuzhiyun 	if (!cts)
118*4882a593Smuzhiyun 		return 0;
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
121*4882a593Smuzhiyun 	cstate = bpf_map_lookup_elem(&my_map, &key);
122*4882a593Smuzhiyun 	if (!cstate)
123*4882a593Smuzhiyun 		return 0;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
126*4882a593Smuzhiyun 	pts = bpf_map_lookup_elem(&my_map, &key);
127*4882a593Smuzhiyun 	if (!pts)
128*4882a593Smuzhiyun 		return 0;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
131*4882a593Smuzhiyun 	pstate = bpf_map_lookup_elem(&my_map, &key);
132*4882a593Smuzhiyun 	if (!pstate)
133*4882a593Smuzhiyun 		return 0;
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	prev_state = *cstate;
136*4882a593Smuzhiyun 	*cstate = ctx->state;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	if (!*cts) {
139*4882a593Smuzhiyun 		*cts = bpf_ktime_get_ns();
140*4882a593Smuzhiyun 		return 0;
141*4882a593Smuzhiyun 	}
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	cur_ts = bpf_ktime_get_ns();
144*4882a593Smuzhiyun 	delta = cur_ts - *cts;
145*4882a593Smuzhiyun 	*cts = cur_ts;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	/*
148*4882a593Smuzhiyun 	 * When state doesn't equal to (u32)-1, the cpu will enter
149*4882a593Smuzhiyun 	 * one idle state; for this case we need to record interval
150*4882a593Smuzhiyun 	 * for the pstate.
151*4882a593Smuzhiyun 	 *
152*4882a593Smuzhiyun 	 *                 OPP2
153*4882a593Smuzhiyun 	 *            +---------------------+
154*4882a593Smuzhiyun 	 *     OPP1   |                     |
155*4882a593Smuzhiyun 	 *   ---------+                     |
156*4882a593Smuzhiyun 	 *                                  |  Idle state
157*4882a593Smuzhiyun 	 *                                  +---------------
158*4882a593Smuzhiyun 	 *
159*4882a593Smuzhiyun 	 *            |<- pstate duration ->|
160*4882a593Smuzhiyun 	 *            ^                     ^
161*4882a593Smuzhiyun 	 *           pts                  cur_ts
162*4882a593Smuzhiyun 	 */
163*4882a593Smuzhiyun 	if (ctx->state != (u32)-1) {
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 		/* record pstate after have first cpu_frequency event */
166*4882a593Smuzhiyun 		if (!*pts)
167*4882a593Smuzhiyun 			return 0;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 		delta = cur_ts - *pts;
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 		pstate_idx = find_cpu_pstate_idx(*pstate);
172*4882a593Smuzhiyun 		if (pstate_idx >= MAX_PSTATE_ENTRIES)
173*4882a593Smuzhiyun 			return 0;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 		key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
176*4882a593Smuzhiyun 		val = bpf_map_lookup_elem(&pstate_duration, &key);
177*4882a593Smuzhiyun 		if (val)
178*4882a593Smuzhiyun 			__sync_fetch_and_add((long *)val, delta);
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	/*
181*4882a593Smuzhiyun 	 * When state equal to (u32)-1, the cpu just exits from one
182*4882a593Smuzhiyun 	 * specific idle state; for this case we need to record
183*4882a593Smuzhiyun 	 * interval for the pstate.
184*4882a593Smuzhiyun 	 *
185*4882a593Smuzhiyun 	 *       OPP2
186*4882a593Smuzhiyun 	 *   -----------+
187*4882a593Smuzhiyun 	 *              |                          OPP1
188*4882a593Smuzhiyun 	 *              |                     +-----------
189*4882a593Smuzhiyun 	 *              |     Idle state      |
190*4882a593Smuzhiyun 	 *              +---------------------+
191*4882a593Smuzhiyun 	 *
192*4882a593Smuzhiyun 	 *              |<- cstate duration ->|
193*4882a593Smuzhiyun 	 *              ^                     ^
194*4882a593Smuzhiyun 	 *             cts                  cur_ts
195*4882a593Smuzhiyun 	 */
196*4882a593Smuzhiyun 	} else {
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 		key = cpu * MAX_CSTATE_ENTRIES + prev_state;
199*4882a593Smuzhiyun 		val = bpf_map_lookup_elem(&cstate_duration, &key);
200*4882a593Smuzhiyun 		if (val)
201*4882a593Smuzhiyun 			__sync_fetch_and_add((long *)val, delta);
202*4882a593Smuzhiyun 	}
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	/* Update timestamp for pstate as new start time */
205*4882a593Smuzhiyun 	if (*pts)
206*4882a593Smuzhiyun 		*pts = cur_ts;
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	return 0;
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun SEC("tracepoint/power/cpu_frequency")
bpf_prog2(struct cpu_args * ctx)212*4882a593Smuzhiyun int bpf_prog2(struct cpu_args *ctx)
213*4882a593Smuzhiyun {
214*4882a593Smuzhiyun 	u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
215*4882a593Smuzhiyun 	u32 key, cpu, pstate_idx;
216*4882a593Smuzhiyun 	u64 *val;
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	cpu = ctx->cpu_id;
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
221*4882a593Smuzhiyun 	pts = bpf_map_lookup_elem(&my_map, &key);
222*4882a593Smuzhiyun 	if (!pts)
223*4882a593Smuzhiyun 		return 0;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
226*4882a593Smuzhiyun 	pstate = bpf_map_lookup_elem(&my_map, &key);
227*4882a593Smuzhiyun 	if (!pstate)
228*4882a593Smuzhiyun 		return 0;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
231*4882a593Smuzhiyun 	cstate = bpf_map_lookup_elem(&my_map, &key);
232*4882a593Smuzhiyun 	if (!cstate)
233*4882a593Smuzhiyun 		return 0;
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun 	prev_state = *pstate;
236*4882a593Smuzhiyun 	*pstate = ctx->state;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	if (!*pts) {
239*4882a593Smuzhiyun 		*pts = bpf_ktime_get_ns();
240*4882a593Smuzhiyun 		return 0;
241*4882a593Smuzhiyun 	}
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun 	cur_ts = bpf_ktime_get_ns();
244*4882a593Smuzhiyun 	delta = cur_ts - *pts;
245*4882a593Smuzhiyun 	*pts = cur_ts;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	/* When CPU is in idle, bail out to skip pstate statistics */
248*4882a593Smuzhiyun 	if (*cstate != (u32)(-1))
249*4882a593Smuzhiyun 		return 0;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	/*
252*4882a593Smuzhiyun 	 * The cpu changes to another different OPP (in below diagram
253*4882a593Smuzhiyun 	 * change frequency from OPP3 to OPP1), need recording interval
254*4882a593Smuzhiyun 	 * for previous frequency OPP3 and update timestamp as start
255*4882a593Smuzhiyun 	 * time for new frequency OPP1.
256*4882a593Smuzhiyun 	 *
257*4882a593Smuzhiyun 	 *                 OPP3
258*4882a593Smuzhiyun 	 *            +---------------------+
259*4882a593Smuzhiyun 	 *     OPP2   |                     |
260*4882a593Smuzhiyun 	 *   ---------+                     |
261*4882a593Smuzhiyun 	 *                                  |    OPP1
262*4882a593Smuzhiyun 	 *                                  +---------------
263*4882a593Smuzhiyun 	 *
264*4882a593Smuzhiyun 	 *            |<- pstate duration ->|
265*4882a593Smuzhiyun 	 *            ^                     ^
266*4882a593Smuzhiyun 	 *           pts                  cur_ts
267*4882a593Smuzhiyun 	 */
268*4882a593Smuzhiyun 	pstate_idx = find_cpu_pstate_idx(*pstate);
269*4882a593Smuzhiyun 	if (pstate_idx >= MAX_PSTATE_ENTRIES)
270*4882a593Smuzhiyun 		return 0;
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
273*4882a593Smuzhiyun 	val = bpf_map_lookup_elem(&pstate_duration, &key);
274*4882a593Smuzhiyun 	if (val)
275*4882a593Smuzhiyun 		__sync_fetch_and_add((long *)val, delta);
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	return 0;
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun char _license[] SEC("license") = "GPL";
281*4882a593Smuzhiyun u32 _version SEC("version") = LINUX_VERSION_CODE;
282