1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun #include <linux/compiler.h>
3*4882a593Smuzhiyun #include <string.h>
4*4882a593Smuzhiyun #include <perf/cpumap.h>
5*4882a593Smuzhiyun #include <perf/evlist.h>
6*4882a593Smuzhiyun #include "metricgroup.h"
7*4882a593Smuzhiyun #include "tests.h"
8*4882a593Smuzhiyun #include "pmu-events/pmu-events.h"
9*4882a593Smuzhiyun #include "evlist.h"
10*4882a593Smuzhiyun #include "rblist.h"
11*4882a593Smuzhiyun #include "debug.h"
12*4882a593Smuzhiyun #include "expr.h"
13*4882a593Smuzhiyun #include "stat.h"
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun static struct pmu_event pme_test[] = {
16*4882a593Smuzhiyun {
17*4882a593Smuzhiyun .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
18*4882a593Smuzhiyun .metric_name = "IPC",
19*4882a593Smuzhiyun .metric_group = "group1",
20*4882a593Smuzhiyun },
21*4882a593Smuzhiyun {
22*4882a593Smuzhiyun .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
23*4882a593Smuzhiyun "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
24*4882a593Smuzhiyun .metric_name = "Frontend_Bound_SMT",
25*4882a593Smuzhiyun },
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun .metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
28*4882a593Smuzhiyun .metric_name = "dcache_miss_cpi",
29*4882a593Smuzhiyun },
30*4882a593Smuzhiyun {
31*4882a593Smuzhiyun .metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
32*4882a593Smuzhiyun .metric_name = "icache_miss_cycles",
33*4882a593Smuzhiyun },
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
36*4882a593Smuzhiyun .metric_name = "cache_miss_cycles",
37*4882a593Smuzhiyun .metric_group = "group1",
38*4882a593Smuzhiyun },
39*4882a593Smuzhiyun {
40*4882a593Smuzhiyun .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
41*4882a593Smuzhiyun .metric_name = "DCache_L2_All_Hits",
42*4882a593Smuzhiyun },
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
45*4882a593Smuzhiyun "l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
46*4882a593Smuzhiyun .metric_name = "DCache_L2_All_Miss",
47*4882a593Smuzhiyun },
48*4882a593Smuzhiyun {
49*4882a593Smuzhiyun .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss",
50*4882a593Smuzhiyun .metric_name = "DCache_L2_All",
51*4882a593Smuzhiyun },
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
54*4882a593Smuzhiyun .metric_name = "DCache_L2_Hits",
55*4882a593Smuzhiyun },
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
58*4882a593Smuzhiyun .metric_name = "DCache_L2_Misses",
59*4882a593Smuzhiyun },
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun .metric_expr = "ipc + m2",
62*4882a593Smuzhiyun .metric_name = "M1",
63*4882a593Smuzhiyun },
64*4882a593Smuzhiyun {
65*4882a593Smuzhiyun .metric_expr = "ipc + m1",
66*4882a593Smuzhiyun .metric_name = "M2",
67*4882a593Smuzhiyun },
68*4882a593Smuzhiyun {
69*4882a593Smuzhiyun .metric_expr = "1/m3",
70*4882a593Smuzhiyun .metric_name = "M3",
71*4882a593Smuzhiyun },
72*4882a593Smuzhiyun {
73*4882a593Smuzhiyun .name = NULL,
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun };
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun static struct pmu_events_map map = {
78*4882a593Smuzhiyun .cpuid = "test",
79*4882a593Smuzhiyun .version = "1",
80*4882a593Smuzhiyun .type = "core",
81*4882a593Smuzhiyun .table = pme_test,
82*4882a593Smuzhiyun };
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun struct value {
85*4882a593Smuzhiyun const char *event;
86*4882a593Smuzhiyun u64 val;
87*4882a593Smuzhiyun };
88*4882a593Smuzhiyun
find_value(const char * name,struct value * values)89*4882a593Smuzhiyun static u64 find_value(const char *name, struct value *values)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun struct value *v = values;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun while (v->event) {
94*4882a593Smuzhiyun if (!strcmp(name, v->event))
95*4882a593Smuzhiyun return v->val;
96*4882a593Smuzhiyun v++;
97*4882a593Smuzhiyun };
98*4882a593Smuzhiyun return 0;
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun
load_runtime_stat(struct runtime_stat * st,struct evlist * evlist,struct value * vals)101*4882a593Smuzhiyun static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
102*4882a593Smuzhiyun struct value *vals)
103*4882a593Smuzhiyun {
104*4882a593Smuzhiyun struct evsel *evsel;
105*4882a593Smuzhiyun u64 count;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun evlist__for_each_entry(evlist, evsel) {
108*4882a593Smuzhiyun count = find_value(evsel->name, vals);
109*4882a593Smuzhiyun perf_stat__update_shadow_stats(evsel, count, 0, st);
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
compute_single(struct rblist * metric_events,struct evlist * evlist,struct runtime_stat * st,const char * name)113*4882a593Smuzhiyun static double compute_single(struct rblist *metric_events, struct evlist *evlist,
114*4882a593Smuzhiyun struct runtime_stat *st, const char *name)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun struct metric_expr *mexp;
117*4882a593Smuzhiyun struct metric_event *me;
118*4882a593Smuzhiyun struct evsel *evsel;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun evlist__for_each_entry(evlist, evsel) {
121*4882a593Smuzhiyun me = metricgroup__lookup(metric_events, evsel, false);
122*4882a593Smuzhiyun if (me != NULL) {
123*4882a593Smuzhiyun list_for_each_entry (mexp, &me->head, nd) {
124*4882a593Smuzhiyun if (strcmp(mexp->metric_name, name))
125*4882a593Smuzhiyun continue;
126*4882a593Smuzhiyun return test_generic_metric(mexp, 0, st);
127*4882a593Smuzhiyun }
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun return 0.;
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun
__compute_metric(const char * name,struct value * vals,const char * name1,double * ratio1,const char * name2,double * ratio2)133*4882a593Smuzhiyun static int __compute_metric(const char *name, struct value *vals,
134*4882a593Smuzhiyun const char *name1, double *ratio1,
135*4882a593Smuzhiyun const char *name2, double *ratio2)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun struct rblist metric_events = {
138*4882a593Smuzhiyun .nr_entries = 0,
139*4882a593Smuzhiyun };
140*4882a593Smuzhiyun struct perf_cpu_map *cpus;
141*4882a593Smuzhiyun struct runtime_stat st;
142*4882a593Smuzhiyun struct evlist *evlist;
143*4882a593Smuzhiyun int err;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun /*
146*4882a593Smuzhiyun * We need to prepare evlist for stat mode running on CPU 0
147*4882a593Smuzhiyun * because that's where all the stats are going to be created.
148*4882a593Smuzhiyun */
149*4882a593Smuzhiyun evlist = evlist__new();
150*4882a593Smuzhiyun if (!evlist)
151*4882a593Smuzhiyun return -ENOMEM;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun cpus = perf_cpu_map__new("0");
154*4882a593Smuzhiyun if (!cpus) {
155*4882a593Smuzhiyun evlist__delete(evlist);
156*4882a593Smuzhiyun return -ENOMEM;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun perf_evlist__set_maps(&evlist->core, cpus, NULL);
160*4882a593Smuzhiyun runtime_stat__init(&st);
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun /* Parse the metric into metric_events list. */
163*4882a593Smuzhiyun err = metricgroup__parse_groups_test(evlist, &map, name,
164*4882a593Smuzhiyun false, false,
165*4882a593Smuzhiyun &metric_events);
166*4882a593Smuzhiyun if (err)
167*4882a593Smuzhiyun goto out;
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun err = perf_evlist__alloc_stats(evlist, false);
170*4882a593Smuzhiyun if (err)
171*4882a593Smuzhiyun goto out;
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun /* Load the runtime stats with given numbers for events. */
174*4882a593Smuzhiyun load_runtime_stat(&st, evlist, vals);
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun /* And execute the metric */
177*4882a593Smuzhiyun if (name1 && ratio1)
178*4882a593Smuzhiyun *ratio1 = compute_single(&metric_events, evlist, &st, name1);
179*4882a593Smuzhiyun if (name2 && ratio2)
180*4882a593Smuzhiyun *ratio2 = compute_single(&metric_events, evlist, &st, name2);
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun out:
183*4882a593Smuzhiyun /* ... clenup. */
184*4882a593Smuzhiyun metricgroup__rblist_exit(&metric_events);
185*4882a593Smuzhiyun runtime_stat__exit(&st);
186*4882a593Smuzhiyun perf_evlist__free_stats(evlist);
187*4882a593Smuzhiyun perf_cpu_map__put(cpus);
188*4882a593Smuzhiyun evlist__delete(evlist);
189*4882a593Smuzhiyun return err;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
compute_metric(const char * name,struct value * vals,double * ratio)192*4882a593Smuzhiyun static int compute_metric(const char *name, struct value *vals, double *ratio)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun return __compute_metric(name, vals, name, ratio, NULL, NULL);
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
compute_metric_group(const char * name,struct value * vals,const char * name1,double * ratio1,const char * name2,double * ratio2)197*4882a593Smuzhiyun static int compute_metric_group(const char *name, struct value *vals,
198*4882a593Smuzhiyun const char *name1, double *ratio1,
199*4882a593Smuzhiyun const char *name2, double *ratio2)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun return __compute_metric(name, vals, name1, ratio1, name2, ratio2);
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun
test_ipc(void)204*4882a593Smuzhiyun static int test_ipc(void)
205*4882a593Smuzhiyun {
206*4882a593Smuzhiyun double ratio;
207*4882a593Smuzhiyun struct value vals[] = {
208*4882a593Smuzhiyun { .event = "inst_retired.any", .val = 300 },
209*4882a593Smuzhiyun { .event = "cpu_clk_unhalted.thread", .val = 200 },
210*4882a593Smuzhiyun { .event = NULL, },
211*4882a593Smuzhiyun };
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to compute metric",
214*4882a593Smuzhiyun compute_metric("IPC", vals, &ratio) == 0);
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun TEST_ASSERT_VAL("IPC failed, wrong ratio",
217*4882a593Smuzhiyun ratio == 1.5);
218*4882a593Smuzhiyun return 0;
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
test_frontend(void)221*4882a593Smuzhiyun static int test_frontend(void)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun double ratio;
224*4882a593Smuzhiyun struct value vals[] = {
225*4882a593Smuzhiyun { .event = "idq_uops_not_delivered.core", .val = 300 },
226*4882a593Smuzhiyun { .event = "cpu_clk_unhalted.thread", .val = 200 },
227*4882a593Smuzhiyun { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 },
228*4882a593Smuzhiyun { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 },
229*4882a593Smuzhiyun { .event = NULL, },
230*4882a593Smuzhiyun };
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to compute metric",
233*4882a593Smuzhiyun compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0);
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio",
236*4882a593Smuzhiyun ratio == 0.45);
237*4882a593Smuzhiyun return 0;
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
test_cache_miss_cycles(void)240*4882a593Smuzhiyun static int test_cache_miss_cycles(void)
241*4882a593Smuzhiyun {
242*4882a593Smuzhiyun double ratio;
243*4882a593Smuzhiyun struct value vals[] = {
244*4882a593Smuzhiyun { .event = "l1d-loads-misses", .val = 300 },
245*4882a593Smuzhiyun { .event = "l1i-loads-misses", .val = 200 },
246*4882a593Smuzhiyun { .event = "inst_retired.any", .val = 400 },
247*4882a593Smuzhiyun { .event = NULL, },
248*4882a593Smuzhiyun };
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to compute metric",
251*4882a593Smuzhiyun compute_metric("cache_miss_cycles", vals, &ratio) == 0);
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio",
254*4882a593Smuzhiyun ratio == 1.25);
255*4882a593Smuzhiyun return 0;
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun /*
260*4882a593Smuzhiyun * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi
261*4882a593Smuzhiyun * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) +
262*4882a593Smuzhiyun * l2_rqsts.pf_miss + l2_rqsts.rfo_miss
263*4882a593Smuzhiyun * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss
264*4882a593Smuzhiyun * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all)
265*4882a593Smuzhiyun * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all)
266*4882a593Smuzhiyun *
267*4882a593Smuzhiyun * l2_rqsts.demand_data_rd_hit = 100
268*4882a593Smuzhiyun * l2_rqsts.pf_hit = 200
269*4882a593Smuzhiyun * l2_rqsts.rfo_hi = 300
270*4882a593Smuzhiyun * l2_rqsts.all_demand_data_rd = 400
271*4882a593Smuzhiyun * l2_rqsts.pf_miss = 500
272*4882a593Smuzhiyun * l2_rqsts.rfo_miss = 600
273*4882a593Smuzhiyun *
274*4882a593Smuzhiyun * DCache_L2_All_Hits = 600
275*4882a593Smuzhiyun * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400
276*4882a593Smuzhiyun * DCache_L2_All = 600 + 1400 = 2000
277*4882a593Smuzhiyun * DCache_L2_Hits = 600 / 2000 = 0.3
278*4882a593Smuzhiyun * DCache_L2_Misses = 1400 / 2000 = 0.7
279*4882a593Smuzhiyun */
test_dcache_l2(void)280*4882a593Smuzhiyun static int test_dcache_l2(void)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun double ratio;
283*4882a593Smuzhiyun struct value vals[] = {
284*4882a593Smuzhiyun { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 },
285*4882a593Smuzhiyun { .event = "l2_rqsts.pf_hit", .val = 200 },
286*4882a593Smuzhiyun { .event = "l2_rqsts.rfo_hit", .val = 300 },
287*4882a593Smuzhiyun { .event = "l2_rqsts.all_demand_data_rd", .val = 400 },
288*4882a593Smuzhiyun { .event = "l2_rqsts.pf_miss", .val = 500 },
289*4882a593Smuzhiyun { .event = "l2_rqsts.rfo_miss", .val = 600 },
290*4882a593Smuzhiyun { .event = NULL, },
291*4882a593Smuzhiyun };
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to compute metric",
294*4882a593Smuzhiyun compute_metric("DCache_L2_Hits", vals, &ratio) == 0);
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio",
297*4882a593Smuzhiyun ratio == 0.3);
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to compute metric",
300*4882a593Smuzhiyun compute_metric("DCache_L2_Misses", vals, &ratio) == 0);
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio",
303*4882a593Smuzhiyun ratio == 0.7);
304*4882a593Smuzhiyun return 0;
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
test_recursion_fail(void)307*4882a593Smuzhiyun static int test_recursion_fail(void)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun double ratio;
310*4882a593Smuzhiyun struct value vals[] = {
311*4882a593Smuzhiyun { .event = "inst_retired.any", .val = 300 },
312*4882a593Smuzhiyun { .event = "cpu_clk_unhalted.thread", .val = 200 },
313*4882a593Smuzhiyun { .event = NULL, },
314*4882a593Smuzhiyun };
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to find recursion",
317*4882a593Smuzhiyun compute_metric("M1", vals, &ratio) == -1);
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to find recursion",
320*4882a593Smuzhiyun compute_metric("M3", vals, &ratio) == -1);
321*4882a593Smuzhiyun return 0;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
test_metric_group(void)324*4882a593Smuzhiyun static int test_metric_group(void)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun double ratio1, ratio2;
327*4882a593Smuzhiyun struct value vals[] = {
328*4882a593Smuzhiyun { .event = "cpu_clk_unhalted.thread", .val = 200 },
329*4882a593Smuzhiyun { .event = "l1d-loads-misses", .val = 300 },
330*4882a593Smuzhiyun { .event = "l1i-loads-misses", .val = 200 },
331*4882a593Smuzhiyun { .event = "inst_retired.any", .val = 400 },
332*4882a593Smuzhiyun { .event = NULL, },
333*4882a593Smuzhiyun };
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun TEST_ASSERT_VAL("failed to find recursion",
336*4882a593Smuzhiyun compute_metric_group("group1", vals,
337*4882a593Smuzhiyun "IPC", &ratio1,
338*4882a593Smuzhiyun "cache_miss_cycles", &ratio2) == 0);
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun TEST_ASSERT_VAL("group IPC failed, wrong ratio",
341*4882a593Smuzhiyun ratio1 == 2.0);
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio",
344*4882a593Smuzhiyun ratio2 == 1.25);
345*4882a593Smuzhiyun return 0;
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun
test__parse_metric(struct test * test __maybe_unused,int subtest __maybe_unused)348*4882a593Smuzhiyun int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
349*4882a593Smuzhiyun {
350*4882a593Smuzhiyun TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
351*4882a593Smuzhiyun TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
352*4882a593Smuzhiyun TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
353*4882a593Smuzhiyun TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
354*4882a593Smuzhiyun TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
355*4882a593Smuzhiyun TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
356*4882a593Smuzhiyun return 0;
357*4882a593Smuzhiyun }
358