xref: /OK3568_Linux_fs/kernel/tools/perf/bench/synthesize.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Benchmark synthesis of perf events such as at the start of a 'perf
4*4882a593Smuzhiyun  * record'. Synthesis is done on the current process and the 'dummy' event
5*4882a593Smuzhiyun  * handlers are invoked that support dump_trace but otherwise do nothing.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Copyright 2019 Google LLC.
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun #include <stdio.h>
10*4882a593Smuzhiyun #include "bench.h"
11*4882a593Smuzhiyun #include "../util/debug.h"
12*4882a593Smuzhiyun #include "../util/session.h"
13*4882a593Smuzhiyun #include "../util/stat.h"
14*4882a593Smuzhiyun #include "../util/synthetic-events.h"
15*4882a593Smuzhiyun #include "../util/target.h"
16*4882a593Smuzhiyun #include "../util/thread_map.h"
17*4882a593Smuzhiyun #include "../util/tool.h"
18*4882a593Smuzhiyun #include "../util/util.h"
19*4882a593Smuzhiyun #include <linux/atomic.h>
20*4882a593Smuzhiyun #include <linux/err.h>
21*4882a593Smuzhiyun #include <linux/time64.h>
22*4882a593Smuzhiyun #include <subcmd/parse-options.h>
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun static unsigned int min_threads = 1;
25*4882a593Smuzhiyun static unsigned int max_threads = UINT_MAX;
26*4882a593Smuzhiyun static unsigned int single_iterations = 10000;
27*4882a593Smuzhiyun static unsigned int multi_iterations = 10;
28*4882a593Smuzhiyun static bool run_st;
29*4882a593Smuzhiyun static bool run_mt;
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun static const struct option options[] = {
32*4882a593Smuzhiyun 	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
33*4882a593Smuzhiyun 	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
34*4882a593Smuzhiyun 	OPT_UINTEGER('m', "min-threads", &min_threads,
35*4882a593Smuzhiyun 		"Minimum number of threads in multithreaded bench"),
36*4882a593Smuzhiyun 	OPT_UINTEGER('M', "max-threads", &max_threads,
37*4882a593Smuzhiyun 		"Maximum number of threads in multithreaded bench"),
38*4882a593Smuzhiyun 	OPT_UINTEGER('i', "single-iterations", &single_iterations,
39*4882a593Smuzhiyun 		"Number of iterations used to compute single-threaded average"),
40*4882a593Smuzhiyun 	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
41*4882a593Smuzhiyun 		"Number of iterations used to compute multi-threaded average"),
42*4882a593Smuzhiyun 	OPT_END()
43*4882a593Smuzhiyun };
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun static const char *const bench_usage[] = {
46*4882a593Smuzhiyun 	"perf bench internals synthesize <options>",
47*4882a593Smuzhiyun 	NULL
48*4882a593Smuzhiyun };
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun static atomic_t event_count;
51*4882a593Smuzhiyun 
process_synthesized_event(struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)52*4882a593Smuzhiyun static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
53*4882a593Smuzhiyun 				     union perf_event *event __maybe_unused,
54*4882a593Smuzhiyun 				     struct perf_sample *sample __maybe_unused,
55*4882a593Smuzhiyun 				     struct machine *machine __maybe_unused)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	atomic_inc(&event_count);
58*4882a593Smuzhiyun 	return 0;
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun 
do_run_single_threaded(struct perf_session * session,struct perf_thread_map * threads,struct target * target,bool data_mmap)61*4882a593Smuzhiyun static int do_run_single_threaded(struct perf_session *session,
62*4882a593Smuzhiyun 				struct perf_thread_map *threads,
63*4882a593Smuzhiyun 				struct target *target, bool data_mmap)
64*4882a593Smuzhiyun {
65*4882a593Smuzhiyun 	const unsigned int nr_threads_synthesize = 1;
66*4882a593Smuzhiyun 	struct timeval start, end, diff;
67*4882a593Smuzhiyun 	u64 runtime_us;
68*4882a593Smuzhiyun 	unsigned int i;
69*4882a593Smuzhiyun 	double time_average, time_stddev, event_average, event_stddev;
70*4882a593Smuzhiyun 	int err;
71*4882a593Smuzhiyun 	struct stats time_stats, event_stats;
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun 	init_stats(&time_stats);
74*4882a593Smuzhiyun 	init_stats(&event_stats);
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	for (i = 0; i < single_iterations; i++) {
77*4882a593Smuzhiyun 		atomic_set(&event_count, 0);
78*4882a593Smuzhiyun 		gettimeofday(&start, NULL);
79*4882a593Smuzhiyun 		err = __machine__synthesize_threads(&session->machines.host,
80*4882a593Smuzhiyun 						NULL,
81*4882a593Smuzhiyun 						target, threads,
82*4882a593Smuzhiyun 						process_synthesized_event,
83*4882a593Smuzhiyun 						data_mmap,
84*4882a593Smuzhiyun 						nr_threads_synthesize);
85*4882a593Smuzhiyun 		if (err)
86*4882a593Smuzhiyun 			return err;
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun 		gettimeofday(&end, NULL);
89*4882a593Smuzhiyun 		timersub(&end, &start, &diff);
90*4882a593Smuzhiyun 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
91*4882a593Smuzhiyun 		update_stats(&time_stats, runtime_us);
92*4882a593Smuzhiyun 		update_stats(&event_stats, atomic_read(&event_count));
93*4882a593Smuzhiyun 	}
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	time_average = avg_stats(&time_stats);
96*4882a593Smuzhiyun 	time_stddev = stddev_stats(&time_stats);
97*4882a593Smuzhiyun 	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
98*4882a593Smuzhiyun 		data_mmap ? "data " : "", time_average, time_stddev);
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	event_average = avg_stats(&event_stats);
101*4882a593Smuzhiyun 	event_stddev = stddev_stats(&event_stats);
102*4882a593Smuzhiyun 	printf("  Average num. events: %.3f (+- %.3f)\n",
103*4882a593Smuzhiyun 		event_average, event_stddev);
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun 	printf("  Average time per event %.3f usec\n",
106*4882a593Smuzhiyun 		time_average / event_average);
107*4882a593Smuzhiyun 	return 0;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun 
run_single_threaded(void)110*4882a593Smuzhiyun static int run_single_threaded(void)
111*4882a593Smuzhiyun {
112*4882a593Smuzhiyun 	struct perf_session *session;
113*4882a593Smuzhiyun 	struct target target = {
114*4882a593Smuzhiyun 		.pid = "self",
115*4882a593Smuzhiyun 	};
116*4882a593Smuzhiyun 	struct perf_thread_map *threads;
117*4882a593Smuzhiyun 	int err;
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	perf_set_singlethreaded();
120*4882a593Smuzhiyun 	session = perf_session__new(NULL, false, NULL);
121*4882a593Smuzhiyun 	if (IS_ERR(session)) {
122*4882a593Smuzhiyun 		pr_err("Session creation failed.\n");
123*4882a593Smuzhiyun 		return PTR_ERR(session);
124*4882a593Smuzhiyun 	}
125*4882a593Smuzhiyun 	threads = thread_map__new_by_pid(getpid());
126*4882a593Smuzhiyun 	if (!threads) {
127*4882a593Smuzhiyun 		pr_err("Thread map creation failed.\n");
128*4882a593Smuzhiyun 		err = -ENOMEM;
129*4882a593Smuzhiyun 		goto err_out;
130*4882a593Smuzhiyun 	}
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 	puts(
133*4882a593Smuzhiyun "Computing performance of single threaded perf event synthesis by\n"
134*4882a593Smuzhiyun "synthesizing events on the perf process itself:");
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	err = do_run_single_threaded(session, threads, &target, false);
137*4882a593Smuzhiyun 	if (err)
138*4882a593Smuzhiyun 		goto err_out;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	err = do_run_single_threaded(session, threads, &target, true);
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun err_out:
143*4882a593Smuzhiyun 	if (threads)
144*4882a593Smuzhiyun 		perf_thread_map__put(threads);
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	perf_session__delete(session);
147*4882a593Smuzhiyun 	return err;
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun 
do_run_multi_threaded(struct target * target,unsigned int nr_threads_synthesize)150*4882a593Smuzhiyun static int do_run_multi_threaded(struct target *target,
151*4882a593Smuzhiyun 				unsigned int nr_threads_synthesize)
152*4882a593Smuzhiyun {
153*4882a593Smuzhiyun 	struct timeval start, end, diff;
154*4882a593Smuzhiyun 	u64 runtime_us;
155*4882a593Smuzhiyun 	unsigned int i;
156*4882a593Smuzhiyun 	double time_average, time_stddev, event_average, event_stddev;
157*4882a593Smuzhiyun 	int err;
158*4882a593Smuzhiyun 	struct stats time_stats, event_stats;
159*4882a593Smuzhiyun 	struct perf_session *session;
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	init_stats(&time_stats);
162*4882a593Smuzhiyun 	init_stats(&event_stats);
163*4882a593Smuzhiyun 	for (i = 0; i < multi_iterations; i++) {
164*4882a593Smuzhiyun 		session = perf_session__new(NULL, false, NULL);
165*4882a593Smuzhiyun 		if (IS_ERR(session))
166*4882a593Smuzhiyun 			return PTR_ERR(session);
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 		atomic_set(&event_count, 0);
169*4882a593Smuzhiyun 		gettimeofday(&start, NULL);
170*4882a593Smuzhiyun 		err = __machine__synthesize_threads(&session->machines.host,
171*4882a593Smuzhiyun 						NULL,
172*4882a593Smuzhiyun 						target, NULL,
173*4882a593Smuzhiyun 						process_synthesized_event,
174*4882a593Smuzhiyun 						false,
175*4882a593Smuzhiyun 						nr_threads_synthesize);
176*4882a593Smuzhiyun 		if (err) {
177*4882a593Smuzhiyun 			perf_session__delete(session);
178*4882a593Smuzhiyun 			return err;
179*4882a593Smuzhiyun 		}
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 		gettimeofday(&end, NULL);
182*4882a593Smuzhiyun 		timersub(&end, &start, &diff);
183*4882a593Smuzhiyun 		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
184*4882a593Smuzhiyun 		update_stats(&time_stats, runtime_us);
185*4882a593Smuzhiyun 		update_stats(&event_stats, atomic_read(&event_count));
186*4882a593Smuzhiyun 		perf_session__delete(session);
187*4882a593Smuzhiyun 	}
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun 	time_average = avg_stats(&time_stats);
190*4882a593Smuzhiyun 	time_stddev = stddev_stats(&time_stats);
191*4882a593Smuzhiyun 	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
192*4882a593Smuzhiyun 		time_average, time_stddev);
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 	event_average = avg_stats(&event_stats);
195*4882a593Smuzhiyun 	event_stddev = stddev_stats(&event_stats);
196*4882a593Smuzhiyun 	printf("    Average num. events: %.3f (+- %.3f)\n",
197*4882a593Smuzhiyun 		event_average, event_stddev);
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	printf("    Average time per event %.3f usec\n",
200*4882a593Smuzhiyun 		time_average / event_average);
201*4882a593Smuzhiyun 	return 0;
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun 
run_multi_threaded(void)204*4882a593Smuzhiyun static int run_multi_threaded(void)
205*4882a593Smuzhiyun {
206*4882a593Smuzhiyun 	struct target target = {
207*4882a593Smuzhiyun 		.cpu_list = "0"
208*4882a593Smuzhiyun 	};
209*4882a593Smuzhiyun 	unsigned int nr_threads_synthesize;
210*4882a593Smuzhiyun 	int err;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	if (max_threads == UINT_MAX)
213*4882a593Smuzhiyun 		max_threads = sysconf(_SC_NPROCESSORS_ONLN);
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	puts(
216*4882a593Smuzhiyun "Computing performance of multi threaded perf event synthesis by\n"
217*4882a593Smuzhiyun "synthesizing events on CPU 0:");
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	for (nr_threads_synthesize = min_threads;
220*4882a593Smuzhiyun 	     nr_threads_synthesize <= max_threads;
221*4882a593Smuzhiyun 	     nr_threads_synthesize++) {
222*4882a593Smuzhiyun 		if (nr_threads_synthesize == 1)
223*4882a593Smuzhiyun 			perf_set_singlethreaded();
224*4882a593Smuzhiyun 		else
225*4882a593Smuzhiyun 			perf_set_multithreaded();
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 		printf("  Number of synthesis threads: %u\n",
228*4882a593Smuzhiyun 			nr_threads_synthesize);
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 		err = do_run_multi_threaded(&target, nr_threads_synthesize);
231*4882a593Smuzhiyun 		if (err)
232*4882a593Smuzhiyun 			return err;
233*4882a593Smuzhiyun 	}
234*4882a593Smuzhiyun 	perf_set_singlethreaded();
235*4882a593Smuzhiyun 	return 0;
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun 
bench_synthesize(int argc,const char ** argv)238*4882a593Smuzhiyun int bench_synthesize(int argc, const char **argv)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun 	int err = 0;
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	argc = parse_options(argc, argv, options, bench_usage, 0);
243*4882a593Smuzhiyun 	if (argc) {
244*4882a593Smuzhiyun 		usage_with_options(bench_usage, options);
245*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
246*4882a593Smuzhiyun 	}
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 	/*
249*4882a593Smuzhiyun 	 * If neither single threaded or multi-threaded are specified, default
250*4882a593Smuzhiyun 	 * to running just single threaded.
251*4882a593Smuzhiyun 	 */
252*4882a593Smuzhiyun 	if (!run_st && !run_mt)
253*4882a593Smuzhiyun 		run_st = true;
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	if (run_st)
256*4882a593Smuzhiyun 		err = run_single_threaded();
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	if (!err && run_mt)
259*4882a593Smuzhiyun 		err = run_multi_threaded();
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	return err;
262*4882a593Smuzhiyun }
263