xref: /OK3568_Linux_fs/kernel/tools/perf/bench/find-bit-bench.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Benchmark find_next_bit and related bit operations.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright 2020 Google LLC.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun #include <stdlib.h>
8*4882a593Smuzhiyun #include "bench.h"
9*4882a593Smuzhiyun #include "../util/stat.h"
10*4882a593Smuzhiyun #include <linux/bitmap.h>
11*4882a593Smuzhiyun #include <linux/bitops.h>
12*4882a593Smuzhiyun #include <linux/time64.h>
13*4882a593Smuzhiyun #include <subcmd/parse-options.h>
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun static unsigned int outer_iterations = 5;
16*4882a593Smuzhiyun static unsigned int inner_iterations = 100000;
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun static const struct option options[] = {
19*4882a593Smuzhiyun 	OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
20*4882a593Smuzhiyun 		"Number of outer iterations used"),
21*4882a593Smuzhiyun 	OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
22*4882a593Smuzhiyun 		"Number of inner iterations used"),
23*4882a593Smuzhiyun 	OPT_END()
24*4882a593Smuzhiyun };
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun static const char *const bench_usage[] = {
27*4882a593Smuzhiyun 	"perf bench mem find_bit <options>",
28*4882a593Smuzhiyun 	NULL
29*4882a593Smuzhiyun };
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun static unsigned int accumulator;
32*4882a593Smuzhiyun static unsigned int use_of_val;
33*4882a593Smuzhiyun 
workload(int val)34*4882a593Smuzhiyun static noinline void workload(int val)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun 	use_of_val += val;
37*4882a593Smuzhiyun 	accumulator++;
38*4882a593Smuzhiyun }
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
asm_test_bit(long nr,const unsigned long * addr)41*4882a593Smuzhiyun static bool asm_test_bit(long nr, const unsigned long *addr)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 	bool oldbit;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	asm volatile("bt %2,%1"
46*4882a593Smuzhiyun 		     : "=@ccc" (oldbit)
47*4882a593Smuzhiyun 		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun 	return oldbit;
50*4882a593Smuzhiyun }
51*4882a593Smuzhiyun #else
52*4882a593Smuzhiyun #define asm_test_bit test_bit
53*4882a593Smuzhiyun #endif
54*4882a593Smuzhiyun 
do_for_each_set_bit(unsigned int num_bits)55*4882a593Smuzhiyun static int do_for_each_set_bit(unsigned int num_bits)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	unsigned long *to_test = bitmap_alloc(num_bits);
58*4882a593Smuzhiyun 	struct timeval start, end, diff;
59*4882a593Smuzhiyun 	u64 runtime_us;
60*4882a593Smuzhiyun 	struct stats fb_time_stats, tb_time_stats;
61*4882a593Smuzhiyun 	double time_average, time_stddev;
62*4882a593Smuzhiyun 	unsigned int bit, i, j;
63*4882a593Smuzhiyun 	unsigned int set_bits, skip;
64*4882a593Smuzhiyun 	unsigned int old;
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	init_stats(&fb_time_stats);
67*4882a593Smuzhiyun 	init_stats(&tb_time_stats);
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
70*4882a593Smuzhiyun 		bitmap_zero(to_test, num_bits);
71*4882a593Smuzhiyun 		skip = num_bits / set_bits;
72*4882a593Smuzhiyun 		for (i = 0; i < num_bits; i += skip)
73*4882a593Smuzhiyun 			set_bit(i, to_test);
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 		for (i = 0; i < outer_iterations; i++) {
76*4882a593Smuzhiyun 			old = accumulator;
77*4882a593Smuzhiyun 			gettimeofday(&start, NULL);
78*4882a593Smuzhiyun 			for (j = 0; j < inner_iterations; j++) {
79*4882a593Smuzhiyun 				for_each_set_bit(bit, to_test, num_bits)
80*4882a593Smuzhiyun 					workload(bit);
81*4882a593Smuzhiyun 			}
82*4882a593Smuzhiyun 			gettimeofday(&end, NULL);
83*4882a593Smuzhiyun 			assert(old + (inner_iterations * set_bits) == accumulator);
84*4882a593Smuzhiyun 			timersub(&end, &start, &diff);
85*4882a593Smuzhiyun 			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
86*4882a593Smuzhiyun 			update_stats(&fb_time_stats, runtime_us);
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun 			old = accumulator;
89*4882a593Smuzhiyun 			gettimeofday(&start, NULL);
90*4882a593Smuzhiyun 			for (j = 0; j < inner_iterations; j++) {
91*4882a593Smuzhiyun 				for (bit = 0; bit < num_bits; bit++) {
92*4882a593Smuzhiyun 					if (asm_test_bit(bit, to_test))
93*4882a593Smuzhiyun 						workload(bit);
94*4882a593Smuzhiyun 				}
95*4882a593Smuzhiyun 			}
96*4882a593Smuzhiyun 			gettimeofday(&end, NULL);
97*4882a593Smuzhiyun 			assert(old + (inner_iterations * set_bits) == accumulator);
98*4882a593Smuzhiyun 			timersub(&end, &start, &diff);
99*4882a593Smuzhiyun 			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
100*4882a593Smuzhiyun 			update_stats(&tb_time_stats, runtime_us);
101*4882a593Smuzhiyun 		}
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 		printf("%d operations %d bits set of %d bits\n",
104*4882a593Smuzhiyun 			inner_iterations, set_bits, num_bits);
105*4882a593Smuzhiyun 		time_average = avg_stats(&fb_time_stats);
106*4882a593Smuzhiyun 		time_stddev = stddev_stats(&fb_time_stats);
107*4882a593Smuzhiyun 		printf("  Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
108*4882a593Smuzhiyun 			time_average, time_stddev);
109*4882a593Smuzhiyun 		time_average = avg_stats(&tb_time_stats);
110*4882a593Smuzhiyun 		time_stddev = stddev_stats(&tb_time_stats);
111*4882a593Smuzhiyun 		printf("  Average test_bit loop took:    %.3f usec (+- %.3f usec)\n",
112*4882a593Smuzhiyun 			time_average, time_stddev);
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 		if (use_of_val == accumulator)  /* Try to avoid compiler tricks. */
115*4882a593Smuzhiyun 			printf("\n");
116*4882a593Smuzhiyun 	}
117*4882a593Smuzhiyun 	bitmap_free(to_test);
118*4882a593Smuzhiyun 	return 0;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun 
bench_mem_find_bit(int argc,const char ** argv)121*4882a593Smuzhiyun int bench_mem_find_bit(int argc, const char **argv)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun 	int err = 0, i;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	argc = parse_options(argc, argv, options, bench_usage, 0);
126*4882a593Smuzhiyun 	if (argc) {
127*4882a593Smuzhiyun 		usage_with_options(bench_usage, options);
128*4882a593Smuzhiyun 		exit(EXIT_FAILURE);
129*4882a593Smuzhiyun 	}
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	for (i = 1; i <= 2048; i <<= 1)
132*4882a593Smuzhiyun 		do_for_each_set_bit(i);
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	return err;
135*4882a593Smuzhiyun }
136