xref: /OK3568_Linux_fs/kernel/tools/lib/perf/Documentation/libperf-sampling.txt (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyunlibperf-sampling(7)
2*4882a593Smuzhiyun===================
3*4882a593Smuzhiyun
4*4882a593SmuzhiyunNAME
5*4882a593Smuzhiyun----
6*4882a593Smuzhiyunlibperf-sampling - sampling interface
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun
9*4882a593SmuzhiyunDESCRIPTION
10*4882a593Smuzhiyun-----------
11*4882a593SmuzhiyunThe sampling interface provides API to measure and get count for specific perf events.
12*4882a593Smuzhiyun
13*4882a593SmuzhiyunThe following test tries to explain count on `sampling.c` example.
14*4882a593Smuzhiyun
15*4882a593SmuzhiyunIt is by no means complete guide to sampling, but shows libperf basic API for sampling.
16*4882a593Smuzhiyun
17*4882a593SmuzhiyunThe `sampling.c` comes with libperf package and can be compiled and run like:
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun[source,bash]
20*4882a593Smuzhiyun--
21*4882a593Smuzhiyun$ gcc -o sampling sampling.c -lperf
22*4882a593Smuzhiyun$ sudo ./sampling
23*4882a593Smuzhiyuncpu   0, pid      0, tid      0, ip     ffffffffad06c4e6, period                    1
24*4882a593Smuzhiyuncpu   0, pid   4465, tid   4469, ip     ffffffffad118748, period             18322959
25*4882a593Smuzhiyuncpu   0, pid      0, tid      0, ip     ffffffffad115722, period             33544846
26*4882a593Smuzhiyuncpu   0, pid   4465, tid   4470, ip         7f84fe0cdad6, period             23687474
27*4882a593Smuzhiyuncpu   0, pid      0, tid      0, ip     ffffffffad9e0349, period             34255790
28*4882a593Smuzhiyuncpu   0, pid   4465, tid   4469, ip     ffffffffad136581, period             38664069
29*4882a593Smuzhiyuncpu   0, pid      0, tid      0, ip     ffffffffad9e55e2, period             21922384
30*4882a593Smuzhiyuncpu   0, pid   4465, tid   4470, ip         7f84fe0ebebf, period             17655175
31*4882a593Smuzhiyun...
32*4882a593Smuzhiyun--
33*4882a593Smuzhiyun
34*4882a593SmuzhiyunIt requires root access, because it uses hardware cycles event.
35*4882a593Smuzhiyun
36*4882a593SmuzhiyunThe `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
37*4882a593Smuzhiyunnutshell it:
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun- creates events
40*4882a593Smuzhiyun- adds them to the event list
41*4882a593Smuzhiyun- opens and enables events through the event list
42*4882a593Smuzhiyun- sleeps for 3 seconds
43*4882a593Smuzhiyun- disables events
44*4882a593Smuzhiyun- reads and displays recorded samples
45*4882a593Smuzhiyun- destroys the event list
46*4882a593Smuzhiyun
47*4882a593SmuzhiyunThe first thing you need to do before using libperf is to call init function:
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun[source,c]
50*4882a593Smuzhiyun--
51*4882a593Smuzhiyun 12 static int libperf_print(enum libperf_print_level level,
52*4882a593Smuzhiyun 13                          const char *fmt, va_list ap)
53*4882a593Smuzhiyun 14 {
54*4882a593Smuzhiyun 15         return vfprintf(stderr, fmt, ap);
55*4882a593Smuzhiyun 16 }
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun 23 int main(int argc, char **argv)
58*4882a593Smuzhiyun 24 {
59*4882a593Smuzhiyun ...
60*4882a593Smuzhiyun 40         libperf_init(libperf_print);
61*4882a593Smuzhiyun--
62*4882a593Smuzhiyun
63*4882a593SmuzhiyunIt will setup the library and sets function for debug output from library.
64*4882a593Smuzhiyun
65*4882a593SmuzhiyunThe `libperf_print` callback will receive any message with its debug level,
66*4882a593Smuzhiyundefined as:
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun[source,c]
69*4882a593Smuzhiyun--
70*4882a593Smuzhiyunenum libperf_print_level {
71*4882a593Smuzhiyun        LIBPERF_ERR,
72*4882a593Smuzhiyun        LIBPERF_WARN,
73*4882a593Smuzhiyun        LIBPERF_INFO,
74*4882a593Smuzhiyun        LIBPERF_DEBUG,
75*4882a593Smuzhiyun        LIBPERF_DEBUG2,
76*4882a593Smuzhiyun        LIBPERF_DEBUG3,
77*4882a593Smuzhiyun};
78*4882a593Smuzhiyun--
79*4882a593Smuzhiyun
80*4882a593SmuzhiyunOnce the setup is complete we start by defining cycles event using the `struct perf_event_attr`:
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun[source,c]
83*4882a593Smuzhiyun--
84*4882a593Smuzhiyun 29         struct perf_event_attr attr = {
85*4882a593Smuzhiyun 30                 .type        = PERF_TYPE_HARDWARE,
86*4882a593Smuzhiyun 31                 .config      = PERF_COUNT_HW_CPU_CYCLES,
87*4882a593Smuzhiyun 32                 .disabled    = 1,
88*4882a593Smuzhiyun 33                 .freq        = 1,
89*4882a593Smuzhiyun 34                 .sample_freq = 10,
90*4882a593Smuzhiyun 35                 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
91*4882a593Smuzhiyun 36         };
92*4882a593Smuzhiyun--
93*4882a593Smuzhiyun
94*4882a593SmuzhiyunNext step is to prepare CPUs map.
95*4882a593Smuzhiyun
96*4882a593SmuzhiyunIn this case we will monitor all the available CPUs:
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun[source,c]
99*4882a593Smuzhiyun--
100*4882a593Smuzhiyun 42         cpus = perf_cpu_map__new(NULL);
101*4882a593Smuzhiyun 43         if (!cpus) {
102*4882a593Smuzhiyun 44                 fprintf(stderr, "failed to create cpus\n");
103*4882a593Smuzhiyun 45                 return -1;
104*4882a593Smuzhiyun 46         }
105*4882a593Smuzhiyun--
106*4882a593Smuzhiyun
107*4882a593SmuzhiyunNow we create libperf's event list, which will serve as holder for the cycles event:
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun[source,c]
110*4882a593Smuzhiyun--
111*4882a593Smuzhiyun 48         evlist = perf_evlist__new();
112*4882a593Smuzhiyun 49         if (!evlist) {
113*4882a593Smuzhiyun 50                 fprintf(stderr, "failed to create evlist\n");
114*4882a593Smuzhiyun 51                 goto out_cpus;
115*4882a593Smuzhiyun 52         }
116*4882a593Smuzhiyun--
117*4882a593Smuzhiyun
118*4882a593SmuzhiyunWe create libperf's event for the cycles attribute we defined earlier and add it to the list:
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun[source,c]
121*4882a593Smuzhiyun--
122*4882a593Smuzhiyun 54         evsel = perf_evsel__new(&attr);
123*4882a593Smuzhiyun 55         if (!evsel) {
124*4882a593Smuzhiyun 56                 fprintf(stderr, "failed to create cycles\n");
125*4882a593Smuzhiyun 57                 goto out_cpus;
126*4882a593Smuzhiyun 58         }
127*4882a593Smuzhiyun 59
128*4882a593Smuzhiyun 60         perf_evlist__add(evlist, evsel);
129*4882a593Smuzhiyun--
130*4882a593Smuzhiyun
131*4882a593SmuzhiyunConfigure event list with the cpus map and open event:
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun[source,c]
134*4882a593Smuzhiyun--
135*4882a593Smuzhiyun 62         perf_evlist__set_maps(evlist, cpus, NULL);
136*4882a593Smuzhiyun 63
137*4882a593Smuzhiyun 64         err = perf_evlist__open(evlist);
138*4882a593Smuzhiyun 65         if (err) {
139*4882a593Smuzhiyun 66                 fprintf(stderr, "failed to open evlist\n");
140*4882a593Smuzhiyun 67                 goto out_evlist;
141*4882a593Smuzhiyun 68         }
142*4882a593Smuzhiyun--
143*4882a593Smuzhiyun
144*4882a593SmuzhiyunOnce the events list is open, we can create memory maps AKA perf ring buffers:
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun[source,c]
147*4882a593Smuzhiyun--
148*4882a593Smuzhiyun 70         err = perf_evlist__mmap(evlist, 4);
149*4882a593Smuzhiyun 71         if (err) {
150*4882a593Smuzhiyun 72                 fprintf(stderr, "failed to mmap evlist\n");
151*4882a593Smuzhiyun 73                 goto out_evlist;
152*4882a593Smuzhiyun 74         }
153*4882a593Smuzhiyun--
154*4882a593Smuzhiyun
155*4882a593SmuzhiyunThe event is created as disabled (note the `disabled = 1` assignment above),
156*4882a593Smuzhiyunso we need to enable the events list explicitly.
157*4882a593Smuzhiyun
158*4882a593SmuzhiyunFrom this moment the cycles event is sampling.
159*4882a593Smuzhiyun
160*4882a593SmuzhiyunWe will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list.
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun[source,c]
163*4882a593Smuzhiyun--
164*4882a593Smuzhiyun 76         perf_evlist__enable(evlist);
165*4882a593Smuzhiyun 77         sleep(3);
166*4882a593Smuzhiyun 78         perf_evlist__disable(evlist);
167*4882a593Smuzhiyun--
168*4882a593Smuzhiyun
169*4882a593SmuzhiyunFollowing code walks through the ring buffers and reads stored events/samples:
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun[source,c]
172*4882a593Smuzhiyun--
173*4882a593Smuzhiyun 80         perf_evlist__for_each_mmap(evlist, map, false) {
174*4882a593Smuzhiyun 81                 if (perf_mmap__read_init(map) < 0)
175*4882a593Smuzhiyun 82                         continue;
176*4882a593Smuzhiyun 83
177*4882a593Smuzhiyun 84                 while ((event = perf_mmap__read_event(map)) != NULL) {
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun                            /* process event */
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun108                         perf_mmap__consume(map);
182*4882a593Smuzhiyun109                 }
183*4882a593Smuzhiyun110                 perf_mmap__read_done(map);
184*4882a593Smuzhiyun111         }
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun--
187*4882a593Smuzhiyun
188*4882a593SmuzhiyunEach sample needs to get parsed:
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun[source,c]
191*4882a593Smuzhiyun--
192*4882a593Smuzhiyun 85                         int cpu, pid, tid;
193*4882a593Smuzhiyun 86                         __u64 ip, period, *array;
194*4882a593Smuzhiyun 87                         union u64_swap u;
195*4882a593Smuzhiyun 88
196*4882a593Smuzhiyun 89                         array = event->sample.array;
197*4882a593Smuzhiyun 90
198*4882a593Smuzhiyun 91                         ip = *array;
199*4882a593Smuzhiyun 92                         array++;
200*4882a593Smuzhiyun 93
201*4882a593Smuzhiyun 94                         u.val64 = *array;
202*4882a593Smuzhiyun 95                         pid = u.val32[0];
203*4882a593Smuzhiyun 96                         tid = u.val32[1];
204*4882a593Smuzhiyun 97                         array++;
205*4882a593Smuzhiyun 98
206*4882a593Smuzhiyun 99                         u.val64 = *array;
207*4882a593Smuzhiyun100                         cpu = u.val32[0];
208*4882a593Smuzhiyun101                         array++;
209*4882a593Smuzhiyun102
210*4882a593Smuzhiyun103                         period = *array;
211*4882a593Smuzhiyun104
212*4882a593Smuzhiyun105                         fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
213*4882a593Smuzhiyun106                                 cpu, pid, tid, ip, period);
214*4882a593Smuzhiyun--
215*4882a593Smuzhiyun
216*4882a593SmuzhiyunAnd finally cleanup.
217*4882a593Smuzhiyun
218*4882a593SmuzhiyunWe close the whole events list (both events) and remove it together with the threads map:
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun[source,c]
221*4882a593Smuzhiyun--
222*4882a593Smuzhiyun113 out_evlist:
223*4882a593Smuzhiyun114         perf_evlist__delete(evlist);
224*4882a593Smuzhiyun115 out_cpus:
225*4882a593Smuzhiyun116         perf_cpu_map__put(cpus);
226*4882a593Smuzhiyun117         return err;
227*4882a593Smuzhiyun118 }
228*4882a593Smuzhiyun--
229*4882a593Smuzhiyun
230*4882a593SmuzhiyunREPORTING BUGS
231*4882a593Smuzhiyun--------------
232*4882a593SmuzhiyunReport bugs to <linux-perf-users@vger.kernel.org>.
233*4882a593Smuzhiyun
234*4882a593SmuzhiyunLICENSE
235*4882a593Smuzhiyun-------
236*4882a593Smuzhiyunlibperf is Free Software licensed under the GNU LGPL 2.1
237*4882a593Smuzhiyun
238*4882a593SmuzhiyunRESOURCES
239*4882a593Smuzhiyun---------
240*4882a593Smuzhiyunhttps://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
241*4882a593Smuzhiyun
242*4882a593SmuzhiyunSEE ALSO
243*4882a593Smuzhiyun--------
244*4882a593Smuzhiyunlibperf(3), libperf-counting(7)
245