1*4882a593Smuzhiyunlibperf-sampling(7) 2*4882a593Smuzhiyun=================== 3*4882a593Smuzhiyun 4*4882a593SmuzhiyunNAME 5*4882a593Smuzhiyun---- 6*4882a593Smuzhiyunlibperf-sampling - sampling interface 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun 9*4882a593SmuzhiyunDESCRIPTION 10*4882a593Smuzhiyun----------- 11*4882a593SmuzhiyunThe sampling interface provides API to measure and get count for specific perf events. 12*4882a593Smuzhiyun 13*4882a593SmuzhiyunThe following test tries to explain count on `sampling.c` example. 14*4882a593Smuzhiyun 15*4882a593SmuzhiyunIt is by no means complete guide to sampling, but shows libperf basic API for sampling. 16*4882a593Smuzhiyun 17*4882a593SmuzhiyunThe `sampling.c` comes with libperf package and can be compiled and run like: 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun[source,bash] 20*4882a593Smuzhiyun-- 21*4882a593Smuzhiyun$ gcc -o sampling sampling.c -lperf 22*4882a593Smuzhiyun$ sudo ./sampling 23*4882a593Smuzhiyuncpu 0, pid 0, tid 0, ip ffffffffad06c4e6, period 1 24*4882a593Smuzhiyuncpu 0, pid 4465, tid 4469, ip ffffffffad118748, period 18322959 25*4882a593Smuzhiyuncpu 0, pid 0, tid 0, ip ffffffffad115722, period 33544846 26*4882a593Smuzhiyuncpu 0, pid 4465, tid 4470, ip 7f84fe0cdad6, period 23687474 27*4882a593Smuzhiyuncpu 0, pid 0, tid 0, ip ffffffffad9e0349, period 34255790 28*4882a593Smuzhiyuncpu 0, pid 4465, tid 4469, ip ffffffffad136581, period 38664069 29*4882a593Smuzhiyuncpu 0, pid 0, tid 0, ip ffffffffad9e55e2, period 21922384 30*4882a593Smuzhiyuncpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 17655175 31*4882a593Smuzhiyun... 32*4882a593Smuzhiyun-- 33*4882a593Smuzhiyun 34*4882a593SmuzhiyunIt requires root access, because it uses hardware cycles event. 35*4882a593Smuzhiyun 36*4882a593SmuzhiyunThe `sampling.c` example profiles/samples all CPUs with hardware cycles, in a 37*4882a593Smuzhiyunnutshell it: 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun- creates events 40*4882a593Smuzhiyun- adds them to the event list 41*4882a593Smuzhiyun- opens and enables events through the event list 42*4882a593Smuzhiyun- sleeps for 3 seconds 43*4882a593Smuzhiyun- disables events 44*4882a593Smuzhiyun- reads and displays recorded samples 45*4882a593Smuzhiyun- destroys the event list 46*4882a593Smuzhiyun 47*4882a593SmuzhiyunThe first thing you need to do before using libperf is to call init function: 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun[source,c] 50*4882a593Smuzhiyun-- 51*4882a593Smuzhiyun 12 static int libperf_print(enum libperf_print_level level, 52*4882a593Smuzhiyun 13 const char *fmt, va_list ap) 53*4882a593Smuzhiyun 14 { 54*4882a593Smuzhiyun 15 return vfprintf(stderr, fmt, ap); 55*4882a593Smuzhiyun 16 } 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun 23 int main(int argc, char **argv) 58*4882a593Smuzhiyun 24 { 59*4882a593Smuzhiyun ... 60*4882a593Smuzhiyun 40 libperf_init(libperf_print); 61*4882a593Smuzhiyun-- 62*4882a593Smuzhiyun 63*4882a593SmuzhiyunIt will setup the library and sets function for debug output from library. 64*4882a593Smuzhiyun 65*4882a593SmuzhiyunThe `libperf_print` callback will receive any message with its debug level, 66*4882a593Smuzhiyundefined as: 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun[source,c] 69*4882a593Smuzhiyun-- 70*4882a593Smuzhiyunenum libperf_print_level { 71*4882a593Smuzhiyun LIBPERF_ERR, 72*4882a593Smuzhiyun LIBPERF_WARN, 73*4882a593Smuzhiyun LIBPERF_INFO, 74*4882a593Smuzhiyun LIBPERF_DEBUG, 75*4882a593Smuzhiyun LIBPERF_DEBUG2, 76*4882a593Smuzhiyun LIBPERF_DEBUG3, 77*4882a593Smuzhiyun}; 78*4882a593Smuzhiyun-- 79*4882a593Smuzhiyun 80*4882a593SmuzhiyunOnce the setup is complete we start by defining cycles event using the `struct perf_event_attr`: 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun[source,c] 83*4882a593Smuzhiyun-- 84*4882a593Smuzhiyun 29 struct perf_event_attr attr = { 85*4882a593Smuzhiyun 30 .type = PERF_TYPE_HARDWARE, 86*4882a593Smuzhiyun 31 .config = PERF_COUNT_HW_CPU_CYCLES, 87*4882a593Smuzhiyun 32 .disabled = 1, 88*4882a593Smuzhiyun 33 .freq = 1, 89*4882a593Smuzhiyun 34 .sample_freq = 10, 90*4882a593Smuzhiyun 35 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD, 91*4882a593Smuzhiyun 36 }; 92*4882a593Smuzhiyun-- 93*4882a593Smuzhiyun 94*4882a593SmuzhiyunNext step is to prepare CPUs map. 95*4882a593Smuzhiyun 96*4882a593SmuzhiyunIn this case we will monitor all the available CPUs: 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun[source,c] 99*4882a593Smuzhiyun-- 100*4882a593Smuzhiyun 42 cpus = perf_cpu_map__new(NULL); 101*4882a593Smuzhiyun 43 if (!cpus) { 102*4882a593Smuzhiyun 44 fprintf(stderr, "failed to create cpus\n"); 103*4882a593Smuzhiyun 45 return -1; 104*4882a593Smuzhiyun 46 } 105*4882a593Smuzhiyun-- 106*4882a593Smuzhiyun 107*4882a593SmuzhiyunNow we create libperf's event list, which will serve as holder for the cycles event: 108*4882a593Smuzhiyun 109*4882a593Smuzhiyun[source,c] 110*4882a593Smuzhiyun-- 111*4882a593Smuzhiyun 48 evlist = perf_evlist__new(); 112*4882a593Smuzhiyun 49 if (!evlist) { 113*4882a593Smuzhiyun 50 fprintf(stderr, "failed to create evlist\n"); 114*4882a593Smuzhiyun 51 goto out_cpus; 115*4882a593Smuzhiyun 52 } 116*4882a593Smuzhiyun-- 117*4882a593Smuzhiyun 118*4882a593SmuzhiyunWe create libperf's event for the cycles attribute we defined earlier and add it to the list: 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun[source,c] 121*4882a593Smuzhiyun-- 122*4882a593Smuzhiyun 54 evsel = perf_evsel__new(&attr); 123*4882a593Smuzhiyun 55 if (!evsel) { 124*4882a593Smuzhiyun 56 fprintf(stderr, "failed to create cycles\n"); 125*4882a593Smuzhiyun 57 goto out_cpus; 126*4882a593Smuzhiyun 58 } 127*4882a593Smuzhiyun 59 128*4882a593Smuzhiyun 60 perf_evlist__add(evlist, evsel); 129*4882a593Smuzhiyun-- 130*4882a593Smuzhiyun 131*4882a593SmuzhiyunConfigure event list with the cpus map and open event: 132*4882a593Smuzhiyun 133*4882a593Smuzhiyun[source,c] 134*4882a593Smuzhiyun-- 135*4882a593Smuzhiyun 62 perf_evlist__set_maps(evlist, cpus, NULL); 136*4882a593Smuzhiyun 63 137*4882a593Smuzhiyun 64 err = perf_evlist__open(evlist); 138*4882a593Smuzhiyun 65 if (err) { 139*4882a593Smuzhiyun 66 fprintf(stderr, "failed to open evlist\n"); 140*4882a593Smuzhiyun 67 goto out_evlist; 141*4882a593Smuzhiyun 68 } 142*4882a593Smuzhiyun-- 143*4882a593Smuzhiyun 144*4882a593SmuzhiyunOnce the events list is open, we can create memory maps AKA perf ring buffers: 145*4882a593Smuzhiyun 146*4882a593Smuzhiyun[source,c] 147*4882a593Smuzhiyun-- 148*4882a593Smuzhiyun 70 err = perf_evlist__mmap(evlist, 4); 149*4882a593Smuzhiyun 71 if (err) { 150*4882a593Smuzhiyun 72 fprintf(stderr, "failed to mmap evlist\n"); 151*4882a593Smuzhiyun 73 goto out_evlist; 152*4882a593Smuzhiyun 74 } 153*4882a593Smuzhiyun-- 154*4882a593Smuzhiyun 155*4882a593SmuzhiyunThe event is created as disabled (note the `disabled = 1` assignment above), 156*4882a593Smuzhiyunso we need to enable the events list explicitly. 157*4882a593Smuzhiyun 158*4882a593SmuzhiyunFrom this moment the cycles event is sampling. 159*4882a593Smuzhiyun 160*4882a593SmuzhiyunWe will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list. 161*4882a593Smuzhiyun 162*4882a593Smuzhiyun[source,c] 163*4882a593Smuzhiyun-- 164*4882a593Smuzhiyun 76 perf_evlist__enable(evlist); 165*4882a593Smuzhiyun 77 sleep(3); 166*4882a593Smuzhiyun 78 perf_evlist__disable(evlist); 167*4882a593Smuzhiyun-- 168*4882a593Smuzhiyun 169*4882a593SmuzhiyunFollowing code walks through the ring buffers and reads stored events/samples: 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun[source,c] 172*4882a593Smuzhiyun-- 173*4882a593Smuzhiyun 80 perf_evlist__for_each_mmap(evlist, map, false) { 174*4882a593Smuzhiyun 81 if (perf_mmap__read_init(map) < 0) 175*4882a593Smuzhiyun 82 continue; 176*4882a593Smuzhiyun 83 177*4882a593Smuzhiyun 84 while ((event = perf_mmap__read_event(map)) != NULL) { 178*4882a593Smuzhiyun 179*4882a593Smuzhiyun /* process event */ 180*4882a593Smuzhiyun 181*4882a593Smuzhiyun108 perf_mmap__consume(map); 182*4882a593Smuzhiyun109 } 183*4882a593Smuzhiyun110 perf_mmap__read_done(map); 184*4882a593Smuzhiyun111 } 185*4882a593Smuzhiyun 186*4882a593Smuzhiyun-- 187*4882a593Smuzhiyun 188*4882a593SmuzhiyunEach sample needs to get parsed: 189*4882a593Smuzhiyun 190*4882a593Smuzhiyun[source,c] 191*4882a593Smuzhiyun-- 192*4882a593Smuzhiyun 85 int cpu, pid, tid; 193*4882a593Smuzhiyun 86 __u64 ip, period, *array; 194*4882a593Smuzhiyun 87 union u64_swap u; 195*4882a593Smuzhiyun 88 196*4882a593Smuzhiyun 89 array = event->sample.array; 197*4882a593Smuzhiyun 90 198*4882a593Smuzhiyun 91 ip = *array; 199*4882a593Smuzhiyun 92 array++; 200*4882a593Smuzhiyun 93 201*4882a593Smuzhiyun 94 u.val64 = *array; 202*4882a593Smuzhiyun 95 pid = u.val32[0]; 203*4882a593Smuzhiyun 96 tid = u.val32[1]; 204*4882a593Smuzhiyun 97 array++; 205*4882a593Smuzhiyun 98 206*4882a593Smuzhiyun 99 u.val64 = *array; 207*4882a593Smuzhiyun100 cpu = u.val32[0]; 208*4882a593Smuzhiyun101 array++; 209*4882a593Smuzhiyun102 210*4882a593Smuzhiyun103 period = *array; 211*4882a593Smuzhiyun104 212*4882a593Smuzhiyun105 fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n", 213*4882a593Smuzhiyun106 cpu, pid, tid, ip, period); 214*4882a593Smuzhiyun-- 215*4882a593Smuzhiyun 216*4882a593SmuzhiyunAnd finally cleanup. 217*4882a593Smuzhiyun 218*4882a593SmuzhiyunWe close the whole events list (both events) and remove it together with the threads map: 219*4882a593Smuzhiyun 220*4882a593Smuzhiyun[source,c] 221*4882a593Smuzhiyun-- 222*4882a593Smuzhiyun113 out_evlist: 223*4882a593Smuzhiyun114 perf_evlist__delete(evlist); 224*4882a593Smuzhiyun115 out_cpus: 225*4882a593Smuzhiyun116 perf_cpu_map__put(cpus); 226*4882a593Smuzhiyun117 return err; 227*4882a593Smuzhiyun118 } 228*4882a593Smuzhiyun-- 229*4882a593Smuzhiyun 230*4882a593SmuzhiyunREPORTING BUGS 231*4882a593Smuzhiyun-------------- 232*4882a593SmuzhiyunReport bugs to <linux-perf-users@vger.kernel.org>. 233*4882a593Smuzhiyun 234*4882a593SmuzhiyunLICENSE 235*4882a593Smuzhiyun------- 236*4882a593Smuzhiyunlibperf is Free Software licensed under the GNU LGPL 2.1 237*4882a593Smuzhiyun 238*4882a593SmuzhiyunRESOURCES 239*4882a593Smuzhiyun--------- 240*4882a593Smuzhiyunhttps://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 241*4882a593Smuzhiyun 242*4882a593SmuzhiyunSEE ALSO 243*4882a593Smuzhiyun-------- 244*4882a593Smuzhiyunlibperf(3), libperf-counting(7) 245