xref: /OK3568_Linux_fs/kernel/tools/perf/Documentation/perf-top.txt (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyunperf-top(1)
2*4882a593Smuzhiyun===========
3*4882a593Smuzhiyun
4*4882a593SmuzhiyunNAME
5*4882a593Smuzhiyun----
6*4882a593Smuzhiyunperf-top - System profiling tool.
7*4882a593Smuzhiyun
8*4882a593SmuzhiyunSYNOPSIS
9*4882a593Smuzhiyun--------
10*4882a593Smuzhiyun[verse]
11*4882a593Smuzhiyun'perf top' [-e <EVENT> | --event=EVENT] [<options>]
12*4882a593Smuzhiyun
13*4882a593SmuzhiyunDESCRIPTION
14*4882a593Smuzhiyun-----------
15*4882a593SmuzhiyunThis command generates and displays a performance counter profile in real time.
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun
18*4882a593SmuzhiyunOPTIONS
19*4882a593Smuzhiyun-------
20*4882a593Smuzhiyun-a::
21*4882a593Smuzhiyun--all-cpus::
22*4882a593Smuzhiyun        System-wide collection.  (default)
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun-c <count>::
25*4882a593Smuzhiyun--count=<count>::
26*4882a593Smuzhiyun	Event period to sample.
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun-C <cpu-list>::
29*4882a593Smuzhiyun--cpu=<cpu>::
30*4882a593SmuzhiyunMonitor only on the list of CPUs provided. Multiple CPUs can be provided as a
31*4882a593Smuzhiyuncomma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32*4882a593SmuzhiyunDefault is to monitor all CPUS.
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun-d <seconds>::
35*4882a593Smuzhiyun--delay=<seconds>::
36*4882a593Smuzhiyun	Number of seconds to delay between refreshes.
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun-e <event>::
39*4882a593Smuzhiyun--event=<event>::
40*4882a593Smuzhiyun	Select the PMU event. Selection can be a symbolic event name
41*4882a593Smuzhiyun	(use 'perf list' to list all events) or a raw PMU
42*4882a593Smuzhiyun	event (eventsel+umask) in the form of rNNN where NNN is a
43*4882a593Smuzhiyun	hexadecimal event descriptor.
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun-E <entries>::
46*4882a593Smuzhiyun--entries=<entries>::
47*4882a593Smuzhiyun	Display this many functions.
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun-f <count>::
50*4882a593Smuzhiyun--count-filter=<count>::
51*4882a593Smuzhiyun	Only display functions with more events than this.
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun--group::
54*4882a593Smuzhiyun        Put the counters into a counter group.
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun--group-sort-idx::
57*4882a593Smuzhiyun	Sort the output by the event at the index n in group. If n is invalid,
58*4882a593Smuzhiyun	sort by the first event. It can support multiple groups with different
59*4882a593Smuzhiyun	amount of events. WARNING: This should be used on grouped events.
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun-F <freq>::
62*4882a593Smuzhiyun--freq=<freq>::
63*4882a593Smuzhiyun	Profile at this frequency. Use 'max' to use the currently maximum
64*4882a593Smuzhiyun	allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
65*4882a593Smuzhiyun	sysctl.
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun-i::
68*4882a593Smuzhiyun--inherit::
69*4882a593Smuzhiyun	Child tasks do not inherit counters.
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun-k <path>::
72*4882a593Smuzhiyun--vmlinux=<path>::
73*4882a593Smuzhiyun	Path to vmlinux.  Required for annotation functionality.
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun--ignore-vmlinux::
76*4882a593Smuzhiyun	Ignore vmlinux files.
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun--kallsyms=<file>::
79*4882a593Smuzhiyun	kallsyms pathname
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun-m <pages>::
82*4882a593Smuzhiyun--mmap-pages=<pages>::
83*4882a593Smuzhiyun	Number of mmap data pages (must be a power of two) or size
84*4882a593Smuzhiyun	specification with appended unit character - B/K/M/G. The
85*4882a593Smuzhiyun	size is rounded up to have nearest pages power of two value.
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun-p <pid>::
88*4882a593Smuzhiyun--pid=<pid>::
89*4882a593Smuzhiyun	Profile events on existing Process ID (comma separated list).
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun-t <tid>::
92*4882a593Smuzhiyun--tid=<tid>::
93*4882a593Smuzhiyun        Profile events on existing thread ID (comma separated list).
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun-u::
96*4882a593Smuzhiyun--uid=::
97*4882a593Smuzhiyun        Record events in threads owned by uid. Name or number.
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun-r <priority>::
100*4882a593Smuzhiyun--realtime=<priority>::
101*4882a593Smuzhiyun	Collect data with this RT SCHED_FIFO priority.
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun--sym-annotate=<symbol>::
104*4882a593Smuzhiyun        Annotate this symbol.
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun-K::
107*4882a593Smuzhiyun--hide_kernel_symbols::
108*4882a593Smuzhiyun        Hide kernel symbols.
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun-U::
111*4882a593Smuzhiyun--hide_user_symbols::
112*4882a593Smuzhiyun        Hide user symbols.
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun--demangle-kernel::
115*4882a593Smuzhiyun        Demangle kernel symbols.
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun-D::
118*4882a593Smuzhiyun--dump-symtab::
119*4882a593Smuzhiyun        Dump the symbol table used for profiling.
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun-v::
122*4882a593Smuzhiyun--verbose::
123*4882a593Smuzhiyun	Be more verbose (show counter open errors, etc).
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun-z::
126*4882a593Smuzhiyun--zero::
127*4882a593Smuzhiyun	Zero history across display updates.
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun-s::
130*4882a593Smuzhiyun--sort::
131*4882a593Smuzhiyun	Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
132*4882a593Smuzhiyun	local_weight, abort, in_tx, transaction, overhead, sample, period.
133*4882a593Smuzhiyun	Please see description of --sort in the perf-report man page.
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun--fields=::
136*4882a593Smuzhiyun	Specify output field - multiple keys can be specified in CSV format.
137*4882a593Smuzhiyun	Following fields are available:
138*4882a593Smuzhiyun	overhead, overhead_sys, overhead_us, overhead_children, sample and period.
139*4882a593Smuzhiyun	Also it can contain any sort key(s).
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun	By default, every sort keys not specified in --field will be appended
142*4882a593Smuzhiyun	automatically.
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun-n::
145*4882a593Smuzhiyun--show-nr-samples::
146*4882a593Smuzhiyun	Show a column with the number of samples.
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun--show-total-period::
149*4882a593Smuzhiyun	Show a column with the sum of periods.
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun--dsos::
152*4882a593Smuzhiyun	Only consider symbols in these dsos.  This option will affect the
153*4882a593Smuzhiyun	percentage of the overhead column.  See --percentage for more info.
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun--comms::
156*4882a593Smuzhiyun	Only consider symbols in these comms.  This option will affect the
157*4882a593Smuzhiyun	percentage of the overhead column.  See --percentage for more info.
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun--symbols::
160*4882a593Smuzhiyun	Only consider these symbols.  This option will affect the
161*4882a593Smuzhiyun	percentage of the overhead column.  See --percentage for more info.
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun-M::
164*4882a593Smuzhiyun--disassembler-style=:: Set disassembler style for objdump.
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun--prefix=PREFIX::
167*4882a593Smuzhiyun--prefix-strip=N::
168*4882a593Smuzhiyun        Remove first N entries from source file path names in executables
169*4882a593Smuzhiyun        and add PREFIX. This allows to display source code compiled on systems
170*4882a593Smuzhiyun        with different file system layout.
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun--source::
173*4882a593Smuzhiyun	Interleave source code with assembly code. Enabled by default,
174*4882a593Smuzhiyun	disable with --no-source.
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun--asm-raw::
177*4882a593Smuzhiyun	Show raw instruction encoding of assembly instructions.
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun-g::
180*4882a593Smuzhiyun	Enables call-graph (stack chain/backtrace) recording.
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun--call-graph [mode,type,min[,limit],order[,key][,branch]]::
183*4882a593Smuzhiyun	Setup and enable call-graph (stack chain/backtrace) recording,
184*4882a593Smuzhiyun	implies -g.  See `--call-graph` section in perf-record and
185*4882a593Smuzhiyun	perf-report man pages for details.
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun--children::
188*4882a593Smuzhiyun	Accumulate callchain of children to parent entry so that then can
189*4882a593Smuzhiyun	show up in the output.  The output will have a new "Children" column
190*4882a593Smuzhiyun	and will be sorted on the data.  It requires -g/--call-graph option
191*4882a593Smuzhiyun	enabled.  See the `overhead calculation' section for more details.
192*4882a593Smuzhiyun	Enabled by default, disable with --no-children.
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun--max-stack::
195*4882a593Smuzhiyun	Set the stack depth limit when parsing the callchain, anything
196*4882a593Smuzhiyun	beyond the specified depth will be ignored. This is a trade-off
197*4882a593Smuzhiyun	between information loss and faster processing especially for
198*4882a593Smuzhiyun	workloads that can have a very long callchain stack.
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun	Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun--ignore-callees=<regex>::
203*4882a593Smuzhiyun        Ignore callees of the function(s) matching the given regex.
204*4882a593Smuzhiyun        This has the effect of collecting the callers of each such
205*4882a593Smuzhiyun        function into one place in the call-graph tree.
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun--percent-limit::
208*4882a593Smuzhiyun	Do not show entries which have an overhead under that percent.
209*4882a593Smuzhiyun	(Default: 0).
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun--percentage::
212*4882a593Smuzhiyun	Determine how to display the overhead percentage of filtered entries.
213*4882a593Smuzhiyun	Filters can be applied by --comms, --dsos and/or --symbols options and
214*4882a593Smuzhiyun	Zoom operations on the TUI (thread, dso, etc).
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun	"relative" means it's relative to filtered entries only so that the
217*4882a593Smuzhiyun	sum of shown entries will be always 100%. "absolute" means it retains
218*4882a593Smuzhiyun	the original value before and after the filter is applied.
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun-w::
221*4882a593Smuzhiyun--column-widths=<width[,width...]>::
222*4882a593Smuzhiyun	Force each column width to the provided list, for large terminal
223*4882a593Smuzhiyun	readability.  0 means no limit (default behavior).
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun--proc-map-timeout::
226*4882a593Smuzhiyun	When processing pre-existing threads /proc/XXX/mmap, it may take
227*4882a593Smuzhiyun	a long time, because the file may be huge. A time out is needed
228*4882a593Smuzhiyun	in such cases.
229*4882a593Smuzhiyun	This option sets the time out limit. The default value is 500 ms.
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun-b::
233*4882a593Smuzhiyun--branch-any::
234*4882a593Smuzhiyun	Enable taken branch stack sampling. Any type of taken branch may be sampled.
235*4882a593Smuzhiyun	This is a shortcut for --branch-filter any. See --branch-filter for more infos.
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun-j::
238*4882a593Smuzhiyun--branch-filter::
239*4882a593Smuzhiyun	Enable taken branch stack sampling. Each sample captures a series of consecutive
240*4882a593Smuzhiyun	taken branches. The number of branches captured with each sample depends on the
241*4882a593Smuzhiyun	underlying hardware, the type of branches of interest, and the executed code.
242*4882a593Smuzhiyun	It is possible to select the types of branches captured by enabling filters.
243*4882a593Smuzhiyun	For a full list of modifiers please see the perf record manpage.
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun	The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
246*4882a593Smuzhiyun	The privilege levels may be omitted, in which case, the privilege levels of the associated
247*4882a593Smuzhiyun	event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
248*4882a593Smuzhiyun	levels are subject to permissions.  When sampling on multiple events, branch stack sampling
249*4882a593Smuzhiyun	is enabled for all the sampling events. The sampled branch type is the same for all events.
250*4882a593Smuzhiyun	The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
251*4882a593Smuzhiyun	Note that this feature may not be available on all processors.
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun--raw-trace::
254*4882a593Smuzhiyun	When displaying traceevent output, do not use print fmt or plugins.
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun--hierarchy::
257*4882a593Smuzhiyun	Enable hierarchy output.
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun--overwrite::
260*4882a593Smuzhiyun	Enable this to use just the most recent records, which helps in high core count
261*4882a593Smuzhiyun	machines such as Knights Landing/Mill, but right now is disabled by default as
262*4882a593Smuzhiyun	the pausing used in this technique is leading to loss of metadata events such
263*4882a593Smuzhiyun	as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading
264*4882a593Smuzhiyun	to lots of unknown samples appearing on the UI. Enable this if you are in such
265*4882a593Smuzhiyun	machines and profiling a workload that doesn't creates short lived threads and/or
266*4882a593Smuzhiyun	doesn't uses many executable mmap operations. Work is being planed to solve
267*4882a593Smuzhiyun	this situation, till then, this will remain disabled by default.
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun--force::
270*4882a593Smuzhiyun	Don't do ownership validation.
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun--num-thread-synthesize::
273*4882a593Smuzhiyun	The number of threads to run when synthesizing events for existing processes.
274*4882a593Smuzhiyun	By default, the number of threads equals to the number of online CPUs.
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun--namespaces::
277*4882a593Smuzhiyun	Record events of type PERF_RECORD_NAMESPACES and display it with the
278*4882a593Smuzhiyun	'cgroup_id' sort key.
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun--all-cgroups::
281*4882a593Smuzhiyun	Record events of type PERF_RECORD_CGROUP and display it with the
282*4882a593Smuzhiyun	'cgroup' sort key.
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun--switch-on EVENT_NAME::
285*4882a593Smuzhiyun	Only consider events after this event is found.
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun	E.g.:
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun           Find out where broadcast packets are handled
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun		perf probe -L icmp_rcv
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun	   Insert a probe there:
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun		perf probe icmp_rcv:59
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun	   Start perf top and ask it to only consider the cycles events when a
298*4882a593Smuzhiyun           broadcast packet arrives This will show a menu with two entries and
299*4882a593Smuzhiyun           will start counting when a broadcast packet arrives:
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun		perf top -e cycles,probe:icmp_rcv --switch-on=probe:icmp_rcv
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun	   Alternatively one can ask for --group and then two overhead columns
304*4882a593Smuzhiyun           will appear, the first for cycles and the second for the switch-on event.
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun		perf top --group -e cycles,probe:icmp_rcv --switch-on=probe:icmp_rcv
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun	This may be interesting to measure a workload only after some initialization
309*4882a593Smuzhiyun	phase is over, i.e. insert a perf probe at that point and use the above
310*4882a593Smuzhiyun	examples replacing probe:icmp_rcv with the just-after-init probe.
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun--switch-off EVENT_NAME::
313*4882a593Smuzhiyun	Stop considering events after this event is found.
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun--show-on-off-events::
316*4882a593Smuzhiyun	Show the --switch-on/off events too. This has no effect in 'perf top' now
317*4882a593Smuzhiyun	but probably we'll make the default not to show the switch-on/off events
318*4882a593Smuzhiyun        on the --group mode and if there is only one event besides the off/on ones,
319*4882a593Smuzhiyun	go straight to the histogram browser, just like 'perf top' with no events
320*4882a593Smuzhiyun	explicitely specified does.
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun--stitch-lbr::
323*4882a593Smuzhiyun	Show callgraph with stitched LBRs, which may have more complete
324*4882a593Smuzhiyun	callgraph. The option must be used with --call-graph lbr recording.
325*4882a593Smuzhiyun	Disabled by default. In common cases with call stack overflows,
326*4882a593Smuzhiyun	it can recreate better call stacks than the default lbr call stack
327*4882a593Smuzhiyun	output. But this approach is not full proof. There can be cases
328*4882a593Smuzhiyun	where it creates incorrect call stacks from incorrect matches.
329*4882a593Smuzhiyun	The known limitations include exception handing such as
330*4882a593Smuzhiyun	setjmp/longjmp will have calls/returns not match.
331*4882a593Smuzhiyun
332*4882a593Smuzhiyunifdef::HAVE_LIBPFM[]
333*4882a593Smuzhiyun--pfm-events events::
334*4882a593SmuzhiyunSelect a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
335*4882a593Smuzhiyunincluding support for event filters. For example '--pfm-events
336*4882a593Smuzhiyuninst_retired:any_p:u:c=1:i'. More than one event can be passed to the
337*4882a593Smuzhiyunoption using the comma separator. Hardware events and generic hardware
338*4882a593Smuzhiyunevents cannot be mixed together. The latter must be used with the -e
339*4882a593Smuzhiyunoption. The -e option and this one can be mixed and matched.  Events
340*4882a593Smuzhiyuncan be grouped using the {} notation.
341*4882a593Smuzhiyunendif::HAVE_LIBPFM[]
342*4882a593Smuzhiyun
343*4882a593SmuzhiyunINTERACTIVE PROMPTING KEYS
344*4882a593Smuzhiyun--------------------------
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun[d]::
347*4882a593Smuzhiyun	Display refresh delay.
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun[e]::
350*4882a593Smuzhiyun	Number of entries to display.
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun[E]::
353*4882a593Smuzhiyun	Event to display when multiple counters are active.
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun[f]::
356*4882a593Smuzhiyun	Profile display filter (>= hit count).
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun[F]::
359*4882a593Smuzhiyun	Annotation display filter (>= % of total).
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun[s]::
362*4882a593Smuzhiyun	Annotate symbol.
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun[S]::
365*4882a593Smuzhiyun	Stop annotation, return to full profile display.
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun[K]::
368*4882a593Smuzhiyun	Hide kernel symbols.
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun[U]::
371*4882a593Smuzhiyun	Hide user symbols.
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun[z]::
374*4882a593Smuzhiyun	Toggle event count zeroing across display updates.
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun[qQ]::
377*4882a593Smuzhiyun	Quit.
378*4882a593Smuzhiyun
379*4882a593SmuzhiyunPressing any unmapped key displays a menu, and prompts for input.
380*4882a593Smuzhiyun
381*4882a593Smuzhiyuninclude::callchain-overhead-calculation.txt[]
382*4882a593Smuzhiyun
383*4882a593SmuzhiyunSEE ALSO
384*4882a593Smuzhiyun--------
385*4882a593Smuzhiyunlinkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
386