1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
\input{configpres}
\title{perf}
\section{Performance Analyzation}
\maketitle
\begin{frame}
\frametitle{What is perf?}
Perf is a profiler tool for Linux 2.6+ based systems.
\begin{itemize}
\item perf\_events interface of the Linux kernel
\item perf userspace tool
\item access perf events from own applications
\end{itemize}
Perf is an event counter, no tracer.
\end{frame}
\begin{frame}
\frametitle{perf events}
\begin{itemize}
\item hardware performance counters (PMU of Host CPU)
\item tracepoint events (from ftrace)
\item dynamic probes (e.g. kprobes or uprobes)
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{perf subcommands I}
\begin{description}
\item[list] list available events
\item[stat] measure total event count for single program or for system, for
some time
\item[top] top-like dynamic view of hottest functions
\item[record] measure and save sampling data for single program
\item[report] analyze file generated by perf record;
can generate flat, or graph profile
\item[diff] Read two perf.data files and display the differential profile
\end{description}
\end{frame}
\begin{frame}
\frametitle{perf subcommands II}
\begin{description}
\item[sched] tracing/measuring of scheduler actions and latencies
\item[lock] analyze lock events
\item[timechart]tool to visualize total system behavior during a workload
\end{description}
\end{frame}
\begin{frame}
\frametitle{selecting events}
\begin{description}
\item[-e] use the specified event
\item[-f] filter events, e.g. -e irq:irq\_handler\_entry --filter irq==18
\end{description}
\end{frame}
\begin{frame}[fragile]
\frametitle{Example: live-view of page-fault event}
\begin{lstlisting}
# perf stat chromium
Performance counter stats for 'chromium':
3733.112825 task-clock # 0.716 CPUs utilized
8,696 context-switches # 0.002 M/sec
1,063 cpu-migrations # 0.285 K/sec
134,686 page-faults # 0.036 M/sec
9,102,760,996 cycles # 2.438 GHz
[83.59%]
6,217,623,412 stalled-cycles-frontend # 68.30% frontend cycles idle
[83.32%]
4,755,104,208 stalled-cycles-backend # 52.24% backend cycles idle
[67.20%]
7,262,564,269 instructions # 0.80 insns per cycle
# 0.86 stalled cycles per insn
[83.72%]
1,495,191,394 branches # 400.521 M/sec
[83.38%]
26,037,805 branch-misses # 1.74% of all branches
[83.04%]
5.214978731 seconds time elapsed
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Example: live-view of page-fault event}
\begin{lstlisting}
# sudo perf top -e page-faults
Samples: 562 of event 'page-faults', Event count (approx.): 4557
62.32% libc-2.17.so [.] __memcpy_ssse3_back
20.36% libc-2.17.so [.] _int_malloc
3.16% module.so [.] evas_gl_common_context_font_push
2.83% libevas.so.1.7.7 [.] 0x0000000000083237
2.37% libdricore9.0.2.so.1.0.0 [.] 0x0000000000126050
1.80% libxul.so (deleted) [.] 0x000000000182e48f
1.12% [kernel] [k] file_read_actor
0.92% libfontconfig.so.1.6.2 [.] 0x000000000001bd43
0.88% libdrm_intel.so.1.0.0 [.] 0x0000000000008503
0.79% libc-2.17.so [.] __memset_sse2
0.75% libeet.so.1.7.7 [.] eet_data_image_header_decode_cipher
0.61% module.so [.] evas_gl_common_context_image_push
0.42% libfreetype.so.6.10.0 [.] FT_Stream_ReadFields
0.42% libecore_x.so.1.7.7 [.] ecore_x_netwm_icons_set
0.24% libelementary.so.1.7.7 [.] 0x000000000005ae60
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Example: count CPU cycles of a ls call}
\begin{lstlisting}
# perf record -e cpu-cycles ls
<output of ls>
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.008 MB perf.data (~334 samples) ]
# perf report
Samples: 24 of event 'cpu-cycles', Event count (approx.): 12333055
21.67% ls [kernel.kallsyms] [k] get_unmapped_area
20.77% ls ld-2.17.so [.] _dl_map_object_deps
17.21% ls [kernel.kallsyms] [k] __slab_free
15.73% ls [kernel.kallsyms] [k] tty_insert_flip_string_fixed_flag
12.65% ls [kernel.kallsyms] [k] unlock_page
6.94% ls libc-2.17.so [.] __strlen_sse2
3.41% ls [kernel.kallsyms] [k] count.isra.17.constprop.28
1.02% ls [kernel.kallsyms] [k] get_pageblock_flags_group
0.41% ls [kernel.kallsyms] [k] perf_event_context_sched_in
0.13% ls [kernel.kallsyms] [k] perf_ctx_unlock
0.07% ls [kernel.kallsyms] [k] native_write_msr_safe
\end{lstlisting}
\end{frame}
\input{tailpres}
|