1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
\input{configpres}
\title{perf}
\section{Performance Analyzation}
\maketitle
\begin{frame}
\frametitle{What is perf?}
Perf is a profiler tool for Linux 2.6+ based systems.
\begin{itemize}
\item perf\_events interface of the Linux kernel
\item perf userspace tool
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{perf events}
\begin{itemize}
\item hardware performance counters (PMU of Host CPU)
\item tracepoint events (from ftrace)
\item dynamic probes (e.g. kprobes or uprobes)
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{perf subcommands I}
\begin{description}
\item[list] list available events
\item[stat] measure total event count for single program or for system, for
some time
\item[top] top-like dynamic view of hottest functions
\item[record] measure and save sampling data for single program
\item[report] analyze file generated by perf record;
can generate flat, or graph profile
\item[diff] Read two perf.data files and display the differential profile
\end{description}
\end{frame}
\begin{frame}
\frametitle{perf subcommands II}
\begin{description}
\item[sched] tracing/measuring of scheduler actions and latencies
\item[lock] analyze lock events
\item[timechart]tool to visualize total system behavior during a workload
\end{description}
\end{frame}
\begin{frame}[fragile]
\frametitle{Example: live-view of page-fault event}
\begin{lstlisting}
# perf stat chromium
Performance counter stats for 'chromium':
3733.112825 task-clock # 0.716 CPUs utilized
8,696 context-switches # 0.002 M/sec
1,063 cpu-migrations # 0.285 K/sec
134,686 page-faults # 0.036 M/sec
9,102,760,996 cycles # 2.438 GHz
[83.59%]
6,217,623,412 stalled-cycles-frontend # 68.30% frontend cycles idle
[83.32%]
4,755,104,208 stalled-cycles-backend # 52.24% backend cycles idle
[67.20%]
7,262,564,269 instructions # 0.80 insns per cycle
# 0.86 stalled cycles per insn
[83.72%]
1,495,191,394 branches # 400.521 M/sec
[83.38%]
26,037,805 branch-misses # 1.74% of all branches
[83.04%]
5.214978731 seconds time elapsed
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Example: live-view of page-fault event}
\begin{lstlisting}
# sudo perf top -e page-faults
Samples: 562 of event 'page-faults', Event count (approx.): 4557
62.32% libc-2.17.so [.] __memcpy_ssse3_back
20.36% libc-2.17.so [.] _int_malloc
3.16% module.so [.] evas_gl_common_context_font_push
2.83% libevas.so.1.7.7 [.] 0x0000000000083237
2.37% libdricore9.0.2.so.1.0.0 [.] 0x0000000000126050
1.80% libxul.so (deleted) [.] 0x000000000182e48f
1.12% [kernel] [k] file_read_actor
0.92% libfontconfig.so.1.6.2 [.] 0x000000000001bd43
0.88% libdrm_intel.so.1.0.0 [.] 0x0000000000008503
0.79% libc-2.17.so [.] __memset_sse2
0.75% libeet.so.1.7.7 [.] eet_data_image_header_decode_cipher
0.61% module.so [.] evas_gl_common_context_image_push
0.42% libfreetype.so.6.10.0 [.] FT_Stream_ReadFields
0.42% libecore_x.so.1.7.7 [.] ecore_x_netwm_icons_set
0.24% libelementary.so.1.7.7 [.] 0x000000000005ae60
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Example: count CPU cycles of a ls call}
\begin{lstlisting}
# perf record -e cpu-cycles ls
<output of ls>
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.008 MB perf.data (~334 samples) ]
# perf report
Samples: 24 of event 'cpu-cycles', Event count (approx.): 12333055
21.67% ls [kernel.kallsyms] [k] get_unmapped_area
20.77% ls ld-2.17.so [.] _dl_map_object_deps
17.21% ls [kernel.kallsyms] [k] __slab_free
15.73% ls [kernel.kallsyms] [k] tty_insert_flip_string_fixed_flag
12.65% ls [kernel.kallsyms] [k] unlock_page
6.94% ls libc-2.17.so [.] __strlen_sse2
3.41% ls [kernel.kallsyms] [k] count.isra.17.constprop.28
1.02% ls [kernel.kallsyms] [k] get_pageblock_flags_group
0.41% ls [kernel.kallsyms] [k] perf_event_context_sched_in
0.13% ls [kernel.kallsyms] [k] perf_ctx_unlock
0.07% ls [kernel.kallsyms] [k] native_write_msr_safe
\end{lstlisting}
\end{frame}
\input{tailpres}
|