summaryrefslogtreecommitdiff
path: root/kernel-devel/kernel-perf/pres_kernel-perf_en.tex
blob: 893aa70eab39887ca91866f8bab22e7d0885db4c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
\input{configpres}

\title{perf}
\section{Performance Analyzation}
\maketitle

\begin{frame}
\frametitle{What is perf?}
Perf is a profiler tool for Linux 2.6+ based systems.
\begin{itemize}
\item perf\_events interface of the Linux kernel
\item perf userspace tool
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{perf events}
\begin{itemize}
\item hardware performance counters (PMU of Host CPU)
\item tracepoint events (from ftrace)
\item dynamic probes (e.g. kprobes or uprobes)
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{perf subcommands I}
\begin{description}
\item[list]     list available events
\item[stat]     measure total event count for single program or for system, for
                  some time
\item[top]      top-like dynamic view of hottest functions
\item[record]   measure and save sampling data for single program
\item[report]   analyze file generated by perf record;
                  can generate flat, or graph profile
\item[diff]     Read two perf.data files and display the differential profile
\end{description}
\end{frame}

\begin{frame}
\frametitle{perf subcommands II}
\begin{description}
\item[sched]    tracing/measuring of scheduler actions and latencies
\item[lock]     analyze lock events
\item[timechart]tool to visualize total system behavior during a workload
\end{description}
\end{frame}

\begin{frame}[fragile]
\frametitle{Example: live-view of page-fault event}
\begin{lstlisting}
# perf stat chromium
 Performance counter stats for 'chromium':

       3733.112825 task-clock                #    0.716 CPUs utilized
             8,696 context-switches          #    0.002 M/sec
             1,063 cpu-migrations            #    0.285 K/sec
           134,686 page-faults               #    0.036 M/sec
     9,102,760,996 cycles                    #    2.438 GHz
[83.59%]
     6,217,623,412 stalled-cycles-frontend   #   68.30% frontend cycles idle
[83.32%]
     4,755,104,208 stalled-cycles-backend    #   52.24% backend  cycles idle
[67.20%]
     7,262,564,269 instructions              #    0.80  insns per cycle
                                             #    0.86  stalled cycles per insn
[83.72%]
     1,495,191,394 branches                  #  400.521 M/sec
[83.38%]
        26,037,805 branch-misses             #    1.74% of all branches
[83.04%]

       5.214978731 seconds time elapsed
\end{lstlisting}
\end{frame}

\begin{frame}[fragile]
\frametitle{Example: live-view of page-fault event}
\begin{lstlisting}
# sudo perf top -e page-faults
Samples: 562  of event 'page-faults', Event count (approx.): 4557
 62.32%  libc-2.17.so              [.] __memcpy_ssse3_back
 20.36%  libc-2.17.so              [.] _int_malloc
  3.16%  module.so                 [.] evas_gl_common_context_font_push
  2.83%  libevas.so.1.7.7          [.] 0x0000000000083237
  2.37%  libdricore9.0.2.so.1.0.0  [.] 0x0000000000126050
  1.80%  libxul.so (deleted)       [.] 0x000000000182e48f
  1.12%  [kernel]                  [k] file_read_actor
  0.92%  libfontconfig.so.1.6.2    [.] 0x000000000001bd43
  0.88%  libdrm_intel.so.1.0.0     [.] 0x0000000000008503
  0.79%  libc-2.17.so              [.] __memset_sse2
  0.75%  libeet.so.1.7.7           [.] eet_data_image_header_decode_cipher
  0.61%  module.so                 [.] evas_gl_common_context_image_push
  0.42%  libfreetype.so.6.10.0     [.] FT_Stream_ReadFields
  0.42%  libecore_x.so.1.7.7       [.] ecore_x_netwm_icons_set
  0.24%  libelementary.so.1.7.7    [.] 0x000000000005ae60
\end{lstlisting}
\end{frame}


\begin{frame}[fragile]
\frametitle{Example: count CPU cycles of a ls call}
\begin{lstlisting}
# perf record -e cpu-cycles ls
<output of ls>
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.008 MB perf.data (~334 samples) ]
# perf report
Samples: 24  of event 'cpu-cycles', Event count (approx.): 12333055
 21.67%  ls  [kernel.kallsyms]  [k] get_unmapped_area
 20.77%  ls  ld-2.17.so         [.] _dl_map_object_deps
 17.21%  ls  [kernel.kallsyms]  [k] __slab_free
 15.73%  ls  [kernel.kallsyms]  [k] tty_insert_flip_string_fixed_flag
 12.65%  ls  [kernel.kallsyms]  [k] unlock_page
  6.94%  ls  libc-2.17.so       [.] __strlen_sse2
  3.41%  ls  [kernel.kallsyms]  [k] count.isra.17.constprop.28
  1.02%  ls  [kernel.kallsyms]  [k] get_pageblock_flags_group
  0.41%  ls  [kernel.kallsyms]  [k] perf_event_context_sched_in
  0.13%  ls  [kernel.kallsyms]  [k] perf_ctx_unlock
  0.07%  ls  [kernel.kallsyms]  [k] native_write_msr_safe
\end{lstlisting}
\end{frame}


\input{tailpres}