summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Traut <manut@mecka.net>2013-07-10 17:25:27 +0200
committerManuel Traut <manut@mecka.net>2013-07-10 17:25:27 +0200
commitee774829de8607af65a128e171bdba1f26409ad4 (patch)
treeca533e831952076c6378bb222d372b451399eb30
parentd8498101ecd022f2131bcc7c1bd7febedb0090d6 (diff)
add pres for perf
Signed-off-by: Manuel Traut <manut@mecka.net>
-rw-r--r--kernel-devel/kernel-perf/Makefile9
-rw-r--r--kernel-devel/kernel-perf/pres_kernel-perf_en.tex124
2 files changed, 133 insertions, 0 deletions
diff --git a/kernel-devel/kernel-perf/Makefile b/kernel-devel/kernel-perf/Makefile
new file mode 100644
index 0000000..d641258
--- /dev/null
+++ b/kernel-devel/kernel-perf/Makefile
@@ -0,0 +1,9 @@
+all:
+ for pdf in `ls -1 *.tex` ; do \
+ TEXINPUTS=`pwd`/../..:.:..:$(TEXINPUTS) pdflatex $$pdf; \
+ TEXINPUTS=`pwd`/../..:.:..:$(TEXINPUTS) pdflatex $$pdf; \
+ done
+
+clean:
+ rm -f *.aux *.log *.pdf *.log *.snm *.toc *.vrb *.nav *.out
+
diff --git a/kernel-devel/kernel-perf/pres_kernel-perf_en.tex b/kernel-devel/kernel-perf/pres_kernel-perf_en.tex
new file mode 100644
index 0000000..893aa70
--- /dev/null
+++ b/kernel-devel/kernel-perf/pres_kernel-perf_en.tex
@@ -0,0 +1,124 @@
+\input{configpres}
+
+\title{perf}
+\section{Performance Analyzation}
+\maketitle
+
+\begin{frame}
+\frametitle{What is perf?}
+Perf is a profiler tool for Linux 2.6+ based systems.
+\begin{itemize}
+\item perf\_events interface of the Linux kernel
+\item perf userspace tool
+\end{itemize}
+\end{frame}
+
+\begin{frame}
+\frametitle{perf events}
+\begin{itemize}
+\item hardware performance counters (PMU of Host CPU)
+\item tracepoint events (from ftrace)
+\item dynamic probes (e.g. kprobes or uprobes)
+\end{itemize}
+\end{frame}
+
+\begin{frame}
+\frametitle{perf subcommands I}
+\begin{description}
+\item[list] list available events
+\item[stat] measure total event count for single program or for system, for
+ some time
+\item[top] top-like dynamic view of hottest functions
+\item[record] measure and save sampling data for single program
+\item[report] analyze file generated by perf record;
+ can generate flat, or graph profile
+\item[diff] Read two perf.data files and display the differential profile
+\end{description}
+\end{frame}
+
+\begin{frame}
+\frametitle{perf subcommands II}
+\begin{description}
+\item[sched] tracing/measuring of scheduler actions and latencies
+\item[lock] analyze lock events
+\item[timechart]tool to visualize total system behavior during a workload
+\end{description}
+\end{frame}
+
+\begin{frame}[fragile]
+\frametitle{Example: live-view of page-fault event}
+\begin{lstlisting}
+# perf stat chromium
+ Performance counter stats for 'chromium':
+
+ 3733.112825 task-clock # 0.716 CPUs utilized
+ 8,696 context-switches # 0.002 M/sec
+ 1,063 cpu-migrations # 0.285 K/sec
+ 134,686 page-faults # 0.036 M/sec
+ 9,102,760,996 cycles # 2.438 GHz
+[83.59%]
+ 6,217,623,412 stalled-cycles-frontend # 68.30% frontend cycles idle
+[83.32%]
+ 4,755,104,208 stalled-cycles-backend # 52.24% backend cycles idle
+[67.20%]
+ 7,262,564,269 instructions # 0.80 insns per cycle
+ # 0.86 stalled cycles per insn
+[83.72%]
+ 1,495,191,394 branches # 400.521 M/sec
+[83.38%]
+ 26,037,805 branch-misses # 1.74% of all branches
+[83.04%]
+
+ 5.214978731 seconds time elapsed
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+\frametitle{Example: live-view of page-fault event}
+\begin{lstlisting}
+# sudo perf top -e page-faults
+Samples: 562 of event 'page-faults', Event count (approx.): 4557
+ 62.32% libc-2.17.so [.] __memcpy_ssse3_back
+ 20.36% libc-2.17.so [.] _int_malloc
+ 3.16% module.so [.] evas_gl_common_context_font_push
+ 2.83% libevas.so.1.7.7 [.] 0x0000000000083237
+ 2.37% libdricore9.0.2.so.1.0.0 [.] 0x0000000000126050
+ 1.80% libxul.so (deleted) [.] 0x000000000182e48f
+ 1.12% [kernel] [k] file_read_actor
+ 0.92% libfontconfig.so.1.6.2 [.] 0x000000000001bd43
+ 0.88% libdrm_intel.so.1.0.0 [.] 0x0000000000008503
+ 0.79% libc-2.17.so [.] __memset_sse2
+ 0.75% libeet.so.1.7.7 [.] eet_data_image_header_decode_cipher
+ 0.61% module.so [.] evas_gl_common_context_image_push
+ 0.42% libfreetype.so.6.10.0 [.] FT_Stream_ReadFields
+ 0.42% libecore_x.so.1.7.7 [.] ecore_x_netwm_icons_set
+ 0.24% libelementary.so.1.7.7 [.] 0x000000000005ae60
+\end{lstlisting}
+\end{frame}
+
+
+\begin{frame}[fragile]
+\frametitle{Example: count CPU cycles of a ls call}
+\begin{lstlisting}
+# perf record -e cpu-cycles ls
+<output of ls>
+[ perf record: Woken up 1 times to write data ]
+[ perf record: Captured and wrote 0.008 MB perf.data (~334 samples) ]
+# perf report
+Samples: 24 of event 'cpu-cycles', Event count (approx.): 12333055
+ 21.67% ls [kernel.kallsyms] [k] get_unmapped_area
+ 20.77% ls ld-2.17.so [.] _dl_map_object_deps
+ 17.21% ls [kernel.kallsyms] [k] __slab_free
+ 15.73% ls [kernel.kallsyms] [k] tty_insert_flip_string_fixed_flag
+ 12.65% ls [kernel.kallsyms] [k] unlock_page
+ 6.94% ls libc-2.17.so [.] __strlen_sse2
+ 3.41% ls [kernel.kallsyms] [k] count.isra.17.constprop.28
+ 1.02% ls [kernel.kallsyms] [k] get_pageblock_flags_group
+ 0.41% ls [kernel.kallsyms] [k] perf_event_context_sched_in
+ 0.13% ls [kernel.kallsyms] [k] perf_ctx_unlock
+ 0.07% ls [kernel.kallsyms] [k] native_write_msr_safe
+\end{lstlisting}
+\end{frame}
+
+
+\input{tailpres}