\input{configpres} \title{perf} \section{Performance Analyzation} \maketitle \begin{frame} \frametitle{What is perf?} Perf is a profiler tool for Linux 2.6+ based systems. \begin{itemize} \item perf\_events interface of the Linux kernel \item perf userspace tool \item access perf events from own applications \end{itemize} Perf is an event counter, no tracer. \end{frame} \begin{frame} \frametitle{perf events} \begin{itemize} \item hardware performance counters (PMU of Host CPU) \item tracepoint events (from ftrace) \item dynamic probes (e.g. kprobes or uprobes) \end{itemize} \end{frame} \begin{frame} \frametitle{perf subcommands I} \begin{description} \item[list] list available events \item[stat] measure total event count for single program or for system, for some time \item[top] top-like dynamic view of hottest functions \item[record] measure and save sampling data for single program \item[report] analyze file generated by perf record; can generate flat, or graph profile \item[diff] Read two perf.data files and display the differential profile \end{description} \end{frame} \begin{frame} \frametitle{perf subcommands II} \begin{description} \item[sched] tracing/measuring of scheduler actions and latencies \item[lock] analyze lock events \item[timechart]tool to visualize total system behavior during a workload \end{description} \end{frame} \begin{frame} \frametitle{selecting events} \begin{description} \item[-e] use the specified event \item[-f] filter events, e.g. -e irq:irq\_handler\_entry --filter irq==18 \end{description} \end{frame} \begin{frame}[fragile] \frametitle{Example: live-view of page-fault event} \begin{lstlisting} # perf stat chromium Performance counter stats for 'chromium': 3733.112825 task-clock # 0.716 CPUs utilized 8,696 context-switches # 0.002 M/sec 1,063 cpu-migrations # 0.285 K/sec 134,686 page-faults # 0.036 M/sec 9,102,760,996 cycles # 2.438 GHz [83.59%] 6,217,623,412 stalled-cycles-frontend # 68.30% frontend cycles idle [83.32%] 4,755,104,208 stalled-cycles-backend # 52.24% backend cycles idle [67.20%] 7,262,564,269 instructions # 0.80 insns per cycle # 0.86 stalled cycles per insn [83.72%] 1,495,191,394 branches # 400.521 M/sec [83.38%] 26,037,805 branch-misses # 1.74% of all branches [83.04%] 5.214978731 seconds time elapsed \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Example: live-view of page-fault event} \begin{lstlisting} # sudo perf top -e page-faults Samples: 562 of event 'page-faults', Event count (approx.): 4557 62.32% libc-2.17.so [.] __memcpy_ssse3_back 20.36% libc-2.17.so [.] _int_malloc 3.16% module.so [.] evas_gl_common_context_font_push 2.83% libevas.so.1.7.7 [.] 0x0000000000083237 2.37% libdricore9.0.2.so.1.0.0 [.] 0x0000000000126050 1.80% libxul.so (deleted) [.] 0x000000000182e48f 1.12% [kernel] [k] file_read_actor 0.92% libfontconfig.so.1.6.2 [.] 0x000000000001bd43 0.88% libdrm_intel.so.1.0.0 [.] 0x0000000000008503 0.79% libc-2.17.so [.] __memset_sse2 0.75% libeet.so.1.7.7 [.] eet_data_image_header_decode_cipher 0.61% module.so [.] evas_gl_common_context_image_push 0.42% libfreetype.so.6.10.0 [.] FT_Stream_ReadFields 0.42% libecore_x.so.1.7.7 [.] ecore_x_netwm_icons_set 0.24% libelementary.so.1.7.7 [.] 0x000000000005ae60 \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Example: count CPU cycles of a ls call} \begin{lstlisting} # perf record -e cpu-cycles ls [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.008 MB perf.data (~334 samples) ] # perf report Samples: 24 of event 'cpu-cycles', Event count (approx.): 12333055 21.67% ls [kernel.kallsyms] [k] get_unmapped_area 20.77% ls ld-2.17.so [.] _dl_map_object_deps 17.21% ls [kernel.kallsyms] [k] __slab_free 15.73% ls [kernel.kallsyms] [k] tty_insert_flip_string_fixed_flag 12.65% ls [kernel.kallsyms] [k] unlock_page 6.94% ls libc-2.17.so [.] __strlen_sse2 3.41% ls [kernel.kallsyms] [k] count.isra.17.constprop.28 1.02% ls [kernel.kallsyms] [k] get_pageblock_flags_group 0.41% ls [kernel.kallsyms] [k] perf_event_context_sched_in 0.13% ls [kernel.kallsyms] [k] perf_ctx_unlock 0.07% ls [kernel.kallsyms] [k] native_write_msr_safe \end{lstlisting} \end{frame} \input{tailpres}