diff options
| author | Manuel Traut <manut@mecka.net> | 2014-03-16 20:25:02 +0100 |
|---|---|---|
| committer | Manuel Traut <manut@mecka.net> | 2014-03-16 20:25:02 +0100 |
| commit | 6f4862dba013c38bfba0109ba9363195b5171c7b (patch) | |
| tree | fe366d5a7ccc01860b047c551bda12324c7029ad /kernel-devel | |
| parent | 5761d1eb8a47437375c4f61bb54d09e60b67c816 (diff) | |
| parent | 2f46ad6a6f7393dc7898672a7ea3337395bafae9 (diff) | |
Merge ../projects/schulung
Diffstat (limited to 'kernel-devel')
9 files changed, 808 insertions, 52 deletions
diff --git a/kernel-devel/kernel-debugging/pres_kernel-debugging_en.tex b/kernel-devel/kernel-debugging/pres_kernel-debugging_en.tex index cc1824f..82dc25b 100644 --- a/kernel-devel/kernel-debugging/pres_kernel-debugging_en.tex +++ b/kernel-devel/kernel-debugging/pres_kernel-debugging_en.tex @@ -2,6 +2,73 @@ \title{Kernel-Debugging} \maketitle + +\section{Kernel Configuration} +\begin{frame} +\frametitle{Kernel hacking / printk and dmesg options} +\begin{itemize} +\item Show timing information on printks +\item Default message log level (1-7) +\item Enable dynamic printk() support +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Kernel hacking / Compile-time checks and compiler options} +\begin{itemize} +\item Compile the kernel with debug info +\item Debug Filesystem +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Kernel hacking / Memory Debugging} +\begin{itemize} +\item Kernel memory leak detector +\end{itemize} +reports leaks in /sys/kernel/debug/kmemleak + +see also: Documentation/kmemleak.txt +\end{frame} + +\begin{frame} +\frametitle{Kernel hacking / Debug Lockups and Hangs} +\begin{itemize} +\item Debug Lockups and Hangs +\begin{description} +\item[Softlockup] loop in kernel mode +\item[Hardlockup] CPU loop in kernel mode without letting IRQs run +\item[Hung task] task is uninterruptible (D state) +\end{description} +\end{itemize} +stack trace is printed on detection +\end{frame} + +\begin{frame} +\frametitle{Kernel hacking / Lock Debugging} +\begin{itemize} +\item RT Mutex debugging, deadlock detection +\item Lock debugging: prove locking correctness, see Documentation/lockdep-design.txt. +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Kernel hacking / Tracing} +\begin{itemize} +\item Kernel Function (Graph) Tracer +\item Scheduling Latency Tracer +\item Enable [k/u]probes-based dynamic events +\item enable/disable function tracing dynamically +\item Ring buffer benchmark stress tester (!!don't use it!!) +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Kernel hacking} +\begin{description} +\item[Remote debugging over FireWire] Documentation/debugging-via-ohci1394.txt +\end{description} +\end{frame} \section{printk} \begin{frame}[fragile] \frametitle{printk is your friend!!} @@ -38,6 +105,25 @@ $ cat /proc/sys/kernel/printk \end{verbatim} \end{frame} +\section{dynamic printk} +\begin{frame} +\frametitle{dynamic printk} +controlled by debugfs: dynamic\_debug/control + +format: filename:lineno [module]function flags format +\begin{description} +\item[filename] source file of the debug statement +\item[lineno] line number of the debug statement +\item[module] module that contains the debug statement +\item[function] function that contains the debug statement +\item[flags] '=p' means the line is turned 'on' for printing +\item[format] the format used for the debug statement +\end{description} +Use pr\_debug() and dev\_dbg() in your code. + +see also: Documentation/dynamic-debug-howto.txt +\end{frame} + \section{Logging messages} \begin{frame}[fragile] \frametitle{Serial Console} diff --git a/kernel-devel/kernel-perf/Makefile b/kernel-devel/kernel-perf/Makefile new file mode 100644 index 0000000..d641258 --- /dev/null +++ b/kernel-devel/kernel-perf/Makefile @@ -0,0 +1,9 @@ +all: + for pdf in `ls -1 *.tex` ; do \ + TEXINPUTS=`pwd`/../..:.:..:$(TEXINPUTS) pdflatex $$pdf; \ + TEXINPUTS=`pwd`/../..:.:..:$(TEXINPUTS) pdflatex $$pdf; \ + done + +clean: + rm -f *.aux *.log *.pdf *.log *.snm *.toc *.vrb *.nav *.out + diff --git a/kernel-devel/kernel-perf/pres_kernel-perf_en.tex b/kernel-devel/kernel-perf/pres_kernel-perf_en.tex new file mode 100644 index 0000000..b6edfc0 --- /dev/null +++ b/kernel-devel/kernel-perf/pres_kernel-perf_en.tex @@ -0,0 +1,135 @@ +\input{configpres} + +\title{perf} +\section{Performance Analyzation} +\maketitle + +\begin{frame} +\frametitle{What is perf?} +Perf is a profiler tool for Linux 2.6+ based systems. +\begin{itemize} +\item perf\_events interface of the Linux kernel +\item perf userspace tool +\item access perf events from own applications +\end{itemize} +Perf is an event counter, no tracer. +\end{frame} + +\begin{frame} +\frametitle{perf events} +\begin{itemize} +\item hardware performance counters (PMU of Host CPU) +\item tracepoint events (from ftrace) +\item dynamic probes (e.g. kprobes or uprobes) +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{perf subcommands I} +\begin{description} +\item[list] list available events +\item[stat] measure total event count for single program or for system, for + some time +\item[top] top-like dynamic view of hottest functions +\item[record] measure and save sampling data for single program +\item[report] analyze file generated by perf record; + can generate flat, or graph profile +\item[diff] Read two perf.data files and display the differential profile +\end{description} +\end{frame} + +\begin{frame} +\frametitle{perf subcommands II} +\begin{description} +\item[sched] tracing/measuring of scheduler actions and latencies +\item[lock] analyze lock events +\item[timechart]tool to visualize total system behavior during a workload +\end{description} +\end{frame} + + +\begin{frame} +\frametitle{selecting events} +\begin{description} +\item[-e] use the specified event +\item[-f] filter events, e.g. -e irq:irq\_handler\_entry --filter irq==18 +\end{description} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Example: live-view of page-fault event} +\begin{lstlisting} +# perf stat chromium + Performance counter stats for 'chromium': + + 3733.112825 task-clock # 0.716 CPUs utilized + 8,696 context-switches # 0.002 M/sec + 1,063 cpu-migrations # 0.285 K/sec + 134,686 page-faults # 0.036 M/sec + 9,102,760,996 cycles # 2.438 GHz +[83.59%] + 6,217,623,412 stalled-cycles-frontend # 68.30% frontend cycles idle +[83.32%] + 4,755,104,208 stalled-cycles-backend # 52.24% backend cycles idle +[67.20%] + 7,262,564,269 instructions # 0.80 insns per cycle + # 0.86 stalled cycles per insn +[83.72%] + 1,495,191,394 branches # 400.521 M/sec +[83.38%] + 26,037,805 branch-misses # 1.74% of all branches +[83.04%] + + 5.214978731 seconds time elapsed +\end{lstlisting} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Example: live-view of page-fault event} +\begin{lstlisting} +# sudo perf top -e page-faults +Samples: 562 of event 'page-faults', Event count (approx.): 4557 + 62.32% libc-2.17.so [.] __memcpy_ssse3_back + 20.36% libc-2.17.so [.] _int_malloc + 3.16% module.so [.] evas_gl_common_context_font_push + 2.83% libevas.so.1.7.7 [.] 0x0000000000083237 + 2.37% libdricore9.0.2.so.1.0.0 [.] 0x0000000000126050 + 1.80% libxul.so (deleted) [.] 0x000000000182e48f + 1.12% [kernel] [k] file_read_actor + 0.92% libfontconfig.so.1.6.2 [.] 0x000000000001bd43 + 0.88% libdrm_intel.so.1.0.0 [.] 0x0000000000008503 + 0.79% libc-2.17.so [.] __memset_sse2 + 0.75% libeet.so.1.7.7 [.] eet_data_image_header_decode_cipher + 0.61% module.so [.] evas_gl_common_context_image_push + 0.42% libfreetype.so.6.10.0 [.] FT_Stream_ReadFields + 0.42% libecore_x.so.1.7.7 [.] ecore_x_netwm_icons_set + 0.24% libelementary.so.1.7.7 [.] 0x000000000005ae60 +\end{lstlisting} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Example: count CPU cycles of a ls call} +\begin{lstlisting} +# perf record -e cpu-cycles ls +<output of ls> +[ perf record: Woken up 1 times to write data ] +[ perf record: Captured and wrote 0.008 MB perf.data (~334 samples) ] +# perf report +Samples: 24 of event 'cpu-cycles', Event count (approx.): 12333055 + 21.67% ls [kernel.kallsyms] [k] get_unmapped_area + 20.77% ls ld-2.17.so [.] _dl_map_object_deps + 17.21% ls [kernel.kallsyms] [k] __slab_free + 15.73% ls [kernel.kallsyms] [k] tty_insert_flip_string_fixed_flag + 12.65% ls [kernel.kallsyms] [k] unlock_page + 6.94% ls libc-2.17.so [.] __strlen_sse2 + 3.41% ls [kernel.kallsyms] [k] count.isra.17.constprop.28 + 1.02% ls [kernel.kallsyms] [k] get_pageblock_flags_group + 0.41% ls [kernel.kallsyms] [k] perf_event_context_sched_in + 0.13% ls [kernel.kallsyms] [k] perf_ctx_unlock + 0.07% ls [kernel.kallsyms] [k] native_write_msr_safe +\end{lstlisting} +\end{frame} + + +\input{tailpres} diff --git a/kernel-devel/kernel-tracing/pres_kernel-tracing_en.tex b/kernel-devel/kernel-tracing/pres_kernel-tracing_en.tex index d73a10f..d5a9686 100644 --- a/kernel-devel/kernel-tracing/pres_kernel-tracing_en.tex +++ b/kernel-devel/kernel-tracing/pres_kernel-tracing_en.tex @@ -1,19 +1,33 @@ \input{configpres} \title{Kernel-Tracing} -\section{FTRACE} +\section{Overview} \maketitle + +\begin{frame} +\frametitle{Overview} +\tableofcontents +\end{frame} + \begin{frame}[fragile] \frametitle{Kerneltracing: Overview} \begin{itemize} \item DebugFS interface -\item Different tracers: function, function\_graph, sched\_switch, ... +\item Different tracers: function, function\_graph, wakeup, wakeup\_rt, \dots \item Custom trace events \item Graphical frontend (kernelshark) \end{itemize} \end{frame} \begin{frame}[fragile] +\frametitle{Kerneltracing: Overview} +\begin{figure}[h] +\centering +\includegraphics[width=10cm]{images/trace_overview.png} +\end{figure} +\end{frame} + +\begin{frame}[fragile] \frametitle{Kernel-Tracing: DebugFS} \begin{verbatim} $ mount -t debugfs debugfs /sys/kernel/debug @@ -23,27 +37,103 @@ blk function_graph mmiotrace wakeup_rt wakeup \end{verbatim} \end{frame} +\section{Event tracing} +\begin{frame}[fragile] +\frametitle{Event tracing} +current\_tracer can be set to NOP +\begin{verbatim} +$ cd /sys/kernel/debug/tracing +$ ls events/ +[...] +irq +sched +scsi +signal +skb +[...] +\end{verbatim} +\end{frame} \begin{frame}[fragile] -\frametitle{Kerneltracing: Trivial example} +\frametitle{Event tracing} \begin{verbatim} -$ echo function_graph > current_tracer -$ echo 1 > tracing_enabled -$ sleep 1 -$ echo 0 > tracing_enabled -$ less trace -# tracer: function_graph -# CPU DURATION FUNCTION CALLS -# | | | | | | | - 1) | enqueue_entity() { - 1) | update_curr() { - 1) 0.336 us | task_of(); - 1) 1.524 us | } - 1) 0.420 us | place_entity(); +$ cd /sys/kernel/debug/tracing +$ ls -1 events/sched/ +enable +filter +sched_kthread_stop +sched_kthread_stop_ret +sched_migrate_task +sched_pi_setprio +[...] +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Event tracing: Enable events} +\begin{verbatim} +$ cd /sys/kernel/debug/tracing +# Enable ALL events of the group ''sched'' +$ echo 1 > events/sched/enable +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Record a trace} +After enabling the events you want to see, do: +\begin{verbatim} +$ cd /sys/kernel/debug/tracing +# Start recording to the ringbuffer +$ echo 1 > tracing_on +# Stop recording to the ringbuffer +$ echo 0 > tracing_on +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Analyze a trace} +You can even do this while recording! +\begin{verbatim} +$ cd /sys/kernel/debug/tracing +# Just print the current content of the ring buffer +$ cat trace +# or: do a consuming read on the ring buffer +$ cat trace_pipe +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Trace event format and filters} +Each trace event has a specific format and parameters. +You can put a filter on those parameters for recording a trace: +\begin{verbatim} +$ cat events/sched/sched_switch/format +[...] +field:__u8 prev_comm[15]; +field:pid_t prev_pid; +field:int prev_prio; +field:long prev_state; +[...] +$ echo 'next_comm == bash' \ + > events/sched/sched_switch/filter +$ echo 1 > events/sched/sched_switch/enable +$ echo 1 > tracing_on +$ cat trace \end{verbatim} \end{frame} \begin{frame}[fragile] +\frametitle{Tracing on multicore} +\begin{itemize} +\item One ringbuffer per cpu +\item trace contains ALL events +\item the per\_cpu directory contains a trace for each cpu +\item tracing\_cpumask can limit tracing to specific cores +\end{itemize} +\end{frame} + +\section{Tracers} +\begin{frame}[fragile] \frametitle{Tracer: function} \begin{verbatim} # tracer: function @@ -59,56 +149,60 @@ wnck-2022 [003] 5766.659918: kfree <-skb \end{frame} \begin{frame}[fragile] -\frametitle{Tracer: function / latency\_format} +\frametitle{Tracer: function\_graph} \begin{verbatim} -$ echo 1 > options/latency_format +$ echo function_graph > current_tracer +$ echo 1 > tracing_on +$ sleep 1 +$ echo 0 > tracing_on $ less trace -# _------=> CPU# -# / _-----=> irqs-off -# | / _----=> need-resched -# || / _---=> hardirq/softirq -# ||| / _--=> preempt-depth -# |||| /_--=> lock-depth -# |||||/ delay -# cmd pid |||||| time | caller -# \ / |||||| \ | / -wnck-app-2022 3..... 1237702us : skb_relea -wnck-app-2022 3..... 1237703us : sock_wfre +# tracer: function_graph +# CPU DURATION FUNCTION CALLS +# | | | | | | | + 1) | enqueue_entity() { + 1) | update_curr() { + 1) 0.336 us | task_of(); + 1) 1.524 us | } + 1) 0.420 us | place_entity(); \end{verbatim} -\end{frame} +\end{frame} \begin{frame}[fragile] -\frametitle{Tracer: function (custom tracepoint)} +\frametitle{function\_graph: Set a trigger function} +You can set a trigger function for the function\_graph tracer +if you just want to record specific functions and their childs: \begin{verbatim} -$ echo 1 > tracing_enabled -$ echo "MARK" > trace_marker -$ echo 0 > tracing_enabled -$ less trace -... - bash-4328 [003] 5603.687935: get_slab - bash-4328 [003] 5603.687935: _cond_re - bash-4328 [003] 5603.687936: _cond_re - bash-4328 [003] 5603.687939: 0: MARK - bash-4328 [003] 5603.687939: kfree <- -... +echo do_IRQ > set_graph_function +# Additionals triggers can be set with +echo another_function >> set_graph_function \end{verbatim} \end{frame} \begin{frame}[fragile] -\frametitle{Tracer: sched\_switch} +\frametitle{Tracer: function / latency\_format} \tiny \begin{verbatim} -# tracer: sched_switch +$ echo 1 > options/latency_format +# tracer: function # -# TASK-PID CPU# TIMESTAMP FUNCTION -# | | | | | - bash-4328 [003] 78.553966: 4328:120:S + [003] 4328:120:S bash - bash-4328 [003] 78.553979: 4328:120:S ==> [003] 0:120:R <idle> -<idle>-0 [003] 78.553986: 0:120:R + [003] 13:120:R ksoftirqd/3 -<idle>-0 [003] 78.553988: 0:120:R ==> [003] 13:120:R ksoftirqd/3 - +# function latency trace v1.1.5 on 3.9.4-x1-00124-g0bfd8ff +# -------------------------------------------------------------------- +# latency: 0 us, #204955/25306195, CPU#0 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) +# ----------------- +# | task: -0 (uid:0 nice:0 policy:0 rt_prio:0) +# ----------------- +# +# _------=> CPU# +# / _-----=> irqs-off +# | / _----=> need-resched +# || / _---=> hardirq/softirq +# ||| / _--=> preempt-depth +# |||| / delay +# cmd pid ||||| time | caller +# \ / ||||| \ | / +terminol-11964 1.... 11639243us : ep_read_events_proc <-ep_scan_ready_list.isra.8 \end{verbatim} -\end{frame} +\end{frame} \begin{frame}[fragile] \frametitle{Tracer: wakeup\_rt} @@ -131,6 +225,44 @@ ls-4579 3dN... 5us : _raw_spin_unlock_irqrestore <-try_to_wake_up \end{verbatim} \end{frame} +\section{Kernel function profiler} + +\begin{frame}[fragile] +\frametitle{Kernel function profiler} +\begin{verbatim} +$ echo 1 > function_profile_enabled +$ echo 1 > tracing_on +# then do something... +$ echo 0 > tracing_on +$ less trace_stat/function0 +Function Hit Time Avg +-------- --- ---- --- +__schedule 7064 1958976725 us 277318.3 us +schedule 6961 1958965845 us 281420.1 us +[...] +\end{verbatim} +\end{frame} + +\section{trace\_marker} +\begin{frame}[fragile] +\frametitle{Custom application tracepoints: ''simple method''} +\begin{verbatim} +$ echo 1 > tracing_on +$ echo "MARK" > trace_marker +$ echo 0 > tracing_on +$ less trace +... + bash-4328 [003] 5603.687935: get_slab + bash-4328 [003] 5603.687935: _cond_re + bash-4328 [003] 5603.687936: _cond_re + bash-4328 [003] 5603.687939: 0: MARK + bash-4328 [003] 5603.687939: kfree <- +... +\end{verbatim} +\end{frame} + +\section{trace\_printk} + \begin{frame}[fragile] \frametitle{trace\_printk()} \begin{itemize} @@ -158,6 +290,10 @@ Only trace specific functions. ftrace_notrace= \end{verbatim} Don't trace specific functions. +\begin{verbatim} +trace_event= +\end{verbatim} +Just enable trace events (comma separated list) \end{frame} \begin{frame}[fragile] @@ -170,6 +306,20 @@ echo z > /proc/sysrq-trigger \end{verbatim} \end{frame} +\begin{frame}[fragile] +\frametitle{Trace instances} +You can have separate trace instances with their own +buffers and events: +\begin{verbatim} +$ cd /sys/kernel/debug/tracing +$ mkdir instances/my_inst1 +$ cd instances/my_inst1 +$ echo 1 > events/sched/enable +$ cat trace +[...] +\end{verbatim} +\end{frame} + \section{trace-cmd} \begin{frame} @@ -326,9 +476,200 @@ $ kernelshark -i mytrace.dat \includegraphics[width=10cm]{images/kernelshark_zoom.png} \end{figure} \end{frame} + +\section{Useful things} + +\begin{frame}[fragile] +\frametitle{Control a trace from your kernel code} +\begin{verbatim} +void my_kernel_function(void) +{ + tracing_on(); + do_some_stuff_i_wanna_trace(); + tracing_off(); +} +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Matching PIDs and process names of your trace(s)} +\begin{verbatim} +$ cat saved_cmdlines +[...] +5112 bash +5223 ARTHUR_DENT +5546 kworker/0:2 +8465 kworker/0:0 +[...] +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Where's my thread in the trace?!} +To ''find'' a specific thread in your trace, you can use the +following function to set the thread's name: +\begin{verbatim} +#define _GNU_SOURCE +#include <pthread.h> + +pthread_setname_np(pthread_t thread, + const char *name); +\end{verbatim} +Available since glibc version 2.12 +\end{frame} + +\begin{frame}[fragile] +\frametitle{Thread names: Example} +\begin{verbatim} +/* pthread_example */ +pthread_t test_thread; +[...] +pthread_create(&test_thread, NULL, + my_test_thread, NULL); +[...] +pthread_setname_np(test_thread, "ARTHUR_DENT"); +\end{verbatim} +Check with the ps command: +\begin{verbatim} +$ ps H -C pthread_example -o 'pid tid cmd comm' + PID TID CMD COMMAND +4515 4515 ./pthread_example pthread_example +4515 4516 ./pthread_example ARTHUR_DENT +\end{verbatim} +\end{frame} + \begin{frame} +\frametitle{Thread name in kernelshark} +\begin{figure}[h] +\centering +\includegraphics[width=10cm]{images/pthread_kernelshark.png} +\end{figure} +\end{frame} + +\section{kprobes} +\begin{frame}[fragile] +\frametitle{Dynamic kernel tracepoints: KPROBES} +\begin{itemize} +\item Similar to Tracepoints +\item Can be added / removed dynamically +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Dynamic kernel tracepoints: KPROBES} +\begin{verbatim} +$ echo 'p:my_k_event do_IRQ' > kprobe_events +$ echo 1 events/kprobes/my_k_event/enabled +$ echo 1 > tracing_on +$ cat trace +<idle>-0 [000] d... 545.173709: my_k_event: (do_IRQ+0x0/0xc0) +<idle>-0 [000] d... 545.331051: my_k_event: (do_IRQ+0x0/0xc0) +<idle>-0 [000] d... 545.331490: my_k_event: (do_IRQ+0x0/0xc0) +<idle>-0 [000] d... 545.490730: my_k_event: (do_IRQ+0x0/0xc0) +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Dynamic kernel tracepoints: KPROBES for custom modules} +Let's assume we want to have a tracepoint for the function +hello\_init in the module hello.ko +\begin{verbatim} +# Note: >> will append a new event +$ echo 'p:my_mod_event hello:hello_init' \ + >> kprobe_events +$ echo 1 > events/kprobes/my_mod_event/enable +$ insmod hello.ko +$ cat trace +insmod-9586 [000] d... 13278.003468: my_mod_event: (0xf878d080) +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Dynamic kernel tracepoints: Question / Exercise} +What happens, if we add the following event. What's different? +\begin{verbatim} +# Note >> will append a new event +$ echo 'r:my_exercise_event hello:hello_init' \ + >> kprobe_events +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{KPROBES statistics} +\begin{verbatim} +$ cat kprobe_profile +my_mod_event_ret 2 0 +my_mod_event 2 0 +\end{verbatim} +\end{frame} + +\section{uprobes} +\begin{frame}[fragile] +\frametitle{Dynamic Userspace Tracepoints: uprobes} +\begin{itemize} +\item Similar to kprobes +\item For userspace applications +\item A uprobe event is set on a specific offset in a userland process +\item Powerful method to correlate your kernel and userland events! +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Dynamic Userspace Tracepoints: uprobes} +\begin{verbatim} +$ gcc -Wall -g -o pthread_example \ + pthread_example.c -lpthread +$ objdump -F -D -S pthread_example | less +\end{verbatim} +\begin{verbatim} +08048594 <my_test_thread> (File Offset: 0x594): +[...] +void *my_test_thread(void *x_void_ptr) +[...] + for (i = 0; i < 10; i++) { +80485a1: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) +80485a8: eb 1c jmp 80485c6 <my_test_thread+0x32> (File Offset: 0x5c6) + printf("The answer is 42!\n"); +80485aa: c7 04 24 50 87 04 08 movl $0x8048750,(%esp) +\end{verbatim} +So, the file offset for the printf call is 0x5aa ! +\end{frame} + +\begin{frame}[fragile] +\frametitle{Dynamic Userspace Tracepoints: uprobes II} +\begin{verbatim} +echo \ +'p:my_ev /home/devel/pthread/pthread_example:0x5aa' \ + > /sys/kernel/debug/tracing/uprobe_events +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Dynamic Userspace Tracepoints: uprobes III} +\begin{verbatim} +$ cd /sys/kernel/debug/tracing/ +$ echo 1 > events/uprobes/my_ev/enable +$ echo 1 > tracing on +$ /home/devel/pthread_example/pthread_example +$ echo 0 > tracing_on +$ less trace +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | +ARTHUR_DENT-5223 [000] d... 5653.154822: my_ev: (0x80485aa) +ARTHUR_DENT-5223 [000] d... 5654.155723: my_ev: (0x80485aa) +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{uprobes: statistics} +\begin{verbatim} +$ cat uprobe_profile +/home/devel/pthread/pthread_example my_ev 10 +\end{verbatim} +\end{frame} \section{sources} +\begin{frame} \frametitle{sources} \begin{thebibliography}{1} \bibitem{trace1} http://lwn.net/Articles/365835/ diff --git a/kernel-devel/kexec-and-crash-kernel/Makefile b/kernel-devel/kexec-and-crash-kernel/Makefile new file mode 100644 index 0000000..d641258 --- /dev/null +++ b/kernel-devel/kexec-and-crash-kernel/Makefile @@ -0,0 +1,9 @@ +all: + for pdf in `ls -1 *.tex` ; do \ + TEXINPUTS=`pwd`/../..:.:..:$(TEXINPUTS) pdflatex $$pdf; \ + TEXINPUTS=`pwd`/../..:.:..:$(TEXINPUTS) pdflatex $$pdf; \ + done + +clean: + rm -f *.aux *.log *.pdf *.log *.snm *.toc *.vrb *.nav *.out + diff --git a/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/crashkernel.diff b/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/crashkernel.diff new file mode 100644 index 0000000..1d3e082 --- /dev/null +++ b/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/crashkernel.diff @@ -0,0 +1,65 @@ +From: Magnus Damm <damm at opensource.se> + +Update the copy_oldmem_page() function to ioremap() only +when accessing memory that is outside the regular range +of system memory that is managed by the kernel. + +Without this patch a warning is triggered in the ARM-specific +ioremap implementation, see WARN_ON(pfn_valid()) in ioremap.c + +The copy_oldmem_page() function is used by the secondary crash +kernel to access memory using the /proc/vmcore code implemented +in fs/proc/vmcore.c. To pass information from the first kernel +to the secondary crash kernel a kernel command line option is +used to point out where the elf core hdr is located. + +The crash kernel is loaded through kexec-tools which also contains +code that reserves memory for the elfcorehdr= option. This memory +block is reserved _inside_ the main system memory of the secondary +kernel. The /proc/vmcore code in the secondary kernel is however +using copy_oldmem_page() to access both this elfcorehdr area and +the rest of the memory used by the the first kernel. + +So the copy_oldmem_page() function is used to access data that +may be located in system memory, or it may be outside. Always +using ioremap will not work, so this patch makes it conditional +based on pfn_valid(). + +For more details please look at the sh7372-based example here: +http://permalink.gmane.org/gmane.linux.ports.sh.devel/11502 + +Signed-off-by: Magnus Damm <damm at opensource.se> +--- + + arch/arm/kernel/crash_dump.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- 0001/arch/arm/kernel/crash_dump.c ++++ work/arch/arm/kernel/crash_dump.c 2011-06-18 20:59:49.000000000 +0900 +@@ -39,9 +39,13 @@ ssize_t copy_oldmem_page(unsigned long p + if (!csize) + return 0; + +- vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); +- if (!vaddr) +- return -ENOMEM; ++ if (pfn_valid(pfn)) { ++ vaddr = phys_to_virt(pfn << PAGE_SHIFT); ++ } else { ++ vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); ++ if (!vaddr) ++ return -ENOMEM; ++ } + + if (userbuf) { + if (copy_to_user(buf, vaddr + offset, csize)) { +@@ -52,6 +56,8 @@ ssize_t copy_oldmem_page(unsigned long p + memcpy(buf, vaddr + offset, csize); + } + +- iounmap(vaddr); ++ if (!pfn_valid(pfn)) ++ iounmap(vaddr); ++ + return csize; + } diff --git a/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/kexec_fix_arm_braindamage.diff b/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/kexec_fix_arm_braindamage.diff new file mode 100644 index 0000000..877f5ea --- /dev/null +++ b/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/kexec_fix_arm_braindamage.diff @@ -0,0 +1,28 @@ +Index: kexec-tools-2.0.3/kexec/arch/arm/crashdump-arm.c +=================================================================== +--- kexec-tools-2.0.3.orig/kexec/arch/arm/crashdump-arm.c 2011-10-03 00:56:38.000000000 +0200 ++++ kexec-tools-2.0.3/kexec/arch/arm/crashdump-arm.c 2013-07-06 17:26:13.410309437 +0200 +@@ -204,12 +204,12 @@ + * @cmdline. Note that @cmdline must be at least %COMMAND_LINE_SIZE bytes long + * (including %NUL). + */ +-static void cmdline_add_mem(char *cmdline, unsigned long size) ++static void cmdline_add_mem(char *cmdline, unsigned long size, unsigned long offset) + { + char buf[COMMAND_LINE_SIZE]; + int buflen; + +- buflen = snprintf(buf, sizeof(buf), "%s mem=%ldK", cmdline, size >> 10); ++ buflen = snprintf(buf, sizeof(buf), "%s mem=%ldK@0x%X", cmdline, size >> 10, offset); + if (buflen < 0) + die("Failed to construct mem= command line parameter\n"); + if (buflen >= sizeof(buf)) +@@ -301,7 +301,7 @@ + * prevents the dump capture kernel from using any other memory regions + * which belong to the primary kernel. + */ +- cmdline_add_mem(mod_cmdline, elfcorehdr - crash_reserved_mem.start); ++ cmdline_add_mem(mod_cmdline, elfcorehdr - crash_reserved_mem.start, crash_reserved_mem.start); + + dump_memory_ranges(); + dbgprintf("kernel command line: \"%s\"\n", mod_cmdline); diff --git a/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/zreladdr.diff b/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/zreladdr.diff new file mode 100644 index 0000000..867e13a --- /dev/null +++ b/kernel-devel/kexec-and-crash-kernel/crashkernel_patches/zreladdr.diff @@ -0,0 +1,13 @@ +Index: linux-3.2/arch/arm/mach-omap2/Makefile.boot +=================================================================== +--- linux-3.2.orig/arch/arm/mach-omap2/Makefile.boot 2012-01-05 00:55:44.000000000 +0100 ++++ linux-3.2/arch/arm/mach-omap2/Makefile.boot 2013-07-06 17:00:18.718516198 +0200 +@@ -1,3 +1,8 @@ ++ifeq ($(CONFIG_CRASH_DUMP),y) ++ zreladdr-y += 0x84008000 ++params_phys-y := 0x84000100 ++else + zreladdr-y += 0x80008000 + params_phys-y := 0x80000100 ++endif + initrd_phys-y := 0x80800000 diff --git a/kernel-devel/kexec-and-crash-kernel/pres_kexec_and_crashkernel_en.tex b/kernel-devel/kexec-and-crash-kernel/pres_kexec_and_crashkernel_en.tex new file mode 100644 index 0000000..1791267 --- /dev/null +++ b/kernel-devel/kexec-and-crash-kernel/pres_kexec_and_crashkernel_en.tex @@ -0,0 +1,70 @@ +\input{configpres} + +\title{Kexec and Crashkernels} +\maketitle +\begin{frame} +\frametitle{What is kexec?} +Kexec is a mechanism to boot Linux from within Linux, +without going through the BIOS / the Bootloader. +\end{frame} + +\begin{frame} +\frametitle{kexec-tools} +http://horms.net/projects/kexec/ +\end{frame} + +\begin{frame}[fragile] +\frametitle{Using kexec: Kernel configuration} +\begin{verbatim} +Boot options --> +[*] Kexec system call +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Using kexec: Booting the new kernel} +\begin{verbatim} +# Load the kernel image and set the commandline +$ kexec -l uImage --append=$(cat /proc/cmdline) +# Start the new kernel +$ kexec -e +\end{verbatim} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Using kexec: Crashkernel} +\begin{itemize} +\item Build production kernel (enable kexec system call!) +\item Build ''crash kernel'' (enable kexec system call and crash dump kernel): +\begin{verbatim} +Boot options --> +[*] Kexec system call +[*] Build kdump crash kernel +\end{verbatim} +\item The crash kernel option should automatically select /proc/vmcore +\end{itemize} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Using kexec: Crashkernel} +\begin{itemize} +\item Boot production kernel (Commandline: crashkernel=256M@0x84000000) +\item Check if crashkernel memory was reserved: +\begin{verbatim} +cat /proc/iomem | grep Crash + 84000000-93ffffff : Crash kernel +\end{verbatim} +\item Load crash kernel and let the system crash ;-) +\begin{verbatim} +kexec -p uImage-crash \ + --append="$(cat /proc/cmdline | \ + sed 's/crashkernel/bla/')" +echo c > /proc/sysrq-trigger +\end{verbatim} +\item After crashing the crash kernel should boot up! +\item Within the crashkernel the core file for the production kernel is available in /proc/vmcore +\end{itemize} +\end{frame} + +\input{tailpres} |
