\input{configpres} \title{\lq (Embedded) Linux Application Development\rq} \maketitle \subsection{Toolchains} \begin{frame} \frametitle{What is a toolchain?} \pause \begin{enumerate} \item compiler (gcc) \item debugger (gdb) \item binutils (as, ld, objdump, ...) \item sysroot (ld-linux-x86-64.so.2, libc.so.6, ...) \item sysroot-dev (libc.so, stdio.h, ...) \end{enumerate} \end{frame} \subsection{The GNU Compiler} \begin{frame}[fragile] \frametitle{Build a Test Program} Create a test program: Hello, world! \begin{verbatim} /* hello.c */ #include int main(void) { printf("Hello, world!\n"); return 0; } \end{verbatim} Build the test program. \begin{verbatim} gcc -ohello hello.c \end{verbatim} \end{frame} \begin{frame} \frametitle{Stages of a Build} \begin{enumerate} \item pre-processor (evaluate macros) \item compiler (convert C code to assembly) \item assembler (convert assembly to machine code) \item linker (add information for external symbols) \end{enumerate} \end{frame} \begin{frame}[fragile] \frametitle{Stages of a Build} pre-process code (evaluate macros) \begin{verbatim} gcc -E -ohello_pre.c hello.c \end{verbatim} compile code (C to assembly) \begin{verbatim} gcc -S -ohello.S hello_pre.c \end{verbatim} assemble code (assembly to machine) \begin{verbatim} as -ohello.o hello.S \end{verbatim} link objects (objects to executable) \begin{verbatim} ld -I/lib64/ld-linux-x86-64.so.2 -ohello hello.o \ /usr/lib/x86_64-linux-gnu/crt[1in].o -lc \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Stages of a Build} stop after pre-processor stage \begin{verbatim} gcc -E hello.c \end{verbatim} stop after compile stage \begin{verbatim} gcc -S -ohello.S hello.c \end{verbatim} stop after assemble stage \begin{verbatim} gcc -c -ohello.o hello.c \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Important gcc Options} compile with optimization level 3 \begin{verbatim} gcc -O3 -ohello hello.c \end{verbatim} compile without optimization and with debug symbols \begin{verbatim} gcc -O0 -g -ohello hello.c \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Important gcc Options} link against additional libraries (librt) \begin{verbatim} gcc -ohello hello.c -lrt \end{verbatim} add extra search paths for libraries \begin{verbatim} gcc -L/mypath -ohello hello.c -lrt \end{verbatim} add extra search paths for headers \begin{verbatim} gcc -I/mypath -ohello hello.c \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Important gcc Options} turn on (almost) all warnings \begin{verbatim} gcc -Wall -ohello hello.c \end{verbatim} treat warnings as errors \begin{verbatim} gcc -Werror -ohello hello.c \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{gcc: Useful Tips} stop after pre-processing stage and include comments \begin{verbatim} gcc -E -C hello.c \end{verbatim} show pre-defined/internal macros \begin{verbatim} $ gcc -E -dM - < /dev/null | sort [...] #define __SIZEOF_DOUBLE__ 8 #define __SIZEOF_FLOAT__ 4 #define __SIZEOF_INT__ 4 #define __SIZEOF_LONG__ 8 [...] \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{gcc: Useful Tips} Where does the symbol printf come from? \begin{verbatim} $ gcc -Wl,-y,printf -ohello hello.c /lib/libc.so.6: definition of printf \end{verbatim} Source and further useful tips: http://elinux.org/GCC\_Tips \end{frame} \subsection{The Binutils} \begin{frame} \frametitle{What are the binutils?} A collection of programs to create and modify binaries. These programs are not responsible for understanding source code. The most important tools are: \begin{itemize} \item \textbf{as}: the GNU assembler \item \textbf{ld}: the GNU linker \end{itemize} \end{frame} \begin{frame} \frametitle{Other binutils Programs} \begin{itemize} \item \textbf{addr2line}: matches binary addresses to source code line numbers \item \textbf{gprof}: an execution profiler \item \textbf{nm}: lists symbols within object files \item \textbf{objcopy}: copies and converts object files \item \textbf{objdump}: lists/decodes information within object files \end{itemize} \end{frame} \begin{frame} \frametitle{Other binutils Programs} \begin{itemize} \item \textbf{ar}: create, modify, extract archives \item \textbf{ranlib}: generates the index for an archive file \item \textbf{readelf}: displays information within ELF files \item \textbf{size}: lists sizes of ELF file sections \item \textbf{strip}: removes symbols \end{itemize} \end{frame} \begin{frame}[containsverbatim] \frametitle{Investigating Binaries with objdump} \begin{verbatim} $ objdump -x /bin/ls /bin/ls: file format elf64-x86-64 /bin/ls architecture: i386:x86-64, flags 0x00000112: EXEC_P, HAS_SYMS, D_PAGED start address 0x0000000000402490 Program Header: PHDR off 0x0000000000000040 vaddr [...] filesz 0x00000000000001f8 memsz [...] [...] Dynamic Section: NEEDED libselinux.so.1 [...] \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Investigating Binaries with objdump} \begin{verbatim} [...] Version References: required from librt.so.1: 0x09691a75 0x00 07 GLIBC_2.2.5 [...] Sections: Idx Name Size VMA [...] 0 .interp 0000001c 000000000040 [...] CONTENTS, ALLOC, LOAD, [...] 1 .note.ABI-tag 00000020 000000000040 [...] CONTENTS, ALLOC, LOAD, [...] [...] \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Identifying Library Dependencies with objdump} list library dependencies \begin{verbatim} $ objdump -x /bin/ls | grep NEEDED NEEDED libselinux.so.1 NEEDED libacl.so.1 NEEDED libc.so.6 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Modifying Binaries with objcopy} convert 64-bit ELF to SREC format \begin{verbatim} objcopy -I elf64-x86-64 -O srec hello hello.srec \end{verbatim} convert SREC to 64-bit ELF format \begin{verbatim} objcopy -I srec -O elf64-x86-64 hello.srec hello \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Identifying Source Code Lines with addr2line} disassemble program to see addresses \begin{verbatim} $ objdump -D hello | less [...] 000000000040050c
: 40050c: 55 push %rbp 40050d: 48 89 e5 mov %rsp,%rbp [...] \end{verbatim} identify source line number for an address \begin{verbatim} $ addr2line -e hello 40050c /home/devel/work/hello.c:4 \end{verbatim} \end{frame} \subsection{Static Libraries} \begin{frame}[fragile] \frametitle{Using Static Libraries} create static library \begin{verbatim} gcc -c file1.c file2.c ar cr libhello.a file1.o file2.o ranlib libhello.a \end{verbatim} link against static library \begin{verbatim} gcc -ohello hello.o libhello.a \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Static Builds} create a static executable \begin{verbatim} gcc -static -ohello hello.o -lhello \end{verbatim} the linker searches for a file libhello.a \end{frame} \subsection{Dynamic Libraries} \begin{frame} \frametitle{\textbf{P}osition \textbf{I}ndependent \textbf{C}ode} The -fPIC compiler option tells the compiler to only generate instructions that are position independent (i.e. relative). This means the generated instrutions can be run from any virtual address. \end{frame} \begin{frame}[fragile] \frametitle{Using Dynamic (i.e. Shared) Libraries} create shared library \begin{verbatim} gcc -c -fPIC file1.c file2.c gcc -shared -olibhello.so.0.0.1 file1.o file2.o \end{verbatim} link against shared library \begin{verbatim} gcc -ohello hello.c libhello.so.0.0.1 \end{verbatim} Try to start the program. What happens? \end{frame} \begin{frame} \frametitle{What is the Dynamic Loader?} The dynamic loader (ld-linux) loads the shared libraries needed by a program and then starts that program. It is part of the C library. \end{frame} \begin{frame}[fragile] \frametitle{Running Programs} \begin{verbatim} ./hello \end{verbatim} or \begin{verbatim} /lib64/ld-linux-x86-64.so.2 ./hello \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Investigating Which Dynamic Loader} The dynamic loader is stored in the contents in the ELF .interp section. \begin{verbatim} $ objdump -s hello | less hello: file format elf64-x86-64 Contents of section .interp: 400200 2f6c6962 36342f6c 642d6c69 6e75782d /lib64/ld-linux- 400210 7838362d 36342e73 6f2e3200 x86-64.so.2. \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{The Dynamic Loaded: Environment Variables} \begin{tabular}{|l|p{4cm}|} \hline \textbf{Environment Variable} & \textbf{Description} \\ \hline LD\_LIBRARY\_PATH & extra search path for libraries \\ \hline LD\_PRELOAD & list of libraries to be loaded before all others \\ \hline LD\_DEBUG & debug output \\ \hline LD\_TRACE\_LOADED\_OBJECTS & list shared libraries and their load addresses \\ \hline LD\_TRACE\_PRELINKING & list all load addresses \\ \hline \end{tabular} \end{frame} \begin{frame}[fragile] \frametitle{The Dynamic Loaded: Environment Variables} \begin{verbatim} $ LD_DEBUG=help ./hello Valid options for the LD_DEBUG environment variable are: libs display library search paths reloc display relocation processing files display progress for input file symbols display symbol table processing bindings display information about symbol binding versions display version dependencies all all previous options combined statistics display relocation statistics unused determined unused DSOs help display this help message and exit \end{verbatim} \end{frame} \begin{frame} \frametitle{The Dynamic Loaded: Search Order} \begin{enumerate} \item DT\_RPATH (ELF dynamic section) \item LD\_LIBRARY\_PATH (environment variable) \item DT\_RUNPATH (ELF dynamic section) \item ld.so.cache (local file) \item generic fallbacks: /lib, /usr/lib, etc. \end{enumerate} \end{frame} \begin{frame} \frametitle{Loader Terminology} \begin{itemize} \item \textbf{real name}: the filename of the shared library \item \textbf{shared object name}: identifies the shared library to load (runtime) \item \textbf{linker name}: points to the library to link against (build time, -l) \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{The Shared Object Name (soname)} The -soname linker option specifies to the linker the shared object name to record in the shared library. It is stored as SONAME in the ELF dynamic section. \begin{verbatim} gcc -c -fPIC file1.c file2.c gcc -shared -Wl,-soname,libhello.so.0 \ -olibhello.so.0.0.1 file1.o file2.o \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{The Shared Object Name (soname)} A symbolic link with the shared object name pointing to the shared library (real name) must exist. This is what the dynamic loader is looking for. \begin{verbatim} libhello.so.0 -> libhello.so.0.0.1 \end{verbatim} The symbolic link should point to the desired library version. \end{frame} \begin{frame}[fragile] \frametitle{The Shared Object Name (soname)} originally \begin{verbatim} libhello.so.0 -> libhello.so.0.0.1 \end{verbatim} after minor version update \begin{verbatim} libhello.so.0 -> libhello.so.0.1.0 \end{verbatim} after major version update \begin{verbatim} libhello.so.1 -> libhello.so.1.0.0 \end{verbatim} The real name version is typically of the form: major, minor, build. \end{frame} \begin{frame}[containsverbatim] \frametitle{The Shared Object Name (soname)} Library dependencies are stored as NEEDED in the ELF dynamic section. \begin{verbatim} $ objdump -x hello | grep NEEDED NEEDED libhello.so.0 NEEDED libc.so.6 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{The Linker Name} A symbolic link without any version number is usually created that points to the latest major release. \begin{verbatim} libhello.so -> libhello.so.1 \end{verbatim} This is used by the linker (build time) to locate the correct library specified with -l. \begin{verbatim} gcc -ohello hello.c -lhello \end{verbatim} \end{frame} \begin{frame} \frametitle{Loader Terminology} \begin{itemize} \item \textbf{real name}: libhello.so.0.0.1 \item \textbf{shared object name}: libhello.so.0 \item \textbf{linker name}: libhello.so \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{DT\_RUNPATH and DT\_RPATH} The -rpath and --enable-new-dtags linker options together specify to the linker a search path to record in the executable binary. It is stored as DT\_RUNPATH in the ELF dynamic section. \begin{verbatim} gcc -Wl,--enable-new-dtags -Wl,-rpath,/mypath \ -ohello hello.c -lhello \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{DT\_RUNPATH and DT\_RPATH} The -rpath and --disable-new-dtags linker options together specify to the linker a search path to record in the executable binary. It is stored as DT\_RPATH in the ELF dynamic section. \begin{verbatim} gcc -Wl,--disable-new-dtags -Wl,-rpath,/mypath \ -ohello2 hello.c -lhello \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Identifying Hard-Coded Search Paths} Hard-coded library search paths are stored as DT\_RUNPATH or DT\_RPATH in the ELF dynamic section (depending on the linker options used). \begin{verbatim} $ objdump -x hello | grep PATH RUNPATH /mypath $ objdump -x hello2 | grep PATH RPATH /mypath \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{The Dynamic Loader Cache} The paths used to fill the ld.so.cache are listed in /etc/ld.so.conf and within .conf files in the /etc/ld.so.conf.d/ directory. \begin{verbatim} $ find /etc/ld.so.conf* -type f /etc/ld.so.conf /etc/ld.so.conf.d/fakeroot-x86_64-linux-gnu.conf /etc/ld.so.conf.d/libc.conf /etc/ld.so.conf.d/x86_64-linux-gnu.conf \end{verbatim} view the dynamic loader cache \begin{verbatim} /sbin/ldconfig -p \end{verbatim} update the dynamic loader cache \begin{verbatim} sudo /sbin/ldconfig \end{verbatim} \end{frame} \begin{frame} \frametitle{The Dynamic Loader} Awareness and understanding of the dynamic loader leads to: \begin{enumerate} \item control where files are located \item control which libraries are loaded \item faster program loading \item cleaner system integration \end{enumerate} \end{frame} \subsection{Position Independent Executables} \begin{frame}[fragile] \frametitle{\textbf{P}osition \textbf{I}ndependent \textbf{E}xecutables} The -fPIE compiler option is like the -fPIC compiler option (only generate instructions that are position independent), but the compiled objects should be used for executables instead of dynamic libraries. \begin{verbatim} gcc -c -fPIE hello.c \end{verbatim} The -pie linker option tells the linker to create a position independent executable. Objects must be compiled with -fPIE for predictable results. \begin{verbatim} gcc -pie -ohello hello.o \end{verbatim} The -no-pie linker option tells the linker \textbf{not} to create a position indepdendent executable. \end{frame} \begin{frame}[fragile] \frametitle{\textbf{A}ddress \textbf{S}pace \textbf{L}ayout \textbf{R}andomization} With PIE, the executable is loaded to a randomized address on each run. \begin{verbatim} $ LD_TRACE_PRELINKING=1 ./hello | grep '=>' ./hello => ./hello (0x000055f721cde000, 0x000055f721cde000) [...] $ LD_TRACE_PRELINKING=1 ./hello | grep '=>' ./hello => ./hello (0x00005624239d2000, 0x00005624239d2000) [...] \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{\textbf{A}ddress \textbf{S}pace \textbf{L}ayout \textbf{R}andomization} ASLR can be disabled. \begin{itemize} \item \textbf{setarch -R}: program environment (local) \item \textbf{norandmaps}: boot argument (global) \item \textbf{kernel.randomize\_va\_space=0}: sysctl data (global) \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{\textbf{A}ddress \textbf{S}pace \textbf{L}ayout \textbf{R}andomization} disable ASLR for a single program \begin{verbatim} $ setarch `uname -m` -R ./hello Hello, world! \end{verbatim} verify ASLR is disabled \begin{verbatim} $ setarch `uname -m` -R env LD_TRACE_PRELINKING=1 ./hello | grep '=>' ./hello => ./hello (0x0000555555554000, 0x0000555555554000) [...] $ setarch `uname -m` -R env LD_TRACE_PRELINKING=1 ./hello | grep '=>' ./hello => ./hello (0x0000555555554000, 0x0000555555554000) [...] \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Identifying Source Code Lines with addr2line} disassemble program to see \textbf{relative} addresses \begin{verbatim} $ objdump -D hello | less [...] 00000000000006b0
: 6b0: 55 push %rbp 6b1: 48 89 e5 mov %rsp,%rbp [...] \end{verbatim} identify source line number for a \textbf{relative} address \begin{verbatim} $ addr2line -e hello 6b0 /home/devel/work/hello.c:4 \end{verbatim} \end{frame} \subsection{Toolchains (cont.)} \begin{frame} \frametitle{What is a cross-toolchain?} \pause A toolchain where the architecture of the build machine is different than the architecture of the target machine. \end{frame} \subsection{Automating the Build Process} \begin{frame} \frametitle{GNU make} \begin{alertblock}{What is GNU make?} GNU make automates and controls build processes. \end{alertblock} \end{frame} \begin{frame}[containsverbatim] \frametitle{Simple Example} \begin{verbatim} # Makefile hello: hello.o gcc -o$@ $< hello.o: hello.c gcc -c -o$@ $< clean: rm -f hello hello.o .PHONY: clean \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Rules with Patterns} \begin{verbatim} hello: hello.o gcc -o$@ $< %.o: %.c gcc -c -o$@ $< clean: rm -f hello hello.o .PHONY: clean \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Variables} \begin{verbatim} EXE = hello OBJ = $(EXE).o $(EXE): $(OBJ) gcc -o$@ $< %.o: %.c gcc -c -o$@ $< clean: rm -f $(EXE) $(OBJ) .PHONY: clean \end{verbatim} \end{frame} \begin{frame}[containsverbatim] \frametitle{Pattern Substitution} \begin{verbatim} EXE = hello SRC = hello.c file1.c file2.c OBJS = $(SRC:%.c=%.o) $(EXE): $(OBJS) gcc -o$@ $< %.o: %.c gcc -c -o$@ $< clean: rm -f $(EXE) $(OBJS) .PHONY: clean \end{verbatim} \end{frame} \input{tailpres}