\input{configpres} \section{Basics} \title{Linux Fastboot} \maketitle \subsection{Motivation} \begin{frame} \frametitle{Motivation} \begin{itemize} \item always greater requirements for energy saving functions \item runtime of multimedia devices \item automotive applications \pause \item \textbf{Solution:} power off when idle \pause \item \textbf{But:} users do not like ''waiting'' \end{itemize} \end{frame} \subsection{Theoretical Basics} \begin{frame} \frametitle{The first step: Requirements Definition} \begin{itemize} \item What is the maximum allowed boot time? \item Which functionality must be availalbe after this time? \item speed vs. flexibility \end{itemize} \end{frame} \begin{frame} \frametitle{Boot Process} \includegraphics[height=0.7\textheight]{images/boot_overview.png} \end{frame} \begin{frame} \frametitle{Actors in the Boot Process} \begin{itemize} \item hardware reset \item bootloader \item operating system (load drivers, mount filesystems) \item start scripts / applications \end{itemize} \end{frame} \begin{frame} \frametitle{Critical Hardware Components} \begin{itemize} \item power supply \item reset logic \item boot logic / boot order \item connection to the boot medium \item connection to required peripherals \end{itemize} \textbf{Important: The hardware is an essential part of a fastboot concept!!} \end{frame} \begin{frame} \frametitle{Bootloader} \begin{itemize} \item ''base configuration'' of the CPU \item modifies the device tree and/or sets up the ''ATAGs structure'' \item flushes the caches \item disables the MMU \end{itemize} \end{frame} \begin{frame} \frametitle{The Linux Kernel} \begin{itemize} \item many features for boot time optimization \item very flexible \item compression \item possible to initialize in parallel \item approx. 150ms - 250ms to mount a filesystem \end{itemize} \end{frame} \begin{frame} \frametitle{The Application} \begin{itemize} \item for systems, this usually has the most potential for optimizing \item start scripts \item linking \end{itemize} \end{frame} \section{Optimization} \subsection{Bootloader} \begin{frame}[fragile] \frametitle{Bootloader Optimization (U-Boot) 1} remove features that are not needed in the field: \begin{lstlisting} /* include/configs/boardname.h */ [...] #include #undef CONFIG_CMD_NET [...] \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Bootloader Optimization (U-Boot) 2} disable verification of the kernel image: \begin{verbatim} setenv verify n \end{verbatim} disable output on the U-Boot console: \begin{verbatim} setenv silent 1 \end{verbatim} do not wait for user input: \begin{verbatim} setenv bootdelay 0 \end{verbatim} \end{frame} \begin{frame} \frametitle{Bootloader Optimization (IPL)} \begin{itemize} \item replace a ''general purpose'' bootloader with an optimized IPL \item IPL also as a basis for update concepts and ''rescue kernels'' \end{itemize} \end{frame} \subsection{Kernel} \begin{frame}[fragile] \frametitle{Kernel Optimization: Components} \begin{itemize} \item configuration and build \item compression \item boot parameters \item driver initialization \item root filesystem \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Kernel Optimization: Configuration} \begin{verbatim} General setup ---> Kernel compression mode --> \end{verbatim} \begin{itemize} \item LZO for embedded systems very interesting \item copy vs. decompress \item for storage medium with direct addressable read access, ''excecute in place (XIP)'' is available \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Kernel Optimization: Command Line Parameter} \begin{itemize} \item delay loop calibration: ''lpj=''; for ARM9 systems, this can save >100ms \item parameters available for runtime analysis of the boot process: ''initcall\_debug'', ''printk.time=1'' \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Kernel Optimization: Delay Loop} \begin{verbatim} ... Calibrating delay loop... 99.12 BogoMIPS (lpj=495616) ... \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Kernel Optimization: initcall\_debug} \begin{verbatim} calling populate_rootfs+0x0/0x1f8 @ 1 initcall populate_rootfs+0x0/0x1f8 returned 0 after 281 usecs calling timer_init_sysfs+0x0/0x38 @ 1 initcall timer_init_sysfs+0x0/0x38 returned 0 after 590 usecs calling leds_init+0x0/0x3c @ 1 initcall leds_init+0x0/0x3c returned 0 after 488 usecs calling leds_init+0x0/0x50 @ 1 initcall leds_init+0x0/0x50 returned -19 after 1 usecs calling fpe_init+0x0/0x78 @ 1 \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Kernel Optimization: printk.time} \begin{verbatim} [ 0.800000] mice: PS/2 mouse device [ 0.810000] rtc-at91sam9 at91_rtt.0: [ 0.820000] rtc-at91sam9 at91_rtt.0: [ 0.830000] TCP cubic registered [ 0.830000] NET: Registered protocol [ 0.830000] rtc-at91sam9 at91_rtt.0: [ 2.610000] VFS: Mounted root (jffs2 [ 2.610000] Freeing init memory: 116K \end{verbatim} \end{frame} \subsection{File Systems} \begin{frame}[fragile] \frametitle{Initramfs} \begin{verbatim} dir /dev 755 0 0 nod /dev/console 644 0 0 c 5 1 nod /dev/loop0 644 0 0 b 7 0 dir /bin 755 1000 1000 slink /bin/sh busybox 777 0 0 file /bin/busybox initfs/busybox 755 0 0 [...] dir /proc 755 0 0 dir /sys 755 0 0 dir /mnt 755 0 0 \end{verbatim} \end{frame} \begin{frame} \frametitle{Ubifs} \begin{itemize} \item for Linux systems, this is \textbf{the} flash filesystem to choose \item fast mount times \item power-fail safe \item \dots \end{itemize} \end{frame} \subsection{Application} \begin{frame} \frametitle{Optimizing the Application} \begin{itemize} \item analyze the start processes with bootchartd \item possibly starting the application directly with init= \item dynamic vs. static linking \item prelink for dynamically linked applications \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Start Script Tasks in the Application} \begin{verbatim} ret = mount("sysfs", "/sys", "sysfs", 0, NULL); if(ret < 0) perror("Can't mount sysfs\n"); \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Dynamic Linking} \begin{enumerate} \item DT\_RPATH section in ELF executables \item paths that are specified in LD\_LIBRARY\_PATH \item DT\_RUNPATH section in ELF executables \item dynamic loader cache: /etc/ld.so.cache \item default paths /lib, /usr/lib, \dots \end{enumerate} \end{frame} \begin{frame}[fragile] \frametitle{Dynamic Linking: Analysis} \begin{verbatim} $ LD_DEBUG=libs ls 3082: find library=librt.so.1 [0]; searching 3082: search cache=/etc/ld.so.cache 3082: trying file=/lib/librt.so.1 \end{verbatim} \end{frame} \section{Example} \subsection{Analysis of a Test System} \begin{frame} \frametitle{Test System} \begin{itemize} \item ARM9 CPU from the Atmel AT91 series \item starting point: busybox image from the Angstrom distribution \item storage medium: NAND flash \item test application: toggling a GPIO via sysfs interface \end{itemize} \end{frame} \begin{frame} \frametitle{AT91 Controller Boot Process} \includegraphics[height=0.7\textheight]{images/boot_at91_overview.png} \end{frame} \begin{frame} \frametitle{The AT91 Family Boot Modes} \begin{itemize} \item RomBOOT: boot via an internal boot logic \item boot via chip-select 0 (CS0) of the external bus interface \end{itemize} \end{frame} \begin{frame} \frametitle{AT91 RomBOOT} \includegraphics[height=0.7\textheight]{images/boot_at91_romboot.png} \end{frame} \begin{frame} \frametitle{Power-On Behavior / Power Supply} \includegraphics[height=0.7\textheight]{images/at91_netzteil.png} \end{frame} \begin{frame} \frametitle{Reset Behavior} \includegraphics[height=0.7\textheight]{images/at91_reset.png} \end{frame} \begin{frame} \frametitle{RomBOOT} \includegraphics[height=0.7\textheight]{images/at91_romboot_measure.png} \end{frame} \begin{frame} \frametitle{Summary / Hardware Optimization} \begin{itemize} \item use the internal oscillator for slowclock: >1s savings \item boot from CS0: ~100ms - 150ms savings \end{itemize} \end{frame} \subsection{Optimizing the Test System} \begin{frame} \frametitle{Boot Time Measurement via GPIO} \includegraphics[width=10cm]{images/gpio_measure.png} \end{frame} \begin{frame} \frametitle{Parts of the Boot Time Measurement} \begin{itemize} \item from bootstrap until U-Boot \item from U-Boot until early boot code in the kernel (also includes copying the kernel into RAM and unpacking the kernel) \item from kernel until application (also includes mounting the filesystem) \end{itemize} \end{frame} \begin{frame} \frametitle{Initial Boot Times} \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - U-Boot & --- \\ \hline U-Boot - kernel & 6.5s \\ \hline kernel - application & 4.5s \\ \hline \textbf{Total} & \textbf{11s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{U-Boot without Netzwerk Support} \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - U-Boot & --- \\ \hline U-Boot - kernel & 4.25s \\ \hline kernel - application & 4.5s \\ \hline \textbf{Total} & \textbf{8.75s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{U-Boot verify=n} \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - U-Boot & --- \\ \hline U-Boot - kernel & 3.89s \\ \hline kernel - application & 4.5s \\ \hline \textbf{Total} & \textbf{8.39s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{Kernel ''Trimming''} \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - U-Boot & --- \\ \hline U-Boot - kernel & 3.77s \\ \hline kernel - application & 4.33s \\ \hline \textbf{Total} & \textbf{8.1s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{Analyze the Start Scripts: bootchartd} \includegraphics[height=0.85\textheight]{images/bootchart.png} \end{frame} \begin{frame} \frametitle{Optimizing the Start Scripts} \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - U-Boot & --- \\ \hline U-Boot - kernel & 3.77s \\ \hline kernel - application & 3.61s \\ \hline \textbf{Total} & \textbf{7.38s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{Initramfs Compressed with LZO} The application is started directly with the init= parameter. \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - U-Boot & --- \\ \hline U-Boot - kernel & 3.79s \\ \hline kernel - application & 0.372s \\ \hline \textbf{Total} & \textbf{4.162s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{Modified AT91 Bootstrap} AT91 bootstrap starts Linux directly, i.e. the system boots without U-Boot. \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - kernel & 676ms \\ \hline kernel - application & 584ms \\ \hline \textbf{Total} & \textbf{1.260s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{lpj=} \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - kernel & 676ms \\ \hline kernel - application & 384ms \\ \hline \textbf{Total} & \textbf{1.060s} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{No Output on the Serial Interface (quiet)} \begin{table}[!h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - kernel & 524ms \\ \hline kernel - application & 212ms \\ \hline \textbf{Total} & \textbf{736ms} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{Kernel Image Compressed with LZO} \begin{table}[h] \centering \begin{tabular}{ | c | c | } \hline Boot Part & Time \\ \hline bootstrap - kernel & 444ms \\ \hline kernel - application & 212ms \\ \hline \textbf{Total} & \textbf{656ms} \\ \hline \end{tabular} \end{table} \end{frame} \begin{frame} \frametitle{Boot Times After the Optimizations} \includegraphics[height=0.7\textheight]{images/at91_ipl_quiet_lpj_lzo.png} \end{frame} \begin{frame} \frametitle{Summary} \begin{itemize} \item Linux provides an optimal platform for anyone that wants a modern operating system, yet also needs to boot in under 1s \item using just the simple optimizations can save multiple seconds \item the hardware is an essential part of a fastboot concept \item the fastboot concept is mostly platform independent \end{itemize} \end{frame} \input{tailpres}