\input{configpres} \date{18 October 2018} \title{Building Real-Time Applications for Linux} \maketitle \begin{frame} \tableofcontents[currentsection] \end{frame} \begin{frame} \frametitle{What is real-time?} \begin{itemize} \item correctness also means executing at the correct time \item failing to meet timing restrictions leads to an error \end{itemize} \end{frame} \begin{frame} \frametitle{A Visual Aid} \begin{overprint} \onslide<1> \begin{alertblock}{Remember...} \bigskip Failing to meet timing restrictions leads to an error! \end{alertblock} \onslide<2|handout:0> \begin{figure}[h] \centering \includegraphics[height=0.5\textwidth]{images/nuclear.png} \end{figure} \end{overprint} \end{frame} \begin{frame} \frametitle{Requirements} \begin{itemize} \item deterministic time behavior \item interruptible \item avoid priority inversion \\ (priority inheritance / priority ceiling) \end{itemize} \end{frame} \begin{frame} \frametitle{Priority Inversion} \begin{figure}[h] \centering \includegraphics[width=0.8\textwidth]{images/prio_inv.png} \end{figure} Task 3 is holding a lock that Task 1 wants. But Task 3 never has a chance to release the lock because Task 2 is running unbounded. \end{frame} \section{Evaluating a Real-Time Linux System} \begin{frame} \tableofcontents[currentsection] \end{frame} \begin{frame} \frametitle{Testing Preempt RT Systems} RT Tests: \bigskip \begin{itemize} \item cyclictest \item hwlatdetect \item pi\_stress \item signaltest \end{itemize} \end{frame} \begin{frame} \frametitle{cyclictest} \bigskip \begin{itemize} \item originally developed by Thomas Gleixner for Preempt RT testing \item high resolution timer test software \item creates any number of cyclic real-time tasks with varying priorities and varying intervals \item provides lots of debugging possibilities \item yields very significant results to determine the real-time behavior of a platform \end{itemize} \end{frame} \begin{frame} \frametitle{Load Scenarios} Suitable load scenarios in order to create worst-case situations: \bigskip \begin{itemize} \item CPU Load: ''hackbench'', orginally written for scheduler benchmarking \item Interrupt Load: flood pinging (''ping -f'') \item Serial/Network Load: ''top -d 0'' via console and network shell \item Various Load Scenarios: ''stress-ng'' \end{itemize} \end{frame} \begin{frame} \frametitle{hackbench} \begin{figure}[h] \centering \includegraphics[height=3.5cm]{images/hackbench.png} \label{img:hackbench} \end{figure} \begin{itemize} \item starts groups, each with 20 clients and 20 servers \item every client sends 100 messages via socket to every server \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Pitfall!} \begin{verbatim} cat /proc/sys/kernel/sched_rt_runtime_us \end{verbatim} \begin{figure} \centering \includegraphics[height=0.4\textwidth]{images/pitfall.png} \end{figure} \end{frame} \section{Application Development} \begin{frame} \tableofcontents[currentsection] \end{frame} \begin{frame} \frametitle{Real-Time Development with Preempt RT} \begin{figure} \includegraphics[height=0.4\textwidth]{images/thumb.png} \end{figure} POSIX! \end{frame} \subsubsection{Priorities} \begin{frame} \frametitle{Real-Time Scheduling Policies} \begin{itemize} \item SCHED\_FIFO: static priority \item SCHED\_RR: priority based, round robin scheduling per priority \item SCHED\_DEADLINE: dynamic priority based upon deadlines \end{itemize} \bigbreak SCHED\_FIFO and SCHED\_RR scheduling policies accept priorities from 1 to 99, where 99 is the highest priority. (But never use 99! It is for special critical kernel tasks!) \bigbreak The SCHED\_DEADLINE policy calculates priorities dynamically. \end{frame} \begin{frame}[fragile] \frametitle{Setting the Scheduling Policy} The scheduling policy can be set using the ''chrt'' command: \bigskip \begin{verbatim} Set policy: chrt [opts] chrt [opts] [ ...] Scheduling policies: -d, --deadline set policy to SCHED_DEADLINE -f, --fifo set policy to SCHED_FIFO -r, --rr set policy to SCHED_RR (default) \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Setting the Scheduling Policy} ...or in code: \bigskip \begin{verbatim} #include #include struct sched_param param; param.sched_priority = 80; if (sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { perror("sched_setscheduler() failed)"; exit(1); } \end{verbatim} \end{frame} \subsubsection{CPU Affinity} \begin{frame}[fragile] \frametitle{Setting the CPU Affinity} The CPU affinity can be set using the ''taskset'' command: \bigbreak \begin{verbatim} taskset [options] mask command [arg]... taskset [options] -p [mask] pid \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Setting the CPU Affinity} ...or in code: \bigskip \begin{verbatim} #define _GNU_SOURCE #include #include cpu_set_t set; CPU_ZERO(&set); CPU_SET(0, &set); CPU_SET(1, &set); if (sched_setaffinity(pid, CPU_SETSIZE, &set) == -1) { perror("sched_setaffinity() failed"); exit(1); } \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{CPU Affinity on Boot} Kernel Parameters: \bigbreak \begin{itemize} \item maxcpus=\textit{n}: limits the kernel to bring up \textit{n} processors \item isolcpus=\textit{cpulist}: specify CPUs to isolate from disturbances \item threadirqs: force threading of interrupt handlers \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Interrupt Routing} \begin{verbatim} $ ls /proc/irq/ 0 1 10 11 12 13 14 15 17 18 19 ... default_smp_affinity $ cat /proc/irq/default_smp_affinity 3 \end{verbatim} Set default IRQ affinity to CPU0 \begin{verbatim} echo 1 > /proc/irq/default_smp_affinity \end{verbatim} Set affinity for IRQ19 to CPU1 \begin{verbatim} echo 2 > /proc/irq/19/smp_affinity \end{verbatim} \end{frame} \subsubsection{Memory Management} \begin{frame} \frametitle{Memory Over-Committing} Comparable to those low-cost airlines ;) \pause \bigbreak \begin{itemize} \item ...selling more tickets than available seats \item ...hoping not everyone will come ;) \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Memory Over-Commit Settings} \begin{verbatim} /proc/sys/vm/overcommit_memory \end{verbatim} \bigbreak Possible settings are: \begin{itemize} \item 0: heuristic overcommit handling (default) \item 1: always overcommit \item 2: do not overcommit \end{itemize} \end{frame} \begin{frame} \frametitle{Virtual Address Memory Mapping} By default, physical memory pages are mapped to the virtual address space \emph{on-demand}. This is how over-commitment works and it affects \emph{all} virtual memory of a process: \bigbreak \begin{itemize} \item text segment \item initialized data segment \item uninitialized data segment \item stack(s) \item heap \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Locking The Memory} \begin{verbatim} #include #include if (mlockall(MCL_CURRENT | MCL_FUTURE) == -1) { perror("mlockall() failed"); exit(1); } \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Stack Prefaulting} \begin{verbatim} #include #define MAX_SAFE_STACK (512 * 1024) void prefault_stack(void) { unsigned char dummy[MAX_SAFE_STACK]; int i; for (i = 0; i < MAX_SAFE_STACK; i += sysconf(_SC_PAGESIZE)) dummy[i] = i; } \end{verbatim} \end{frame} \begin{frame} \frametitle{Dynamic Allocations from Real-Time Context} \bigbreak \begin{itemize} \item if possible, avoid memory allocations from real-time context \item try to use a pre-allocated buffer instead \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Disable malloc() Trimming and mmap() Usage} \begin{verbatim} #include #include if (!mallopt(M_TRIM_THRESHOLD, -1)) { perror("mallopt(M_TRIM_THRESHOLD) failed"); exit(1); } if (!mallopt(M_MMAP_MAX, 0)) { perror("mallopt(M_MMAP_MAX) failed"); exit(1); } \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{Heap Prefaulting} \begin{verbatim} #include #include #include void prefault_heap(int size) { char *dummy; int i; dummy = malloc(size); if (!dummy) { perror("malloc() failed"); exit(1); } for (i = 0; i < size; i += sysconf(_SC_PAGESIZE)) dummy[i] = i; free(dummy); } \end{verbatim} \end{frame} \subsubsection{Clocks} \begin{frame} \frametitle{Time and Sleeping} Functions: \begin{itemize} \item Use POSIX! \item clock\_getres() \item clock\_gettime() \item clock\_settime() \item clock\_nanosleep() \end{itemize} \bigbreak Clocks: \begin{itemize} \item CLOCK\_MONOTONIC: A clock that cannot be set and represents monotonic time since some unspecified starting point. \item CLOCK\_REALTIME: System-wide real time clock. Can be set (by NTP, user, ...)! \end{itemize} \end{frame} \begin{frame} \frametitle{Cyclic Tasks} \begin{itemize} \item Use clock\_nanosleep()! \item Do not use signals! \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Cyclic Example} \begin{verbatim} #define CYCLE_TIME_NS (100 * 1000 * 1000) #define NSEC_PER_SEC (1000 * 1000 * 1000) static void norm_ts(struct timespec *tv) { while (tv->tv_nsec > NSEC_PER_SEC) { tv->tv_sec++; tv->tv_nsec -= NSEC_PER_SEC; } } int main(void) { struct timespec tv; clock_gettime(CLOCK_MONOTONIC, &tv); do { /* do the work */ /* wait for next cycle */ tv.tv_nsec += CYCLE_TIME_NS; norm_ts(&tv); clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &tv, NULL); } while(1); } \end{verbatim} \end{frame} \subsubsection{Locking} \begin{frame} \frametitle{Synchronization} \begin{itemize} \item use pthread\_mutexes \item activate priority inheritance \item activate shared and robustness (if multi-process access) \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Locking Example} \begin{verbatim} pthread_mutex_t master_lock; pthread_mutexattr_t mattr; pthread_mutexattr_init(&mattr); pthread_mutexattr_setprotocol(&mattr, PTHREAD_PRIO_INHERIT); pthread_mutex_init(&master_lock, &mattr); pthread_mutex_lock(&master_lock); /* do critical work */ pthread_mutex_unlock(&master_lock); pthread_mutex_destroy(&master_lock); \end{verbatim} \end{frame} \subsubsection{Signalling} \begin{frame} \frametitle{Signalling Mechanisms} \begin{itemize} \item Do not use signals! \item Use pthread\_cond\_vars. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{pthread\_cond\_var: Initialization} \begin{verbatim} pthread_mutexattr_t mattr; pthread_mutex_t mutex; pthread_condattr_t cattr; pthread_cond_t cond; pthread_mutexattr_init(&mattr); pthread_mutexattr_setprotocol(&mattr, PTHREAD_PRIO_INHERIT); pthread_mutex_init(&mutex, &mattr); pthread_condattr_init(&cattr); pthread_cond_init (&cond, &cattr); \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{pthread\_cond\_var: Send Signal} Sender: \begin{verbatim} pthread_mutex_lock(&mutex); /* do the work */ pthread_cond_broadcast(&cond); pthread_mutex_unlock(&mutex); \end{verbatim} \bigbreak Receiver: \begin{verbatim} pthread_mutex_lock(&mutex); pthread_cond_wait(&cond, &mutex); /* we have been signaled */ pthread_mutex_unlock(&mutex); \end{verbatim} \end{frame} \section{Debugging and Verifying Applications} \begin{frame} \tableofcontents[currentsection] \end{frame} \begin{frame}[fragile] \frametitle{Kernel Tracing: Overview} \begin{itemize} \item debugfs interface \item static and dynamic trace events \item various heuristics (tracers): function, function\_graph, wakeup, wakeup\_rt, \dots \item custom trace events \item graphical frontend (kernelshark) \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Kernel Tracing: Overview} \begin{figure}[h] \centering \includegraphics[width=10cm]{images/trace_overview.png} \end{figure} \end{frame} \begin{frame} \frametitle{Kernelshark} \begin{figure}[h] \centering \includegraphics[width=10cm]{images/kernelshark.png} \end{figure} \end{frame} \begin{frame} \frametitle{Kernelshark} \begin{figure}[h] \centering \includegraphics[width=10cm]{images/kernelshark_zoom.png} \end{figure} \end{frame} \begin{frame}[fragile] \frametitle{Wakeup Example} \begin{verbatim} $ sudo trace-cmd record -e irq_vectors:local_timer_entry \ -e sched:sched_wakeup \ -e sched:sched_switch \ -e syscalls:sys_exit_nanosleep \ chrt -f 98 /bin/sleep 1 $ kernelshark \end{verbatim} \bigbreak \begin{figure}[h] \centering \includegraphics[width=10cm]{images/kernelshark_sleep.png} \end{figure} \end{frame} \begin{frame}[fragile] \frametitle{IPC Signaling Example (with Priority Inheritance)} Set custom events ''sending'' and ''received'' in userspace application. \begin{verbatim} $ sudo perf probe -x ./send sending=send.c:35 $ sudo perf probe -x ./recv received=recv.c:43 \end{verbatim} Pin sender (real-time priority 80) and receiver (real-time priority 70) to the same CPU to force priority inheritance. \begin{verbatim} $ sudo taskset 1 chrt -f 80 ./recv & $ sudo trace-cmd record -e sched:sched_switch \ -e sched:sched_wakeup \ -e sched:sched_pi_setprio \ -e probe_send:sending \ -e probe_recv:received \ -e syscalls \ taskset 1 chrt -f 70 ./send \end{verbatim} \end{frame} \begin{frame}[fragile] \frametitle{IPC Signaling Example (with Priority Inheritance)} \begin{verbatim} $ kernelshark \end{verbatim} \begin{figure}[h] \centering \includegraphics[width=10cm]{images/kernelshark_ipcshm1.png} \end{figure} \begin{figure}[h] \centering \includegraphics[width=10cm]{images/kernelshark_ipcshm2.png} \end{figure} \end{frame} \section{Real-Time in the Kernel} \begin{frame} \tableofcontents[currentsection] \end{frame} \begin{frame} \frametitle{General Purpose vs. Real-Time} \begin{figure}[h] \centering \includegraphics[height=0.5\textwidth]{images/gpos_vs_rt.png} \end{figure} \end{frame} \begin{frame} \frametitle{Single Kernel} \begin{figure}[h] \centering \includegraphics[height=0.5\textwidth]{images/single_kernel.png} \end{figure} \end{frame} \begin{frame} \frametitle{PREEMPT\_RT: Real-Time for Linux} \begin{itemize} \item Thomas Gleixner, Ingo Molnar \item in-kernel approach \item large development community \item many features already integrated in ''mainline'' Linux \item POSIX real-time \item highly accepted, agreement in 2006 for complete Linux integration \end{itemize} \end{frame} \begin{frame} \frametitle{Goals of Preempt RT} \begin{itemize} \item full Linux kernel hardware support \item standard API (POSIX) \item no special user ABI \item full availability using existing tools \item scalable! \end{itemize} \end{frame} \begin{frame} \frametitle{Influence of Preempt RT on ''Mainline'' Linux} \begin{itemize} \item generic interrupt subsystem \item generic timekeeping \item generic timer handling \item high resolution timers \item the NOHZ infrastructure \item consolidation of the locking infrastructure \item tracing! \item ... and much more \end{itemize} \end{frame} \begin{frame} \frametitle{Preempt RT and Mainline} \textit{''Controlling a laser with Linux is crazy, but everyone in this room is crazy in his own way. So if you want to use Linux to control an industrial welding laser, I have no problem with your using Preempt RT''} \\ - Linus Torvalds at the Kernel Summit 2006 \end{frame} \begin{frame} \frametitle{How does Preempt RT make Linux real-time capable?} \begin{overprint} \onslide<1|handout:0> \begin{alertblock}{Remember...} \bigskip Interruptibility is a main requirement of a real-time system. \end{alertblock} \onslide<2> \begin{itemize} \item Locking primitives: Spinlocks are replaced with RT mutexes that can sleep. Raw spinlocks are introduced to provide the classic spinlock functionality. \item Interrupt handlers run by default each as its own kernel thread. \end{itemize} \end{overprint} \end{frame} \begin{frame} \frametitle{Preempt RT} \begin{figure}[h] \centering \includegraphics[height=0.5\textwidth]{images/RT_preempt_kernel_approach.jpg} \end{figure} \end{frame} \section{Results: What is possible using this approach?} \begin{frame} \tableofcontents[currentsection] \end{frame} \begin{frame} \frametitle{Measurements on the Cortex A9 Platform} \begin{itemize} \item ARM Cortex A9 SOC \item Load Scenario: 100\% CPU Load using ''hackbench'' \item IRQ measurements at 10 kHz with the OSADL Latency Box \item Test Duration: 12h \end{itemize} \end{frame} \begin{frame} \frametitle{What was measured?} Latency and Jitter \begin{figure}[h] \centering \includegraphics[width=10cm]{images/latency.png} \end{figure} \end{frame} \begin{frame} \frametitle{Latency Userspace Task: Most Important Use Case} The most important index is the reaction time for a userspace application. It is quite common that an application is required to react from an external event! \end{frame} \begin{frame} \frametitle{PREEMPT\_RT Latency Userspace Task} \includegraphics[width=8cm]{images/10k-rt-usr-noisol.png} \end{frame} \begin{frame} \frametitle{PREEMPT\_RT Latency Userspace Task (isolated CPU)} \includegraphics[width=8cm]{images/10k-rt-usr-isol.png} \end{frame} \begin{frame} \frametitle{Latency in the Kernel} ...or how to compare apples and oranges!! ;-) \end{frame} \begin{frame} \frametitle{PREEMPT\_RT Latency Kernel} \includegraphics[width=8cm]{images/10k-rt-irq-noisol.png} \end{frame} \begin{frame} \frametitle{PREEMPT\_RT Latency Kernel (isolated CPU)} \includegraphics[width=8cm]{images/10k-rt-irq-isol.png} \end{frame} \begin{frame} \frametitle{PREEMPT\_RT Latency Kernel (FIQ / fast interrupt)} \includegraphics[width=8cm]{images/10k-fiq-irq-noisol.png} \end{frame} \section{Checklist for Real-Time Applications} \begin{frame} \tableofcontents[currentsection] \end{frame} \begin{frame}[fragile] \frametitle{Checklist} \begin{columns}[T] \begin{column}{5cm} {Real-Time Priority \begin{itemize} \item SCHED\_FIFO, SCHED\_RR \end{itemize} \medbreak CPU Affinity \begin{itemize} \item applications \item interrupt handlers \item interrupt routing \end{itemize} \medbreak Memory Management \begin{itemize} \item avoid mmap() with malloc() \item lock memory \item prefault memory \end{itemize} \medbreak Time and Sleeping \begin{itemize} \item use monotonic clock \item use absolute time \end{itemize} } \end{column} \begin{column}{5cm} {Avoid Signals \begin{itemize} \item such as POSIX timers \item such as kill() \end{itemize} \medbreak Avoid Priority Inversion \begin{itemize} \item use pthread\_mutex \\ (and set attributes!) \item use pthread\_condvar \\ (and set attributes!) \end{itemize} \medbreak Be aware of NMIs \bigbreak Verify Results \begin{itemize} \item trace scheduling \item trace page faults \item monitor traces \end{itemize} } \end{column} \end{columns} \end{frame} \input{tailpres}