\input{configpres} \section{Xilinx Zynq} \title{Xilinx Zynq} \maketitle \subsection{Zynq FPGA/ARM Design} \begin{frame} \frametitle{Zynq FPGA/ARM Design} \begin{alertblock}{Processing System (PS)} \begin{itemize} \item dual-core ARM Cortex-A9 MPCore \item each core has its own 32KB L1 cache \item all cores share a single 512KB L2 cache \item snoop control unit (SCU) to maintain L2 cache coherency between cores \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq FPGA/ARM Design} \begin{alertblock}{Asymmetric Multi Processing (AMP)} \begin{itemize} \item private peripheral interrupts (PPI) \item separate memory management units (MMU) \item private timers \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq FPGA/ARM Design} \begin{alertblock}{Programmable Logic (PL)} \begin{itemize} \item PL configured at boot or at a later time \item supports complete or partial reconfiguration \item PL configuration data called "bitstream" \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq FPGA/ARM Design} \begin{alertblock}{Power Domains} \begin{itemize} \item PS and PL have separate power domains \item PS and/or PL can be shutdown if not needed \end{itemize} \end{alertblock} \end{frame} \subsection{PS Peripherals} \begin{frame} \frametitle{PS Peripherals} \begin{itemize} \item 2x UART \item 2x USB (host/gadget/OTG) \item 2x I2C \item 2x SPI \item 2x CAN \item 2x ethernet \item Quad-SPI \item SDIO \item watchdog \item static memory controller (SMC) (NAND/SRAM/NOR) \item GPIO (54 via MIO, 64 inputs (PL->PS) via EMIO, 128 outputs (PS->PL) via EMIO) \item PCI Express (only on some devices) \item 256KB on-chip memory (OCM) \end{itemize} \end{frame} \subsection{PS-PL Interfaces} \begin{frame} \frametitle{PS-PL Interfaces} \begin{alertblock}{Accelerator Coherency Port (ACP)} \begin{itemize} \item 64-bit interface to allow PL (master) to access OCM or L2 \item low-latency \item connected directly to SCU, as if it were the CPU \item cache coherent \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{PS-PL Interfaces} \begin{alertblock}{High Performance (HP)} \begin{itemize} \item 4x master ports for PL \item for DDR and OCM \item fifo buffering \item configurable to maximize performance/throughput \item must be non-cached or flush/invalidate cache \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{PS-PL Interfaces} \begin{alertblock}{General Purpose (GP)} \begin{itemize} \item 2x master ports \item 2x slave ports \item directly mapped \item low performance \end{itemize} \end{alertblock} \end{frame} \subsection{Zynq SoC} \begin{frame} \frametitle{Zynq SoC} \begin{figure}[h] \centering \includegraphics[width=8.5cm]{images/zynq_soc.png} \end{figure} \end{frame} \subsection{Data Movement Comparison} \begin{frame} \frametitle{Data Movement Comparison} \begin{figure}[h] \centering \includegraphics[width=10cm]{images/data_movement_comparison.png} \end{figure} \end{frame} \subsection{Zynq Boot Process} \begin{frame} \frametitle{Zynq Boot Process} \begin{alertblock}{Boot Devices} \begin{itemize} \item SD \item NAND \item QSPI (with optional execute-in-place) \item NOR (with optional execute-in-place) \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Boot Process} \begin{alertblock}{1. Boot ROM} \begin{itemize} \item determine boot source \item load and execute first stage boot loader (FSBL) \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Boot Process} \begin{alertblock}{2. FSBL} \begin{itemize} \item setup pin configuration \item initialize clocks/RAM \item optionally perform any security/validation checks \item optionally program FPGA \item optionally load data \item load and execute boot loader \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Boot Process} \begin{alertblock}{3. Bootloader} \begin{itemize} \item optionally program FPGA \item optionally load data \item load and execute kernel \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Boot Process} \begin{alertblock}{4. Kernel} \begin{itemize} \item start up 2nd core \item setup memory/process management \item initialize hardware \item execute userspace environment \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Boot Process} \begin{alertblock}{5. Userspace} \begin{itemize} \item optionally program FPGA \item optionally trigger more hardware initialization \item do something useful \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Boot Process} When should what be loaded? \begin{alertblock}{Considerations} \begin{itemize} \item boot time (delayed initialization) \item boot device (raw flash management) \item modularity (updates) \item flexibility (rescue/maintenance/test systems) \item complexity (KISS) \end{itemize} \end{alertblock} \end{frame} \subsection{Zynq Development Tools} \begin{frame} \frametitle{Zynq Development Tools/Source} \begin{alertblock}{Vivado / Vivado HLS / SDK} \begin{itemize} \item www.xilinx.com \item Support - Downloads - Vivado HLx 2016.1 \item All OS Installer Single-File Download (11.16 GB) \end{itemize} \end{alertblock} \begin{alertblock}{Source} \begin{itemize} \item git://git.yoctoproject.org/meta-xilinx \item https://github.com/Xilinx/u-boot-xlnx.git \item https://github.com/Xilinx/linux-xlnx.git \item git://github.com/Xilinx/device-tree-xlnx.git \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Development Tools - Vivado} \begin{alertblock}{Demo} \begin{itemize} \item setup zedboard project \item setup AXI GPIO IP for PL \item export bitstream \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Zynq Development Tools - SDK} \begin{alertblock}{Demo} \begin{itemize} \item program FPGA via JTAG \end{itemize} \end{alertblock} \end{frame} \subsection{FPGA Programming} \begin{frame} \frametitle{FPGA Programming} The FPGA can be (re)programmed from nearly all boot stages. \begin{itemize} \item FSBL \item U-Boot \item Linux Userspace \end{itemize} \end{frame} \subsection{Accessing FPGA from ARM} \begin{frame} \frametitle{Accessing FPGA from ARM} For bare metal applications, Xilinx provides a rich set of API's and examples for many IP blocks. \begin{alertblock}{Demo} \begin{itemize} \item implement bare metal application that uses AXI GPIO IP in PL \item test the application via JTAG \end{itemize} \end{alertblock} \end{frame} \begin{frame}[containsverbatim] \frametitle{Accessing FPGA from ARM} Bare metal applications can be loaded and started directly from the FSBL. \begin{alertblock}{Demo} \begin{itemize} \item build FSBL \item configure FSBL to program the FPGA and run the test application \end{itemize} \end{alertblock} \begin{alertblock}{Boot Image File (BIF)} \begin{verbatim} the_ROM_image: { [bootloader]fsbl.elf design_gpio_wrapper.bit bare.elf } \end{verbatim} \end{alertblock} \end{frame} \begin{frame} \frametitle{Accessing FPGA from ARM} For Linux applications, IP blocks are typically available through the appropriate Linux sub-system API's. (Assuming the IP block driver is implemented as a Linux driver.) \begin{itemize} \item PL modules mapped to memory space \item Linux is not even aware that it is programmable hardware \end{itemize} \begin{alertblock}{Demo} \begin{itemize} \item configure FSBL to run U-Boot \item load/run Linux via U-Boot \item access AXI GPIO IP in PL via sysfs \item lightshow \end{itemize} \end{alertblock} \end{frame} \subsection{Asymmetric Multi Processing} \begin{frame} \frametitle{Asymmetric Multi Processing} The FSBL can be configured to load 2 bare metal applications, one on each core. \begin{alertblock}{Demo} \begin{itemize} \item split bare metal test application into 2 applications \item application 1 on core 1, application 2 on core 2 \item modify application 1 to start application 2 \item configure FSBL to load both applications \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Asymmetric Multi Processing} The FSBL can be configured to load U-Boot on one core and a bare metal application on the other. \begin{alertblock}{Demo} \begin{itemize} \item modify U-Boot/Linux to restrict memory \item modify bare metal test application to restrict memory \item configure FSBL to run U-Boot and load bare metal application \item activate the bare metal application in U-Boot \item start the bare metal application from Linux \end{itemize} \end{alertblock} \end{frame} \begin{frame} \frametitle{Asymmetric Multi Processing} Instead of relying on U-Boot to load the kernel, the FSBL can load all components into memory. \begin{alertblock}{Demo} \begin{itemize} \item configure FSBL to load device tree and kernel \end{itemize} \end{alertblock} \end{frame} \subsection{Bare Metal vs. Process Affinity} \begin{frame} \frametitle{Bare Metal vs. Process Affinity} \begin{alertblock}{bare metal advantages} \begin{itemize} \item hardware separation \item faster startup time \item less reliance on 3rd party software \end{itemize} \end{alertblock} \begin{alertblock}{Linux application advantages} \begin{itemize} \item full operating system features available \item rich hardware API available \item simplified development/implementation \item synchronized shared hardware resources \end{itemize} \end{alertblock} \end{frame} \input{tailpres}