summaryrefslogtreecommitdiff
path: root/misc/pres_zynq_en.tex
blob: d1ba5d2b5d069f23f7a70bb4923ad218e0f044a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
\input{configpres}

\section{Xilinx Zynq}

\title{Xilinx Zynq}
\maketitle

\subsection{Zynq FPGA/ARM Design}

\begin{frame}
\frametitle{Zynq FPGA/ARM Design}
\begin{alertblock}{Processing System (PS)}
\begin{itemize}
\item dual-core ARM Cortex-A9 MPCore
\item each core has its own 32KB L1 cache
\item all cores share a single 512KB L2 cache
\item snoop control unit (SCU) to maintain L2 cache coherency between cores
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq FPGA/ARM Design}
\begin{alertblock}{Asymmetric Multi Processing (AMP)}
\begin{itemize}
\item private peripheral interrupts (PPI)
\item separate memory management units (MMU)
\item private timers
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq FPGA/ARM Design}
\begin{alertblock}{Programmable Logic (PL)}
\begin{itemize}
\item PL configured at boot or at a later time
\item supports complete or partial reconfiguration
\item PL configuration data called "bitstream"
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq FPGA/ARM Design}
\begin{alertblock}{Power Domains}
\begin{itemize}
\item PS and PL have separate power domains
\item PS and/or PL can be shutdown if not needed
\end{itemize}
\end{alertblock}
\end{frame}

\subsection{PS Peripherals}

\begin{frame}
\frametitle{PS Peripherals}
\begin{itemize}
\item 2x UART
\item 2x USB (host/gadget/OTG)
\item 2x I2C
\item 2x SPI
\item 2x CAN
\item 2x ethernet
\item Quad-SPI
\item SDIO
\item watchdog
\item static memory controller (SMC) (NAND/SRAM/NOR)
\item GPIO (54 via MIO, 64 inputs (PL->PS) via EMIO, 128 outputs (PS->PL) via EMIO)
\item PCI Express (only on some devices)
\item 256KB on-chip memory (OCM)
\end{itemize}
\end{frame}

\subsection{PS-PL Interfaces}

\begin{frame}
\frametitle{PS-PL Interfaces}
\begin{alertblock}{Accelerator Coherency Port (ACP)}
\begin{itemize}
\item 64-bit interface to allow PL (master) to access OCM or L2
\item low-latency
\item connected directly to SCU, as if it were the CPU
\item cache coherent
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{PS-PL Interfaces}
\begin{alertblock}{High Performance (HP)}
\begin{itemize}
\item 4x master ports for PL
\item for DDR and OCM
\item fifo buffering
\item configurable to maximize performance/throughput
\item must be non-cached or flush/invalidate cache
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{PS-PL Interfaces}
\begin{alertblock}{General Purpose (GP)}
\begin{itemize}
\item 2x master ports
\item 2x slave ports
\item directly mapped
\item low performance
\end{itemize}
\end{alertblock}
\end{frame}

\subsection{Zynq SoC}

\begin{frame}
\frametitle{Zynq SoC}
\begin{figure}[h]
\centering
\includegraphics[width=8.5cm]{images/zynq_soc.png}
\end{figure}
\end{frame}

\subsection{Data Movement Comparison}

\begin{frame}
\frametitle{Data Movement Comparison}
\begin{figure}[h]
\centering
\includegraphics[width=10cm]{images/data_movement_comparison.png}
\end{figure}
\end{frame}

\subsection{Zynq Boot Process}

\begin{frame}
\frametitle{Zynq Boot Process}
\begin{alertblock}{Boot Devices}
\begin{itemize}
\item SD
\item NAND
\item QSPI (with optional execute-in-place)
\item NOR (with optional execute-in-place)
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Boot Process}
\begin{alertblock}{1. Boot ROM}
\begin{itemize}
\item determine boot source
\item load and execute first stage boot loader (FSBL)
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Boot Process}
\begin{alertblock}{2. FSBL}
\begin{itemize}
\item setup pin configuration
\item initialize clocks/RAM
\item optionally perform any security/validation checks
\item optionally program FPGA
\item optionally load data
\item load and execute boot loader
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Boot Process}
\begin{alertblock}{3. Bootloader}
\begin{itemize}
\item optionally program FPGA
\item optionally load data
\item load and execute kernel
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Boot Process}
\begin{alertblock}{4. Kernel}
\begin{itemize}
\item start up 2nd core
\item setup memory/process management
\item initialize hardware
\item execute userspace environment
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Boot Process}
\begin{alertblock}{5. Userspace}
\begin{itemize}
\item optionally program FPGA
\item optionally trigger more hardware initialization
\item do something useful
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Boot Process}
When should what be loaded?
\begin{alertblock}{Considerations}
\begin{itemize}
\item boot time (delayed initialization)
\item boot device (raw flash management)
\item modularity (updates)
\item flexibility (rescue/maintenance/test systems)
\item complexity (KISS)
\end{itemize}
\end{alertblock}
\end{frame}

\subsection{Zynq Development Tools}

\begin{frame}
\frametitle{Zynq Development Tools/Source}
\begin{alertblock}{Vivado / Vivado HLS / SDK}
\begin{itemize}
\item www.xilinx.com
\item Support - Downloads - Vivado HLx 2016.1
\item All OS Installer Single-File Download (11.16 GB)
\end{itemize}
\end{alertblock}
\begin{alertblock}{Source}
\begin{itemize}
\item git://git.yoctoproject.org/meta-xilinx
\item https://github.com/Xilinx/u-boot-xlnx.git
\item https://github.com/Xilinx/linux-xlnx.git
\item git://github.com/Xilinx/device-tree-xlnx.git
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Development Tools - Vivado}
\begin{alertblock}{Demo}
\begin{itemize}
\item setup zedboard project
\item setup AXI GPIO IP for PL
\item export bitstream
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Zynq Development Tools - SDK}
\begin{alertblock}{Demo}
\begin{itemize}
\item program FPGA via JTAG
\end{itemize}
\end{alertblock}
\end{frame}

\subsection{FPGA Programming}

\begin{frame}
\frametitle{FPGA Programming}
The FPGA can be (re)programmed from nearly all boot stages.
\begin{itemize}
\item FSBL
\item U-Boot
\item Linux Userspace
\end{itemize}
\end{frame}

\subsection{Accessing FPGA from ARM}

\begin{frame}
\frametitle{Accessing FPGA from ARM}
For bare metal applications, Xilinx provides a rich set of API's and examples for many IP blocks.
\begin{alertblock}{Demo}
\begin{itemize}
\item implement bare metal application that uses AXI GPIO IP in PL
\item test the application via JTAG
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}[containsverbatim]
\frametitle{Accessing FPGA from ARM}
Bare metal applications can be loaded and started directly from the FSBL.
\begin{alertblock}{Demo}
\begin{itemize}
\item build FSBL
\item configure FSBL to program the FPGA and run the test application
\end{itemize}
\end{alertblock}
\begin{alertblock}{Boot Image File (BIF)}
\begin{verbatim}
the_ROM_image: {
    [bootloader]fsbl.elf
    design_gpio_wrapper.bit
    bare.elf
}
\end{verbatim}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Accessing FPGA from ARM}
For Linux applications, IP blocks are typically available through the appropriate Linux sub-system API's. (Assuming the IP block driver is implemented as a Linux driver.)
\begin{itemize}
\item PL modules mapped to memory space
\item Linux is not even aware that it is programmable hardware
\end{itemize}
\begin{alertblock}{Demo}
\begin{itemize}
\item configure FSBL to run U-Boot
\item load/run Linux via U-Boot
\item access AXI GPIO IP in PL via sysfs
\item lightshow
\end{itemize}
\end{alertblock}
\end{frame}

\subsection{Asymmetric Multi Processing}

\begin{frame}
\frametitle{Asymmetric Multi Processing}
The FSBL can be configured to load 2 bare metal applications, one on each core.
\begin{alertblock}{Demo}
\begin{itemize}
\item split bare metal test application into 2 applications
\item application 1 on core 1, application 2 on core 2
\item modify application 1 to start application 2
\item configure FSBL to load both applications
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Asymmetric Multi Processing}
The FSBL can be configured to load U-Boot on one core and a bare metal application on the other.
\begin{alertblock}{Demo}
\begin{itemize}
\item modify U-Boot/Linux to restrict memory
\item modify bare metal test application to restrict memory
\item configure FSBL to run U-Boot and load bare metal application
\item activate the bare metal application in U-Boot
\item start the bare metal application from Linux
\end{itemize}
\end{alertblock}
\end{frame}

\begin{frame}
\frametitle{Asymmetric Multi Processing}
Instead of relying on U-Boot to load the kernel, the FSBL can load all components into memory.
\begin{alertblock}{Demo}
\begin{itemize}
\item configure FSBL to load device tree and kernel
\end{itemize}
\end{alertblock}
\end{frame}

\subsection{Bare Metal vs. Process Affinity}

\begin{frame}
\frametitle{Bare Metal vs. Process Affinity}
\begin{alertblock}{bare metal advantages}
\begin{itemize}
\item hardware separation
\item faster startup time
\item less reliance on 3rd party software
\end{itemize}
\end{alertblock}
\begin{alertblock}{Linux application advantages}
\begin{itemize}
\item full operating system features available
\item rich hardware API available
\item simplified development/implementation
\item synchronized shared hardware resources
\end{itemize}
\end{alertblock}
\end{frame}

\input{tailpres}