From 494b16d6fbd565225e4f9ddaf2b813b520271753 Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Mon, 22 Oct 2012 17:05:03 +0200 Subject: Misc/xml: Add new XML fasttrack Add new miscellaneous section with an XML fasttrack. Coding samples are located in misc/samples/. Signed-off-by: Holger Dengler --- misc/pres_xml-fasttrack_en.tex | 426 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 426 insertions(+) create mode 100644 misc/pres_xml-fasttrack_en.tex (limited to 'misc/pres_xml-fasttrack_en.tex') diff --git a/misc/pres_xml-fasttrack_en.tex b/misc/pres_xml-fasttrack_en.tex new file mode 100644 index 0000000..35027ee --- /dev/null +++ b/misc/pres_xml-fasttrack_en.tex @@ -0,0 +1,426 @@ +\def\lximg{/usr/share/lx/icons/fueller.png} + +\input{configpres} + +% ------------------------ +\section{Programming in C/C++} + +% ------------------------ +\subsection{XML Processing} + +\title{XML Processing in C/C++} +\maketitle + +\def\lximg{none} + +\begin{frame} +\frametitle{Contents} +\tableofcontents +\end{frame} + +% ------------------------ +\subsubsection{Basics} + +\begin{frame}[fragile] +\frametitle{What is XML?} +XML is +\begin{itemize} +\item a Markup Language to describe structured data +\item extensible by defining new data type definitions +\item human readable as well as machine readable +\end{itemize} + +{ \tiny +\begin{verbatim} + + + + + The Lord Of The Rings + J.R.R. Tolkien + + + The Wall + Pink Floyd + Another Brick in the Wall + Mother + + +\end{verbatim} +} +\end{frame} + +% ------------------------ +\subsubsection{Processing} + +\begin{frame}[fragile] +\frametitle{Own parser vs. using a common library} +Using a common library is the preferred solution +\begin{itemize} +\item XML can get very complex +\item own parsers might struggle with some exotic but allowed constructs +\item common libraries are (mostly) well tested +\item rapid prototyping with common libraries +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{GNOME XML library} +\begin{itemize} +\item libxml2 is the most common XML library for Linux and C/C++ +\item provides a large toolset for nearly all XML problems +\item can be used for small XML junks as well as for huge XML files or streams +\item support Document Object Model (DOM) as well as Simple API for XML (SAX) +\item support parse, generate and validate +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Document Object Model (DOM)} +\begin{itemize} +\item DOM Parser creates a tree representation of the XML junk in main memory +\item Pros: + \begin{itemize} + \item navigation support + \item model can be modified + \item easy export of an existing model + \end{itemize} +\item Cons: + \begin{itemize} + \item large XML junks need lot of main memory + \item no stream handling support + \item slow for large XML files + \end{itemize} +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Simple API for XML (SAX)} +\begin{itemize} +\item DOM tree parser uses SAX2 internally! +\item SAX uses callbacks for parsing events (start element, end element etc.) +\item Pros: + \begin{itemize} + \item lean and fast + \item large-file and stream handling support + \item memory footprint depends on callback implementation + \end{itemize} +\item Cons: + \begin{itemize} + \item no navigation support + \item no XML generation support + \end{itemize} +\end{itemize} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Recommendation} +\begin{itemize} +\item choose the right parser for the right use case +\item DOM: + \begin{itemize} + \item small files + \item navigate/modify/generate + \item data model validation (DTD) + \end{itemize} +\item SAX2: + \begin{itemize} + \item streams or large files + \item preselect XML junks + \end{itemize} +\item combine SAX2 and DOM if necessary +\end{itemize} +\end{frame} + +% ------------------------ +\subsubsection{Examples} + +\begin{frame}[fragile] +\frametitle{DOM main()} +{ \tiny +\begin{verbatim} +#include +#include +#include + +static void print_element_names(xmlNode *); + +int main(int argc, char **argv) { + xmlDocPtr my_doc = NULL; + xmlNode *root_element = NULL; + + my_doc = xmlReadFile(argv[1], NULL, 0); // parse XML file + + root_element = xmlDocGetRootElement(my_doc); + + print_element_names(root_element); // output + + xmlFreeDoc(my_doc); + xmlCleanupParser(); + return 0; +} +\end{verbatim} +} +\end{frame} + +\begin{frame}[fragile] +\frametitle{DOM tree navigation and output} +{ \tiny +\begin{verbatim} +static void print_element_names(xmlNode * a_node) +{ + xmlNode *cur_node = NULL; + + for (cur_node = a_node; cur_node; cur_node = cur_node->next) { + if (cur_node->type == XML_ELEMENT_NODE) { + printf("node type: Element, name: %s\n", cur_node->name); + } + + print_element_names(cur_node->children); + } +} +\end{verbatim} +} +\end{frame} + +\begin{frame}[fragile] +\frametitle{DOM: Input / Output} +\begin{columns} +\begin{column}{5cm} +{ \tiny +\begin{verbatim} + + + + +The Lord Of The Rings +J.R.R. Tolkien + + +The Wall +Pink Floyd + + Another Brick in the Wall +Mother + + +\end{verbatim} +} +\end{column} +\begin{column}{5cm} +{ \tiny +\begin{verbatim} + + +node type: Element, name: store + +node type: Element, name: book +node type: Element, name: title +node type: Element, name: author + +node type: Element, name: cd +node type: Element, name: title +node type: Element, name: artist +node type: Element, name: track + +node type: Element, name: track + + +\end{verbatim} +} +\end{column} +\end{columns} +\end{frame} + +\begin{frame}[fragile] +\frametitle{SAX2 main()} +{ \tiny +\begin{verbatim} +/* + * Simple SAX example + */ + +#include +#include +#include + +static int cb_total = 0; + +int main(int argc, char **argv) { + cb_total = 0; + + xmlSAXUserParseFile(debugSAXHandler, NULL, argv[1]); + + fprintf(stdout, "\ncallback calls: %d\n", cb_total); + + xmlCleanupParser(); + xmlMemoryDump(); + + return 0; +} +\end{verbatim} +} +\end{frame} + +\begin{frame}[fragile] +\frametitle{SAX2 callback implementations} +{ \tiny +\begin{verbatim} +static void cb_start_document(void *ctx ATTRIBUTE_UNUSED) +{ + cb_total++; + fprintf(stdout, "SAX.startDocument()\n"); +} + +static void cb_end_document(void *ctx ATTRIBUTE_UNUSED) +{ + cb_total++; + fprintf(stdout, "SAX.endDocument()\n"); +} + +static void cb_characters(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) +{ + char output[40]; + int i; + + cb_total++; + for (i = 0;(i + + + + + + +[...] + + +\end{verbatim} +} +\end{column} +\begin{column}{5cm} +{ \tiny +\begin{verbatim} +SAX.startDocument() +SAX.startElement(store) +SAX.characters( +, 1) +SAX.comment( Sample Bookstore ) +SAX.characters( +, 1) +[...] +SAX.endElement(store) +SAX.endDocument() +\end{verbatim} +} +\end{column} +\end{columns} +\end{frame} + +\begin{frame}[fragile] +\frametitle{SAX: Input / Output - Element} +\begin{columns} +\begin{column}{5cm} +{ \tiny +\begin{verbatim} + + + +The Lord Of The Rings + + + + +J.R.R. Tolkien + + + + + +\end{verbatim} +} +\end{column} +\begin{column}{5cm} +{ \tiny +\begin{verbatim} +SAX.startElement(book, isbn='10000001') +SAX.characters( +, 1) +SAX.startElement(title) +SAX.characters(The Lord Of The Rings, 21) +SAX.endElement(title) +SAX.characters( +, 1) +SAX.startElement(author) +SAX.characters(J.R.R. Tolkien, 13) +SAX.endElement(author) +SAX.characters( +, 1) +SAX.endElement(book) +\end{verbatim} +} +\end{column} +\end{columns} +\end{frame} + +% ------------------------ +\subsection{} +\begin{frame} +\frametitle{References} + +XML on W3C: +\begin{itemize} +\item \url{http://www.w3.org/XML/} +\item \url{http://www.w3.org/TR/REC-xml/} +\end{itemize} + +GNOME XML Library: +\begin{itemize} +\item \url{http://www.xmlsoft.org/} +\end{itemize} + +Wikipedia: +\begin{itemize} +\item \url{http://en.wikipedia.org/wiki/XML} +\end{itemize} +\end{frame} + +\input{tailpres} -- cgit v1.2.3