\def\lximg{/usr/share/lx/icons/fueller.png} \input{configpres} % ------------------------ \section{Programming in C/C++} % ------------------------ \subsection{XML Processing} \title{XML Processing in C/C++} \maketitle \def\lximg{none} \begin{frame} \frametitle{Contents} \tableofcontents \end{frame} % ------------------------ \subsubsection{Basics} \begin{frame}[fragile] \frametitle{What is XML?} XML is \begin{itemize} \item a Markup Language to describe structured data \item extensible by defining new data type definitions \item human readable as well as machine readable \end{itemize} { \tiny \begin{verbatim} The Lord Of The Rings J.R.R. Tolkien The Wall Pink Floyd Another Brick in the Wall Mother \end{verbatim} } \end{frame} % ------------------------ \subsubsection{Processing} \begin{frame}[fragile] \frametitle{Own parser vs. using a common library} Using a common library is the preferred solution \begin{itemize} \item XML can get very complex \item own parsers might struggle with some exotic but allowed constructs \item common libraries are (mostly) well tested \item rapid prototyping with common libraries \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{GNOME XML library} \begin{itemize} \item libxml2 is the most common XML library for Linux and C/C++ \item provides a large toolset for nearly all XML problems \item can be used for small XML junks as well as for huge XML files or streams \item support Document Object Model (DOM) as well as Simple API for XML (SAX) \item support parse, generate and validate \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Document Object Model (DOM)} \begin{itemize} \item DOM Parser creates a tree representation of the XML junk in main memory \item Pros: \begin{itemize} \item navigation support \item model can be modified \item easy export of an existing model \end{itemize} \item Cons: \begin{itemize} \item large XML junks need lot of main memory \item no stream handling support \item slow for large XML files \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Simple API for XML (SAX)} \begin{itemize} \item DOM tree parser uses SAX2 internally! \item SAX uses callbacks for parsing events (start element, end element etc.) \item Pros: \begin{itemize} \item lean and fast \item large-file and stream handling support \item memory footprint depends on callback implementation \end{itemize} \item Cons: \begin{itemize} \item no navigation support \item no XML generation support \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Recommendation} \begin{itemize} \item choose the right parser for the right use case \item DOM: \begin{itemize} \item small files \item navigate/modify/generate \item data model validation (DTD) \end{itemize} \item SAX2: \begin{itemize} \item streams or large files \item preselect XML junks \end{itemize} \item combine SAX2 and DOM if necessary \end{itemize} \end{frame} % ------------------------ \subsubsection{Examples} \begin{frame}[fragile] \frametitle{DOM main()} { \tiny \begin{verbatim} #include #include #include static void print_element_names(xmlNode *); int main(int argc, char **argv) { xmlDocPtr my_doc = NULL; xmlNode *root_element = NULL; my_doc = xmlReadFile(argv[1], NULL, 0); // parse XML file root_element = xmlDocGetRootElement(my_doc); print_element_names(root_element); // output xmlFreeDoc(my_doc); xmlCleanupParser(); return 0; } \end{verbatim} } \end{frame} \begin{frame}[fragile] \frametitle{DOM tree navigation and output} { \tiny \begin{verbatim} static void print_element_names(xmlNode * a_node) { xmlNode *cur_node = NULL; for (cur_node = a_node; cur_node; cur_node = cur_node->next) { if (cur_node->type == XML_ELEMENT_NODE) { printf("node type: Element, name: %s\n", cur_node->name); } print_element_names(cur_node->children); } } \end{verbatim} } \end{frame} \begin{frame}[fragile] \frametitle{DOM: Input / Output} \begin{columns} \begin{column}{5cm} { \tiny \begin{verbatim} The Lord Of The Rings J.R.R. Tolkien The Wall Pink Floyd Another Brick in the Wall Mother \end{verbatim} } \end{column} \begin{column}{5cm} { \tiny \begin{verbatim} node type: Element, name: store node type: Element, name: book node type: Element, name: title node type: Element, name: author node type: Element, name: cd node type: Element, name: title node type: Element, name: artist node type: Element, name: track node type: Element, name: track \end{verbatim} } \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{SAX2 main()} { \tiny \begin{verbatim} /* * Simple SAX example */ #include #include #include static int cb_total = 0; int main(int argc, char **argv) { cb_total = 0; xmlSAXUserParseFile(debugSAXHandler, NULL, argv[1]); fprintf(stdout, "\ncallback calls: %d\n", cb_total); xmlCleanupParser(); xmlMemoryDump(); return 0; } \end{verbatim} } \end{frame} \begin{frame}[fragile] \frametitle{SAX2 callback implementations} { \tiny \begin{verbatim} static void cb_start_document(void *ctx ATTRIBUTE_UNUSED) { cb_total++; fprintf(stdout, "SAX.startDocument()\n"); } static void cb_end_document(void *ctx ATTRIBUTE_UNUSED) { cb_total++; fprintf(stdout, "SAX.endDocument()\n"); } static void cb_characters(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) { char output[40]; int i; cb_total++; for (i = 0;(i [...] \end{verbatim} } \end{column} \begin{column}{5cm} { \tiny \begin{verbatim} SAX.startDocument() SAX.startElement(store) SAX.characters( , 1) SAX.comment( Sample Bookstore ) SAX.characters( , 1) [...] SAX.endElement(store) SAX.endDocument() \end{verbatim} } \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{SAX: Input / Output - Element} \begin{columns} \begin{column}{5cm} { \tiny \begin{verbatim} The Lord Of The Rings J.R.R. Tolkien \end{verbatim} } \end{column} \begin{column}{5cm} { \tiny \begin{verbatim} SAX.startElement(book, isbn='10000001') SAX.characters( , 1) SAX.startElement(title) SAX.characters(The Lord Of The Rings, 21) SAX.endElement(title) SAX.characters( , 1) SAX.startElement(author) SAX.characters(J.R.R. Tolkien, 13) SAX.endElement(author) SAX.characters( , 1) SAX.endElement(book) \end{verbatim} } \end{column} \end{columns} \end{frame} % ------------------------ \subsection{} \begin{frame} \frametitle{References} XML on W3C: \begin{itemize} \item \url{http://www.w3.org/XML/} \item \url{http://www.w3.org/TR/REC-xml/} \end{itemize} GNOME XML Library: \begin{itemize} \item \url{http://www.xmlsoft.org/} \end{itemize} Wikipedia: \begin{itemize} \item \url{http://en.wikipedia.org/wiki/XML} \end{itemize} \end{frame} \input{tailpres}