/ libxml2 / doc / example.html
example.html
  1  <?xml version="1.0" encoding="UTF-8"?>
  2  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3  <html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><link rel="SHORTCUT ICON" href="/favicon.ico" /><style type="text/css">
  4  TD {font-family: Verdana,Arial,Helvetica}
  5  BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em}
  6  H1 {font-family: Verdana,Arial,Helvetica}
  7  H2 {font-family: Verdana,Arial,Helvetica}
  8  H3 {font-family: Verdana,Arial,Helvetica}
  9  A:link, A:visited, A:active { text-decoration: underline }
 10  </style><title>A real example</title></head><body bgcolor="#8b7765" text="#000000" link="#a06060" vlink="#000000"><table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr><td width="120"><a href="http://swpat.ffii.org/"><img src="epatents.png" alt="Action against software patents" /></a></td><td width="180"><a href="http://www.gnome.org/"><img src="gnome2.png" alt="Gnome2 Logo" /></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C Logo" /></a><a href="http://www.redhat.com/"><img src="redhat.gif" alt="Red Hat Logo" /></a><div align="left"><a href="http://xmlsoft.org/"><img src="Libxml2-Logo-180x168.gif" alt="Made with Libxml2 Logo" /></a></div></td><td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center"><h1>The XML C parser and toolkit of Gnome</h1><h2>A real example</h2></td></tr></table></td></tr></table></td></tr></table><table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr><td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Developer Menu</b></center></td></tr><tr><td bgcolor="#fffacd"><form action="search.php" enctype="application/x-www-form-urlencoded" method="get"><input name="query" type="text" size="20" value="" /><input name="submit" type="submit" value="Search ..." /></form><ul><li><a href="index.html" style="font-weight:bold">Main Menu</a></li><li><a href="html/index.html" style="font-weight:bold">Reference Manual</a></li><li><a href="examples/index.html" style="font-weight:bold">Code Examples</a></li><li><a href="guidelines.html">XML Guidelines</a></li><li><a href="tutorial/index.html">Tutorial</a></li><li><a href="xmlreader.html">The Reader Interface</a></li><li><a href="ChangeLog.html">ChangeLog</a></li><li><a href="XSLT.html">XSLT</a></li><li><a href="python.html">Python and bindings</a></li><li><a href="architecture.html">libxml2 architecture</a></li><li><a href="tree.html">The tree output</a></li><li><a href="interface.html">The SAX interface</a></li><li><a href="xmlmem.html">Memory Management</a></li><li><a href="xmlio.html">I/O Interfaces</a></li><li><a href="library.html">The parser interfaces</a></li><li><a href="entities.html">Entities or no entities</a></li><li><a href="namespaces.html">Namespaces</a></li><li><a href="upgrade.html">Upgrading 1.x code</a></li><li><a href="threads.html">Thread safety</a></li><li><a href="DOM.html">DOM Principles</a></li><li><a href="example.html">A real example</a></li><li><a href="xml.html">flat page</a>, <a href="site.xsl">stylesheet</a></li></ul></td></tr></table><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>API Indexes</b></center></td></tr><tr><td bgcolor="#fffacd"><ul><li><a href="APIchunk0.html">Alphabetic</a></li><li><a href="APIconstructors.html">Constructors</a></li><li><a href="APIfunctions.html">Functions/Types</a></li><li><a href="APIfiles.html">Modules</a></li><li><a href="APIsymbols.html">Symbols</a></li></ul></td></tr></table><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr><tr><td bgcolor="#fffacd"><ul><li><a href="http://mail.gnome.org/archives/xml/">Mail archive</a></li><li><a href="http://xmlsoft.org/XSLT/">XSLT libxslt</a></li><li><a href="http://phd.cs.unibo.it/gdome2/">DOM gdome2</a></li><li><a href="http://www.aleksey.com/xmlsec/">XML-DSig xmlsec</a></li><li><a href="ftp://xmlsoft.org/">FTP</a></li><li><a href="http://www.zlatkovic.com/projects/libxml/">Windows binaries</a></li><li><a href="http://opencsw.org/packages/libxml2">Solaris binaries</a></li><li><a href="http://www.explain.com.au/oss/libxml2xslt.html">MacOsX binaries</a></li><li><a href="http://lxml.de/">lxml Python bindings</a></li><li><a href="http://cpan.uwinnipeg.ca/dist/XML-LibXML">Perl bindings</a></li><li><a href="http://libxmlplusplus.sourceforge.net/">C++ bindings</a></li><li><a href="http://www.zend.com/php5/articles/php5-xmlphp.php#Heading4">PHP bindings</a></li><li><a href="http://sourceforge.net/projects/libxml2-pas/">Pascal bindings</a></li><li><a href="http://libxml.rubyforge.org/">Ruby bindings</a></li><li><a href="http://tclxml.sourceforge.net/">Tcl bindings</a></li><li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxml2">Bug Tracker</a></li></ul></td></tr></table></td></tr></table></td><td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd"><p>Here is a real size example, where the actual content of the application
 11  data is not kept in the DOM tree but uses internal structures. It is based on
 12  a proposal to keep a database of jobs related to Gnome, with an XML based
 13  storage structure. Here is an <a href="gjobs.xml">XML encoded jobs
 14  base</a>:</p><pre>&lt;?xml version="1.0"?&gt;
 15  &lt;gjob:Helping xmlns:gjob="http://www.gnome.org/some-location"&gt;
 16    &lt;gjob:Jobs&gt;
 17  
 18      &lt;gjob:Job&gt;
 19        &lt;gjob:Project ID="3"/&gt;
 20        &lt;gjob:Application&gt;GBackup&lt;/gjob:Application&gt;
 21        &lt;gjob:Category&gt;Development&lt;/gjob:Category&gt;
 22  
 23        &lt;gjob:Update&gt;
 24          &lt;gjob:Status&gt;Open&lt;/gjob:Status&gt;
 25          &lt;gjob:Modified&gt;Mon, 07 Jun 1999 20:27:45 -0400 MET DST&lt;/gjob:Modified&gt;
 26          &lt;gjob:Salary&gt;USD 0.00&lt;/gjob:Salary&gt;
 27        &lt;/gjob:Update&gt;
 28  
 29        &lt;gjob:Developers&gt;
 30          &lt;gjob:Developer&gt;
 31          &lt;/gjob:Developer&gt;
 32        &lt;/gjob:Developers&gt;
 33  
 34        &lt;gjob:Contact&gt;
 35          &lt;gjob:Person&gt;Nathan Clemons&lt;/gjob:Person&gt;
 36          &lt;gjob:Email&gt;nathan@windsofstorm.net&lt;/gjob:Email&gt;
 37          &lt;gjob:Company&gt;
 38          &lt;/gjob:Company&gt;
 39          &lt;gjob:Organisation&gt;
 40          &lt;/gjob:Organisation&gt;
 41          &lt;gjob:Webpage&gt;
 42          &lt;/gjob:Webpage&gt;
 43          &lt;gjob:Snailmail&gt;
 44          &lt;/gjob:Snailmail&gt;
 45          &lt;gjob:Phone&gt;
 46          &lt;/gjob:Phone&gt;
 47        &lt;/gjob:Contact&gt;
 48  
 49        &lt;gjob:Requirements&gt;
 50        The program should be released as free software, under the GPL.
 51        &lt;/gjob:Requirements&gt;
 52  
 53        &lt;gjob:Skills&gt;
 54        &lt;/gjob:Skills&gt;
 55  
 56        &lt;gjob:Details&gt;
 57        A GNOME based system that will allow a superuser to configure 
 58        compressed and uncompressed files and/or file systems to be backed 
 59        up with a supported media in the system.  This should be able to 
 60        perform via find commands generating a list of files that are passed 
 61        to tar, dd, cpio, cp, gzip, etc., to be directed to the tape machine 
 62        or via operations performed on the filesystem itself. Email 
 63        notification and GUI status display very important.
 64        &lt;/gjob:Details&gt;
 65  
 66      &lt;/gjob:Job&gt;
 67  
 68    &lt;/gjob:Jobs&gt;
 69  &lt;/gjob:Helping&gt;</pre><p>While loading the XML file into an internal DOM tree is a matter of
 70  calling only a couple of functions, browsing the tree to gather the data and
 71  generate the internal structures is harder, and more error prone.</p><p>The suggested principle is to be tolerant with respect to the input
 72  structure. For example, the ordering of the attributes is not significant,
 73  the XML specification is clear about it. It's also usually a good idea not to
 74  depend on the order of the children of a given node, unless it really makes
 75  things harder. Here is some code to parse the information for a person:</p><pre>/*
 76   * A person record
 77   */
 78  typedef struct person {
 79      char *name;
 80      char *email;
 81      char *company;
 82      char *organisation;
 83      char *smail;
 84      char *webPage;
 85      char *phone;
 86  } person, *personPtr;
 87  
 88  /*
 89   * And the code needed to parse it
 90   */
 91  personPtr parsePerson(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
 92      personPtr ret = NULL;
 93  
 94  DEBUG("parsePerson\n");
 95      /*
 96       * allocate the struct
 97       */
 98      ret = (personPtr) malloc(sizeof(person));
 99      if (ret == NULL) {
100          fprintf(stderr,"out of memory\n");
101          return(NULL);
102      }
103      memset(ret, 0, sizeof(person));
104  
105      /* We don't care what the top level element name is */
106      cur = cur-&gt;xmlChildrenNode;
107      while (cur != NULL) {
108          if ((!strcmp(cur-&gt;name, "Person")) &amp;&amp; (cur-&gt;ns == ns))
109              ret-&gt;name = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
110          if ((!strcmp(cur-&gt;name, "Email")) &amp;&amp; (cur-&gt;ns == ns))
111              ret-&gt;email = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
112          cur = cur-&gt;next;
113      }
114  
115      return(ret);
116  }</pre><p>Here are a couple of things to notice:</p><ul>
117    <li>Usually a recursive parsing style is the more convenient one: XML data
118      is by nature subject to repetitive constructs and usually exhibits highly
119      structured patterns.</li>
120    <li>The two arguments of type <em>xmlDocPtr</em> and <em>xmlNsPtr</em>,
121      i.e. the pointer to the global XML document and the namespace reserved to
122      the application. Document wide information are needed for example to
123      decode entities and it's a good coding practice to define a namespace for
124      your application set of data and test that the element and attributes
125      you're analyzing actually pertains to your application space. This is
126      done by a simple equality test (cur-&gt;ns == ns).</li>
127    <li>To retrieve text and attributes value, you can use the function
128      <em>xmlNodeListGetString</em> to gather all the text and entity reference
129      nodes generated by the DOM output and produce an single text string.</li>
130  </ul><p>Here is another piece of code used to parse another level of the
131  structure:</p><pre>#include &lt;libxml/tree.h&gt;
132  /*
133   * a Description for a Job
134   */
135  typedef struct job {
136      char *projectID;
137      char *application;
138      char *category;
139      personPtr contact;
140      int nbDevelopers;
141      personPtr developers[100]; /* using dynamic alloc is left as an exercise */
142  } job, *jobPtr;
143  
144  /*
145   * And the code needed to parse it
146   */
147  jobPtr parseJob(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
148      jobPtr ret = NULL;
149  
150  DEBUG("parseJob\n");
151      /*
152       * allocate the struct
153       */
154      ret = (jobPtr) malloc(sizeof(job));
155      if (ret == NULL) {
156          fprintf(stderr,"out of memory\n");
157          return(NULL);
158      }
159      memset(ret, 0, sizeof(job));
160  
161      /* We don't care what the top level element name is */
162      cur = cur-&gt;xmlChildrenNode;
163      while (cur != NULL) {
164          
165          if ((!strcmp(cur-&gt;name, "Project")) &amp;&amp; (cur-&gt;ns == ns)) {
166              ret-&gt;projectID = xmlGetProp(cur, "ID");
167              if (ret-&gt;projectID == NULL) {
168                  fprintf(stderr, "Project has no ID\n");
169              }
170          }
171          if ((!strcmp(cur-&gt;name, "Application")) &amp;&amp; (cur-&gt;ns == ns))
172              ret-&gt;application = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
173          if ((!strcmp(cur-&gt;name, "Category")) &amp;&amp; (cur-&gt;ns == ns))
174              ret-&gt;category = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
175          if ((!strcmp(cur-&gt;name, "Contact")) &amp;&amp; (cur-&gt;ns == ns))
176              ret-&gt;contact = parsePerson(doc, ns, cur);
177          cur = cur-&gt;next;
178      }
179  
180      return(ret);
181  }</pre><p>Once you are used to it, writing this kind of code is quite simple, but
182  boring. Ultimately, it could be possible to write stubbers taking either C
183  data structure definitions, a set of XML examples or an XML DTD and produce
184  the code needed to import and export the content between C data and XML
185  storage. This is left as an exercise to the reader :-)</p><p>Feel free to use <a href="example/gjobread.c">the code for the full C
186  parsing example</a> as a template, it is also available with Makefile in the
187  Gnome SVN base under libxml2/example</p><p><a href="bugs.html">Daniel Veillard</a></p></td></tr></table></td></tr></table></td></tr></table></td></tr></table></td></tr></table></body></html>