/ libxml2 / testHTML.c
testHTML.c
  1  /*
  2   * testHTML.c : a small tester program for HTML input.
  3   *
  4   * See Copyright for the status of this software.
  5   *
  6   * daniel@veillard.com
  7   */
  8  
  9  #include "libxml.h"
 10  
 11  #ifdef LIBXML_HTML_ENABLED
 12  
 13  #include <string.h>
 14  #include <stdarg.h>
 15  
 16  
 17  #ifdef HAVE_SYS_TYPES_H
 18  #include <sys/types.h>
 19  #endif
 20  #ifdef HAVE_SYS_STAT_H
 21  #include <sys/stat.h>
 22  #endif
 23  #ifdef HAVE_FCNTL_H
 24  #include <fcntl.h>
 25  #endif
 26  #ifdef HAVE_UNISTD_H
 27  #include <unistd.h>
 28  #endif
 29  #ifdef HAVE_STDLIB_H
 30  #include <stdlib.h>
 31  #endif
 32  
 33  #include <libxml/xmlmemory.h>
 34  #include <libxml/HTMLparser.h>
 35  #include <libxml/HTMLtree.h>
 36  #include <libxml/debugXML.h>
 37  #include <libxml/xmlerror.h>
 38  #include <libxml/globals.h>
 39  
 40  #ifdef LIBXML_DEBUG_ENABLED
 41  static int debug = 0;
 42  #endif
 43  static int copy = 0;
 44  static int sax = 0;
 45  static int repeat = 0;
 46  static int noout = 0;
 47  #ifdef LIBXML_PUSH_ENABLED
 48  static int push = 0;
 49  #endif /* LIBXML_PUSH_ENABLED */
 50  static char *encoding = NULL;
 51  static int options = 0;
 52  
 53  static xmlSAXHandler emptySAXHandlerStruct = {
 54      NULL, /* internalSubset */
 55      NULL, /* isStandalone */
 56      NULL, /* hasInternalSubset */
 57      NULL, /* hasExternalSubset */
 58      NULL, /* resolveEntity */
 59      NULL, /* getEntity */
 60      NULL, /* entityDecl */
 61      NULL, /* notationDecl */
 62      NULL, /* attributeDecl */
 63      NULL, /* elementDecl */
 64      NULL, /* unparsedEntityDecl */
 65      NULL, /* setDocumentLocator */
 66      NULL, /* startDocument */
 67      NULL, /* endDocument */
 68      NULL, /* startElement */
 69      NULL, /* endElement */
 70      NULL, /* reference */
 71      NULL, /* characters */
 72      NULL, /* ignorableWhitespace */
 73      NULL, /* processingInstruction */
 74      NULL, /* comment */
 75      NULL, /* xmlParserWarning */
 76      NULL, /* xmlParserError */
 77      NULL, /* xmlParserError */
 78      NULL, /* getParameterEntity */
 79      NULL, /* cdataBlock */
 80      NULL, /* externalSubset */
 81      1,    /* initialized */
 82      NULL, /* private */
 83      NULL, /* startElementNsSAX2Func */
 84      NULL, /* endElementNsSAX2Func */
 85      NULL  /* xmlStructuredErrorFunc */
 86  };
 87  
 88  static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
 89  extern xmlSAXHandlerPtr debugSAXHandler;
 90  
 91  /************************************************************************
 92   *									*
 93   *				Debug Handlers				*
 94   *									*
 95   ************************************************************************/
 96  
 97  /**
 98   * isStandaloneDebug:
 99   * @ctxt:  An XML parser context
100   *
101   * Is this document tagged standalone ?
102   *
103   * Returns 1 if true
104   */
105  static int
106  isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
107  {
108      fprintf(stdout, "SAX.isStandalone()\n");
109      return(0);
110  }
111  
112  /**
113   * hasInternalSubsetDebug:
114   * @ctxt:  An XML parser context
115   *
116   * Does this document has an internal subset
117   *
118   * Returns 1 if true
119   */
120  static int
121  hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
122  {
123      fprintf(stdout, "SAX.hasInternalSubset()\n");
124      return(0);
125  }
126  
127  /**
128   * hasExternalSubsetDebug:
129   * @ctxt:  An XML parser context
130   *
131   * Does this document has an external subset
132   *
133   * Returns 1 if true
134   */
135  static int
136  hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
137  {
138      fprintf(stdout, "SAX.hasExternalSubset()\n");
139      return(0);
140  }
141  
142  /**
143   * hasInternalSubsetDebug:
144   * @ctxt:  An XML parser context
145   *
146   * Does this document has an internal subset
147   */
148  static void
149  internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
150  	       const xmlChar *ExternalID, const xmlChar *SystemID)
151  {
152      fprintf(stdout, "SAX.internalSubset(%s,", name);
153      if (ExternalID == NULL)
154  	fprintf(stdout, " ,");
155      else
156  	fprintf(stdout, " %s,", ExternalID);
157      if (SystemID == NULL)
158  	fprintf(stdout, " )\n");
159      else
160  	fprintf(stdout, " %s)\n", SystemID);
161  }
162  
163  /**
164   * resolveEntityDebug:
165   * @ctxt:  An XML parser context
166   * @publicId: The public ID of the entity
167   * @systemId: The system ID of the entity
168   *
169   * Special entity resolver, better left to the parser, it has
170   * more context than the application layer.
171   * The default behaviour is to NOT resolve the entities, in that case
172   * the ENTITY_REF nodes are built in the structure (and the parameter
173   * values).
174   *
175   * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
176   */
177  static xmlParserInputPtr
178  resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
179  {
180      /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
181  
182  
183      fprintf(stdout, "SAX.resolveEntity(");
184      if (publicId != NULL)
185  	fprintf(stdout, "%s", (char *)publicId);
186      else
187  	fprintf(stdout, " ");
188      if (systemId != NULL)
189  	fprintf(stdout, ", %s)\n", (char *)systemId);
190      else
191  	fprintf(stdout, ", )\n");
192  /*********
193      if (systemId != NULL) {
194          return(xmlNewInputFromFile(ctxt, (char *) systemId));
195      }
196   *********/
197      return(NULL);
198  }
199  
200  /**
201   * getEntityDebug:
202   * @ctxt:  An XML parser context
203   * @name: The entity name
204   *
205   * Get an entity by name
206   *
207   * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
208   */
209  static xmlEntityPtr
210  getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
211  {
212      fprintf(stdout, "SAX.getEntity(%s)\n", name);
213      return(NULL);
214  }
215  
216  /**
217   * getParameterEntityDebug:
218   * @ctxt:  An XML parser context
219   * @name: The entity name
220   *
221   * Get a parameter entity by name
222   *
223   * Returns the xmlParserInputPtr
224   */
225  static xmlEntityPtr
226  getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
227  {
228      fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
229      return(NULL);
230  }
231  
232  
233  /**
234   * entityDeclDebug:
235   * @ctxt:  An XML parser context
236   * @name:  the entity name
237   * @type:  the entity type
238   * @publicId: The public ID of the entity
239   * @systemId: The system ID of the entity
240   * @content: the entity value (without processing).
241   *
242   * An entity definition has been parsed
243   */
244  static void
245  entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
246            const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
247  {
248      fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
249              name, type, publicId, systemId, content);
250  }
251  
252  /**
253   * attributeDeclDebug:
254   * @ctxt:  An XML parser context
255   * @name:  the attribute name
256   * @type:  the attribute type
257   *
258   * An attribute definition has been parsed
259   */
260  static void
261  attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
262                int type, int def, const xmlChar *defaultValue,
263  	      xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
264  {
265      fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
266              elem, name, type, def, defaultValue);
267  }
268  
269  /**
270   * elementDeclDebug:
271   * @ctxt:  An XML parser context
272   * @name:  the element name
273   * @type:  the element type
274   * @content: the element value (without processing).
275   *
276   * An element definition has been parsed
277   */
278  static void
279  elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
280  	    xmlElementContentPtr content ATTRIBUTE_UNUSED)
281  {
282      fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
283              name, type);
284  }
285  
286  /**
287   * notationDeclDebug:
288   * @ctxt:  An XML parser context
289   * @name: The name of the notation
290   * @publicId: The public ID of the entity
291   * @systemId: The system ID of the entity
292   *
293   * What to do when a notation declaration has been parsed.
294   */
295  static void
296  notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
297  	     const xmlChar *publicId, const xmlChar *systemId)
298  {
299      fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
300              (char *) name, (char *) publicId, (char *) systemId);
301  }
302  
303  /**
304   * unparsedEntityDeclDebug:
305   * @ctxt:  An XML parser context
306   * @name: The name of the entity
307   * @publicId: The public ID of the entity
308   * @systemId: The system ID of the entity
309   * @notationName: the name of the notation
310   *
311   * What to do when an unparsed entity declaration is parsed
312   */
313  static void
314  unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
315  		   const xmlChar *publicId, const xmlChar *systemId,
316  		   const xmlChar *notationName)
317  {
318      fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
319              (char *) name, (char *) publicId, (char *) systemId,
320  	    (char *) notationName);
321  }
322  
323  /**
324   * setDocumentLocatorDebug:
325   * @ctxt:  An XML parser context
326   * @loc: A SAX Locator
327   *
328   * Receive the document locator at startup, actually xmlDefaultSAXLocator
329   * Everything is available on the context, so this is useless in our case.
330   */
331  static void
332  setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
333  {
334      fprintf(stdout, "SAX.setDocumentLocator()\n");
335  }
336  
337  /**
338   * startDocumentDebug:
339   * @ctxt:  An XML parser context
340   *
341   * called when the document start being processed.
342   */
343  static void
344  startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
345  {
346      fprintf(stdout, "SAX.startDocument()\n");
347  }
348  
349  /**
350   * endDocumentDebug:
351   * @ctxt:  An XML parser context
352   *
353   * called when the document end has been detected.
354   */
355  static void
356  endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
357  {
358      fprintf(stdout, "SAX.endDocument()\n");
359  }
360  
361  /**
362   * startElementDebug:
363   * @ctxt:  An XML parser context
364   * @name:  The element name
365   *
366   * called when an opening tag has been processed.
367   */
368  static void
369  startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
370  {
371      int i;
372  
373      fprintf(stdout, "SAX.startElement(%s", (char *) name);
374      if (atts != NULL) {
375          for (i = 0;(atts[i] != NULL);i++) {
376  	    fprintf(stdout, ", %s", atts[i++]);
377  	    if (atts[i] != NULL) {
378  		unsigned char output[40];
379  		const unsigned char *att = atts[i];
380  		int outlen, attlen;
381  	        fprintf(stdout, "='");
382  		while ((attlen = strlen((char*)att)) > 0) {
383  		    outlen = sizeof output - 1;
384  		    htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
385  		    output[outlen] = 0;
386  		    fprintf(stdout, "%s", (char *) output);
387  		    att += attlen;
388  		}
389  		fprintf(stdout, "'");
390  	    }
391  	}
392      }
393      fprintf(stdout, ")\n");
394  }
395  
396  /**
397   * endElementDebug:
398   * @ctxt:  An XML parser context
399   * @name:  The element name
400   *
401   * called when the end of an element has been detected.
402   */
403  static void
404  endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
405  {
406      fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
407  }
408  
409  /**
410   * charactersDebug:
411   * @ctxt:  An XML parser context
412   * @ch:  a xmlChar string
413   * @len: the number of xmlChar
414   *
415   * receiving some chars from the parser.
416   * Question: how much at a time ???
417   */
418  static void
419  charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
420  {
421      unsigned char output[40];
422      int inlen = len, outlen = 30;
423  
424      htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
425      output[outlen] = 0;
426  
427      fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
428  }
429  
430  /**
431   * cdataDebug:
432   * @ctxt:  An XML parser context
433   * @ch:  a xmlChar string
434   * @len: the number of xmlChar
435   *
436   * receiving some cdata chars from the parser.
437   * Question: how much at a time ???
438   */
439  static void
440  cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
441  {
442      unsigned char output[40];
443      int inlen = len, outlen = 30;
444  
445      htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
446      output[outlen] = 0;
447  
448      fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
449  }
450  
451  /**
452   * referenceDebug:
453   * @ctxt:  An XML parser context
454   * @name:  The entity name
455   *
456   * called when an entity reference is detected.
457   */
458  static void
459  referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
460  {
461      fprintf(stdout, "SAX.reference(%s)\n", name);
462  }
463  
464  /**
465   * ignorableWhitespaceDebug:
466   * @ctxt:  An XML parser context
467   * @ch:  a xmlChar string
468   * @start: the first char in the string
469   * @len: the number of xmlChar
470   *
471   * receiving some ignorable whitespaces from the parser.
472   * Question: how much at a time ???
473   */
474  static void
475  ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
476  {
477      char output[40];
478      int i;
479  
480      for (i = 0;(i<len) && (i < 30);i++)
481  	output[i] = ch[i];
482      output[i] = 0;
483  
484      fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
485  }
486  
487  /**
488   * processingInstructionDebug:
489   * @ctxt:  An XML parser context
490   * @target:  the target name
491   * @data: the PI data's
492   * @len: the number of xmlChar
493   *
494   * A processing instruction has been parsed.
495   */
496  static void
497  processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
498                        const xmlChar *data)
499  {
500      fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
501              (char *) target, (char *) data);
502  }
503  
504  /**
505   * commentDebug:
506   * @ctxt:  An XML parser context
507   * @value:  the comment content
508   *
509   * A comment has been parsed.
510   */
511  static void
512  commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
513  {
514      fprintf(stdout, "SAX.comment(%s)\n", value);
515  }
516  
517  /**
518   * warningDebug:
519   * @ctxt:  An XML parser context
520   * @msg:  the message to display/transmit
521   * @...:  extra parameters for the message display
522   *
523   * Display and format a warning messages, gives file, line, position and
524   * extra parameters.
525   */
526  static void XMLCDECL
527  warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
528  {
529      va_list args;
530  
531      va_start(args, msg);
532      fprintf(stdout, "SAX.warning: ");
533      vfprintf(stdout, msg, args);
534      va_end(args);
535  }
536  
537  /**
538   * errorDebug:
539   * @ctxt:  An XML parser context
540   * @msg:  the message to display/transmit
541   * @...:  extra parameters for the message display
542   *
543   * Display and format a error messages, gives file, line, position and
544   * extra parameters.
545   */
546  static void XMLCDECL
547  errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
548  {
549      va_list args;
550  
551      va_start(args, msg);
552      fprintf(stdout, "SAX.error: ");
553      vfprintf(stdout, msg, args);
554      va_end(args);
555  }
556  
557  /**
558   * fatalErrorDebug:
559   * @ctxt:  An XML parser context
560   * @msg:  the message to display/transmit
561   * @...:  extra parameters for the message display
562   *
563   * Display and format a fatalError messages, gives file, line, position and
564   * extra parameters.
565   */
566  static void XMLCDECL
567  fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
568  {
569      va_list args;
570  
571      va_start(args, msg);
572      fprintf(stdout, "SAX.fatalError: ");
573      vfprintf(stdout, msg, args);
574      va_end(args);
575  }
576  
577  static xmlSAXHandler debugSAXHandlerStruct = {
578      internalSubsetDebug,
579      isStandaloneDebug,
580      hasInternalSubsetDebug,
581      hasExternalSubsetDebug,
582      resolveEntityDebug,
583      getEntityDebug,
584      entityDeclDebug,
585      notationDeclDebug,
586      attributeDeclDebug,
587      elementDeclDebug,
588      unparsedEntityDeclDebug,
589      setDocumentLocatorDebug,
590      startDocumentDebug,
591      endDocumentDebug,
592      startElementDebug,
593      endElementDebug,
594      referenceDebug,
595      charactersDebug,
596      ignorableWhitespaceDebug,
597      processingInstructionDebug,
598      commentDebug,
599      warningDebug,
600      errorDebug,
601      fatalErrorDebug,
602      getParameterEntityDebug,
603      cdataDebug,
604      NULL,
605      1,
606      NULL,
607      NULL,
608      NULL,
609      NULL
610  };
611  
612  xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
613  /************************************************************************
614   *									*
615   *				Debug					*
616   *									*
617   ************************************************************************/
618  
619  static void
620  parseSAXFile(char *filename) {
621      htmlDocPtr doc = NULL;
622  
623      /*
624       * Empty callbacks for checking
625       */
626  #ifdef LIBXML_PUSH_ENABLED
627      if (push) {
628  	FILE *f;
629  
630  #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
631  	f = fopen(filename, "rb");
632  #else
633  	f = fopen(filename, "r");
634  #endif
635  	if (f != NULL) {
636  	    int res, size = 3;
637  	    char chars[4096];
638  	    htmlParserCtxtPtr ctxt;
639  
640  	    /* if (repeat) */
641  		size = 4096;
642  	    res = fread(chars, 1, 4, f);
643  	    if (res > 0) {
644  		ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
645  			    chars, res, filename, XML_CHAR_ENCODING_NONE);
646  		while ((res = fread(chars, 1, size, f)) > 0) {
647  		    htmlParseChunk(ctxt, chars, res, 0);
648  		}
649  		htmlParseChunk(ctxt, chars, 0, 1);
650  		doc = ctxt->myDoc;
651  		htmlFreeParserCtxt(ctxt);
652  	    }
653  	    if (doc != NULL) {
654  		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
655  		xmlFreeDoc(doc);
656  	    }
657  	    fclose(f);
658  	}
659  	if (!noout) {
660  #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
661  		f = fopen(filename, "rb");
662  #else
663  		f = fopen(filename, "r");
664  #endif
665  	    if (f != NULL) {
666  		int res, size = 3;
667  		char chars[4096];
668  		htmlParserCtxtPtr ctxt;
669  
670  		/* if (repeat) */
671  		    size = 4096;
672  		res = fread(chars, 1, 4, f);
673  		if (res > 0) {
674  		    ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
675  				chars, res, filename, XML_CHAR_ENCODING_NONE);
676  		    while ((res = fread(chars, 1, size, f)) > 0) {
677  			htmlParseChunk(ctxt, chars, res, 0);
678  		    }
679  		    htmlParseChunk(ctxt, chars, 0, 1);
680  		    doc = ctxt->myDoc;
681  		    htmlFreeParserCtxt(ctxt);
682  		}
683  		if (doc != NULL) {
684  		    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
685  		    xmlFreeDoc(doc);
686  		}
687  		fclose(f);
688  	    }
689  	}
690      } else {
691  #endif /* LIBXML_PUSH_ENABLED */
692  	doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
693  	if (doc != NULL) {
694  	    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
695  	    xmlFreeDoc(doc);
696  	}
697  
698  	if (!noout) {
699  	    /*
700  	     * Debug callback
701  	     */
702  	    doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
703  	    if (doc != NULL) {
704  		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
705  		xmlFreeDoc(doc);
706  	    }
707  	}
708  #ifdef LIBXML_PUSH_ENABLED
709      }
710  #endif /* LIBXML_PUSH_ENABLED */
711  }
712  
713  static void
714  parseAndPrintFile(char *filename) {
715      htmlDocPtr doc = NULL;
716  
717      /*
718       * build an HTML tree from a string;
719       */
720  #ifdef LIBXML_PUSH_ENABLED
721      if (push) {
722  	FILE *f;
723  
724  #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
725  	f = fopen(filename, "rb");
726  #else
727  	f = fopen(filename, "r");
728  #endif
729  	if (f != NULL) {
730  	    int res, size = 3;
731  	    char chars[4096];
732  	    htmlParserCtxtPtr ctxt;
733  
734  	    /* if (repeat) */
735  		size = 4096;
736  	    res = fread(chars, 1, 4, f);
737  	    if (res > 0) {
738  		ctxt = htmlCreatePushParserCtxt(NULL, NULL,
739  			    chars, res, filename, XML_CHAR_ENCODING_NONE);
740  		while ((res = fread(chars, 1, size, f)) > 0) {
741  		    htmlParseChunk(ctxt, chars, res, 0);
742  		}
743  		htmlParseChunk(ctxt, chars, 0, 1);
744  		doc = ctxt->myDoc;
745  		htmlFreeParserCtxt(ctxt);
746  	    }
747  	    fclose(f);
748  	}
749      } else {
750  	doc = htmlReadFile(filename, NULL, options);
751      }
752  #else
753  	doc = htmlReadFile(filename,NULL,options);
754  #endif
755      if (doc == NULL) {
756          xmlGenericError(xmlGenericErrorContext,
757  		"Could not parse %s\n", filename);
758      }
759  
760  #ifdef LIBXML_TREE_ENABLED
761      /*
762       * test intermediate copy if needed.
763       */
764      if (copy) {
765          htmlDocPtr tmp;
766  
767          tmp = doc;
768  	doc = xmlCopyDoc(doc, 1);
769  	xmlFreeDoc(tmp);
770      }
771  #endif
772  
773  #ifdef LIBXML_OUTPUT_ENABLED
774      /*
775       * print it.
776       */
777      if (!noout) {
778  #ifdef LIBXML_DEBUG_ENABLED
779  	if (!debug) {
780  	    if (encoding)
781  		htmlSaveFileEnc("-", doc, encoding);
782  	    else
783  		htmlDocDump(stdout, doc);
784  	} else
785  	    xmlDebugDumpDocument(stdout, doc);
786  #else
787  	if (encoding)
788  	    htmlSaveFileEnc("-", doc, encoding);
789  	else
790  	    htmlDocDump(stdout, doc);
791  #endif
792      }
793  #endif /* LIBXML_OUTPUT_ENABLED */
794  
795      /*
796       * free it.
797       */
798      xmlFreeDoc(doc);
799  }
800  
801  int main(int argc, char **argv) {
802      int i, count;
803      int files = 0;
804  
805      for (i = 1; i < argc ; i++) {
806  #ifdef LIBXML_DEBUG_ENABLED
807  	if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
808  	    debug++;
809  	else
810  #endif
811  	    if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
812  	    copy++;
813  #ifdef LIBXML_PUSH_ENABLED
814  	else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
815  	    push++;
816  #endif /* LIBXML_PUSH_ENABLED */
817  	else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
818  	    sax++;
819  	else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
820  	    noout++;
821  	else if ((!strcmp(argv[i], "-repeat")) ||
822  	         (!strcmp(argv[i], "--repeat")))
823  	    repeat++;
824  	else if ((!strcmp(argv[i], "-encode")) ||
825  	         (!strcmp(argv[i], "--encode"))) {
826  	    i++;
827  	    encoding = argv[i];
828          }
829      }
830      for (i = 1; i < argc ; i++) {
831  	if ((!strcmp(argv[i], "-encode")) ||
832  	         (!strcmp(argv[i], "--encode"))) {
833  	    i++;
834  	    continue;
835          }
836  	if (argv[i][0] != '-') {
837  	    if (repeat) {
838  		for (count = 0;count < 100 * repeat;count++) {
839  		    if (sax)
840  			parseSAXFile(argv[i]);
841  		    else
842  			parseAndPrintFile(argv[i]);
843  		}
844  	    } else {
845  		if (sax)
846  		    parseSAXFile(argv[i]);
847  		else
848  		    parseAndPrintFile(argv[i]);
849  	    }
850  	    files ++;
851  	}
852      }
853      if (files == 0) {
854  	printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
855  	       argv[0]);
856  	printf("\tParse the HTML files and output the result of the parsing\n");
857  #ifdef LIBXML_DEBUG_ENABLED
858  	printf("\t--debug : dump a debug tree of the in-memory document\n");
859  #endif
860  	printf("\t--copy : used to test the internal copy implementation\n");
861  	printf("\t--sax : debug the sequence of SAX callbacks\n");
862  	printf("\t--repeat : parse the file 100 times, for timing\n");
863  	printf("\t--noout : do not print the result\n");
864  #ifdef LIBXML_PUSH_ENABLED
865  	printf("\t--push : use the push mode parser\n");
866  #endif /* LIBXML_PUSH_ENABLED */
867  	printf("\t--encode encoding : output in the given encoding\n");
868      }
869      xmlCleanupParser();
870      xmlMemoryDump();
871  
872      return(0);
873  }
874  #else /* !LIBXML_HTML_ENABLED */
875  #include <stdio.h>
876  int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
877      printf("%s : HTML support not compiled in\n", argv[0]);
878      return(0);
879  }
880  #endif