/ libxml2 / parserInternals.c
parserInternals.c
   1  /*
   2   * parserInternals.c : Internal routines (and obsolete ones) needed for the
   3   *                     XML and HTML parsers.
   4   *
   5   * See Copyright for the status of this software.
   6   *
   7   * daniel@veillard.com
   8   */
   9  
  10  #define IN_LIBXML
  11  #include "libxml.h"
  12  
  13  #if defined(WIN32) && !defined (__CYGWIN__)
  14  #define XML_DIR_SEP '\\'
  15  #else
  16  #define XML_DIR_SEP '/'
  17  #endif
  18  
  19  #include <string.h>
  20  #ifdef HAVE_CTYPE_H
  21  #include <ctype.h>
  22  #endif
  23  #ifdef HAVE_STDLIB_H
  24  #include <stdlib.h>
  25  #endif
  26  #ifdef HAVE_SYS_STAT_H
  27  #include <sys/stat.h>
  28  #endif
  29  #ifdef HAVE_FCNTL_H
  30  #include <fcntl.h>
  31  #endif
  32  #ifdef HAVE_UNISTD_H
  33  #include <unistd.h>
  34  #endif
  35  #ifdef HAVE_ZLIB_H
  36  #include <zlib.h>
  37  #endif
  38  
  39  #include <libxml/xmlmemory.h>
  40  #include <libxml/tree.h>
  41  #include <libxml/parser.h>
  42  #include <libxml/parserInternals.h>
  43  #include <libxml/valid.h>
  44  #include <libxml/entities.h>
  45  #include <libxml/xmlerror.h>
  46  #include <libxml/encoding.h>
  47  #include <libxml/valid.h>
  48  #include <libxml/xmlIO.h>
  49  #include <libxml/uri.h>
  50  #include <libxml/dict.h>
  51  #include <libxml/SAX.h>
  52  #ifdef LIBXML_CATALOG_ENABLED
  53  #include <libxml/catalog.h>
  54  #endif
  55  #include <libxml/globals.h>
  56  #include <libxml/chvalid.h>
  57  
  58  #define CUR(ctxt) ctxt->input->cur
  59  #define END(ctxt) ctxt->input->end
  60  #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
  61  
  62  #include "buf.h"
  63  #include "enc.h"
  64  
  65  /*
  66   * Various global defaults for parsing
  67   */
  68  
  69  /**
  70   * xmlCheckVersion:
  71   * @version: the include version number
  72   *
  73   * check the compiled lib version against the include one.
  74   * This can warn or immediately kill the application
  75   */
  76  void
  77  xmlCheckVersion(int version) {
  78      int myversion = (int) LIBXML_VERSION;
  79  
  80      xmlInitParser();
  81  
  82      if ((myversion / 10000) != (version / 10000)) {
  83  	xmlGenericError(xmlGenericErrorContext,
  84  		"Fatal: program compiled against libxml %d using libxml %d\n",
  85  		(version / 10000), (myversion / 10000));
  86  	fprintf(stderr,
  87  		"Fatal: program compiled against libxml %d using libxml %d\n",
  88  		(version / 10000), (myversion / 10000));
  89      }
  90      if ((myversion / 100) < (version / 100)) {
  91  	xmlGenericError(xmlGenericErrorContext,
  92  		"Warning: program compiled against libxml %d using older %d\n",
  93  		(version / 100), (myversion / 100));
  94      }
  95  }
  96  
  97  
  98  /************************************************************************
  99   *									*
 100   *		Some factorized error routines				*
 101   *									*
 102   ************************************************************************/
 103  
 104  
 105  /**
 106   * xmlErrMemory:
 107   * @ctxt:  an XML parser context
 108   * @extra:  extra informations
 109   *
 110   * Handle a redefinition of attribute error
 111   */
 112  void
 113  xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
 114  {
 115      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 116          (ctxt->instate == XML_PARSER_EOF))
 117  	return;
 118      if (ctxt != NULL) {
 119          ctxt->errNo = XML_ERR_NO_MEMORY;
 120          ctxt->instate = XML_PARSER_EOF;
 121          ctxt->disableSAX = 1;
 122      }
 123      if (extra)
 124          __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
 125                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
 126                          NULL, NULL, 0, 0,
 127                          "Memory allocation failed : %s\n", extra);
 128      else
 129          __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
 130                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
 131                          NULL, NULL, 0, 0, "Memory allocation failed\n");
 132  }
 133  
 134  /**
 135   * __xmlErrEncoding:
 136   * @ctxt:  an XML parser context
 137   * @xmlerr:  the error number
 138   * @msg:  the error message
 139   * @str1:  an string info
 140   * @str2:  an string info
 141   *
 142   * Handle an encoding error
 143   */
 144  void
 145  __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
 146                   const char *msg, const xmlChar * str1, const xmlChar * str2)
 147  {
 148      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 149          (ctxt->instate == XML_PARSER_EOF))
 150  	return;
 151      if (ctxt != NULL)
 152          ctxt->errNo = xmlerr;
 153  #pragma clang diagnostic push
 154  #pragma clang diagnostic ignored "-Wformat-nonliteral"
 155      __xmlRaiseError(NULL, NULL, NULL,
 156                      ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
 157                      NULL, 0, (const char *) str1, (const char *) str2,
 158                      NULL, 0, 0, msg, str1, str2);
 159  #pragma clang diagnostic pop
 160      if (ctxt != NULL) {
 161          ctxt->wellFormed = 0;
 162          if (ctxt->recovery == 0)
 163              ctxt->disableSAX = 1;
 164      }
 165  }
 166  
 167  /**
 168   * xmlErrInternal:
 169   * @ctxt:  an XML parser context
 170   * @msg:  the error message
 171   * @str:  error informations
 172   *
 173   * Handle an internal error
 174   */
 175  static void LIBXML_ATTR_FORMAT(2,0)
 176  xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
 177  {
 178      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 179          (ctxt->instate == XML_PARSER_EOF))
 180  	return;
 181      if (ctxt != NULL)
 182          ctxt->errNo = XML_ERR_INTERNAL_ERROR;
 183  #pragma clang diagnostic push
 184  #pragma clang diagnostic ignored "-Wformat-nonliteral"
 185      __xmlRaiseError(NULL, NULL, NULL,
 186                      ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
 187                      XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
 188                      0, 0, msg, str);
 189  #pragma clang diagnostic pop
 190      if (ctxt != NULL) {
 191          ctxt->wellFormed = 0;
 192          if (ctxt->recovery == 0)
 193              ctxt->disableSAX = 1;
 194      }
 195  }
 196  
 197  /**
 198   * xmlErrEncodingInt:
 199   * @ctxt:  an XML parser context
 200   * @error:  the error number
 201   * @msg:  the error message
 202   * @val:  an integer value
 203   *
 204   * n encoding error
 205   */
 206  static void LIBXML_ATTR_FORMAT(3,0)
 207  xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 208                    const char *msg, int val)
 209  {
 210      if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
 211          (ctxt->instate == XML_PARSER_EOF))
 212  	return;
 213      if (ctxt != NULL)
 214          ctxt->errNo = error;
 215  #pragma clang diagnostic push
 216  #pragma clang diagnostic ignored "-Wformat-nonliteral"
 217      __xmlRaiseError(NULL, NULL, NULL,
 218                      ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
 219                      NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
 220  #pragma clang diagnostic pop
 221      if (ctxt != NULL) {
 222          ctxt->wellFormed = 0;
 223          if (ctxt->recovery == 0)
 224              ctxt->disableSAX = 1;
 225      }
 226  }
 227  
 228  /**
 229   * xmlIsLetter:
 230   * @c:  an unicode character (int)
 231   *
 232   * Check whether the character is allowed by the production
 233   * [84] Letter ::= BaseChar | Ideographic
 234   *
 235   * Returns 0 if not, non-zero otherwise
 236   */
 237  int
 238  xmlIsLetter(int c) {
 239      return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
 240  }
 241  
 242  /************************************************************************
 243   *									*
 244   *		Input handling functions for progressive parsing	*
 245   *									*
 246   ************************************************************************/
 247  
 248  /* #define DEBUG_INPUT */
 249  /* #define DEBUG_STACK */
 250  /* #define DEBUG_PUSH */
 251  
 252  
 253  /* we need to keep enough input to show errors in context */
 254  #define LINE_LEN        80
 255  
 256  #ifdef DEBUG_INPUT
 257  #define CHECK_BUFFER(in) check_buffer(in)
 258  
 259  static
 260  void check_buffer(xmlParserInputPtr in) {
 261      if (in->base != xmlBufContent(in->buf->buffer)) {
 262          xmlGenericError(xmlGenericErrorContext,
 263  		"xmlParserInput: base mismatch problem\n");
 264      }
 265      if (in->cur < in->base) {
 266          xmlGenericError(xmlGenericErrorContext,
 267  		"xmlParserInput: cur < base problem\n");
 268      }
 269      if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
 270          xmlGenericError(xmlGenericErrorContext,
 271  		"xmlParserInput: cur > base + use problem\n");
 272      }
 273      xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
 274              (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
 275  	    xmlBufUse(in->buf->buffer));
 276  }
 277  
 278  #else
 279  #define CHECK_BUFFER(in)
 280  #endif
 281  
 282  
 283  /**
 284   * xmlParserInputRead:
 285   * @in:  an XML parser input
 286   * @len:  an indicative size for the lookahead
 287   *
 288   * This function was internal and is deprecated.
 289   *
 290   * Returns -1 as this is an error to use it.
 291   */
 292  int
 293  xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
 294      return(-1);
 295  }
 296  
 297  /**
 298   * xmlParserInputGrow:
 299   * @in:  an XML parser input
 300   * @len:  an indicative size for the lookahead
 301   *
 302   * This function increase the input for the parser. It tries to
 303   * preserve pointers to the input buffer, and keep already read data
 304   *
 305   * Returns the amount of char read, or -1 in case of error, 0 indicate the
 306   * end of this entity
 307   */
 308  int
 309  xmlParserInputGrow(xmlParserInputPtr in, int len) {
 310      int ret;
 311      size_t indx;
 312      const xmlChar *content;
 313  
 314      if ((in == NULL) || (len < 0)) return(-1);
 315  #ifdef DEBUG_INPUT
 316      xmlGenericError(xmlGenericErrorContext, "Grow\n");
 317  #endif
 318      if (in->buf == NULL) return(-1);
 319      if (in->base == NULL) return(-1);
 320      if (in->cur == NULL) return(-1);
 321      if (in->buf->buffer == NULL) return(-1);
 322  
 323      CHECK_BUFFER(in);
 324  
 325      indx = in->cur - in->base;
 326      if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
 327  
 328  	CHECK_BUFFER(in);
 329  
 330          return(0);
 331      }
 332      if (in->buf->readcallback != NULL) {
 333  	ret = xmlParserInputBufferGrow(in->buf, len);
 334      } else
 335          return(0);
 336  
 337      /*
 338       * NOTE : in->base may be a "dangling" i.e. freed pointer in this
 339       *        block, but we use it really as an integer to do some
 340       *        pointer arithmetic. Insure will raise it as a bug but in
 341       *        that specific case, that's not !
 342       */
 343  
 344      content = xmlBufContent(in->buf->buffer);
 345      if (in->base != content) {
 346          /*
 347  	 * the buffer has been reallocated
 348  	 */
 349  	indx = in->cur - in->base;
 350  	in->base = content;
 351  	in->cur = &content[indx];
 352      }
 353      in->end = xmlBufEnd(in->buf->buffer);
 354  
 355      CHECK_BUFFER(in);
 356  
 357      return(ret);
 358  }
 359  
 360  /**
 361   * xmlParserInputShrink:
 362   * @in:  an XML parser input
 363   *
 364   * This function removes used input for the parser.
 365   */
 366  void
 367  xmlParserInputShrink(xmlParserInputPtr in) {
 368      size_t used;
 369      size_t ret;
 370      size_t indx;
 371      const xmlChar *content;
 372  
 373  #ifdef DEBUG_INPUT
 374      xmlGenericError(xmlGenericErrorContext, "Shrink\n");
 375  #endif
 376      if (in == NULL) return;
 377      if (in->buf == NULL) return;
 378      if (in->base == NULL) return;
 379      if (in->cur == NULL) return;
 380      if (in->buf->buffer == NULL) return;
 381  
 382      CHECK_BUFFER(in);
 383  
 384      used = in->cur - xmlBufContent(in->buf->buffer);
 385      /*
 386       * Do not shrink on large buffers whose only a tiny fraction
 387       * was consumed
 388       */
 389      if (used > INPUT_CHUNK) {
 390  	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
 391  	if (ret > 0) {
 392  	    in->cur -= ret;
 393  	    in->consumed += ret;
 394  	}
 395  	in->end = xmlBufEnd(in->buf->buffer);
 396      }
 397  
 398      CHECK_BUFFER(in);
 399  
 400      if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
 401          return;
 402      }
 403      xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
 404      content = xmlBufContent(in->buf->buffer);
 405      if (in->base != content) {
 406          /*
 407  	 * the buffer has been reallocated
 408  	 */
 409  	indx = in->cur - in->base;
 410  	in->base = content;
 411  	in->cur = &content[indx];
 412      }
 413      in->end = xmlBufEnd(in->buf->buffer);
 414  
 415      CHECK_BUFFER(in);
 416  }
 417  
 418  /************************************************************************
 419   *									*
 420   *		UTF8 character input and related functions		*
 421   *									*
 422   ************************************************************************/
 423  
 424  /**
 425   * xmlNextChar:
 426   * @ctxt:  the XML parser context
 427   *
 428   * Skip to the next char input char.
 429   */
 430  
 431  void
 432  xmlNextChar(xmlParserCtxtPtr ctxt)
 433  {
 434      if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
 435          (ctxt->input == NULL))
 436          return;
 437  
 438      if (!(VALID_CTXT(ctxt))) {
 439          xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
 440  	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
 441          xmlStopParser(ctxt);
 442  	return;
 443      }
 444  
 445      if ((*ctxt->input->cur == 0) &&
 446          (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
 447          if ((ctxt->instate != XML_PARSER_COMMENT))
 448              xmlPopInput(ctxt);
 449          return;
 450      }
 451  
 452      if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
 453          const unsigned char *cur;
 454          unsigned char c;
 455  
 456          /*
 457           *   2.11 End-of-Line Handling
 458           *   the literal two-character sequence "#xD#xA" or a standalone
 459           *   literal #xD, an XML processor must pass to the application
 460           *   the single character #xA.
 461           */
 462          if (*(ctxt->input->cur) == '\n') {
 463              ctxt->input->line++; ctxt->input->col = 1;
 464          } else
 465              ctxt->input->col++;
 466  
 467          /*
 468           * We are supposed to handle UTF8, check it's valid
 469           * From rfc2044: encoding of the Unicode values on UTF-8:
 470           *
 471           * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
 472           * 0000 0000-0000 007F   0xxxxxxx
 473           * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
 474           * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
 475           *
 476           * Check for the 0x110000 limit too
 477           */
 478          cur = ctxt->input->cur;
 479  
 480          c = *cur;
 481          if (c & 0x80) {
 482          if (c == 0xC0)
 483  	    goto encoding_error;
 484              if (cur[1] == 0) {
 485                  xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 486                  cur = ctxt->input->cur;
 487              }
 488              if ((cur[1] & 0xc0) != 0x80)
 489                  goto encoding_error;
 490              if ((c & 0xe0) == 0xe0) {
 491                  unsigned int val;
 492  
 493                  if (cur[2] == 0) {
 494                      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 495                      cur = ctxt->input->cur;
 496                  }
 497                  if ((cur[2] & 0xc0) != 0x80)
 498                      goto encoding_error;
 499                  if ((c & 0xf0) == 0xf0) {
 500                      if (cur[3] == 0) {
 501                          xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 502                          cur = ctxt->input->cur;
 503                      }
 504                      if (((c & 0xf8) != 0xf0) ||
 505                          ((cur[3] & 0xc0) != 0x80))
 506                          goto encoding_error;
 507                      /* 4-byte code */
 508                      ctxt->input->cur += 4;
 509                      val = (cur[0] & 0x7) << 18;
 510                      val |= (cur[1] & 0x3f) << 12;
 511                      val |= (cur[2] & 0x3f) << 6;
 512                      val |= cur[3] & 0x3f;
 513                  } else {
 514                      /* 3-byte code */
 515                      ctxt->input->cur += 3;
 516                      val = (cur[0] & 0xf) << 12;
 517                      val |= (cur[1] & 0x3f) << 6;
 518                      val |= cur[2] & 0x3f;
 519                  }
 520                  if (((val > 0xd7ff) && (val < 0xe000)) ||
 521                      ((val > 0xfffd) && (val < 0x10000)) ||
 522                      (val >= 0x110000)) {
 523  		xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
 524  				  "Char 0x%X out of allowed range\n",
 525  				  val);
 526                  }
 527              } else
 528                  /* 2-byte code */
 529                  ctxt->input->cur += 2;
 530          } else
 531              /* 1-byte code */
 532              ctxt->input->cur++;
 533  
 534          ctxt->nbChars++;
 535          if (*ctxt->input->cur == 0)
 536              xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 537      } else {
 538          /*
 539           * Assume it's a fixed length encoding (1) with
 540           * a compatible encoding for the ASCII set, since
 541           * XML constructs only use < 128 chars
 542           */
 543  
 544          if (*(ctxt->input->cur) == '\n') {
 545              ctxt->input->line++; ctxt->input->col = 1;
 546          } else
 547              ctxt->input->col++;
 548          ctxt->input->cur++;
 549          ctxt->nbChars++;
 550          if (*ctxt->input->cur == 0)
 551              xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 552      }
 553      if ((*ctxt->input->cur == '%') && (!ctxt->html))
 554          xmlParserHandlePEReference(ctxt);
 555      if ((*ctxt->input->cur == 0) &&
 556          (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
 557          xmlPopInput(ctxt);
 558      return;
 559  encoding_error:
 560      /*
 561       * If we detect an UTF8 error that probably mean that the
 562       * input encoding didn't get properly advertised in the
 563       * declaration header. Report the error and switch the encoding
 564       * to ISO-Latin-1 (if you don't like this policy, just declare the
 565       * encoding !)
 566       */
 567      if ((ctxt == NULL) || (ctxt->input == NULL) ||
 568          (ctxt->input->end - ctxt->input->cur < 4)) {
 569  	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
 570  		     "Input is not proper UTF-8, indicate encoding !\n",
 571  		     NULL, NULL);
 572      } else {
 573          char buffer[150];
 574  
 575  	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
 576  			ctxt->input->cur[0], ctxt->input->cur[1],
 577  			ctxt->input->cur[2], ctxt->input->cur[3]);
 578  	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
 579  		     "Input is not proper UTF-8, indicate encoding !\n%s",
 580  		     BAD_CAST buffer, NULL);
 581      }
 582      ctxt->charset = XML_CHAR_ENCODING_8859_1;
 583      ctxt->input->cur++;
 584      return;
 585  }
 586  
 587  /**
 588   * xmlCurrentChar:
 589   * @ctxt:  the XML parser context
 590   * @len:  pointer to the length of the char read
 591   *
 592   * The current char value, if using UTF-8 this may actually span multiple
 593   * bytes in the input buffer. Implement the end of line normalization:
 594   * 2.11 End-of-Line Handling
 595   * Wherever an external parsed entity or the literal entity value
 596   * of an internal parsed entity contains either the literal two-character
 597   * sequence "#xD#xA" or a standalone literal #xD, an XML processor
 598   * must pass to the application the single character #xA.
 599   * This behavior can conveniently be produced by normalizing all
 600   * line breaks to #xA on input, before parsing.)
 601   *
 602   * Returns the current char value and its length
 603   */
 604  
 605  int
 606  xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
 607      if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
 608      if (ctxt->instate == XML_PARSER_EOF)
 609  	return(0);
 610  
 611      if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
 612  	    *len = 1;
 613  	    return((int) *ctxt->input->cur);
 614      }
 615      if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
 616  	/*
 617  	 * We are supposed to handle UTF8, check it's valid
 618  	 * From rfc2044: encoding of the Unicode values on UTF-8:
 619  	 *
 620  	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
 621  	 * 0000 0000-0000 007F   0xxxxxxx
 622  	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
 623  	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
 624  	 *
 625  	 * Check for the 0x110000 limit too
 626  	 */
 627  	const unsigned char *cur = ctxt->input->cur;
 628  	unsigned char c;
 629  	unsigned int val;
 630  
 631  	c = *cur;
 632  	if (c & 0x80) {
 633  	    if (((c & 0x40) == 0) || (c == 0xC0))
 634  		goto encoding_error;
 635  	    if (cur[1] == 0) {
 636  		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 637                  cur = ctxt->input->cur;
 638              }
 639  	    if ((cur[1] & 0xc0) != 0x80)
 640  		goto encoding_error;
 641  	    if ((c & 0xe0) == 0xe0) {
 642  		if (cur[2] == 0) {
 643  		    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 644                      cur = ctxt->input->cur;
 645                  }
 646  		if ((cur[2] & 0xc0) != 0x80)
 647  		    goto encoding_error;
 648  		if ((c & 0xf0) == 0xf0) {
 649  		    if (cur[3] == 0) {
 650  			xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 651                          cur = ctxt->input->cur;
 652                      }
 653  		    if (((c & 0xf8) != 0xf0) ||
 654  			((cur[3] & 0xc0) != 0x80))
 655  			goto encoding_error;
 656  		    /* 4-byte code */
 657  		    *len = 4;
 658  		    val = (cur[0] & 0x7) << 18;
 659  		    val |= (cur[1] & 0x3f) << 12;
 660  		    val |= (cur[2] & 0x3f) << 6;
 661  		    val |= cur[3] & 0x3f;
 662  		    if (val < 0x10000)
 663  			goto encoding_error;
 664  		} else {
 665  		  /* 3-byte code */
 666  		    *len = 3;
 667  		    val = (cur[0] & 0xf) << 12;
 668  		    val |= (cur[1] & 0x3f) << 6;
 669  		    val |= cur[2] & 0x3f;
 670  		    if (val < 0x800)
 671  			goto encoding_error;
 672  		}
 673  	    } else {
 674  	      /* 2-byte code */
 675  		*len = 2;
 676  		val = (cur[0] & 0x1f) << 6;
 677  		val |= cur[1] & 0x3f;
 678  		if (val < 0x80)
 679  		    goto encoding_error;
 680  	    }
 681  	    if (!IS_CHAR(val)) {
 682  	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
 683  				  "Char 0x%X out of allowed range\n", val);
 684                  if (ctxt->instate == XML_PARSER_EOF)
 685                      goto encoding_error;
 686  	    }
 687  	    return(val);
 688  	} else {
 689  	    /* 1-byte code */
 690  	    *len = 1;
 691  	    if (*ctxt->input->cur == 0)
 692  		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 693  	    if ((*ctxt->input->cur == 0) &&
 694  	        (ctxt->input->end > ctxt->input->cur)) {
 695  	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
 696  				  "Char 0x0 out of allowed range\n", 0);
 697                  if (ctxt->instate == XML_PARSER_EOF)
 698                      goto encoding_error;
 699  	    }
 700  	    if (*ctxt->input->cur == 0xD) {
 701  		if (ctxt->input->cur[1] == 0xA) {
 702  		    ctxt->nbChars++;
 703  		    ctxt->input->cur++;
 704  		}
 705  		return(0xA);
 706  	    }
 707  	    return((int) *ctxt->input->cur);
 708  	}
 709      }
 710      /*
 711       * Assume it's a fixed length encoding (1) with
 712       * a compatible encoding for the ASCII set, since
 713       * XML constructs only use < 128 chars
 714       */
 715      *len = 1;
 716      if (*ctxt->input->cur == 0xD) {
 717  	if (ctxt->input->cur[1] == 0xA) {
 718  	    ctxt->nbChars++;
 719  	    ctxt->input->cur++;
 720  	}
 721  	return(0xA);
 722      }
 723      return((int) *ctxt->input->cur);
 724  encoding_error:
 725      /*
 726       * An encoding problem may arise from a truncated input buffer
 727       * splitting a character in the middle. In that case do not raise
 728       * an error but return 0 to endicate an end of stream problem
 729       */
 730      if (ctxt->input->end - ctxt->input->cur < 4) {
 731  	*len = 0;
 732  	return(0);
 733      }
 734  
 735      /*
 736       * If we detect an UTF8 error that probably mean that the
 737       * input encoding didn't get properly advertised in the
 738       * declaration header. Report the error and switch the encoding
 739       * to ISO-Latin-1 (if you don't like this policy, just declare the
 740       * encoding !)
 741       */
 742      {
 743          char buffer[150];
 744  
 745  	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
 746  			ctxt->input->cur[0], ctxt->input->cur[1],
 747  			ctxt->input->cur[2], ctxt->input->cur[3]);
 748  	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
 749  		     "Input is not proper UTF-8, indicate encoding !\n%s",
 750  		     BAD_CAST buffer, NULL);
 751      }
 752      ctxt->charset = XML_CHAR_ENCODING_8859_1;
 753      *len = 1;
 754      return((int) *ctxt->input->cur);
 755  }
 756  
 757  /**
 758   * xmlStringCurrentChar:
 759   * @ctxt:  the XML parser context
 760   * @cur:  pointer to the beginning of the char
 761   * @len:  pointer to the length of the char read
 762   *
 763   * The current char value, if using UTF-8 this may actually span multiple
 764   * bytes in the input buffer.
 765   *
 766   * Returns the current char value and its length
 767   */
 768  
 769  int
 770  xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
 771  {
 772      if ((len == NULL) || (cur == NULL)) return(0);
 773      if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
 774          /*
 775           * We are supposed to handle UTF8, check it's valid
 776           * From rfc2044: encoding of the Unicode values on UTF-8:
 777           *
 778           * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
 779           * 0000 0000-0000 007F   0xxxxxxx
 780           * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
 781           * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
 782           *
 783           * Check for the 0x110000 limit too
 784           */
 785          unsigned char c;
 786          unsigned int val;
 787  
 788          c = *cur;
 789          if (c & 0x80) {
 790              if ((cur[1] & 0xc0) != 0x80)
 791                  goto encoding_error;
 792              if ((c & 0xe0) == 0xe0) {
 793  
 794                  if ((cur[2] & 0xc0) != 0x80)
 795                      goto encoding_error;
 796                  if ((c & 0xf0) == 0xf0) {
 797                      if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
 798                          goto encoding_error;
 799                      /* 4-byte code */
 800                      *len = 4;
 801                      val = (cur[0] & 0x7) << 18;
 802                      val |= (cur[1] & 0x3f) << 12;
 803                      val |= (cur[2] & 0x3f) << 6;
 804                      val |= cur[3] & 0x3f;
 805                  } else {
 806                      /* 3-byte code */
 807                      *len = 3;
 808                      val = (cur[0] & 0xf) << 12;
 809                      val |= (cur[1] & 0x3f) << 6;
 810                      val |= cur[2] & 0x3f;
 811                  }
 812              } else {
 813                  /* 2-byte code */
 814                  *len = 2;
 815                  val = (cur[0] & 0x1f) << 6;
 816                  val |= cur[1] & 0x3f;
 817              }
 818              if (!IS_CHAR(val)) {
 819  	        xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
 820  				  "Char 0x%X out of allowed range\n", val);
 821              }
 822              return (val);
 823          } else {
 824              /* 1-byte code */
 825              *len = 1;
 826              return ((int) *cur);
 827          }
 828      }
 829      /*
 830       * Assume it's a fixed length encoding (1) with
 831       * a compatible encoding for the ASCII set, since
 832       * XML constructs only use < 128 chars
 833       */
 834      *len = 1;
 835      return ((int) *cur);
 836  encoding_error:
 837  
 838      /*
 839       * An encoding problem may arise from a truncated input buffer
 840       * splitting a character in the middle. In that case do not raise
 841       * an error but return 0 to endicate an end of stream problem
 842       */
 843      if ((ctxt == NULL) || (ctxt->input == NULL) ||
 844          (ctxt->input->end - ctxt->input->cur < 4)) {
 845  	*len = 0;
 846  	return(0);
 847      }
 848      /*
 849       * If we detect an UTF8 error that probably mean that the
 850       * input encoding didn't get properly advertised in the
 851       * declaration header. Report the error and switch the encoding
 852       * to ISO-Latin-1 (if you don't like this policy, just declare the
 853       * encoding !)
 854       */
 855      {
 856          char buffer[150];
 857  
 858  	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
 859  			ctxt->input->cur[0], ctxt->input->cur[1],
 860  			ctxt->input->cur[2], ctxt->input->cur[3]);
 861  	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
 862  		     "Input is not proper UTF-8, indicate encoding !\n%s",
 863  		     BAD_CAST buffer, NULL);
 864      }
 865      *len = 1;
 866      return ((int) *cur);
 867  }
 868  
 869  /**
 870   * xmlCopyCharMultiByte:
 871   * @out:  pointer to an array of xmlChar
 872   * @val:  the char value
 873   *
 874   * append the char value in the array
 875   *
 876   * Returns the number of xmlChar written
 877   */
 878  int
 879  xmlCopyCharMultiByte(xmlChar *out, int val) {
 880      if (out == NULL) return(0);
 881      /*
 882       * We are supposed to handle UTF8, check it's valid
 883       * From rfc2044: encoding of the Unicode values on UTF-8:
 884       *
 885       * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
 886       * 0000 0000-0000 007F   0xxxxxxx
 887       * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
 888       * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
 889       */
 890      if  (val >= 0x80) {
 891  	xmlChar *savedout = out;
 892  	int bits;
 893  	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
 894  	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
 895  	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
 896  	else {
 897  	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
 898  		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
 899  			      val);
 900  	    return(0);
 901  	}
 902  	for ( ; bits >= 0; bits-= 6)
 903  	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
 904  	return (out - savedout);
 905      }
 906      *out = (xmlChar) val;
 907      return 1;
 908  }
 909  
 910  /**
 911   * xmlCopyChar:
 912   * @len:  Ignored, compatibility
 913   * @out:  pointer to an array of xmlChar
 914   * @val:  the char value
 915   *
 916   * append the char value in the array
 917   *
 918   * Returns the number of xmlChar written
 919   */
 920  
 921  int
 922  xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
 923      if (out == NULL) return(0);
 924      /* the len parameter is ignored */
 925      if  (val >= 0x80) {
 926  	return(xmlCopyCharMultiByte (out, val));
 927      }
 928      *out = (xmlChar) val;
 929      return 1;
 930  }
 931  
 932  /************************************************************************
 933   *									*
 934   *		Commodity functions to switch encodings			*
 935   *									*
 936   ************************************************************************/
 937  
 938  static int
 939  xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
 940                         xmlCharEncodingHandlerPtr handler, int len);
 941  static int
 942  xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
 943                            xmlCharEncodingHandlerPtr handler, int len);
 944  /**
 945   * xmlSwitchEncoding:
 946   * @ctxt:  the parser context
 947   * @enc:  the encoding value (number)
 948   *
 949   * change the input functions when discovering the character encoding
 950   * of a given entity.
 951   *
 952   * Returns 0 in case of success, -1 otherwise
 953   */
 954  int
 955  xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 956  {
 957      xmlCharEncodingHandlerPtr handler;
 958      int len = -1;
 959      int ret;
 960  
 961      if (ctxt == NULL) return(-1);
 962      switch (enc) {
 963  	case XML_CHAR_ENCODING_ERROR:
 964  	    __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
 965  	                   "encoding unknown\n", NULL, NULL);
 966  	    return(-1);
 967  	case XML_CHAR_ENCODING_NONE:
 968  	    /* let's assume it's UTF-8 without the XML decl */
 969  	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
 970  	    return(0);
 971  	case XML_CHAR_ENCODING_UTF8:
 972  	    /* default encoding, no conversion should be needed */
 973  	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
 974  
 975  	    /*
 976  	     * Errata on XML-1.0 June 20 2001
 977  	     * Specific handling of the Byte Order Mark for
 978  	     * UTF-8
 979  	     */
 980  	    if ((ctxt->input != NULL) &&
 981  		(ctxt->input->cur[0] == 0xEF) &&
 982  		(ctxt->input->cur[1] == 0xBB) &&
 983  		(ctxt->input->cur[2] == 0xBF)) {
 984  		ctxt->input->cur += 3;
 985  	    }
 986  	    return(0);
 987      case XML_CHAR_ENCODING_UTF16LE:
 988      case XML_CHAR_ENCODING_UTF16BE:
 989          /*The raw input characters are encoded
 990           *in UTF-16. As we expect this function
 991           *to be called after xmlCharEncInFunc, we expect
 992           *ctxt->input->cur to contain UTF-8 encoded characters.
 993           *So the raw UTF16 Byte Order Mark
 994           *has also been converted into
 995           *an UTF-8 BOM. Let's skip that BOM.
 996           */
 997          if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
 998              (ctxt->input->cur[0] == 0xEF) &&
 999              (ctxt->input->cur[1] == 0xBB) &&
1000              (ctxt->input->cur[2] == 0xBF)) {
1001              ctxt->input->cur += 3;
1002          }
1003          len = 90;
1004  	break;
1005      case XML_CHAR_ENCODING_UCS2:
1006          len = 90;
1007  	break;
1008      case XML_CHAR_ENCODING_UCS4BE:
1009      case XML_CHAR_ENCODING_UCS4LE:
1010      case XML_CHAR_ENCODING_UCS4_2143:
1011      case XML_CHAR_ENCODING_UCS4_3412:
1012          len = 180;
1013  	break;
1014      case XML_CHAR_ENCODING_EBCDIC:
1015      case XML_CHAR_ENCODING_8859_1:
1016      case XML_CHAR_ENCODING_8859_2:
1017      case XML_CHAR_ENCODING_8859_3:
1018      case XML_CHAR_ENCODING_8859_4:
1019      case XML_CHAR_ENCODING_8859_5:
1020      case XML_CHAR_ENCODING_8859_6:
1021      case XML_CHAR_ENCODING_8859_7:
1022      case XML_CHAR_ENCODING_8859_8:
1023      case XML_CHAR_ENCODING_8859_9:
1024      case XML_CHAR_ENCODING_ASCII:
1025      case XML_CHAR_ENCODING_2022_JP:
1026      case XML_CHAR_ENCODING_SHIFT_JIS:
1027      case XML_CHAR_ENCODING_EUC_JP:
1028          len = 45;
1029  	break;
1030      }
1031      handler = xmlGetCharEncodingHandler(enc);
1032      if (handler == NULL) {
1033  	/*
1034  	 * Default handlers.
1035  	 */
1036  	switch (enc) {
1037  	    case XML_CHAR_ENCODING_ASCII:
1038  		/* default encoding, no conversion should be needed */
1039  		ctxt->charset = XML_CHAR_ENCODING_UTF8;
1040  		return(0);
1041  	    case XML_CHAR_ENCODING_UTF16LE:
1042  		break;
1043  	    case XML_CHAR_ENCODING_UTF16BE:
1044  		break;
1045  	    case XML_CHAR_ENCODING_UCS4LE:
1046  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1047  			       "encoding not supported %s\n",
1048  			       BAD_CAST "USC4 little endian", NULL);
1049  		break;
1050  	    case XML_CHAR_ENCODING_UCS4BE:
1051  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1052  			       "encoding not supported %s\n",
1053  			       BAD_CAST "USC4 big endian", NULL);
1054  		break;
1055  	    case XML_CHAR_ENCODING_EBCDIC:
1056  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1057  			       "encoding not supported %s\n",
1058  			       BAD_CAST "EBCDIC", NULL);
1059  		break;
1060  	    case XML_CHAR_ENCODING_UCS4_2143:
1061  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1062  			       "encoding not supported %s\n",
1063  			       BAD_CAST "UCS4 2143", NULL);
1064  		break;
1065  	    case XML_CHAR_ENCODING_UCS4_3412:
1066  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1067  			       "encoding not supported %s\n",
1068  			       BAD_CAST "UCS4 3412", NULL);
1069  		break;
1070  	    case XML_CHAR_ENCODING_UCS2:
1071  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1072  			       "encoding not supported %s\n",
1073  			       BAD_CAST "UCS2", NULL);
1074  		break;
1075  	    case XML_CHAR_ENCODING_8859_1:
1076  	    case XML_CHAR_ENCODING_8859_2:
1077  	    case XML_CHAR_ENCODING_8859_3:
1078  	    case XML_CHAR_ENCODING_8859_4:
1079  	    case XML_CHAR_ENCODING_8859_5:
1080  	    case XML_CHAR_ENCODING_8859_6:
1081  	    case XML_CHAR_ENCODING_8859_7:
1082  	    case XML_CHAR_ENCODING_8859_8:
1083  	    case XML_CHAR_ENCODING_8859_9:
1084  		/*
1085  		 * We used to keep the internal content in the
1086  		 * document encoding however this turns being unmaintainable
1087  		 * So xmlGetCharEncodingHandler() will return non-null
1088  		 * values for this now.
1089  		 */
1090  		if ((ctxt->inputNr == 1) &&
1091  		    (ctxt->encoding == NULL) &&
1092  		    (ctxt->input != NULL) &&
1093  		    (ctxt->input->encoding != NULL)) {
1094  		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1095  		}
1096  		ctxt->charset = enc;
1097  		return(0);
1098  	    case XML_CHAR_ENCODING_2022_JP:
1099  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1100  			       "encoding not supported %s\n",
1101  			       BAD_CAST "ISO-2022-JP", NULL);
1102  		break;
1103  	    case XML_CHAR_ENCODING_SHIFT_JIS:
1104  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1105  			       "encoding not supported %s\n",
1106  			       BAD_CAST "Shift_JIS", NULL);
1107  		break;
1108  	    case XML_CHAR_ENCODING_EUC_JP:
1109  		__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1110  			       "encoding not supported %s\n",
1111  			       BAD_CAST "EUC-JP", NULL);
1112  		break;
1113  	    default:
1114  	        break;
1115  	}
1116      }
1117      if (handler == NULL) {
1118          xmlStopParser(ctxt);
1119  	return(-1);
1120      }
1121      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1122      ret = xmlSwitchToEncodingInt(ctxt, handler, len);
1123      if (((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) && !(ctxt->html)) {
1124          /*
1125  	 * on XML encoding conversion errors, stop the parser
1126  	 */
1127          xmlStopParser(ctxt);
1128  	ctxt->errNo = XML_I18N_CONV_FAILED;
1129      }
1130      return(ret);
1131  }
1132  
1133  /**
1134   * xmlSwitchInputEncoding:
1135   * @ctxt:  the parser context
1136   * @input:  the input stream
1137   * @handler:  the encoding handler
1138   * @len:  the number of bytes to convert for the first line or -1
1139   *
1140   * change the input functions when discovering the character encoding
1141   * of a given entity.
1142   *
1143   * Returns 0 in case of success, -1 otherwise
1144   */
1145  static int
1146  xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1147                            xmlCharEncodingHandlerPtr handler, int len)
1148  {
1149      int nbchars;
1150  
1151      if (handler == NULL)
1152          return (-1);
1153      if (input == NULL)
1154          return (-1);
1155      if (input->buf != NULL) {
1156          if (input->buf->encoder != NULL) {
1157              /*
1158               * Check in case the auto encoding detetection triggered
1159               * in already.
1160               */
1161              if (input->buf->encoder == handler)
1162                  return (0);
1163  
1164              /*
1165               * "UTF-16" can be used for both LE and BE
1166               if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1167               BAD_CAST "UTF-16", 6)) &&
1168               (!xmlStrncmp(BAD_CAST handler->name,
1169               BAD_CAST "UTF-16", 6))) {
1170               return(0);
1171               }
1172               */
1173  
1174              /*
1175               * Note: this is a bit dangerous, but that's what it
1176               * takes to use nearly compatible signature for different
1177               * encodings.
1178               */
1179              xmlCharEncCloseFunc(input->buf->encoder);
1180              input->buf->encoder = handler;
1181              return (0);
1182          }
1183          input->buf->encoder = handler;
1184  
1185          /*
1186           * Is there already some content down the pipe to convert ?
1187           */
1188          if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1189              int processed;
1190  	    unsigned int use;
1191  
1192              /*
1193               * Specific handling of the Byte Order Mark for
1194               * UTF-16
1195               */
1196              if ((handler->name != NULL) &&
1197                  (!strcmp(handler->name, "UTF-16LE") ||
1198                   !strcmp(handler->name, "UTF-16")) &&
1199                  (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1200                  input->cur += 2;
1201              }
1202              if ((handler->name != NULL) &&
1203                  (!strcmp(handler->name, "UTF-16BE")) &&
1204                  (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1205                  input->cur += 2;
1206              }
1207              /*
1208               * Errata on XML-1.0 June 20 2001
1209               * Specific handling of the Byte Order Mark for
1210               * UTF-8
1211               */
1212              if ((handler->name != NULL) &&
1213                  (!strcmp(handler->name, "UTF-8")) &&
1214                  (input->cur[0] == 0xEF) &&
1215                  (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1216                  input->cur += 3;
1217              }
1218  
1219              /*
1220               * Shrink the current input buffer.
1221               * Move it as the raw buffer and create a new input buffer
1222               */
1223              processed = input->cur - input->base;
1224              xmlBufShrink(input->buf->buffer, processed);
1225              input->buf->raw = input->buf->buffer;
1226              input->buf->buffer = xmlBufCreate();
1227  	    input->buf->rawconsumed = processed;
1228  	    use = xmlBufUse(input->buf->raw);
1229  
1230              if (ctxt->html) {
1231                  /*
1232                   * convert as much as possible of the buffer
1233                   */
1234                  nbchars = xmlCharEncInput(input->buf, 1);
1235              } else {
1236                  /*
1237                   * convert just enough to get
1238                   * '<?xml version="1.0" encoding="xxx"?>'
1239                   * parsed with the autodetected encoding
1240                   * into the parser reading buffer.
1241                   */
1242                  nbchars = xmlCharEncFirstLineInput(input->buf, len);
1243              }
1244              if (nbchars < 0) {
1245                  xmlBufFree(input->buf->buffer);
1246                  input->buf->buffer = input->buf->raw;
1247                  input->buf->raw = NULL;
1248                  input->buf->rawconsumed = 0;
1249              } else {
1250                  input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1251              }
1252              xmlBufResetInput(input->buf->buffer, input);
1253              if (nbchars < 0) {
1254                  if (!ctxt->html) {
1255                      xmlErrInternal(ctxt,
1256                                     "switching encoding: encoder error\n",
1257                                     NULL);
1258                  }
1259                  return (-1);
1260              }
1261          }
1262          return (0);
1263      } else if (input->length == 0) {
1264  	/*
1265  	 * When parsing a static memory array one must know the
1266  	 * size to be able to convert the buffer.
1267  	 */
1268  	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1269  	return (-1);
1270      }
1271      return (0);
1272  }
1273  
1274  /**
1275   * xmlSwitchInputEncoding:
1276   * @ctxt:  the parser context
1277   * @input:  the input stream
1278   * @handler:  the encoding handler
1279   *
1280   * change the input functions when discovering the character encoding
1281   * of a given entity.
1282   *
1283   * Returns 0 in case of success, -1 otherwise
1284   */
1285  int
1286  xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1287                            xmlCharEncodingHandlerPtr handler) {
1288      return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1289  }
1290  
1291  /**
1292   * xmlSwitchToEncodingInt:
1293   * @ctxt:  the parser context
1294   * @handler:  the encoding handler
1295   * @len: the length to convert or -1
1296   *
1297   * change the input functions when discovering the character encoding
1298   * of a given entity, and convert only @len bytes of the output, this
1299   * is needed on auto detect to allows any declared encoding later to
1300   * convert the actual content after the xmlDecl
1301   *
1302   * Returns 0 in case of success, -1 otherwise
1303   */
1304  static int
1305  xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
1306                         xmlCharEncodingHandlerPtr handler, int len) {
1307      int ret = 0;
1308  
1309      if (handler != NULL) {
1310          if (ctxt->input != NULL) {
1311  	    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
1312  	} else {
1313  	    xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1314  	                   NULL);
1315  	    return(-1);
1316  	}
1317  	/*
1318  	 * The parsing is now done in UTF8 natively
1319  	 */
1320  	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1321      } else
1322  	return(-1);
1323      return(ret);
1324  }
1325  
1326  /**
1327   * xmlSwitchToEncoding:
1328   * @ctxt:  the parser context
1329   * @handler:  the encoding handler
1330   *
1331   * change the input functions when discovering the character encoding
1332   * of a given entity.
1333   *
1334   * Returns 0 in case of success, -1 otherwise
1335   */
1336  int
1337  xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1338  {
1339      return (xmlSwitchToEncodingInt(ctxt, handler, -1));
1340  }
1341  
1342  /************************************************************************
1343   *									*
1344   *	Commodity functions to handle entities processing		*
1345   *									*
1346   ************************************************************************/
1347  
1348  /**
1349   * xmlFreeInputStream:
1350   * @input:  an xmlParserInputPtr
1351   *
1352   * Free up an input stream.
1353   */
1354  void
1355  xmlFreeInputStream(xmlParserInputPtr input) {
1356      if (input == NULL) return;
1357  
1358      if (input->filename != NULL) xmlFree((char *) input->filename);
1359      if (input->directory != NULL) xmlFree((char *) input->directory);
1360      if (input->encoding != NULL) xmlFree((char *) input->encoding);
1361      if (input->version != NULL) xmlFree((char *) input->version);
1362      if ((input->free != NULL) && (input->base != NULL))
1363          input->free((xmlChar *) input->base);
1364      if (input->buf != NULL)
1365          xmlFreeParserInputBuffer(input->buf);
1366      xmlFree(input);
1367  }
1368  
1369  /**
1370   * xmlNewInputStream:
1371   * @ctxt:  an XML parser context
1372   *
1373   * Create a new input stream structure.
1374   *
1375   * Returns the new input stream or NULL
1376   */
1377  xmlParserInputPtr
1378  xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1379      xmlParserInputPtr input;
1380  
1381      input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1382      if (input == NULL) {
1383          xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1384  	return(NULL);
1385      }
1386      memset(input, 0, sizeof(xmlParserInput));
1387      input->line = 1;
1388      input->col = 1;
1389      input->standalone = -1;
1390  
1391      /*
1392       * If the context is NULL the id cannot be initialized, but that
1393       * should not happen while parsing which is the situation where
1394       * the id is actually needed.
1395       */
1396      if (ctxt != NULL)
1397          input->id = ctxt->input_id++;
1398  
1399      return(input);
1400  }
1401  
1402  /**
1403   * xmlNewIOInputStream:
1404   * @ctxt:  an XML parser context
1405   * @input:  an I/O Input
1406   * @enc:  the charset encoding if known
1407   *
1408   * Create a new input stream structure encapsulating the @input into
1409   * a stream suitable for the parser.
1410   *
1411   * Returns the new input stream or NULL
1412   */
1413  xmlParserInputPtr
1414  xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1415  	            xmlCharEncoding enc) {
1416      xmlParserInputPtr inputStream;
1417  
1418      if (input == NULL) return(NULL);
1419      if (xmlParserDebugEntities)
1420  	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1421      inputStream = xmlNewInputStream(ctxt);
1422      if (inputStream == NULL) {
1423  	return(NULL);
1424      }
1425      inputStream->filename = NULL;
1426      inputStream->buf = input;
1427      xmlBufResetInput(inputStream->buf->buffer, inputStream);
1428  
1429      if (enc != XML_CHAR_ENCODING_NONE) {
1430          xmlSwitchEncoding(ctxt, enc);
1431      }
1432  
1433      return(inputStream);
1434  }
1435  
1436  /**
1437   * xmlNewEntityInputStream:
1438   * @ctxt:  an XML parser context
1439   * @entity:  an Entity pointer
1440   *
1441   * Create a new input stream based on an xmlEntityPtr
1442   *
1443   * Returns the new input stream or NULL
1444   */
1445  xmlParserInputPtr
1446  xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1447      xmlParserInputPtr input;
1448  
1449      if (entity == NULL) {
1450          xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1451  	               NULL);
1452  	return(NULL);
1453      }
1454      if (xmlParserDebugEntities)
1455  	xmlGenericError(xmlGenericErrorContext,
1456  		"new input from entity: %s\n", entity->name);
1457      if (entity->content == NULL) {
1458  	switch (entity->etype) {
1459              case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1460  	        xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1461  		               entity->name);
1462                  break;
1463              case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1464              case XML_EXTERNAL_PARAMETER_ENTITY:
1465  		return(xmlLoadExternalEntity((char *) entity->URI,
1466  		       (char *) entity->ExternalID, ctxt));
1467              case XML_INTERNAL_GENERAL_ENTITY:
1468  	        xmlErrInternal(ctxt,
1469  		      "Internal entity %s without content !\n",
1470  		               entity->name);
1471                  break;
1472              case XML_INTERNAL_PARAMETER_ENTITY:
1473  	        xmlErrInternal(ctxt,
1474  		      "Internal parameter entity %s without content !\n",
1475  		               entity->name);
1476                  break;
1477              case XML_INTERNAL_PREDEFINED_ENTITY:
1478  	        xmlErrInternal(ctxt,
1479  		      "Predefined entity %s without content !\n",
1480  		               entity->name);
1481                  break;
1482  	}
1483  	return(NULL);
1484      }
1485      input = xmlNewInputStream(ctxt);
1486      if (input == NULL) {
1487  	return(NULL);
1488      }
1489      if (entity->URI != NULL)
1490  	input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1491      input->base = entity->content;
1492      if (entity->length == 0)
1493          entity->length = xmlStrlen(entity->content);
1494      input->cur = entity->content;
1495      input->length = entity->length;
1496      input->end = &entity->content[input->length];
1497      return(input);
1498  }
1499  
1500  /**
1501   * xmlNewStringInputStream:
1502   * @ctxt:  an XML parser context
1503   * @buffer:  an memory buffer
1504   *
1505   * Create a new input stream based on a memory buffer.
1506   * Returns the new input stream
1507   */
1508  xmlParserInputPtr
1509  xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1510      xmlParserInputPtr input;
1511  
1512      if (buffer == NULL) {
1513          xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1514  	               NULL);
1515  	return(NULL);
1516      }
1517      if (xmlParserDebugEntities)
1518  	xmlGenericError(xmlGenericErrorContext,
1519  		"new fixed input: %.30s\n", buffer);
1520      input = xmlNewInputStream(ctxt);
1521      if (input == NULL) {
1522          xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1523  	return(NULL);
1524      }
1525      input->base = buffer;
1526      input->cur = buffer;
1527      input->length = xmlStrlen(buffer);
1528      input->end = &buffer[input->length];
1529      return(input);
1530  }
1531  
1532  /**
1533   * xmlNewInputFromFile:
1534   * @ctxt:  an XML parser context
1535   * @filename:  the filename to use as entity
1536   *
1537   * Create a new input stream based on a file or an URL.
1538   *
1539   * Returns the new input stream or NULL in case of error
1540   */
1541  xmlParserInputPtr
1542  xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1543      xmlParserInputBufferPtr buf;
1544      xmlParserInputPtr inputStream;
1545      char *directory = NULL;
1546      xmlChar *URI = NULL;
1547  
1548      if (xmlParserDebugEntities)
1549  	xmlGenericError(xmlGenericErrorContext,
1550  		"new input from file: %s\n", filename);
1551      if (ctxt == NULL) return(NULL);
1552      buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1553      if (buf == NULL) {
1554  	if (filename == NULL)
1555  	    __xmlLoaderErr(ctxt,
1556  	                   "failed to load external entity: NULL filename \n",
1557  			   NULL);
1558  	else
1559  	    __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1560  			   (const char *) filename);
1561  	return(NULL);
1562      }
1563  
1564      inputStream = xmlNewInputStream(ctxt);
1565      if (inputStream == NULL)
1566  	return(NULL);
1567  
1568      inputStream->buf = buf;
1569      inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1570      if (inputStream == NULL)
1571          return(NULL);
1572  
1573      if (inputStream->filename == NULL)
1574  	URI = xmlStrdup((xmlChar *) filename);
1575      else
1576  	URI = xmlStrdup((xmlChar *) inputStream->filename);
1577      directory = xmlParserGetDirectory((const char *) URI);
1578      if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1579      inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1580      if (URI != NULL) xmlFree((char *) URI);
1581      inputStream->directory = directory;
1582  
1583      xmlBufResetInput(inputStream->buf->buffer, inputStream);
1584      if ((ctxt->directory == NULL) && (directory != NULL))
1585          ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1586      return(inputStream);
1587  }
1588  
1589  /************************************************************************
1590   *									*
1591   *		Commodity functions to handle parser contexts		*
1592   *									*
1593   ************************************************************************/
1594  
1595  /**
1596   * xmlInitParserCtxt:
1597   * @ctxt:  an XML parser context
1598   *
1599   * Initialize a parser context
1600   *
1601   * Returns 0 in case of success and -1 in case of error
1602   */
1603  
1604  int
1605  xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1606  {
1607      xmlParserInputPtr input;
1608  
1609      if(ctxt==NULL) {
1610          xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1611          return(-1);
1612      }
1613  
1614      xmlDefaultSAXHandlerInit();
1615  
1616      if (ctxt->dict == NULL)
1617  	ctxt->dict = xmlDictCreate();
1618      if (ctxt->dict == NULL) {
1619          xmlErrMemory(NULL, "cannot initialize parser context\n");
1620  	return(-1);
1621      }
1622      xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1623  
1624      if (ctxt->sax == NULL)
1625  	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1626      if (ctxt->sax == NULL) {
1627          xmlErrMemory(NULL, "cannot initialize parser context\n");
1628  	return(-1);
1629      }
1630      else
1631          xmlSAXVersion(ctxt->sax, 2);
1632  
1633      ctxt->maxatts = 0;
1634      ctxt->atts = NULL;
1635      /* Allocate the Input stack */
1636      if (ctxt->inputTab == NULL) {
1637  	ctxt->inputTab = (xmlParserInputPtr *)
1638  		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
1639  	ctxt->inputMax = 5;
1640      }
1641      if (ctxt->inputTab == NULL) {
1642          xmlErrMemory(NULL, "cannot initialize parser context\n");
1643  	ctxt->inputNr = 0;
1644  	ctxt->inputMax = 0;
1645  	ctxt->input = NULL;
1646  	return(-1);
1647      }
1648      while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1649          xmlFreeInputStream(input);
1650      }
1651      ctxt->inputNr = 0;
1652      ctxt->input = NULL;
1653  
1654      ctxt->version = NULL;
1655      ctxt->encoding = NULL;
1656      ctxt->standalone = -1;
1657      ctxt->hasExternalSubset = 0;
1658      ctxt->hasPErefs = 0;
1659      ctxt->html = 0;
1660      ctxt->external = 0;
1661      ctxt->instate = XML_PARSER_START;
1662      ctxt->token = 0;
1663      ctxt->directory = NULL;
1664  
1665      /* Allocate the Node stack */
1666      if (ctxt->nodeTab == NULL) {
1667  	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1668  	ctxt->nodeMax = 10;
1669      }
1670      if (ctxt->nodeTab == NULL) {
1671          xmlErrMemory(NULL, "cannot initialize parser context\n");
1672  	ctxt->nodeNr = 0;
1673  	ctxt->nodeMax = 0;
1674  	ctxt->node = NULL;
1675  	ctxt->inputNr = 0;
1676  	ctxt->inputMax = 0;
1677  	ctxt->input = NULL;
1678  	return(-1);
1679      }
1680      ctxt->nodeNr = 0;
1681      ctxt->node = NULL;
1682  
1683      /* Allocate the Name stack */
1684      if (ctxt->nameTab == NULL) {
1685  	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1686  	ctxt->nameMax = 10;
1687      }
1688      if (ctxt->nameTab == NULL) {
1689          xmlErrMemory(NULL, "cannot initialize parser context\n");
1690  	ctxt->nodeNr = 0;
1691  	ctxt->nodeMax = 0;
1692  	ctxt->node = NULL;
1693  	ctxt->inputNr = 0;
1694  	ctxt->inputMax = 0;
1695  	ctxt->input = NULL;
1696  	ctxt->nameNr = 0;
1697  	ctxt->nameMax = 0;
1698  	ctxt->name = NULL;
1699  	return(-1);
1700      }
1701      ctxt->nameNr = 0;
1702      ctxt->name = NULL;
1703  
1704      /* Allocate the space stack */
1705      if (ctxt->spaceTab == NULL) {
1706  	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1707  	ctxt->spaceMax = 10;
1708      }
1709      if (ctxt->spaceTab == NULL) {
1710          xmlErrMemory(NULL, "cannot initialize parser context\n");
1711  	ctxt->nodeNr = 0;
1712  	ctxt->nodeMax = 0;
1713  	ctxt->node = NULL;
1714  	ctxt->inputNr = 0;
1715  	ctxt->inputMax = 0;
1716  	ctxt->input = NULL;
1717  	ctxt->nameNr = 0;
1718  	ctxt->nameMax = 0;
1719  	ctxt->name = NULL;
1720  	ctxt->spaceNr = 0;
1721  	ctxt->spaceMax = 0;
1722  	ctxt->space = NULL;
1723  	return(-1);
1724      }
1725      ctxt->spaceNr = 1;
1726      ctxt->spaceMax = 10;
1727      ctxt->spaceTab[0] = -1;
1728      ctxt->space = &ctxt->spaceTab[0];
1729      ctxt->userData = ctxt;
1730      ctxt->myDoc = NULL;
1731      ctxt->wellFormed = 1;
1732      ctxt->nsWellFormed = 1;
1733      ctxt->valid = 1;
1734      ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1735      if (ctxt->loadsubset) {
1736          ctxt->options |= XML_PARSE_DTDLOAD;
1737      }
1738      ctxt->validate = xmlDoValidityCheckingDefaultValue;
1739      ctxt->pedantic = xmlPedanticParserDefaultValue;
1740      if (ctxt->pedantic) {
1741          ctxt->options |= XML_PARSE_PEDANTIC;
1742      }
1743      ctxt->linenumbers = xmlLineNumbersDefaultValue;
1744      ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1745      if (ctxt->keepBlanks == 0) {
1746  	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1747  	ctxt->options |= XML_PARSE_NOBLANKS;
1748      }
1749  
1750      ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1751      ctxt->vctxt.userData = ctxt;
1752      ctxt->vctxt.error = xmlParserValidityError;
1753      ctxt->vctxt.warning = xmlParserValidityWarning;
1754      if (ctxt->validate) {
1755  	if (xmlGetWarningsDefaultValue == 0)
1756  	    ctxt->vctxt.warning = NULL;
1757  	else
1758  	    ctxt->vctxt.warning = xmlParserValidityWarning;
1759  	ctxt->vctxt.nodeMax = 0;
1760          ctxt->options |= XML_PARSE_DTDVALID;
1761      }
1762      ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1763      if (ctxt->replaceEntities) {
1764          ctxt->options |= XML_PARSE_NOENT;
1765      }
1766      ctxt->record_info = 0;
1767      ctxt->nbChars = 0;
1768      ctxt->checkIndex = 0;
1769      ctxt->inSubset = 0;
1770      ctxt->errNo = XML_ERR_OK;
1771      ctxt->depth = 0;
1772      ctxt->charset = XML_CHAR_ENCODING_UTF8;
1773      ctxt->catalogs = NULL;
1774      ctxt->nbentities = 0;
1775      ctxt->sizeentities = 0;
1776      ctxt->sizeentcopy = 0;
1777      ctxt->input_id = 1;
1778      xmlInitNodeInfoSeq(&ctxt->node_seq);
1779      return(0);
1780  }
1781  
1782  /**
1783   * xmlFreeParserCtxt:
1784   * @ctxt:  an XML parser context
1785   *
1786   * Free all the memory used by a parser context. However the parsed
1787   * document in ctxt->myDoc is not freed.
1788   */
1789  
1790  void
1791  xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1792  {
1793      xmlParserInputPtr input;
1794  
1795      if (ctxt == NULL) return;
1796  
1797      while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1798          xmlFreeInputStream(input);
1799      }
1800      if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1801      if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1802      if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1803      if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1804      if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1805      if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1806      if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1807      if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1808      if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1809  #ifdef LIBXML_SAX1_ENABLED
1810      if ((ctxt->sax != NULL) &&
1811          (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1812  #else
1813      if (ctxt->sax != NULL)
1814  #endif /* LIBXML_SAX1_ENABLED */
1815          xmlFree(ctxt->sax);
1816      if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1817      if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1818      if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1819      if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1820      if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1821      if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1822      if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1823      if (ctxt->attsDefault != NULL)
1824          xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1825      if (ctxt->attsSpecial != NULL)
1826          xmlHashFree(ctxt->attsSpecial, NULL);
1827      if (ctxt->freeElems != NULL) {
1828          xmlNodePtr cur, next;
1829  
1830  	cur = ctxt->freeElems;
1831  	while (cur != NULL) {
1832  	    next = cur->next;
1833  	    xmlFree(cur);
1834  	    cur = next;
1835  	}
1836      }
1837      if (ctxt->freeAttrs != NULL) {
1838          xmlAttrPtr cur, next;
1839  
1840  	cur = ctxt->freeAttrs;
1841  	while (cur != NULL) {
1842  	    next = cur->next;
1843  	    xmlFree(cur);
1844  	    cur = next;
1845  	}
1846      }
1847      /*
1848       * cleanup the error strings
1849       */
1850      if (ctxt->lastError.message != NULL)
1851          xmlFree(ctxt->lastError.message);
1852      if (ctxt->lastError.file != NULL)
1853          xmlFree(ctxt->lastError.file);
1854      if (ctxt->lastError.str1 != NULL)
1855          xmlFree(ctxt->lastError.str1);
1856      if (ctxt->lastError.str2 != NULL)
1857          xmlFree(ctxt->lastError.str2);
1858      if (ctxt->lastError.str3 != NULL)
1859          xmlFree(ctxt->lastError.str3);
1860  
1861  #ifdef LIBXML_CATALOG_ENABLED
1862      if (ctxt->catalogs != NULL)
1863  	xmlCatalogFreeLocal(ctxt->catalogs);
1864  #endif
1865      xmlFree(ctxt);
1866  }
1867  
1868  /**
1869   * xmlNewParserCtxt:
1870   *
1871   * Allocate and initialize a new parser context.
1872   *
1873   * Returns the xmlParserCtxtPtr or NULL
1874   */
1875  
1876  xmlParserCtxtPtr
1877  xmlNewParserCtxt(void)
1878  {
1879      xmlParserCtxtPtr ctxt;
1880  
1881      ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1882      if (ctxt == NULL) {
1883  	xmlErrMemory(NULL, "cannot allocate parser context\n");
1884  	return(NULL);
1885      }
1886      memset(ctxt, 0, sizeof(xmlParserCtxt));
1887      if (xmlInitParserCtxt(ctxt) < 0) {
1888          xmlFreeParserCtxt(ctxt);
1889  	return(NULL);
1890      }
1891      return(ctxt);
1892  }
1893  
1894  /************************************************************************
1895   *									*
1896   *		Handling of node informations				*
1897   *									*
1898   ************************************************************************/
1899  
1900  /**
1901   * xmlClearParserCtxt:
1902   * @ctxt:  an XML parser context
1903   *
1904   * Clear (release owned resources) and reinitialize a parser context
1905   */
1906  
1907  void
1908  xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1909  {
1910    if (ctxt==NULL)
1911      return;
1912    xmlClearNodeInfoSeq(&ctxt->node_seq);
1913    xmlCtxtReset(ctxt);
1914  }
1915  
1916  
1917  /**
1918   * xmlParserFindNodeInfo:
1919   * @ctx:  an XML parser context
1920   * @node:  an XML node within the tree
1921   *
1922   * Find the parser node info struct for a given node
1923   *
1924   * Returns an xmlParserNodeInfo block pointer or NULL
1925   */
1926  const xmlParserNodeInfo *
1927  xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1928  {
1929      unsigned long pos;
1930  
1931      if ((ctx == NULL) || (node == NULL))
1932          return (NULL);
1933      /* Find position where node should be at */
1934      pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1935      if (pos < ctx->node_seq.length
1936          && ctx->node_seq.buffer[pos].node == node)
1937          return &ctx->node_seq.buffer[pos];
1938      else
1939          return NULL;
1940  }
1941  
1942  
1943  /**
1944   * xmlInitNodeInfoSeq:
1945   * @seq:  a node info sequence pointer
1946   *
1947   * -- Initialize (set to initial state) node info sequence
1948   */
1949  void
1950  xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1951  {
1952      if (seq == NULL)
1953          return;
1954      seq->length = 0;
1955      seq->maximum = 0;
1956      seq->buffer = NULL;
1957  }
1958  
1959  /**
1960   * xmlClearNodeInfoSeq:
1961   * @seq:  a node info sequence pointer
1962   *
1963   * -- Clear (release memory and reinitialize) node
1964   *   info sequence
1965   */
1966  void
1967  xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1968  {
1969      if (seq == NULL)
1970          return;
1971      if (seq->buffer != NULL)
1972          xmlFree(seq->buffer);
1973      xmlInitNodeInfoSeq(seq);
1974  }
1975  
1976  /**
1977   * xmlParserFindNodeInfoIndex:
1978   * @seq:  a node info sequence pointer
1979   * @node:  an XML node pointer
1980   *
1981   *
1982   * xmlParserFindNodeInfoIndex : Find the index that the info record for
1983   *   the given node is or should be at in a sorted sequence
1984   *
1985   * Returns a long indicating the position of the record
1986   */
1987  unsigned long
1988  xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1989                             const xmlNodePtr node)
1990  {
1991      unsigned long upper, lower, middle;
1992      int found = 0;
1993  
1994      if ((seq == NULL) || (node == NULL))
1995          return ((unsigned long) -1);
1996  
1997      /* Do a binary search for the key */
1998      lower = 1;
1999      upper = seq->length;
2000      middle = 0;
2001      while (lower <= upper && !found) {
2002          middle = lower + (upper - lower) / 2;
2003          if (node == seq->buffer[middle - 1].node)
2004              found = 1;
2005          else if (node < seq->buffer[middle - 1].node)
2006              upper = middle - 1;
2007          else
2008              lower = middle + 1;
2009      }
2010  
2011      /* Return position */
2012      if (middle == 0 || seq->buffer[middle - 1].node < node)
2013          return middle;
2014      else
2015          return middle - 1;
2016  }
2017  
2018  
2019  /**
2020   * xmlParserAddNodeInfo:
2021   * @ctxt:  an XML parser context
2022   * @info:  a node info sequence pointer
2023   *
2024   * Insert node info record into the sorted sequence
2025   */
2026  void
2027  xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2028                       const xmlParserNodeInfoPtr info)
2029  {
2030      unsigned long pos;
2031  
2032      if ((ctxt == NULL) || (info == NULL)) return;
2033  
2034      /* Find pos and check to see if node is already in the sequence */
2035      pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2036                                       info->node);
2037  
2038      if ((pos < ctxt->node_seq.length) &&
2039          (ctxt->node_seq.buffer != NULL) &&
2040          (ctxt->node_seq.buffer[pos].node == info->node)) {
2041          ctxt->node_seq.buffer[pos] = *info;
2042      }
2043  
2044      /* Otherwise, we need to add new node to buffer */
2045      else {
2046          if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2047  	    (ctxt->node_seq.buffer == NULL)) {
2048              xmlParserNodeInfo *tmp_buffer;
2049              unsigned int byte_size;
2050  
2051              if (ctxt->node_seq.maximum == 0)
2052                  ctxt->node_seq.maximum = 2;
2053              byte_size = (sizeof(*ctxt->node_seq.buffer) *
2054  			(2 * ctxt->node_seq.maximum));
2055  
2056              if (ctxt->node_seq.buffer == NULL)
2057                  tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2058              else
2059                  tmp_buffer =
2060                      (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2061                                                       byte_size);
2062  
2063              if (tmp_buffer == NULL) {
2064  		xmlErrMemory(ctxt, "failed to allocate buffer\n");
2065                  return;
2066              }
2067              ctxt->node_seq.buffer = tmp_buffer;
2068              ctxt->node_seq.maximum *= 2;
2069          }
2070  
2071          /* If position is not at end, move elements out of the way */
2072          if (pos != ctxt->node_seq.length) {
2073              unsigned long i;
2074  
2075              for (i = ctxt->node_seq.length; i > pos; i--)
2076                  ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2077          }
2078  
2079          /* Copy element and increase length */
2080          ctxt->node_seq.buffer[pos] = *info;
2081          ctxt->node_seq.length++;
2082      }
2083  }
2084  
2085  /************************************************************************
2086   *									*
2087   *		Defaults settings					*
2088   *									*
2089   ************************************************************************/
2090  /**
2091   * xmlPedanticParserDefault:
2092   * @val:  int 0 or 1
2093   *
2094   * Set and return the previous value for enabling pedantic warnings.
2095   *
2096   * Returns the last value for 0 for no substitution, 1 for substitution.
2097   */
2098  
2099  int
2100  xmlPedanticParserDefault(int val) {
2101      int old = xmlPedanticParserDefaultValue;
2102  
2103      xmlPedanticParserDefaultValue = val;
2104      return(old);
2105  }
2106  
2107  /**
2108   * xmlLineNumbersDefault:
2109   * @val:  int 0 or 1
2110   *
2111   * Set and return the previous value for enabling line numbers in elements
2112   * contents. This may break on old application and is turned off by default.
2113   *
2114   * Returns the last value for 0 for no substitution, 1 for substitution.
2115   */
2116  
2117  int
2118  xmlLineNumbersDefault(int val) {
2119      int old = xmlLineNumbersDefaultValue;
2120  
2121      xmlLineNumbersDefaultValue = val;
2122      return(old);
2123  }
2124  
2125  /**
2126   * xmlSubstituteEntitiesDefault:
2127   * @val:  int 0 or 1
2128   *
2129   * Set and return the previous value for default entity support.
2130   * Initially the parser always keep entity references instead of substituting
2131   * entity values in the output. This function has to be used to change the
2132   * default parser behavior
2133   * SAX::substituteEntities() has to be used for changing that on a file by
2134   * file basis.
2135   *
2136   * Returns the last value for 0 for no substitution, 1 for substitution.
2137   */
2138  
2139  int
2140  xmlSubstituteEntitiesDefault(int val) {
2141      int old = xmlSubstituteEntitiesDefaultValue;
2142  
2143      xmlSubstituteEntitiesDefaultValue = val;
2144      return(old);
2145  }
2146  
2147  /**
2148   * xmlKeepBlanksDefault:
2149   * @val:  int 0 or 1
2150   *
2151   * Set and return the previous value for default blanks text nodes support.
2152   * The 1.x version of the parser used an heuristic to try to detect
2153   * ignorable white spaces. As a result the SAX callback was generating
2154   * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2155   * using the DOM output text nodes containing those blanks were not generated.
2156   * The 2.x and later version will switch to the XML standard way and
2157   * ignorableWhitespace() are only generated when running the parser in
2158   * validating mode and when the current element doesn't allow CDATA or
2159   * mixed content.
2160   * This function is provided as a way to force the standard behavior
2161   * on 1.X libs and to switch back to the old mode for compatibility when
2162   * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2163   * by using xmlIsBlankNode() commodity function to detect the "empty"
2164   * nodes generated.
2165   * This value also affect autogeneration of indentation when saving code
2166   * if blanks sections are kept, indentation is not generated.
2167   *
2168   * Returns the last value for 0 for no substitution, 1 for substitution.
2169   */
2170  
2171  int
2172  xmlKeepBlanksDefault(int val) {
2173      int old = xmlKeepBlanksDefaultValue;
2174  
2175      xmlKeepBlanksDefaultValue = val;
2176      if (!val) xmlIndentTreeOutput = 1;
2177      return(old);
2178  }
2179  
2180  #define bottom_parserInternals
2181  #include "elfgcchack.h"