parser.c
1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <assert.h> 43 #include <stdlib.h> 44 #include <limits.h> 45 #include <string.h> 46 #include <stdarg.h> 47 #include <libxml/xmlmemory.h> 48 #include <libxml/threads.h> 49 #include <libxml/globals.h> 50 #include <libxml/tree.h> 51 #include <libxml/parser.h> 52 #include <libxml/parserInternals.h> 53 #include <libxml/valid.h> 54 #include <libxml/entities.h> 55 #include <libxml/xmlerror.h> 56 #include <libxml/encoding.h> 57 #include <libxml/xmlIO.h> 58 #include <libxml/uri.h> 59 #include <libxml/xmlversion.h> 60 #ifdef LIBXML_CATALOG_ENABLED 61 #include <libxml/catalog.h> 62 #endif 63 #ifdef LIBXML_SCHEMAS_ENABLED 64 #include <libxml/xmlschemastypes.h> 65 #include <libxml/relaxng.h> 66 #endif 67 #ifdef HAVE_CTYPE_H 68 #include <ctype.h> 69 #endif 70 #ifdef HAVE_STDLIB_H 71 #include <stdlib.h> 72 #endif 73 #ifdef HAVE_SYS_STAT_H 74 #include <sys/stat.h> 75 #endif 76 #ifdef HAVE_FCNTL_H 77 #include <fcntl.h> 78 #endif 79 #ifdef HAVE_UNISTD_H 80 #include <unistd.h> 81 #endif 82 #ifdef HAVE_ZLIB_H 83 #include <zlib.h> 84 #endif 85 #ifdef HAVE_LZMA_H 86 #include <lzma.h> 87 #endif 88 89 #include "buf.h" 90 #include "enc.h" 91 92 const int xmlEntityDecodingDepthMax = 40; 93 const int xmlEntityDecodingDepthHugeMax = 1024; 94 95 static void 96 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 97 98 static xmlParserCtxtPtr 99 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 100 const xmlChar *base, xmlParserCtxtPtr pctx); 101 102 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 103 104 /************************************************************************ 105 * * 106 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 107 * * 108 ************************************************************************/ 109 110 #define XML_PARSER_BIG_ENTITY 1000 111 #define XML_PARSER_LOT_ENTITY 5000 112 113 /* 114 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 115 * replacement over the size in byte of the input indicates that you have 116 * and eponential behaviour. A value of 10 correspond to at least 3 entity 117 * replacement per byte of input. 118 */ 119 #define XML_PARSER_NON_LINEAR 10 120 121 /* 122 * xmlParserEntityCheck 123 * 124 * Function to check non-linear entity expansion behaviour 125 * This is here to detect and stop exponential linear entity expansion 126 * This is not a limitation of the parser but a safety 127 * boundary feature. It can be disabled with the XML_PARSE_HUGE 128 * parser option. 129 */ 130 static int 131 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 132 xmlEntityPtr ent, size_t replacement) 133 { 134 size_t consumed = 0; 135 int i; 136 137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 138 return (0); 139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 140 return (1); 141 142 /* 143 * This may look absurd but is needed to detect 144 * entities problems 145 */ 146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 147 (ent->content != NULL) && (ent->checked == 0) && 148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 149 unsigned long oldnbent = ctxt->nbentities; 150 xmlChar *rep; 151 152 ent->checked = 1; 153 154 ++ctxt->depth; 155 rep = xmlStringDecodeEntities(ctxt, ent->content, 156 XML_SUBSTITUTE_REF, 0, 0, 0); 157 --ctxt->depth; 158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) { 159 ent->content[0] = 0; 160 } 161 162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 163 if (rep != NULL) { 164 if (xmlStrchr(rep, '<')) 165 ent->checked |= 1; 166 xmlFree(rep); 167 rep = NULL; 168 } 169 } 170 171 /* 172 * Prevent entity exponential check, not just replacement while 173 * parsing the DTD 174 * The check is potentially costly so do that only once in a thousand 175 */ 176 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) && 177 (ctxt->nbentities % 1024 == 0)) { 178 for (i = 0;i < ctxt->inputNr;i++) { 179 consumed += ctxt->inputTab[i]->consumed + 180 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base); 181 } 182 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) { 183 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 184 ctxt->instate = XML_PARSER_EOF; 185 return (1); 186 } 187 consumed = 0; 188 } 189 190 191 192 if (replacement != 0) { 193 if (replacement < XML_MAX_TEXT_LENGTH) 194 return(0); 195 196 /* 197 * If the volume of entity copy reaches 10 times the 198 * amount of parsed data and over the large text threshold 199 * then that's very likely to be an abuse. 200 */ 201 if (ctxt->input != NULL) { 202 consumed = ctxt->input->consumed + 203 (ctxt->input->cur - ctxt->input->base); 204 } 205 consumed += ctxt->sizeentities; 206 207 if (replacement < XML_PARSER_NON_LINEAR * consumed) 208 return(0); 209 } else if (size != 0) { 210 /* 211 * Do the check based on the replacement size of the entity 212 */ 213 if (size < XML_PARSER_BIG_ENTITY) 214 return(0); 215 216 /* 217 * A limit on the amount of text data reasonably used 218 */ 219 if (ctxt->input != NULL) { 220 consumed = ctxt->input->consumed + 221 (ctxt->input->cur - ctxt->input->base); 222 } 223 consumed += ctxt->sizeentities; 224 225 if ((size < XML_PARSER_NON_LINEAR * consumed) && 226 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 227 return (0); 228 } else if (ent != NULL) { 229 /* 230 * use the number of parsed entities in the replacement 231 */ 232 size = ent->checked / 2; 233 234 /* 235 * The amount of data parsed counting entities size only once 236 */ 237 if (ctxt->input != NULL) { 238 consumed = ctxt->input->consumed + 239 (ctxt->input->cur - ctxt->input->base); 240 } 241 consumed += ctxt->sizeentities; 242 243 /* 244 * Check the density of entities for the amount of data 245 * knowing an entity reference will take at least 3 bytes 246 */ 247 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 248 return (0); 249 } else { 250 /* 251 * strange we got no data for checking 252 */ 253 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 254 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 255 (ctxt->nbentities <= 10000)) 256 return (0); 257 } 258 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 259 return (1); 260 } 261 262 /** 263 * xmlParserMaxDepth: 264 * 265 * arbitrary depth limit for the XML documents that we allow to 266 * process. This is not a limitation of the parser but a safety 267 * boundary feature. It can be disabled with the XML_PARSE_HUGE 268 * parser option. 269 */ 270 unsigned int xmlParserMaxDepth = 256; 271 272 273 274 #define SAX2 1 275 #define XML_PARSER_BIG_BUFFER_SIZE 300 276 #define XML_PARSER_BUFFER_SIZE 100 277 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 278 279 /** 280 * XML_PARSER_CHUNK_SIZE 281 * 282 * When calling GROW that's the minimal amount of data 283 * the parser expected to have received. It is not a hard 284 * limit but an optimization when reading strings like Names 285 * It is not strictly needed as long as inputs available characters 286 * are followed by 0, which should be provided by the I/O level 287 */ 288 #define XML_PARSER_CHUNK_SIZE 100 289 290 /* 291 * List of XML prefixed PI allowed by W3C specs 292 */ 293 294 static const char * const xmlW3CPIs[] = { 295 "xml-stylesheet", 296 "xml-model", 297 NULL 298 }; 299 300 301 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 302 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 303 const xmlChar **str); 304 305 static xmlParserErrors 306 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 307 xmlSAXHandlerPtr sax, 308 void *user_data, int depth, const xmlChar *URL, 309 const xmlChar *ID, xmlNodePtr *list); 310 311 static int 312 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 313 const char *encoding); 314 #ifdef LIBXML_LEGACY_ENABLED 315 static void 316 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 317 xmlNodePtr lastNode); 318 #endif /* LIBXML_LEGACY_ENABLED */ 319 320 static xmlParserErrors 321 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 322 const xmlChar *string, void *user_data, xmlNodePtr *lst); 323 324 static int 325 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 326 327 /************************************************************************ 328 * * 329 * Some factorized error routines * 330 * * 331 ************************************************************************/ 332 333 /** 334 * xmlErrAttributeDup: 335 * @ctxt: an XML parser context 336 * @prefix: the attribute prefix 337 * @localname: the attribute localname 338 * 339 * Handle a redefinition of attribute error 340 */ 341 static void 342 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 343 const xmlChar * localname) 344 { 345 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 346 (ctxt->instate == XML_PARSER_EOF)) 347 return; 348 if (ctxt != NULL) 349 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 350 351 if (prefix == NULL) 352 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 353 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 354 (const char *) localname, NULL, NULL, 0, 0, 355 "Attribute %s redefined\n", localname); 356 else 357 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 358 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 359 (const char *) prefix, (const char *) localname, 360 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 361 localname); 362 if (ctxt != NULL) { 363 ctxt->wellFormed = 0; 364 if (ctxt->recovery == 0) 365 ctxt->disableSAX = 1; 366 } 367 } 368 369 /** 370 * xmlFatalErr: 371 * @ctxt: an XML parser context 372 * @error: the error number 373 * @extra: extra information string 374 * 375 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 376 */ 377 static void 378 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 379 { 380 const char *errmsg; 381 382 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 383 (ctxt->instate == XML_PARSER_EOF)) 384 return; 385 switch (error) { 386 case XML_ERR_INVALID_HEX_CHARREF: 387 errmsg = "CharRef: invalid hexadecimal value"; 388 break; 389 case XML_ERR_INVALID_DEC_CHARREF: 390 errmsg = "CharRef: invalid decimal value"; 391 break; 392 case XML_ERR_INVALID_CHARREF: 393 errmsg = "CharRef: invalid value"; 394 break; 395 case XML_ERR_INTERNAL_ERROR: 396 errmsg = "internal error"; 397 break; 398 case XML_ERR_PEREF_AT_EOF: 399 errmsg = "PEReference at end of document"; 400 break; 401 case XML_ERR_PEREF_IN_PROLOG: 402 errmsg = "PEReference in prolog"; 403 break; 404 case XML_ERR_PEREF_IN_EPILOG: 405 errmsg = "PEReference in epilog"; 406 break; 407 case XML_ERR_PEREF_NO_NAME: 408 errmsg = "PEReference: no name"; 409 break; 410 case XML_ERR_PEREF_SEMICOL_MISSING: 411 errmsg = "PEReference: expecting ';'"; 412 break; 413 case XML_ERR_ENTITY_LOOP: 414 errmsg = "Detected an entity reference loop"; 415 break; 416 case XML_ERR_ENTITY_NOT_STARTED: 417 errmsg = "EntityValue: \" or ' expected"; 418 break; 419 case XML_ERR_ENTITY_PE_INTERNAL: 420 errmsg = "PEReferences forbidden in internal subset"; 421 break; 422 case XML_ERR_ENTITY_NOT_FINISHED: 423 errmsg = "EntityValue: \" or ' expected"; 424 break; 425 case XML_ERR_ATTRIBUTE_NOT_STARTED: 426 errmsg = "AttValue: \" or ' expected"; 427 break; 428 case XML_ERR_LT_IN_ATTRIBUTE: 429 errmsg = "Unescaped '<' not allowed in attributes values"; 430 break; 431 case XML_ERR_LITERAL_NOT_STARTED: 432 errmsg = "SystemLiteral \" or ' expected"; 433 break; 434 case XML_ERR_LITERAL_NOT_FINISHED: 435 errmsg = "Unfinished System or Public ID \" or ' expected"; 436 break; 437 case XML_ERR_MISPLACED_CDATA_END: 438 errmsg = "Sequence ']]>' not allowed in content"; 439 break; 440 case XML_ERR_URI_REQUIRED: 441 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 442 break; 443 case XML_ERR_PUBID_REQUIRED: 444 errmsg = "PUBLIC, the Public Identifier is missing"; 445 break; 446 case XML_ERR_HYPHEN_IN_COMMENT: 447 errmsg = "Comment must not contain '--' (double-hyphen)"; 448 break; 449 case XML_ERR_PI_NOT_STARTED: 450 errmsg = "xmlParsePI : no target name"; 451 break; 452 case XML_ERR_RESERVED_XML_NAME: 453 errmsg = "Invalid PI name"; 454 break; 455 case XML_ERR_NOTATION_NOT_STARTED: 456 errmsg = "NOTATION: Name expected here"; 457 break; 458 case XML_ERR_NOTATION_NOT_FINISHED: 459 errmsg = "'>' required to close NOTATION declaration"; 460 break; 461 case XML_ERR_VALUE_REQUIRED: 462 errmsg = "Entity value required"; 463 break; 464 case XML_ERR_URI_FRAGMENT: 465 errmsg = "Fragment not allowed"; 466 break; 467 case XML_ERR_ATTLIST_NOT_STARTED: 468 errmsg = "'(' required to start ATTLIST enumeration"; 469 break; 470 case XML_ERR_NMTOKEN_REQUIRED: 471 errmsg = "NmToken expected in ATTLIST enumeration"; 472 break; 473 case XML_ERR_ATTLIST_NOT_FINISHED: 474 errmsg = "')' required to finish ATTLIST enumeration"; 475 break; 476 case XML_ERR_MIXED_NOT_STARTED: 477 errmsg = "MixedContentDecl : '|' or ')*' expected"; 478 break; 479 case XML_ERR_PCDATA_REQUIRED: 480 errmsg = "MixedContentDecl : '#PCDATA' expected"; 481 break; 482 case XML_ERR_ELEMCONTENT_NOT_STARTED: 483 errmsg = "ContentDecl : Name or '(' expected"; 484 break; 485 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 486 errmsg = "ContentDecl : ',' '|' or ')' expected"; 487 break; 488 case XML_ERR_PEREF_IN_INT_SUBSET: 489 errmsg = 490 "PEReference: forbidden within markup decl in internal subset"; 491 break; 492 case XML_ERR_GT_REQUIRED: 493 errmsg = "expected '>'"; 494 break; 495 case XML_ERR_CONDSEC_INVALID: 496 errmsg = "XML conditional section '[' expected"; 497 break; 498 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 499 errmsg = "Content error in the external subset"; 500 break; 501 case XML_ERR_CONDSEC_INVALID_KEYWORD: 502 errmsg = 503 "conditional section INCLUDE or IGNORE keyword expected"; 504 break; 505 case XML_ERR_CONDSEC_NOT_FINISHED: 506 errmsg = "XML conditional section not closed"; 507 break; 508 case XML_ERR_XMLDECL_NOT_STARTED: 509 errmsg = "Text declaration '<?xml' required"; 510 break; 511 case XML_ERR_XMLDECL_NOT_FINISHED: 512 errmsg = "parsing XML declaration: '?>' expected"; 513 break; 514 case XML_ERR_EXT_ENTITY_STANDALONE: 515 errmsg = "external parsed entities cannot be standalone"; 516 break; 517 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 518 errmsg = "EntityRef: expecting ';'"; 519 break; 520 case XML_ERR_DOCTYPE_NOT_FINISHED: 521 errmsg = "DOCTYPE improperly terminated"; 522 break; 523 case XML_ERR_LTSLASH_REQUIRED: 524 errmsg = "EndTag: '</' not found"; 525 break; 526 case XML_ERR_EQUAL_REQUIRED: 527 errmsg = "expected '='"; 528 break; 529 case XML_ERR_STRING_NOT_CLOSED: 530 errmsg = "String not closed expecting \" or '"; 531 break; 532 case XML_ERR_STRING_NOT_STARTED: 533 errmsg = "String not started expecting ' or \""; 534 break; 535 case XML_ERR_ENCODING_NAME: 536 errmsg = "Invalid XML encoding name"; 537 break; 538 case XML_ERR_STANDALONE_VALUE: 539 errmsg = "standalone accepts only 'yes' or 'no'"; 540 break; 541 case XML_ERR_DOCUMENT_EMPTY: 542 errmsg = "Document is empty"; 543 break; 544 case XML_ERR_DOCUMENT_END: 545 errmsg = "Extra content at the end of the document"; 546 break; 547 case XML_ERR_NOT_WELL_BALANCED: 548 errmsg = "chunk is not well balanced"; 549 break; 550 case XML_ERR_EXTRA_CONTENT: 551 errmsg = "extra content at the end of well balanced chunk"; 552 break; 553 case XML_ERR_VERSION_MISSING: 554 errmsg = "Malformed declaration expecting version"; 555 break; 556 case XML_ERR_NAME_TOO_LONG: 557 errmsg = "Name too long use XML_PARSE_HUGE option"; 558 break; 559 #if 0 560 case: 561 errmsg = ""; 562 break; 563 #endif 564 default: 565 errmsg = "Unregistered error message"; 566 } 567 if (ctxt != NULL) 568 ctxt->errNo = error; 569 if (info == NULL) { 570 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 571 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 572 errmsg); 573 } else { 574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 575 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 576 errmsg, info); 577 } 578 if (ctxt != NULL) { 579 ctxt->wellFormed = 0; 580 if (ctxt->recovery == 0) 581 ctxt->disableSAX = 1; 582 } 583 } 584 585 /** 586 * xmlFatalErrMsg: 587 * @ctxt: an XML parser context 588 * @error: the error number 589 * @msg: the error message 590 * 591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 592 */ 593 static void LIBXML_ATTR_FORMAT(3,0) 594 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 595 const char *msg) 596 { 597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 598 (ctxt->instate == XML_PARSER_EOF)) 599 return; 600 if (ctxt != NULL) 601 ctxt->errNo = error; 602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 603 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 604 if (ctxt != NULL) { 605 ctxt->wellFormed = 0; 606 if (ctxt->recovery == 0) 607 ctxt->disableSAX = 1; 608 } 609 } 610 611 /** 612 * xmlWarningMsg: 613 * @ctxt: an XML parser context 614 * @error: the error number 615 * @msg: the error message 616 * @str1: extra data 617 * @str2: extra data 618 * 619 * Handle a warning. 620 */ 621 static void LIBXML_ATTR_FORMAT(3,0) 622 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 623 const char *msg, const xmlChar *str1, const xmlChar *str2) 624 { 625 xmlStructuredErrorFunc schannel = NULL; 626 627 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 628 (ctxt->instate == XML_PARSER_EOF)) 629 return; 630 if ((ctxt != NULL) && (ctxt->sax != NULL) && 631 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 632 schannel = ctxt->sax->serror; 633 if (ctxt != NULL) { 634 #pragma clang diagnostic push 635 #pragma clang diagnostic ignored "-Wformat-nonliteral" 636 __xmlRaiseError(schannel, 637 (ctxt->sax) ? ctxt->sax->warning : NULL, 638 ctxt->userData, 639 ctxt, NULL, XML_FROM_PARSER, error, 640 XML_ERR_WARNING, NULL, 0, 641 (const char *) str1, (const char *) str2, NULL, 0, 0, 642 msg, (const char *) str1, (const char *) str2); 643 #pragma clang diagnostic pop 644 } else { 645 #pragma clang diagnostic push 646 #pragma clang diagnostic ignored "-Wformat-nonliteral" 647 __xmlRaiseError(schannel, NULL, NULL, 648 ctxt, NULL, XML_FROM_PARSER, error, 649 XML_ERR_WARNING, NULL, 0, 650 (const char *) str1, (const char *) str2, NULL, 0, 0, 651 msg, (const char *) str1, (const char *) str2); 652 #pragma clang diagnostic pop 653 } 654 } 655 656 /** 657 * xmlValidityError: 658 * @ctxt: an XML parser context 659 * @error: the error number 660 * @msg: the error message 661 * @str1: extra data 662 * 663 * Handle a validity error. 664 */ 665 static void LIBXML_ATTR_FORMAT(3,0) 666 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 667 const char *msg, const xmlChar *str1, const xmlChar *str2) 668 { 669 xmlStructuredErrorFunc schannel = NULL; 670 671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 672 (ctxt->instate == XML_PARSER_EOF)) 673 return; 674 if (ctxt != NULL) { 675 ctxt->errNo = error; 676 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 677 schannel = ctxt->sax->serror; 678 } 679 if (ctxt != NULL) { 680 #pragma clang diagnostic push 681 #pragma clang diagnostic ignored "-Wformat-nonliteral" 682 __xmlRaiseError(schannel, 683 ctxt->vctxt.error, ctxt->vctxt.userData, 684 ctxt, NULL, XML_FROM_DTD, error, 685 XML_ERR_ERROR, NULL, 0, (const char *) str1, 686 (const char *) str2, NULL, 0, 0, 687 msg, (const char *) str1, (const char *) str2); 688 #pragma clang diagnostic pop 689 ctxt->valid = 0; 690 } else { 691 #pragma clang diagnostic push 692 #pragma clang diagnostic ignored "-Wformat-nonliteral" 693 __xmlRaiseError(schannel, NULL, NULL, 694 ctxt, NULL, XML_FROM_DTD, error, 695 XML_ERR_ERROR, NULL, 0, (const char *) str1, 696 (const char *) str2, NULL, 0, 0, 697 msg, (const char *) str1, (const char *) str2); 698 #pragma clang diagnostic pop 699 } 700 } 701 702 /** 703 * xmlFatalErrMsgInt: 704 * @ctxt: an XML parser context 705 * @error: the error number 706 * @msg: the error message 707 * @val: an integer value 708 * 709 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 710 */ 711 static void LIBXML_ATTR_FORMAT(3,0) 712 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 713 const char *msg, int val) 714 { 715 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 716 (ctxt->instate == XML_PARSER_EOF)) 717 return; 718 if (ctxt != NULL) 719 ctxt->errNo = error; 720 #pragma clang diagnostic push 721 #pragma clang diagnostic ignored "-Wformat-nonliteral" 722 __xmlRaiseError(NULL, NULL, NULL, 723 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 724 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 725 #pragma clang diagnostic pop 726 if (ctxt != NULL) { 727 ctxt->wellFormed = 0; 728 if (ctxt->recovery == 0) 729 ctxt->disableSAX = 1; 730 } 731 } 732 733 /** 734 * xmlFatalErrMsgStrIntStr: 735 * @ctxt: an XML parser context 736 * @error: the error number 737 * @msg: the error message 738 * @str1: an string info 739 * @val: an integer value 740 * @str2: an string info 741 * 742 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 743 */ 744 static void LIBXML_ATTR_FORMAT(3,0) 745 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 746 const char *msg, const xmlChar *str1, int val, 747 const xmlChar *str2) 748 { 749 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 750 (ctxt->instate == XML_PARSER_EOF)) 751 return; 752 if (ctxt != NULL) 753 ctxt->errNo = error; 754 #pragma clang diagnostic push 755 #pragma clang diagnostic ignored "-Wformat-nonliteral" 756 __xmlRaiseError(NULL, NULL, NULL, 757 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 758 NULL, 0, (const char *) str1, (const char *) str2, 759 NULL, val, 0, msg, str1, val, str2); 760 #pragma clang diagnostic pop 761 if (ctxt != NULL) { 762 ctxt->wellFormed = 0; 763 if (ctxt->recovery == 0) 764 ctxt->disableSAX = 1; 765 } 766 } 767 768 /** 769 * xmlFatalErrMsgStr: 770 * @ctxt: an XML parser context 771 * @error: the error number 772 * @msg: the error message 773 * @val: a string value 774 * 775 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 776 */ 777 static void LIBXML_ATTR_FORMAT(3,0) 778 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 779 const char *msg, const xmlChar * val) 780 { 781 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 782 (ctxt->instate == XML_PARSER_EOF)) 783 return; 784 if (ctxt != NULL) 785 ctxt->errNo = error; 786 #pragma clang diagnostic push 787 #pragma clang diagnostic ignored "-Wformat-nonliteral" 788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 789 XML_FROM_PARSER, error, XML_ERR_FATAL, 790 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 791 val); 792 #pragma clang diagnostic pop 793 if (ctxt != NULL) { 794 ctxt->wellFormed = 0; 795 if (ctxt->recovery == 0) 796 ctxt->disableSAX = 1; 797 } 798 } 799 800 /** 801 * xmlErrMsgStr: 802 * @ctxt: an XML parser context 803 * @error: the error number 804 * @msg: the error message 805 * @val: a string value 806 * 807 * Handle a non fatal parser error 808 */ 809 static void LIBXML_ATTR_FORMAT(3,0) 810 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 811 const char *msg, const xmlChar * val) 812 { 813 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 814 (ctxt->instate == XML_PARSER_EOF)) 815 return; 816 if (ctxt != NULL) 817 ctxt->errNo = error; 818 #pragma clang diagnostic push 819 #pragma clang diagnostic ignored "-Wformat-nonliteral" 820 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 821 XML_FROM_PARSER, error, XML_ERR_ERROR, 822 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 823 val); 824 #pragma clang diagnostic pop 825 } 826 827 /** 828 * xmlNsErr: 829 * @ctxt: an XML parser context 830 * @error: the error number 831 * @msg: the message 832 * @info1: extra information string 833 * @info2: extra information string 834 * 835 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 836 */ 837 static void LIBXML_ATTR_FORMAT(3,0) 838 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 839 const char *msg, 840 const xmlChar * info1, const xmlChar * info2, 841 const xmlChar * info3) 842 { 843 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 844 (ctxt->instate == XML_PARSER_EOF)) 845 return; 846 if (ctxt != NULL) 847 ctxt->errNo = error; 848 #pragma clang diagnostic push 849 #pragma clang diagnostic ignored "-Wformat-nonliteral" 850 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 851 XML_ERR_ERROR, NULL, 0, (const char *) info1, 852 (const char *) info2, (const char *) info3, 0, 0, msg, 853 info1, info2, info3); 854 #pragma clang diagnostic pop 855 if (ctxt != NULL) 856 ctxt->nsWellFormed = 0; 857 } 858 859 /** 860 * xmlNsWarn 861 * @ctxt: an XML parser context 862 * @error: the error number 863 * @msg: the message 864 * @info1: extra information string 865 * @info2: extra information string 866 * 867 * Handle a namespace warning error 868 */ 869 static void LIBXML_ATTR_FORMAT(3,0) 870 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 871 const char *msg, 872 const xmlChar * info1, const xmlChar * info2, 873 const xmlChar * info3) 874 { 875 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 876 (ctxt->instate == XML_PARSER_EOF)) 877 return; 878 #pragma clang diagnostic push 879 #pragma clang diagnostic ignored "-Wformat-nonliteral" 880 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 881 XML_ERR_WARNING, NULL, 0, (const char *) info1, 882 (const char *) info2, (const char *) info3, 0, 0, msg, 883 info1, info2, info3); 884 #pragma clang diagnostic pop 885 } 886 887 /************************************************************************ 888 * * 889 * Library wide options * 890 * * 891 ************************************************************************/ 892 893 /** 894 * xmlHasFeature: 895 * @feature: the feature to be examined 896 * 897 * Examines if the library has been compiled with a given feature. 898 * 899 * Returns a non-zero value if the feature exist, otherwise zero. 900 * Returns zero (0) if the feature does not exist or an unknown 901 * unknown feature is requested, non-zero otherwise. 902 */ 903 int 904 xmlHasFeature(xmlFeature feature) 905 { 906 switch (feature) { 907 case XML_WITH_THREAD: 908 #ifdef LIBXML_THREAD_ENABLED 909 return(1); 910 #else 911 return(0); 912 #endif 913 case XML_WITH_TREE: 914 #ifdef LIBXML_TREE_ENABLED 915 return(1); 916 #else 917 return(0); 918 #endif 919 case XML_WITH_OUTPUT: 920 #ifdef LIBXML_OUTPUT_ENABLED 921 return(1); 922 #else 923 return(0); 924 #endif 925 case XML_WITH_PUSH: 926 #ifdef LIBXML_PUSH_ENABLED 927 return(1); 928 #else 929 return(0); 930 #endif 931 case XML_WITH_READER: 932 #ifdef LIBXML_READER_ENABLED 933 return(1); 934 #else 935 return(0); 936 #endif 937 case XML_WITH_PATTERN: 938 #ifdef LIBXML_PATTERN_ENABLED 939 return(1); 940 #else 941 return(0); 942 #endif 943 case XML_WITH_WRITER: 944 #ifdef LIBXML_WRITER_ENABLED 945 return(1); 946 #else 947 return(0); 948 #endif 949 case XML_WITH_SAX1: 950 #ifdef LIBXML_SAX1_ENABLED 951 return(1); 952 #else 953 return(0); 954 #endif 955 case XML_WITH_FTP: 956 #ifdef LIBXML_FTP_ENABLED 957 return(1); 958 #else 959 return(0); 960 #endif 961 case XML_WITH_HTTP: 962 #ifdef LIBXML_HTTP_ENABLED 963 return(1); 964 #else 965 return(0); 966 #endif 967 case XML_WITH_VALID: 968 #ifdef LIBXML_VALID_ENABLED 969 return(1); 970 #else 971 return(0); 972 #endif 973 case XML_WITH_HTML: 974 #ifdef LIBXML_HTML_ENABLED 975 return(1); 976 #else 977 return(0); 978 #endif 979 case XML_WITH_LEGACY: 980 #ifdef LIBXML_LEGACY_ENABLED 981 return(1); 982 #else 983 return(0); 984 #endif 985 case XML_WITH_C14N: 986 #ifdef LIBXML_C14N_ENABLED 987 return(1); 988 #else 989 return(0); 990 #endif 991 case XML_WITH_CATALOG: 992 #ifdef LIBXML_CATALOG_ENABLED 993 return(1); 994 #else 995 return(0); 996 #endif 997 case XML_WITH_XPATH: 998 #ifdef LIBXML_XPATH_ENABLED 999 return(1); 1000 #else 1001 return(0); 1002 #endif 1003 case XML_WITH_XPTR: 1004 #ifdef LIBXML_XPTR_ENABLED 1005 return(1); 1006 #else 1007 return(0); 1008 #endif 1009 case XML_WITH_XINCLUDE: 1010 #ifdef LIBXML_XINCLUDE_ENABLED 1011 return(1); 1012 #else 1013 return(0); 1014 #endif 1015 case XML_WITH_ICONV: 1016 #ifdef LIBXML_ICONV_ENABLED 1017 return(1); 1018 #else 1019 return(0); 1020 #endif 1021 case XML_WITH_ISO8859X: 1022 #ifdef LIBXML_ISO8859X_ENABLED 1023 return(1); 1024 #else 1025 return(0); 1026 #endif 1027 case XML_WITH_UNICODE: 1028 #ifdef LIBXML_UNICODE_ENABLED 1029 return(1); 1030 #else 1031 return(0); 1032 #endif 1033 case XML_WITH_REGEXP: 1034 #ifdef LIBXML_REGEXP_ENABLED 1035 return(1); 1036 #else 1037 return(0); 1038 #endif 1039 case XML_WITH_AUTOMATA: 1040 #ifdef LIBXML_AUTOMATA_ENABLED 1041 return(1); 1042 #else 1043 return(0); 1044 #endif 1045 case XML_WITH_EXPR: 1046 #ifdef LIBXML_EXPR_ENABLED 1047 return(1); 1048 #else 1049 return(0); 1050 #endif 1051 case XML_WITH_SCHEMAS: 1052 #ifdef LIBXML_SCHEMAS_ENABLED 1053 return(1); 1054 #else 1055 return(0); 1056 #endif 1057 case XML_WITH_SCHEMATRON: 1058 #ifdef LIBXML_SCHEMATRON_ENABLED 1059 return(1); 1060 #else 1061 return(0); 1062 #endif 1063 case XML_WITH_MODULES: 1064 #ifdef LIBXML_MODULES_ENABLED 1065 return(1); 1066 #else 1067 return(0); 1068 #endif 1069 case XML_WITH_DEBUG: 1070 #ifdef LIBXML_DEBUG_ENABLED 1071 return(1); 1072 #else 1073 return(0); 1074 #endif 1075 case XML_WITH_DEBUG_MEM: 1076 #ifdef DEBUG_MEMORY_LOCATION 1077 return(1); 1078 #else 1079 return(0); 1080 #endif 1081 case XML_WITH_DEBUG_RUN: 1082 #ifdef LIBXML_DEBUG_RUNTIME 1083 return(1); 1084 #else 1085 return(0); 1086 #endif 1087 case XML_WITH_ZLIB: 1088 #ifdef LIBXML_ZLIB_ENABLED 1089 return(1); 1090 #else 1091 return(0); 1092 #endif 1093 case XML_WITH_LZMA: 1094 #ifdef LIBXML_LZMA_ENABLED 1095 return(1); 1096 #else 1097 return(0); 1098 #endif 1099 case XML_WITH_ICU: 1100 #ifdef LIBXML_ICU_ENABLED 1101 return(1); 1102 #else 1103 return(0); 1104 #endif 1105 default: 1106 break; 1107 } 1108 return(0); 1109 } 1110 1111 /************************************************************************ 1112 * * 1113 * SAX2 defaulted attributes handling * 1114 * * 1115 ************************************************************************/ 1116 1117 /** 1118 * xmlDetectSAX2: 1119 * @ctxt: an XML parser context 1120 * 1121 * Do the SAX2 detection and specific intialization 1122 */ 1123 static void 1124 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1125 if (ctxt == NULL) return; 1126 #ifdef LIBXML_SAX1_ENABLED 1127 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1128 ((ctxt->sax->startElementNs != NULL) || 1129 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1130 #else 1131 ctxt->sax2 = 1; 1132 #endif /* LIBXML_SAX1_ENABLED */ 1133 1134 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1135 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1136 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1137 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1138 (ctxt->str_xml_ns == NULL)) { 1139 xmlErrMemory(ctxt, NULL); 1140 } 1141 } 1142 1143 typedef struct _xmlDefAttrs xmlDefAttrs; 1144 typedef xmlDefAttrs *xmlDefAttrsPtr; 1145 struct _xmlDefAttrs { 1146 int nbAttrs; /* number of defaulted attributes on that element */ 1147 int maxAttrs; /* the size of the array */ 1148 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1149 }; 1150 1151 /** 1152 * xmlAttrNormalizeSpace: 1153 * @src: the source string 1154 * @dst: the target string 1155 * 1156 * Normalize the space in non CDATA attribute values: 1157 * If the attribute type is not CDATA, then the XML processor MUST further 1158 * process the normalized attribute value by discarding any leading and 1159 * trailing space (#x20) characters, and by replacing sequences of space 1160 * (#x20) characters by a single space (#x20) character. 1161 * Note that the size of dst need to be at least src, and if one doesn't need 1162 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1163 * passing src as dst is just fine. 1164 * 1165 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1166 * is needed. 1167 */ 1168 static xmlChar * 1169 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1170 { 1171 if ((src == NULL) || (dst == NULL)) 1172 return(NULL); 1173 1174 while (*src == 0x20) src++; 1175 while (*src != 0) { 1176 if (*src == 0x20) { 1177 while (*src == 0x20) src++; 1178 if (*src != 0) 1179 *dst++ = 0x20; 1180 } else { 1181 *dst++ = *src++; 1182 } 1183 } 1184 *dst = 0; 1185 if (dst == src) 1186 return(NULL); 1187 return(dst); 1188 } 1189 1190 /** 1191 * xmlAttrNormalizeSpace2: 1192 * @src: the source string 1193 * 1194 * Normalize the space in non CDATA attribute values, a slightly more complex 1195 * front end to avoid allocation problems when running on attribute values 1196 * coming from the input. 1197 * 1198 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1199 * is needed. 1200 */ 1201 static const xmlChar * 1202 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1203 { 1204 int i; 1205 int remove_head = 0; 1206 int need_realloc = 0; 1207 const xmlChar *cur; 1208 1209 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1210 return(NULL); 1211 i = *len; 1212 if (i <= 0) 1213 return(NULL); 1214 1215 cur = src; 1216 while (*cur == 0x20) { 1217 cur++; 1218 remove_head++; 1219 } 1220 while (*cur != 0) { 1221 if (*cur == 0x20) { 1222 cur++; 1223 if ((*cur == 0x20) || (*cur == 0)) { 1224 need_realloc = 1; 1225 break; 1226 } 1227 } else 1228 cur++; 1229 } 1230 if (need_realloc) { 1231 xmlChar *ret; 1232 1233 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1234 if (ret == NULL) { 1235 xmlErrMemory(ctxt, NULL); 1236 return(NULL); 1237 } 1238 xmlAttrNormalizeSpace(ret, ret); 1239 *len = (int) strlen((const char *)ret); 1240 return(ret); 1241 } else if (remove_head) { 1242 *len -= remove_head; 1243 memmove(src, src + remove_head, 1 + *len); 1244 return(src); 1245 } 1246 return(NULL); 1247 } 1248 1249 /** 1250 * xmlAddDefAttrs: 1251 * @ctxt: an XML parser context 1252 * @fullname: the element fullname 1253 * @fullattr: the attribute fullname 1254 * @value: the attribute value 1255 * 1256 * Add a defaulted attribute for an element 1257 */ 1258 static void 1259 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1260 const xmlChar *fullname, 1261 const xmlChar *fullattr, 1262 const xmlChar *value) { 1263 xmlDefAttrsPtr defaults; 1264 int len; 1265 const xmlChar *name; 1266 const xmlChar *prefix; 1267 1268 /* 1269 * Allows to detect attribute redefinitions 1270 */ 1271 if (ctxt->attsSpecial != NULL) { 1272 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1273 return; 1274 } 1275 1276 if (ctxt->attsDefault == NULL) { 1277 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1278 if (ctxt->attsDefault == NULL) 1279 goto mem_error; 1280 } 1281 1282 /* 1283 * split the element name into prefix:localname , the string found 1284 * are within the DTD and then not associated to namespace names. 1285 */ 1286 name = xmlSplitQName3(fullname, &len); 1287 if (name == NULL) { 1288 name = xmlDictLookup(ctxt->dict, fullname, -1); 1289 prefix = NULL; 1290 } else { 1291 name = xmlDictLookup(ctxt->dict, name, -1); 1292 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1293 } 1294 1295 /* 1296 * make sure there is some storage 1297 */ 1298 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1299 if (defaults == NULL) { 1300 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1301 (4 * 5) * sizeof(const xmlChar *)); 1302 if (defaults == NULL) 1303 goto mem_error; 1304 defaults->nbAttrs = 0; 1305 defaults->maxAttrs = 4; 1306 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1307 defaults, NULL) < 0) { 1308 xmlFree(defaults); 1309 goto mem_error; 1310 } 1311 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1312 xmlDefAttrsPtr temp; 1313 1314 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1315 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1316 if (temp == NULL) 1317 goto mem_error; 1318 defaults = temp; 1319 defaults->maxAttrs *= 2; 1320 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1321 defaults, NULL) < 0) { 1322 xmlFree(defaults); 1323 goto mem_error; 1324 } 1325 } 1326 1327 /* 1328 * Split the element name into prefix:localname , the string found 1329 * are within the DTD and hen not associated to namespace names. 1330 */ 1331 name = xmlSplitQName3(fullattr, &len); 1332 if (name == NULL) { 1333 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1334 prefix = NULL; 1335 } else { 1336 name = xmlDictLookup(ctxt->dict, name, -1); 1337 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1338 } 1339 1340 defaults->values[5 * defaults->nbAttrs] = name; 1341 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1342 /* intern the string and precompute the end */ 1343 len = xmlStrlen(value); 1344 value = xmlDictLookup(ctxt->dict, value, len); 1345 defaults->values[5 * defaults->nbAttrs + 2] = value; 1346 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1347 if (ctxt->external) 1348 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1349 else 1350 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1351 defaults->nbAttrs++; 1352 1353 return; 1354 1355 mem_error: 1356 xmlErrMemory(ctxt, NULL); 1357 return; 1358 } 1359 1360 /** 1361 * xmlAddSpecialAttr: 1362 * @ctxt: an XML parser context 1363 * @fullname: the element fullname 1364 * @fullattr: the attribute fullname 1365 * @type: the attribute type 1366 * 1367 * Register this attribute type 1368 */ 1369 static void 1370 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1371 const xmlChar *fullname, 1372 const xmlChar *fullattr, 1373 int type) 1374 { 1375 if (ctxt->attsSpecial == NULL) { 1376 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1377 if (ctxt->attsSpecial == NULL) 1378 goto mem_error; 1379 } 1380 1381 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1382 return; 1383 1384 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1385 (void *) (long) type); 1386 return; 1387 1388 mem_error: 1389 xmlErrMemory(ctxt, NULL); 1390 return; 1391 } 1392 1393 /** 1394 * xmlCleanSpecialAttrCallback: 1395 * 1396 * Removes CDATA attributes from the special attribute table 1397 */ 1398 static void 1399 xmlCleanSpecialAttrCallback(void *payload, void *data, 1400 const xmlChar *fullname, const xmlChar *fullattr, 1401 const xmlChar *unused ATTRIBUTE_UNUSED) { 1402 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1403 1404 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1405 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1406 } 1407 } 1408 1409 /** 1410 * xmlCleanSpecialAttr: 1411 * @ctxt: an XML parser context 1412 * 1413 * Trim the list of attributes defined to remove all those of type 1414 * CDATA as they are not special. This call should be done when finishing 1415 * to parse the DTD and before starting to parse the document root. 1416 */ 1417 static void 1418 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1419 { 1420 if (ctxt->attsSpecial == NULL) 1421 return; 1422 1423 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1424 1425 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1426 xmlHashFree(ctxt->attsSpecial, NULL); 1427 ctxt->attsSpecial = NULL; 1428 } 1429 return; 1430 } 1431 1432 /** 1433 * xmlCheckLanguageID: 1434 * @lang: pointer to the string value 1435 * 1436 * Checks that the value conforms to the LanguageID production: 1437 * 1438 * NOTE: this is somewhat deprecated, those productions were removed from 1439 * the XML Second edition. 1440 * 1441 * [33] LanguageID ::= Langcode ('-' Subcode)* 1442 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1443 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1444 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1445 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1446 * [38] Subcode ::= ([a-z] | [A-Z])+ 1447 * 1448 * The current REC reference the sucessors of RFC 1766, currently 5646 1449 * 1450 * http://www.rfc-editor.org/rfc/rfc5646.txt 1451 * langtag = language 1452 * ["-" script] 1453 * ["-" region] 1454 * *("-" variant) 1455 * *("-" extension) 1456 * ["-" privateuse] 1457 * language = 2*3ALPHA ; shortest ISO 639 code 1458 * ["-" extlang] ; sometimes followed by 1459 * ; extended language subtags 1460 * / 4ALPHA ; or reserved for future use 1461 * / 5*8ALPHA ; or registered language subtag 1462 * 1463 * extlang = 3ALPHA ; selected ISO 639 codes 1464 * *2("-" 3ALPHA) ; permanently reserved 1465 * 1466 * script = 4ALPHA ; ISO 15924 code 1467 * 1468 * region = 2ALPHA ; ISO 3166-1 code 1469 * / 3DIGIT ; UN M.49 code 1470 * 1471 * variant = 5*8alphanum ; registered variants 1472 * / (DIGIT 3alphanum) 1473 * 1474 * extension = singleton 1*("-" (2*8alphanum)) 1475 * 1476 * ; Single alphanumerics 1477 * ; "x" reserved for private use 1478 * singleton = DIGIT ; 0 - 9 1479 * / %x41-57 ; A - W 1480 * / %x59-5A ; Y - Z 1481 * / %x61-77 ; a - w 1482 * / %x79-7A ; y - z 1483 * 1484 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1485 * The parser below doesn't try to cope with extension or privateuse 1486 * that could be added but that's not interoperable anyway 1487 * 1488 * Returns 1 if correct 0 otherwise 1489 **/ 1490 int 1491 xmlCheckLanguageID(const xmlChar * lang) 1492 { 1493 const xmlChar *cur = lang, *nxt; 1494 1495 if (cur == NULL) 1496 return (0); 1497 if (((cur[0] == 'i') && (cur[1] == '-')) || 1498 ((cur[0] == 'I') && (cur[1] == '-')) || 1499 ((cur[0] == 'x') && (cur[1] == '-')) || 1500 ((cur[0] == 'X') && (cur[1] == '-'))) { 1501 /* 1502 * Still allow IANA code and user code which were coming 1503 * from the previous version of the XML-1.0 specification 1504 * it's deprecated but we should not fail 1505 */ 1506 cur += 2; 1507 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1508 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1509 cur++; 1510 return(cur[0] == 0); 1511 } 1512 nxt = cur; 1513 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1514 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1515 nxt++; 1516 if (nxt - cur >= 4) { 1517 /* 1518 * Reserved 1519 */ 1520 if ((nxt - cur > 8) || (nxt[0] != 0)) 1521 return(0); 1522 return(1); 1523 } 1524 if (nxt - cur < 2) 1525 return(0); 1526 /* we got an ISO 639 code */ 1527 if (nxt[0] == 0) 1528 return(1); 1529 if (nxt[0] != '-') 1530 return(0); 1531 1532 nxt++; 1533 cur = nxt; 1534 /* now we can have extlang or script or region or variant */ 1535 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1536 goto region_m49; 1537 1538 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1539 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1540 nxt++; 1541 if (nxt - cur == 4) 1542 goto script; 1543 if (nxt - cur == 2) 1544 goto region; 1545 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1546 goto variant; 1547 if (nxt - cur != 3) 1548 return(0); 1549 /* we parsed an extlang */ 1550 if (nxt[0] == 0) 1551 return(1); 1552 if (nxt[0] != '-') 1553 return(0); 1554 1555 nxt++; 1556 cur = nxt; 1557 /* now we can have script or region or variant */ 1558 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1559 goto region_m49; 1560 1561 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1562 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1563 nxt++; 1564 if (nxt - cur == 2) 1565 goto region; 1566 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1567 goto variant; 1568 if (nxt - cur != 4) 1569 return(0); 1570 /* we parsed a script */ 1571 script: 1572 if (nxt[0] == 0) 1573 return(1); 1574 if (nxt[0] != '-') 1575 return(0); 1576 1577 nxt++; 1578 cur = nxt; 1579 /* now we can have region or variant */ 1580 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1581 goto region_m49; 1582 1583 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1584 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1585 nxt++; 1586 1587 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1588 goto variant; 1589 if (nxt - cur != 2) 1590 return(0); 1591 /* we parsed a region */ 1592 region: 1593 if (nxt[0] == 0) 1594 return(1); 1595 if (nxt[0] != '-') 1596 return(0); 1597 1598 nxt++; 1599 cur = nxt; 1600 /* now we can just have a variant */ 1601 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1602 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1603 nxt++; 1604 1605 if ((nxt - cur < 5) || (nxt - cur > 8)) 1606 return(0); 1607 1608 /* we parsed a variant */ 1609 variant: 1610 if (nxt[0] == 0) 1611 return(1); 1612 if (nxt[0] != '-') 1613 return(0); 1614 /* extensions and private use subtags not checked */ 1615 return (1); 1616 1617 region_m49: 1618 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1619 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1620 nxt += 3; 1621 goto region; 1622 } 1623 return(0); 1624 } 1625 1626 /************************************************************************ 1627 * * 1628 * Parser stacks related functions and macros * 1629 * * 1630 ************************************************************************/ 1631 1632 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1633 const xmlChar ** str); 1634 1635 #ifdef SAX2 1636 /** 1637 * nsPush: 1638 * @ctxt: an XML parser context 1639 * @prefix: the namespace prefix or NULL 1640 * @URL: the namespace name 1641 * 1642 * Pushes a new parser namespace on top of the ns stack 1643 * 1644 * Returns -1 in case of error, -2 if the namespace should be discarded 1645 * and the index in the stack otherwise. 1646 */ 1647 static int 1648 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1649 { 1650 if (ctxt->options & XML_PARSE_NSCLEAN) { 1651 int i; 1652 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1653 if (ctxt->nsTab[i] == prefix) { 1654 /* in scope */ 1655 if (ctxt->nsTab[i + 1] == URL) 1656 return(-2); 1657 /* out of scope keep it */ 1658 break; 1659 } 1660 } 1661 } 1662 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1663 ctxt->nsMax = 10; 1664 ctxt->nsNr = 0; 1665 ctxt->nsTab = (const xmlChar **) 1666 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1667 if (ctxt->nsTab == NULL) { 1668 xmlErrMemory(ctxt, NULL); 1669 ctxt->nsMax = 0; 1670 return (-1); 1671 } 1672 } else if (ctxt->nsNr >= ctxt->nsMax) { 1673 const xmlChar ** tmp; 1674 ctxt->nsMax *= 2; 1675 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1676 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1677 if (tmp == NULL) { 1678 xmlErrMemory(ctxt, NULL); 1679 ctxt->nsMax /= 2; 1680 return (-1); 1681 } 1682 ctxt->nsTab = tmp; 1683 } 1684 ctxt->nsTab[ctxt->nsNr++] = prefix; 1685 ctxt->nsTab[ctxt->nsNr++] = URL; 1686 return (ctxt->nsNr); 1687 } 1688 /** 1689 * nsPop: 1690 * @ctxt: an XML parser context 1691 * @nr: the number to pop 1692 * 1693 * Pops the top @nr parser prefix/namespace from the ns stack 1694 * 1695 * Returns the number of namespaces removed 1696 */ 1697 static int 1698 nsPop(xmlParserCtxtPtr ctxt, int nr) 1699 { 1700 int i; 1701 1702 if (ctxt->nsTab == NULL) return(0); 1703 if (ctxt->nsNr < nr) { 1704 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1705 nr = ctxt->nsNr; 1706 } 1707 if (ctxt->nsNr <= 0) 1708 return (0); 1709 1710 for (i = 0;i < nr;i++) { 1711 ctxt->nsNr--; 1712 ctxt->nsTab[ctxt->nsNr] = NULL; 1713 } 1714 return(nr); 1715 } 1716 #endif 1717 1718 static int 1719 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1720 const xmlChar **atts; 1721 int *attallocs; 1722 int maxatts; 1723 1724 if (ctxt->atts == NULL) { 1725 maxatts = 55; /* allow for 10 attrs by default */ 1726 atts = (const xmlChar **) 1727 xmlMalloc(maxatts * sizeof(xmlChar *)); 1728 if (atts == NULL) goto mem_error; 1729 ctxt->atts = atts; 1730 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1731 if (attallocs == NULL) goto mem_error; 1732 ctxt->attallocs = attallocs; 1733 ctxt->maxatts = maxatts; 1734 } else if (nr + 5 > ctxt->maxatts) { 1735 maxatts = (nr + 5) * 2; 1736 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1737 maxatts * sizeof(const xmlChar *)); 1738 if (atts == NULL) goto mem_error; 1739 ctxt->atts = atts; 1740 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1741 (maxatts / 5) * sizeof(int)); 1742 if (attallocs == NULL) goto mem_error; 1743 ctxt->attallocs = attallocs; 1744 ctxt->maxatts = maxatts; 1745 } 1746 return(ctxt->maxatts); 1747 mem_error: 1748 xmlErrMemory(ctxt, NULL); 1749 return(-1); 1750 } 1751 1752 /** 1753 * inputPush: 1754 * @ctxt: an XML parser context 1755 * @value: the parser input 1756 * 1757 * Pushes a new parser input on top of the input stack 1758 * 1759 * Returns -1 in case of error, the index in the stack otherwise 1760 */ 1761 int 1762 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1763 { 1764 if ((ctxt == NULL) || (value == NULL)) 1765 return(-1); 1766 if (ctxt->inputNr >= ctxt->inputMax) { 1767 ctxt->inputMax *= 2; 1768 ctxt->inputTab = 1769 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1770 ctxt->inputMax * 1771 sizeof(ctxt->inputTab[0])); 1772 if (ctxt->inputTab == NULL) { 1773 xmlErrMemory(ctxt, NULL); 1774 xmlFreeInputStream(value); 1775 ctxt->inputMax /= 2; 1776 value = NULL; 1777 return (-1); 1778 } 1779 } 1780 ctxt->inputTab[ctxt->inputNr] = value; 1781 ctxt->input = value; 1782 return (ctxt->inputNr++); 1783 } 1784 /** 1785 * inputPop: 1786 * @ctxt: an XML parser context 1787 * 1788 * Pops the top parser input from the input stack 1789 * 1790 * Returns the input just removed 1791 */ 1792 xmlParserInputPtr 1793 inputPop(xmlParserCtxtPtr ctxt) 1794 { 1795 xmlParserInputPtr ret; 1796 1797 if (ctxt == NULL) 1798 return(NULL); 1799 if (ctxt->inputNr <= 0) 1800 return (NULL); 1801 ctxt->inputNr--; 1802 if (ctxt->inputNr > 0) 1803 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1804 else 1805 ctxt->input = NULL; 1806 ret = ctxt->inputTab[ctxt->inputNr]; 1807 ctxt->inputTab[ctxt->inputNr] = NULL; 1808 return (ret); 1809 } 1810 /** 1811 * nodePush: 1812 * @ctxt: an XML parser context 1813 * @value: the element node 1814 * 1815 * Pushes a new element node on top of the node stack 1816 * 1817 * Returns -1 in case of error, the index in the stack otherwise 1818 */ 1819 int 1820 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1821 { 1822 if (ctxt == NULL) return(0); 1823 if (ctxt->nodeNr >= ctxt->nodeMax) { 1824 xmlNodePtr *tmp; 1825 1826 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1827 ctxt->nodeMax * 2 * 1828 sizeof(ctxt->nodeTab[0])); 1829 if (tmp == NULL) { 1830 xmlErrMemory(ctxt, NULL); 1831 return (-1); 1832 } 1833 ctxt->nodeTab = tmp; 1834 ctxt->nodeMax *= 2; 1835 } 1836 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1837 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1838 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1839 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1840 xmlParserMaxDepth); 1841 xmlHaltParser(ctxt); 1842 return(-1); 1843 } 1844 ctxt->nodeTab[ctxt->nodeNr] = value; 1845 ctxt->node = value; 1846 return (ctxt->nodeNr++); 1847 } 1848 1849 /** 1850 * nodePop: 1851 * @ctxt: an XML parser context 1852 * 1853 * Pops the top element node from the node stack 1854 * 1855 * Returns the node just removed 1856 */ 1857 xmlNodePtr 1858 nodePop(xmlParserCtxtPtr ctxt) 1859 { 1860 xmlNodePtr ret; 1861 1862 if (ctxt == NULL) return(NULL); 1863 if (ctxt->nodeNr <= 0) 1864 return (NULL); 1865 ctxt->nodeNr--; 1866 if (ctxt->nodeNr > 0) 1867 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1868 else 1869 ctxt->node = NULL; 1870 ret = ctxt->nodeTab[ctxt->nodeNr]; 1871 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1872 return (ret); 1873 } 1874 1875 #ifdef LIBXML_PUSH_ENABLED 1876 /** 1877 * nameNsPush: 1878 * @ctxt: an XML parser context 1879 * @value: the element name 1880 * @prefix: the element prefix 1881 * @URI: the element namespace name 1882 * 1883 * Pushes a new element name/prefix/URL on top of the name stack 1884 * 1885 * Returns -1 in case of error, the index in the stack otherwise 1886 */ 1887 static int 1888 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1889 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1890 { 1891 if (ctxt->nameNr >= ctxt->nameMax) { 1892 const xmlChar * *tmp; 1893 void **tmp2; 1894 ctxt->nameMax *= 2; 1895 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1896 ctxt->nameMax * 1897 sizeof(ctxt->nameTab[0])); 1898 if (tmp == NULL) { 1899 ctxt->nameMax /= 2; 1900 goto mem_error; 1901 } 1902 ctxt->nameTab = tmp; 1903 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1904 ctxt->nameMax * 3 * 1905 sizeof(ctxt->pushTab[0])); 1906 if (tmp2 == NULL) { 1907 ctxt->nameMax /= 2; 1908 goto mem_error; 1909 } 1910 ctxt->pushTab = tmp2; 1911 } 1912 ctxt->nameTab[ctxt->nameNr] = value; 1913 ctxt->name = value; 1914 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1915 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1916 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1917 return (ctxt->nameNr++); 1918 mem_error: 1919 xmlErrMemory(ctxt, NULL); 1920 return (-1); 1921 } 1922 /** 1923 * nameNsPop: 1924 * @ctxt: an XML parser context 1925 * 1926 * Pops the top element/prefix/URI name from the name stack 1927 * 1928 * Returns the name just removed 1929 */ 1930 static const xmlChar * 1931 nameNsPop(xmlParserCtxtPtr ctxt) 1932 { 1933 const xmlChar *ret; 1934 1935 if (ctxt->nameNr <= 0) 1936 return (NULL); 1937 ctxt->nameNr--; 1938 if (ctxt->nameNr > 0) 1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1940 else 1941 ctxt->name = NULL; 1942 ret = ctxt->nameTab[ctxt->nameNr]; 1943 ctxt->nameTab[ctxt->nameNr] = NULL; 1944 return (ret); 1945 } 1946 #endif /* LIBXML_PUSH_ENABLED */ 1947 1948 /** 1949 * namePush: 1950 * @ctxt: an XML parser context 1951 * @value: the element name 1952 * 1953 * Pushes a new element name on top of the name stack 1954 * 1955 * Returns -1 in case of error, the index in the stack otherwise 1956 */ 1957 int 1958 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1959 { 1960 if (ctxt == NULL) return (-1); 1961 1962 if (ctxt->nameNr >= ctxt->nameMax) { 1963 const xmlChar * *tmp; 1964 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1965 ctxt->nameMax * 2 * 1966 sizeof(ctxt->nameTab[0])); 1967 if (tmp == NULL) { 1968 goto mem_error; 1969 } 1970 ctxt->nameTab = tmp; 1971 ctxt->nameMax *= 2; 1972 } 1973 ctxt->nameTab[ctxt->nameNr] = value; 1974 ctxt->name = value; 1975 return (ctxt->nameNr++); 1976 mem_error: 1977 xmlErrMemory(ctxt, NULL); 1978 return (-1); 1979 } 1980 /** 1981 * namePop: 1982 * @ctxt: an XML parser context 1983 * 1984 * Pops the top element name from the name stack 1985 * 1986 * Returns the name just removed 1987 */ 1988 const xmlChar * 1989 namePop(xmlParserCtxtPtr ctxt) 1990 { 1991 const xmlChar *ret; 1992 1993 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1994 return (NULL); 1995 ctxt->nameNr--; 1996 if (ctxt->nameNr > 0) 1997 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1998 else 1999 ctxt->name = NULL; 2000 ret = ctxt->nameTab[ctxt->nameNr]; 2001 ctxt->nameTab[ctxt->nameNr] = NULL; 2002 return (ret); 2003 } 2004 2005 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 2006 if (ctxt->spaceNr >= ctxt->spaceMax) { 2007 int *tmp; 2008 2009 ctxt->spaceMax *= 2; 2010 tmp = (int *) xmlRealloc(ctxt->spaceTab, 2011 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 2012 if (tmp == NULL) { 2013 xmlErrMemory(ctxt, NULL); 2014 ctxt->spaceMax /=2; 2015 return(-1); 2016 } 2017 ctxt->spaceTab = tmp; 2018 } 2019 ctxt->spaceTab[ctxt->spaceNr] = val; 2020 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 2021 return(ctxt->spaceNr++); 2022 } 2023 2024 static int spacePop(xmlParserCtxtPtr ctxt) { 2025 int ret; 2026 if (ctxt->spaceNr <= 0) return(0); 2027 ctxt->spaceNr--; 2028 if (ctxt->spaceNr > 0) 2029 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 2030 else 2031 ctxt->space = &ctxt->spaceTab[0]; 2032 ret = ctxt->spaceTab[ctxt->spaceNr]; 2033 ctxt->spaceTab[ctxt->spaceNr] = -1; 2034 return(ret); 2035 } 2036 2037 /* 2038 * Macros for accessing the content. Those should be used only by the parser, 2039 * and not exported. 2040 * 2041 * Dirty macros, i.e. one often need to make assumption on the context to 2042 * use them 2043 * 2044 * CUR_PTR return the current pointer to the xmlChar to be parsed. 2045 * To be used with extreme caution since operations consuming 2046 * characters may move the input buffer to a different location ! 2047 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 2048 * This should be used internally by the parser 2049 * only to compare to ASCII values otherwise it would break when 2050 * running with UTF-8 encoding. 2051 * RAW same as CUR but in the input buffer, bypass any token 2052 * extraction that may have been done 2053 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 2054 * to compare on ASCII based substring. 2055 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 2056 * strings without newlines within the parser. 2057 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2058 * defined char within the parser. 2059 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2060 * 2061 * NEXT Skip to the next character, this does the proper decoding 2062 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2063 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2064 * CUR_CHAR(l) returns the current unicode character (int), set l 2065 * to the number of xmlChars used for the encoding [0-5]. 2066 * CUR_SCHAR same but operate on a string instead of the context 2067 * COPY_BUF copy the current unicode char to the target buffer, increment 2068 * the index 2069 * GROW, SHRINK handling of input buffers 2070 */ 2071 2072 #define RAW (*ctxt->input->cur) 2073 #define CUR (*ctxt->input->cur) 2074 #define NXT(val) ctxt->input->cur[(val)] 2075 #define CUR_PTR ctxt->input->cur 2076 #define BASE_PTR ctxt->input->base 2077 2078 #define CMP4( s, c1, c2, c3, c4 ) \ 2079 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2080 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2081 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2082 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2083 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2084 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2085 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2086 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2087 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2088 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2089 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2090 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2091 ((unsigned char *) s)[ 8 ] == c9 ) 2092 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2093 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2094 ((unsigned char *) s)[ 9 ] == c10 ) 2095 2096 #define SKIP(val) do { \ 2097 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2098 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2099 if ((*ctxt->input->cur == 0) && \ 2100 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2101 xmlPopInput(ctxt); \ 2102 } while (0) 2103 2104 #define SKIPL(val) do { \ 2105 int skipl; \ 2106 for(skipl=0; skipl<val; skipl++) { \ 2107 if (*(ctxt->input->cur) == '\n') { \ 2108 ctxt->input->line++; ctxt->input->col = 1; \ 2109 } else ctxt->input->col++; \ 2110 ctxt->nbChars++; \ 2111 ctxt->input->cur++; \ 2112 } \ 2113 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2114 if ((*ctxt->input->cur == 0) && \ 2115 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2116 xmlPopInput(ctxt); \ 2117 } while (0) 2118 2119 #define SHRINK if ((ctxt->progressive == 0) && \ 2120 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2121 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2122 xmlSHRINK (ctxt); 2123 2124 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2125 xmlParserInputShrink(ctxt->input); 2126 if ((*ctxt->input->cur == 0) && 2127 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2128 xmlPopInput(ctxt); 2129 } 2130 2131 #define GROW if ((ctxt->progressive == 0) && \ 2132 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2133 xmlGROW (ctxt); 2134 2135 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2136 unsigned long curEnd = ctxt->input->end - ctxt->input->cur; 2137 unsigned long curBase = ctxt->input->cur - ctxt->input->base; 2138 2139 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || 2140 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && 2141 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2142 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2143 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2144 xmlHaltParser(ctxt); 2145 return; 2146 } 2147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2148 if ((ctxt->input->cur > ctxt->input->end) || 2149 (ctxt->input->cur < ctxt->input->base)) { 2150 xmlHaltParser(ctxt); 2151 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2152 return; 2153 } 2154 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2155 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2156 xmlPopInput(ctxt); 2157 } 2158 2159 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2160 2161 #define NEXT xmlNextChar(ctxt) 2162 2163 #define NEXT1 { \ 2164 ctxt->input->col++; \ 2165 ctxt->input->cur++; \ 2166 ctxt->nbChars++; \ 2167 if (*ctxt->input->cur == 0) \ 2168 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2169 } 2170 2171 #define NEXTL(l) do { \ 2172 if (ctxt->input->cur + l <= ctxt->input->end) { \ 2173 if (*(ctxt->input->cur) == '\n') { \ 2174 ctxt->input->line++; ctxt->input->col = 1; \ 2175 } else ctxt->input->col++; \ 2176 ctxt->input->cur += l; \ 2177 } \ 2178 } while (0) 2179 2180 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2181 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2182 2183 #define COPY_BUF(l,b,i,v) \ 2184 if (l == 1) b[i++] = (xmlChar) v; \ 2185 else i += xmlCopyCharMultiByte(&b[i],v) 2186 2187 /** 2188 * xmlSkipBlankChars: 2189 * @ctxt: the XML parser context 2190 * 2191 * skip all blanks character found at that point in the input streams. 2192 * It pops up finished entities in the process if allowable at that point. 2193 * 2194 * Returns the number of space chars skipped 2195 */ 2196 2197 int 2198 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2199 int res = 0; 2200 2201 /* 2202 * It's Okay to use CUR/NEXT here since all the blanks are on 2203 * the ASCII range. 2204 */ 2205 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2206 const xmlChar *cur; 2207 /* 2208 * if we are in the document content, go really fast 2209 */ 2210 cur = ctxt->input->cur; 2211 while (IS_BLANK_CH(*cur)) { 2212 if (*cur == '\n') { 2213 ctxt->input->line++; ctxt->input->col = 1; 2214 } else { 2215 ctxt->input->col++; 2216 } 2217 cur++; 2218 res++; 2219 if (*cur == 0) { 2220 ctxt->input->cur = cur; 2221 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2222 cur = ctxt->input->cur; 2223 } 2224 } 2225 ctxt->input->cur = cur; 2226 } else { 2227 int cur; 2228 do { 2229 cur = CUR; 2230 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */ 2231 (ctxt->instate != XML_PARSER_EOF))) { 2232 NEXT; 2233 cur = CUR; 2234 res++; 2235 } 2236 while ((cur == 0) && (ctxt->inputNr > 1) && 2237 (ctxt->instate != XML_PARSER_COMMENT)) { 2238 xmlPopInput(ctxt); 2239 cur = CUR; 2240 } 2241 /* 2242 * Need to handle support of entities branching here 2243 */ 2244 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2245 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */ 2246 (ctxt->instate != XML_PARSER_EOF)); 2247 } 2248 return(res); 2249 } 2250 2251 /************************************************************************ 2252 * * 2253 * Commodity functions to handle entities * 2254 * * 2255 ************************************************************************/ 2256 2257 /** 2258 * xmlPopInput: 2259 * @ctxt: an XML parser context 2260 * 2261 * xmlPopInput: the current input pointed by ctxt->input came to an end 2262 * pop it and return the next char. 2263 * 2264 * Returns the current xmlChar in the parser context 2265 */ 2266 xmlChar 2267 xmlPopInput(xmlParserCtxtPtr ctxt) { 2268 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2269 if (xmlParserDebugEntities) 2270 xmlGenericError(xmlGenericErrorContext, 2271 "Popping input %d\n", ctxt->inputNr); 2272 xmlFreeInputStream(inputPop(ctxt)); 2273 if ((*ctxt->input->cur == 0) && 2274 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2275 return(xmlPopInput(ctxt)); 2276 return(CUR); 2277 } 2278 2279 /** 2280 * xmlPushInput: 2281 * @ctxt: an XML parser context 2282 * @input: an XML parser input fragment (entity, XML fragment ...). 2283 * 2284 * xmlPushInput: switch to a new input stream which is stacked on top 2285 * of the previous one(s). 2286 * Returns -1 in case of error or the index in the input stack 2287 */ 2288 int 2289 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2290 int ret; 2291 if (input == NULL) return(-1); 2292 2293 if (xmlParserDebugEntities) { 2294 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2295 xmlGenericError(xmlGenericErrorContext, 2296 "%s(%d): ", ctxt->input->filename, 2297 ctxt->input->line); 2298 xmlGenericError(xmlGenericErrorContext, 2299 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2300 } 2301 ret = inputPush(ctxt, input); 2302 if (ctxt->instate == XML_PARSER_EOF) 2303 return(-1); 2304 GROW; 2305 return(ret); 2306 } 2307 2308 /** 2309 * xmlParseCharRef: 2310 * @ctxt: an XML parser context 2311 * 2312 * parse Reference declarations 2313 * 2314 * [66] CharRef ::= '&#' [0-9]+ ';' | 2315 * '&#x' [0-9a-fA-F]+ ';' 2316 * 2317 * [ WFC: Legal Character ] 2318 * Characters referred to using character references must match the 2319 * production for Char. 2320 * 2321 * Returns the value parsed (as an int), 0 in case of error 2322 */ 2323 int 2324 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2325 unsigned int val = 0; 2326 int count = 0; 2327 unsigned int outofrange = 0; 2328 2329 /* 2330 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2331 */ 2332 if ((RAW == '&') && (NXT(1) == '#') && 2333 (NXT(2) == 'x')) { 2334 SKIP(3); 2335 GROW; 2336 while (RAW != ';') { /* loop blocked by count */ 2337 if (count++ > 20) { 2338 count = 0; 2339 GROW; 2340 if (ctxt->instate == XML_PARSER_EOF) 2341 return(0); 2342 } 2343 if ((RAW >= '0') && (RAW <= '9')) 2344 val = val * 16 + (CUR - '0'); 2345 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2346 val = val * 16 + (CUR - 'a') + 10; 2347 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2348 val = val * 16 + (CUR - 'A') + 10; 2349 else { 2350 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2351 val = 0; 2352 break; 2353 } 2354 if (val > 0x10FFFF) 2355 outofrange = val; 2356 2357 NEXT; 2358 count++; 2359 } 2360 if (RAW == ';') { 2361 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2362 ctxt->input->col++; 2363 ctxt->nbChars ++; 2364 ctxt->input->cur++; 2365 } 2366 } else if ((RAW == '&') && (NXT(1) == '#')) { 2367 SKIP(2); 2368 GROW; 2369 while (RAW != ';') { /* loop blocked by count */ 2370 if (count++ > 20) { 2371 count = 0; 2372 GROW; 2373 if (ctxt->instate == XML_PARSER_EOF) 2374 return(0); 2375 } 2376 if ((RAW >= '0') && (RAW <= '9')) 2377 val = val * 10 + (CUR - '0'); 2378 else { 2379 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2380 val = 0; 2381 break; 2382 } 2383 if (val > 0x10FFFF) 2384 outofrange = val; 2385 2386 NEXT; 2387 count++; 2388 } 2389 if (RAW == ';') { 2390 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2391 ctxt->input->col++; 2392 ctxt->nbChars ++; 2393 ctxt->input->cur++; 2394 } 2395 } else { 2396 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2397 } 2398 2399 /* 2400 * [ WFC: Legal Character ] 2401 * Characters referred to using character references must match the 2402 * production for Char. 2403 */ 2404 if ((IS_CHAR(val) && (outofrange == 0))) { 2405 return(val); 2406 } else { 2407 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2408 "xmlParseCharRef: invalid xmlChar value %d\n", 2409 val); 2410 } 2411 return(0); 2412 } 2413 2414 /** 2415 * xmlParseStringCharRef: 2416 * @ctxt: an XML parser context 2417 * @str: a pointer to an index in the string 2418 * 2419 * parse Reference declarations, variant parsing from a string rather 2420 * than an an input flow. 2421 * 2422 * [66] CharRef ::= '&#' [0-9]+ ';' | 2423 * '&#x' [0-9a-fA-F]+ ';' 2424 * 2425 * [ WFC: Legal Character ] 2426 * Characters referred to using character references must match the 2427 * production for Char. 2428 * 2429 * Returns the value parsed (as an int), 0 in case of error, str will be 2430 * updated to the current value of the index 2431 */ 2432 static int 2433 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2434 const xmlChar *ptr; 2435 xmlChar cur; 2436 unsigned int val = 0; 2437 unsigned int outofrange = 0; 2438 2439 if ((str == NULL) || (*str == NULL)) return(0); 2440 ptr = *str; 2441 cur = *ptr; 2442 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2443 ptr += 3; 2444 cur = *ptr; 2445 while (cur != ';') { /* Non input consuming loop */ 2446 if ((cur >= '0') && (cur <= '9')) 2447 val = val * 16 + (cur - '0'); 2448 else if ((cur >= 'a') && (cur <= 'f')) 2449 val = val * 16 + (cur - 'a') + 10; 2450 else if ((cur >= 'A') && (cur <= 'F')) 2451 val = val * 16 + (cur - 'A') + 10; 2452 else { 2453 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2454 val = 0; 2455 break; 2456 } 2457 if (val > 0x10FFFF) 2458 outofrange = val; 2459 2460 ptr++; 2461 cur = *ptr; 2462 } 2463 if (cur == ';') 2464 ptr++; 2465 } else if ((cur == '&') && (ptr[1] == '#')){ 2466 ptr += 2; 2467 cur = *ptr; 2468 while (cur != ';') { /* Non input consuming loops */ 2469 if ((cur >= '0') && (cur <= '9')) 2470 val = val * 10 + (cur - '0'); 2471 else { 2472 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2473 val = 0; 2474 break; 2475 } 2476 if (val > 0x10FFFF) 2477 outofrange = val; 2478 2479 ptr++; 2480 cur = *ptr; 2481 } 2482 if (cur == ';') 2483 ptr++; 2484 } else { 2485 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2486 return(0); 2487 } 2488 *str = ptr; 2489 2490 /* 2491 * [ WFC: Legal Character ] 2492 * Characters referred to using character references must match the 2493 * production for Char. 2494 */ 2495 if ((IS_CHAR(val) && (outofrange == 0))) { 2496 return(val); 2497 } else { 2498 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2499 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2500 val); 2501 } 2502 return(0); 2503 } 2504 2505 /** 2506 * xmlNewBlanksWrapperInputStream: 2507 * @ctxt: an XML parser context 2508 * @entity: an Entity pointer 2509 * 2510 * Create a new input stream for wrapping 2511 * blanks around a PEReference 2512 * 2513 * Returns the new input stream or NULL 2514 */ 2515 2516 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2517 2518 static xmlParserInputPtr 2519 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2520 xmlParserInputPtr input; 2521 xmlChar *buffer; 2522 size_t length; 2523 if (entity == NULL) { 2524 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2525 "xmlNewBlanksWrapperInputStream entity\n"); 2526 return(NULL); 2527 } 2528 if (xmlParserDebugEntities) 2529 xmlGenericError(xmlGenericErrorContext, 2530 "new blanks wrapper for entity: %s\n", entity->name); 2531 input = xmlNewInputStream(ctxt); 2532 if (input == NULL) { 2533 return(NULL); 2534 } 2535 length = xmlStrlen(entity->name) + 5; 2536 buffer = xmlMallocAtomic(length); 2537 if (buffer == NULL) { 2538 xmlErrMemory(ctxt, NULL); 2539 xmlFree(input); 2540 return(NULL); 2541 } 2542 buffer [0] = ' '; 2543 buffer [1] = '%'; 2544 buffer [length-3] = ';'; 2545 buffer [length-2] = ' '; 2546 buffer [length-1] = 0; 2547 memcpy(buffer + 2, entity->name, length - 5); 2548 input->free = deallocblankswrapper; 2549 input->base = buffer; 2550 input->cur = buffer; 2551 input->length = length; 2552 input->end = &buffer[length]; 2553 return(input); 2554 } 2555 2556 /** 2557 * xmlParserHandlePEReference: 2558 * @ctxt: the parser context 2559 * 2560 * [69] PEReference ::= '%' Name ';' 2561 * 2562 * [ WFC: No Recursion ] 2563 * A parsed entity must not contain a recursive 2564 * reference to itself, either directly or indirectly. 2565 * 2566 * [ WFC: Entity Declared ] 2567 * In a document without any DTD, a document with only an internal DTD 2568 * subset which contains no parameter entity references, or a document 2569 * with "standalone='yes'", ... ... The declaration of a parameter 2570 * entity must precede any reference to it... 2571 * 2572 * [ VC: Entity Declared ] 2573 * In a document with an external subset or external parameter entities 2574 * with "standalone='no'", ... ... The declaration of a parameter entity 2575 * must precede any reference to it... 2576 * 2577 * [ WFC: In DTD ] 2578 * Parameter-entity references may only appear in the DTD. 2579 * NOTE: misleading but this is handled. 2580 * 2581 * A PEReference may have been detected in the current input stream 2582 * the handling is done accordingly to 2583 * http://www.w3.org/TR/REC-xml#entproc 2584 * i.e. 2585 * - Included in literal in entity values 2586 * - Included as Parameter Entity reference within DTDs 2587 */ 2588 void 2589 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2590 const xmlChar *name; 2591 xmlEntityPtr entity = NULL; 2592 xmlParserInputPtr input; 2593 2594 if (RAW != '%') return; 2595 switch(ctxt->instate) { 2596 case XML_PARSER_CDATA_SECTION: 2597 return; 2598 case XML_PARSER_COMMENT: 2599 return; 2600 case XML_PARSER_START_TAG: 2601 return; 2602 case XML_PARSER_END_TAG: 2603 return; 2604 case XML_PARSER_EOF: 2605 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2606 return; 2607 case XML_PARSER_PROLOG: 2608 case XML_PARSER_START: 2609 case XML_PARSER_MISC: 2610 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2611 return; 2612 case XML_PARSER_ENTITY_DECL: 2613 case XML_PARSER_CONTENT: 2614 case XML_PARSER_ATTRIBUTE_VALUE: 2615 case XML_PARSER_PI: 2616 case XML_PARSER_SYSTEM_LITERAL: 2617 case XML_PARSER_PUBLIC_LITERAL: 2618 /* we just ignore it there */ 2619 return; 2620 case XML_PARSER_EPILOG: 2621 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2622 return; 2623 case XML_PARSER_ENTITY_VALUE: 2624 /* 2625 * NOTE: in the case of entity values, we don't do the 2626 * substitution here since we need the literal 2627 * entity value to be able to save the internal 2628 * subset of the document. 2629 * This will be handled by xmlStringDecodeEntities 2630 */ 2631 return; 2632 case XML_PARSER_DTD: 2633 /* 2634 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2635 * In the internal DTD subset, parameter-entity references 2636 * can occur only where markup declarations can occur, not 2637 * within markup declarations. 2638 * In that case this is handled in xmlParseMarkupDecl 2639 */ 2640 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2641 return; 2642 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2643 return; 2644 break; 2645 case XML_PARSER_IGNORE: 2646 return; 2647 } 2648 2649 NEXT; 2650 name = xmlParseName(ctxt); 2651 if (xmlParserDebugEntities) 2652 xmlGenericError(xmlGenericErrorContext, 2653 "PEReference: %s\n", name); 2654 if (name == NULL) { 2655 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2656 } else { 2657 if (RAW == ';') { 2658 NEXT; 2659 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2660 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2661 if (ctxt->instate == XML_PARSER_EOF) 2662 return; 2663 if (entity == NULL) { 2664 2665 /* 2666 * [ WFC: Entity Declared ] 2667 * In a document without any DTD, a document with only an 2668 * internal DTD subset which contains no parameter entity 2669 * references, or a document with "standalone='yes'", ... 2670 * ... The declaration of a parameter entity must precede 2671 * any reference to it... 2672 */ 2673 if ((ctxt->standalone == 1) || 2674 ((ctxt->hasExternalSubset == 0) && 2675 (ctxt->hasPErefs == 0))) { 2676 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2677 "PEReference: %%%s; not found\n", name); 2678 } else { 2679 /* 2680 * [ VC: Entity Declared ] 2681 * In a document with an external subset or external 2682 * parameter entities with "standalone='no'", ... 2683 * ... The declaration of a parameter entity must precede 2684 * any reference to it... 2685 */ 2686 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2687 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2688 "PEReference: %%%s; not found\n", 2689 name, NULL); 2690 } else 2691 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2692 "PEReference: %%%s; not found\n", 2693 name, NULL); 2694 ctxt->valid = 0; 2695 } 2696 xmlParserEntityCheck(ctxt, 0, NULL, 0); 2697 } else if (ctxt->input->free != deallocblankswrapper) { 2698 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2699 if (xmlPushInput(ctxt, input) < 0) 2700 return; 2701 } else { 2702 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2703 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2704 xmlChar start[4]; 2705 xmlCharEncoding enc; 2706 2707 /* 2708 * Note: external parameter entities will not be loaded, it 2709 * is not required for a non-validating parser, unless the 2710 * option of validating, or substituting entities were 2711 * given. Doing so is far more secure as the parser will 2712 * only process data coming from the document entity by 2713 * default. 2714 */ 2715 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2716 ((ctxt->options & XML_PARSE_NOENT) == 0) && 2717 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 2718 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 2719 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 2720 (ctxt->replaceEntities == 0) && 2721 (ctxt->validate == 0)) 2722 return; 2723 2724 /* 2725 * handle the extra spaces added before and after 2726 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2727 * this is done independently. 2728 */ 2729 input = xmlNewEntityInputStream(ctxt, entity); 2730 if (xmlPushInput(ctxt, input) < 0) 2731 return; 2732 2733 /* 2734 * Get the 4 first bytes and decode the charset 2735 * if enc != XML_CHAR_ENCODING_NONE 2736 * plug some encoding conversion routines. 2737 * Note that, since we may have some non-UTF8 2738 * encoding (like UTF16, bug 135229), the 'length' 2739 * is not known, but we can calculate based upon 2740 * the amount of data in the buffer. 2741 */ 2742 GROW 2743 if (ctxt->instate == XML_PARSER_EOF) 2744 return; 2745 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2746 start[0] = RAW; 2747 start[1] = NXT(1); 2748 start[2] = NXT(2); 2749 start[3] = NXT(3); 2750 enc = xmlDetectCharEncoding(start, 4); 2751 if (enc != XML_CHAR_ENCODING_NONE) { 2752 xmlSwitchEncoding(ctxt, enc); 2753 } 2754 } 2755 2756 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2757 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2758 (IS_BLANK_CH(NXT(5)))) { 2759 xmlParseTextDecl(ctxt); 2760 } 2761 } else { 2762 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2763 "PEReference: %s is not a parameter entity\n", 2764 name); 2765 } 2766 } 2767 } else { 2768 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2769 } 2770 } 2771 } 2772 2773 /* 2774 * Macro used to grow the current buffer. 2775 * buffer##_size is expected to be a size_t 2776 * mem_error: is expected to handle memory allocation failures 2777 */ 2778 #define growBuffer(buffer, n) { \ 2779 xmlChar *tmp; \ 2780 size_t new_size = buffer##_size * 2 + n; \ 2781 if (new_size < buffer##_size) goto mem_error; \ 2782 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2783 if (tmp == NULL) goto mem_error; \ 2784 buffer = tmp; \ 2785 buffer##_size = new_size; \ 2786 } 2787 2788 /** 2789 * xmlStringLenDecodeEntities: 2790 * @ctxt: the parser context 2791 * @str: the input string 2792 * @len: the string length 2793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2794 * @end: an end marker xmlChar, 0 if none 2795 * @end2: an end marker xmlChar, 0 if none 2796 * @end3: an end marker xmlChar, 0 if none 2797 * 2798 * Takes a entity string content and process to do the adequate substitutions. 2799 * 2800 * [67] Reference ::= EntityRef | CharRef 2801 * 2802 * [69] PEReference ::= '%' Name ';' 2803 * 2804 * Returns A newly allocated string with the substitution done. The caller 2805 * must deallocate it ! 2806 */ 2807 xmlChar * 2808 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2809 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2810 xmlChar *buffer = NULL; 2811 size_t buffer_size = 0; 2812 size_t nbchars = 0; 2813 2814 xmlChar *current = NULL; 2815 xmlChar *rep = NULL; 2816 const xmlChar *last; 2817 xmlEntityPtr ent; 2818 int c, l = 0; 2819 2820 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2821 return(NULL); 2822 last = str + len; 2823 2824 if (((ctxt->depth > xmlEntityDecodingDepthMax) && 2825 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2826 (ctxt->depth > xmlEntityDecodingDepthHugeMax)) { 2827 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2828 return(NULL); 2829 } 2830 2831 /* 2832 * allocate a translation buffer. 2833 */ 2834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2835 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2836 if (buffer == NULL) goto mem_error; 2837 2838 /* 2839 * OK loop until we reach one of the ending char or a size limit. 2840 * we are operating on already parsed values. 2841 */ 2842 if (str < last) 2843 c = CUR_SCHAR(str, l); 2844 else 2845 c = 0; 2846 while ((c != 0) && (c != end) && /* non input consuming loop */ 2847 (c != end2) && (c != end3)) { 2848 2849 if (c == 0) break; 2850 if ((c == '&') && (str[1] == '#')) { 2851 int val = xmlParseStringCharRef(ctxt, &str); 2852 if (val != 0) { 2853 COPY_BUF(0,buffer,nbchars,val); 2854 } 2855 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2856 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2857 } 2858 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2859 if (xmlParserDebugEntities) 2860 xmlGenericError(xmlGenericErrorContext, 2861 "String decoding Entity Reference: %.30s\n", 2862 str); 2863 ent = xmlParseStringEntityRef(ctxt, &str); 2864 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2865 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2866 goto int_error; 2867 xmlParserEntityCheck(ctxt, 0, ent, 0); 2868 if (ent != NULL) 2869 ctxt->nbentities += ent->checked / 2; 2870 if ((ent != NULL) && 2871 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2872 if (ent->content != NULL) { 2873 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2874 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2875 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2876 } 2877 } else { 2878 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2879 "predefined entity has no content\n"); 2880 } 2881 } else if ((ent != NULL) && (ent->content != NULL)) { 2882 ctxt->depth++; 2883 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2884 0, 0, 0); 2885 ctxt->depth--; 2886 2887 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2888 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2889 goto int_error; 2890 2891 if (rep != NULL) { 2892 current = rep; 2893 while (*current != 0) { /* non input consuming loop */ 2894 buffer[nbchars++] = *current++; 2895 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2896 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2897 goto int_error; 2898 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2899 } 2900 } 2901 xmlFree(rep); 2902 rep = NULL; 2903 } 2904 } else if (ent != NULL) { 2905 int i = xmlStrlen(ent->name); 2906 const xmlChar *cur = ent->name; 2907 2908 buffer[nbchars++] = '&'; 2909 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2910 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2911 } 2912 for (;i > 0;i--) 2913 buffer[nbchars++] = *cur++; 2914 buffer[nbchars++] = ';'; 2915 } 2916 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2917 if (xmlParserDebugEntities) 2918 xmlGenericError(xmlGenericErrorContext, 2919 "String decoding PE Reference: %.30s\n", str); 2920 ent = xmlParseStringPEReference(ctxt, &str); 2921 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2922 goto int_error; 2923 xmlParserEntityCheck(ctxt, 0, ent, 0); 2924 if (ent != NULL) 2925 ctxt->nbentities += ent->checked / 2; 2926 if (ent != NULL) { 2927 if (ent->content == NULL) { 2928 /* 2929 * Note: external parsed entities will not be loaded, 2930 * it is not required for a non-validating parser to 2931 * complete external PEreferences coming from the 2932 * internal subset 2933 */ 2934 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2935 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2936 (ctxt->validate != 0)) { 2937 xmlLoadEntityContent(ctxt, ent); 2938 } else { 2939 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2940 "not validating will not read content for PE entity %s\n", 2941 ent->name, NULL); 2942 } 2943 } 2944 ctxt->depth++; 2945 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2946 0, 0, 0); 2947 ctxt->depth--; 2948 if (rep != NULL) { 2949 current = rep; 2950 while (*current != 0) { /* non input consuming loop */ 2951 buffer[nbchars++] = *current++; 2952 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2953 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2954 goto int_error; 2955 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2956 } 2957 } 2958 xmlFree(rep); 2959 rep = NULL; 2960 } 2961 } 2962 } else { 2963 COPY_BUF(l,buffer,nbchars,c); 2964 str += l; 2965 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2966 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2967 } 2968 } 2969 if (str < last) 2970 c = CUR_SCHAR(str, l); 2971 else 2972 c = 0; 2973 } 2974 buffer[nbchars] = 0; 2975 return(buffer); 2976 2977 mem_error: 2978 xmlErrMemory(ctxt, NULL); 2979 int_error: 2980 if (rep != NULL) 2981 xmlFree(rep); 2982 if (buffer != NULL) 2983 xmlFree(buffer); 2984 return(NULL); 2985 } 2986 2987 /** 2988 * xmlStringDecodeEntities: 2989 * @ctxt: the parser context 2990 * @str: the input string 2991 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2992 * @end: an end marker xmlChar, 0 if none 2993 * @end2: an end marker xmlChar, 0 if none 2994 * @end3: an end marker xmlChar, 0 if none 2995 * 2996 * Takes a entity string content and process to do the adequate substitutions. 2997 * 2998 * [67] Reference ::= EntityRef | CharRef 2999 * 3000 * [69] PEReference ::= '%' Name ';' 3001 * 3002 * Returns A newly allocated string with the substitution done. The caller 3003 * must deallocate it ! 3004 */ 3005 xmlChar * 3006 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 3007 xmlChar end, xmlChar end2, xmlChar end3) { 3008 if ((ctxt == NULL) || (str == NULL)) return(NULL); 3009 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 3010 end, end2, end3)); 3011 } 3012 3013 /************************************************************************ 3014 * * 3015 * Commodity functions, cleanup needed ? * 3016 * * 3017 ************************************************************************/ 3018 3019 /** 3020 * areBlanks: 3021 * @ctxt: an XML parser context 3022 * @str: a xmlChar * 3023 * @len: the size of @str 3024 * @blank_chars: we know the chars are blanks 3025 * 3026 * Is this a sequence of blank chars that one can ignore ? 3027 * 3028 * Returns 1 if ignorable 0 otherwise. 3029 */ 3030 3031 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 3032 int blank_chars) { 3033 int i, ret; 3034 xmlNodePtr lastChild; 3035 3036 /* 3037 * Don't spend time trying to differentiate them, the same callback is 3038 * used ! 3039 */ 3040 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 3041 return(0); 3042 3043 /* 3044 * Check for xml:space value. 3045 */ 3046 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 3047 (*(ctxt->space) == -2)) 3048 return(0); 3049 3050 /* 3051 * Check that the string is made of blanks 3052 */ 3053 if (blank_chars == 0) { 3054 for (i = 0;i < len;i++) 3055 if (!(IS_BLANK_CH(str[i]))) return(0); 3056 } 3057 3058 /* 3059 * Look if the element is mixed content in the DTD if available 3060 */ 3061 if (ctxt->node == NULL) return(0); 3062 if (ctxt->myDoc != NULL) { 3063 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 3064 if (ret == 0) return(1); 3065 if (ret == 1) return(0); 3066 } 3067 3068 /* 3069 * Otherwise, heuristic :-\ 3070 */ 3071 if ((RAW != '<') && (RAW != 0xD)) return(0); 3072 if ((ctxt->node->children == NULL) && 3073 (RAW == '<') && (NXT(1) == '/')) return(0); 3074 3075 lastChild = xmlGetLastChild(ctxt->node); 3076 if (lastChild == NULL) { 3077 if ((ctxt->node->type != XML_ELEMENT_NODE) && 3078 (ctxt->node->content != NULL)) return(0); 3079 } else if (xmlNodeIsText(lastChild)) 3080 return(0); 3081 else if ((ctxt->node->children != NULL) && 3082 (xmlNodeIsText(ctxt->node->children))) 3083 return(0); 3084 return(1); 3085 } 3086 3087 /************************************************************************ 3088 * * 3089 * Extra stuff for namespace support * 3090 * Relates to http://www.w3.org/TR/WD-xml-names * 3091 * * 3092 ************************************************************************/ 3093 3094 /** 3095 * xmlSplitQName: 3096 * @ctxt: an XML parser context 3097 * @name: an XML parser context 3098 * @prefix: a xmlChar ** 3099 * 3100 * parse an UTF8 encoded XML qualified name string 3101 * 3102 * [NS 5] QName ::= (Prefix ':')? LocalPart 3103 * 3104 * [NS 6] Prefix ::= NCName 3105 * 3106 * [NS 7] LocalPart ::= NCName 3107 * 3108 * Returns the local part, and prefix is updated 3109 * to get the Prefix if any. 3110 */ 3111 3112 xmlChar * 3113 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 3114 xmlChar buf[XML_MAX_NAMELEN + 5]; 3115 xmlChar *buffer = NULL; 3116 int len = 0; 3117 int max = XML_MAX_NAMELEN; 3118 xmlChar *ret = NULL; 3119 const xmlChar *cur = name; 3120 int c; 3121 3122 if (prefix == NULL) return(NULL); 3123 *prefix = NULL; 3124 3125 if (cur == NULL) return(NULL); 3126 3127 #ifndef XML_XML_NAMESPACE 3128 /* xml: prefix is not really a namespace */ 3129 if ((cur[0] == 'x') && (cur[1] == 'm') && 3130 (cur[2] == 'l') && (cur[3] == ':')) 3131 return(xmlStrdup(name)); 3132 #endif 3133 3134 /* nasty but well=formed */ 3135 if (cur[0] == ':') 3136 return(xmlStrdup(name)); 3137 3138 c = *cur++; 3139 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 3140 buf[len++] = c; 3141 c = *cur++; 3142 } 3143 if (len >= max) { 3144 /* 3145 * Okay someone managed to make a huge name, so he's ready to pay 3146 * for the processing speed. 3147 */ 3148 max = len * 2; 3149 3150 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3151 if (buffer == NULL) { 3152 xmlErrMemory(ctxt, NULL); 3153 return(NULL); 3154 } 3155 memcpy(buffer, buf, len); 3156 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3157 if (len + 10 > max) { 3158 xmlChar *tmp; 3159 3160 max *= 2; 3161 tmp = (xmlChar *) xmlRealloc(buffer, 3162 max * sizeof(xmlChar)); 3163 if (tmp == NULL) { 3164 xmlFree(buffer); 3165 xmlErrMemory(ctxt, NULL); 3166 return(NULL); 3167 } 3168 buffer = tmp; 3169 } 3170 buffer[len++] = c; 3171 c = *cur++; 3172 } 3173 buffer[len] = 0; 3174 } 3175 3176 if ((c == ':') && (*cur == 0)) { 3177 if (buffer != NULL) 3178 xmlFree(buffer); 3179 *prefix = NULL; 3180 return(xmlStrdup(name)); 3181 } 3182 3183 if (buffer == NULL) 3184 ret = xmlStrndup(buf, len); 3185 else { 3186 ret = buffer; 3187 buffer = NULL; 3188 max = XML_MAX_NAMELEN; 3189 } 3190 3191 3192 if (c == ':') { 3193 c = *cur; 3194 *prefix = ret; 3195 if (c == 0) { 3196 return(xmlStrndup(BAD_CAST "", 0)); 3197 } 3198 len = 0; 3199 3200 /* 3201 * Check that the first character is proper to start 3202 * a new name 3203 */ 3204 if (!(((c >= 0x61) && (c <= 0x7A)) || 3205 ((c >= 0x41) && (c <= 0x5A)) || 3206 (c == '_') || (c == ':'))) { 3207 int l; 3208 int first = CUR_SCHAR(cur, l); 3209 3210 if (!IS_LETTER(first) && (first != '_')) { 3211 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3212 "Name %s is not XML Namespace compliant\n", 3213 name); 3214 } 3215 } 3216 cur++; 3217 3218 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3219 buf[len++] = c; 3220 c = *cur++; 3221 } 3222 if (len >= max) { 3223 /* 3224 * Okay someone managed to make a huge name, so he's ready to pay 3225 * for the processing speed. 3226 */ 3227 max = len * 2; 3228 3229 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3230 if (buffer == NULL) { 3231 xmlErrMemory(ctxt, NULL); 3232 return(NULL); 3233 } 3234 memcpy(buffer, buf, len); 3235 while (c != 0) { /* tested bigname2.xml */ 3236 if (len + 10 > max) { 3237 xmlChar *tmp; 3238 3239 max *= 2; 3240 tmp = (xmlChar *) xmlRealloc(buffer, 3241 max * sizeof(xmlChar)); 3242 if (tmp == NULL) { 3243 xmlErrMemory(ctxt, NULL); 3244 xmlFree(buffer); 3245 return(NULL); 3246 } 3247 buffer = tmp; 3248 } 3249 buffer[len++] = c; 3250 c = *cur++; 3251 } 3252 buffer[len] = 0; 3253 } 3254 3255 if (buffer == NULL) 3256 ret = xmlStrndup(buf, len); 3257 else { 3258 ret = buffer; 3259 } 3260 } 3261 3262 return(ret); 3263 } 3264 3265 /************************************************************************ 3266 * * 3267 * The parser itself * 3268 * Relates to http://www.w3.org/TR/REC-xml * 3269 * * 3270 ************************************************************************/ 3271 3272 /************************************************************************ 3273 * * 3274 * Routines to parse Name, NCName and NmToken * 3275 * * 3276 ************************************************************************/ 3277 #ifdef DEBUG 3278 static unsigned long nbParseName = 0; 3279 static unsigned long nbParseNmToken = 0; 3280 static unsigned long nbParseNCName = 0; 3281 static unsigned long nbParseNCNameComplex = 0; 3282 static unsigned long nbParseNameComplex = 0; 3283 static unsigned long nbParseStringName = 0; 3284 #endif 3285 3286 /* 3287 * The two following functions are related to the change of accepted 3288 * characters for Name and NmToken in the Revision 5 of XML-1.0 3289 * They correspond to the modified production [4] and the new production [4a] 3290 * changes in that revision. Also note that the macros used for the 3291 * productions Letter, Digit, CombiningChar and Extender are not needed 3292 * anymore. 3293 * We still keep compatibility to pre-revision5 parsing semantic if the 3294 * new XML_PARSE_OLD10 option is given to the parser. 3295 */ 3296 static int 3297 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3298 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3299 /* 3300 * Use the new checks of production [4] [4a] amd [5] of the 3301 * Update 5 of XML-1.0 3302 */ 3303 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3304 (((c >= 'a') && (c <= 'z')) || 3305 ((c >= 'A') && (c <= 'Z')) || 3306 (c == '_') || (c == ':') || 3307 ((c >= 0xC0) && (c <= 0xD6)) || 3308 ((c >= 0xD8) && (c <= 0xF6)) || 3309 ((c >= 0xF8) && (c <= 0x2FF)) || 3310 ((c >= 0x370) && (c <= 0x37D)) || 3311 ((c >= 0x37F) && (c <= 0x1FFF)) || 3312 ((c >= 0x200C) && (c <= 0x200D)) || 3313 ((c >= 0x2070) && (c <= 0x218F)) || 3314 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3315 ((c >= 0x3001) && (c <= 0xD7FF)) || 3316 ((c >= 0xF900) && (c <= 0xFDCF)) || 3317 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3318 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3319 return(1); 3320 } else { 3321 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3322 return(1); 3323 } 3324 return(0); 3325 } 3326 3327 static int 3328 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3329 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3330 /* 3331 * Use the new checks of production [4] [4a] amd [5] of the 3332 * Update 5 of XML-1.0 3333 */ 3334 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3335 (((c >= 'a') && (c <= 'z')) || 3336 ((c >= 'A') && (c <= 'Z')) || 3337 ((c >= '0') && (c <= '9')) || /* !start */ 3338 (c == '_') || (c == ':') || 3339 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3340 ((c >= 0xC0) && (c <= 0xD6)) || 3341 ((c >= 0xD8) && (c <= 0xF6)) || 3342 ((c >= 0xF8) && (c <= 0x2FF)) || 3343 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3344 ((c >= 0x370) && (c <= 0x37D)) || 3345 ((c >= 0x37F) && (c <= 0x1FFF)) || 3346 ((c >= 0x200C) && (c <= 0x200D)) || 3347 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3348 ((c >= 0x2070) && (c <= 0x218F)) || 3349 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3350 ((c >= 0x3001) && (c <= 0xD7FF)) || 3351 ((c >= 0xF900) && (c <= 0xFDCF)) || 3352 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3353 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3354 return(1); 3355 } else { 3356 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3357 (c == '.') || (c == '-') || 3358 (c == '_') || (c == ':') || 3359 (IS_COMBINING(c)) || 3360 (IS_EXTENDER(c))) 3361 return(1); 3362 } 3363 return(0); 3364 } 3365 3366 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3367 int *len, int *alloc, int normalize); 3368 3369 static const xmlChar * 3370 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3371 int len = 0, l; 3372 int c; 3373 int count = 0; 3374 3375 #ifdef DEBUG 3376 nbParseNameComplex++; 3377 #endif 3378 3379 /* 3380 * Handler for more complex cases 3381 */ 3382 GROW; 3383 if (ctxt->instate == XML_PARSER_EOF) 3384 return(NULL); 3385 c = CUR_CHAR(l); 3386 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3387 /* 3388 * Use the new checks of production [4] [4a] amd [5] of the 3389 * Update 5 of XML-1.0 3390 */ 3391 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3392 (!(((c >= 'a') && (c <= 'z')) || 3393 ((c >= 'A') && (c <= 'Z')) || 3394 (c == '_') || (c == ':') || 3395 ((c >= 0xC0) && (c <= 0xD6)) || 3396 ((c >= 0xD8) && (c <= 0xF6)) || 3397 ((c >= 0xF8) && (c <= 0x2FF)) || 3398 ((c >= 0x370) && (c <= 0x37D)) || 3399 ((c >= 0x37F) && (c <= 0x1FFF)) || 3400 ((c >= 0x200C) && (c <= 0x200D)) || 3401 ((c >= 0x2070) && (c <= 0x218F)) || 3402 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3403 ((c >= 0x3001) && (c <= 0xD7FF)) || 3404 ((c >= 0xF900) && (c <= 0xFDCF)) || 3405 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3406 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3407 return(NULL); 3408 } 3409 len += l; 3410 NEXTL(l); 3411 c = CUR_CHAR(l); 3412 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3413 (((c >= 'a') && (c <= 'z')) || 3414 ((c >= 'A') && (c <= 'Z')) || 3415 ((c >= '0') && (c <= '9')) || /* !start */ 3416 (c == '_') || (c == ':') || 3417 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3418 ((c >= 0xC0) && (c <= 0xD6)) || 3419 ((c >= 0xD8) && (c <= 0xF6)) || 3420 ((c >= 0xF8) && (c <= 0x2FF)) || 3421 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3422 ((c >= 0x370) && (c <= 0x37D)) || 3423 ((c >= 0x37F) && (c <= 0x1FFF)) || 3424 ((c >= 0x200C) && (c <= 0x200D)) || 3425 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3426 ((c >= 0x2070) && (c <= 0x218F)) || 3427 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3428 ((c >= 0x3001) && (c <= 0xD7FF)) || 3429 ((c >= 0xF900) && (c <= 0xFDCF)) || 3430 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3431 ((c >= 0x10000) && (c <= 0xEFFFF)) 3432 )) { 3433 if (count++ > XML_PARSER_CHUNK_SIZE) { 3434 count = 0; 3435 GROW; 3436 if (ctxt->instate == XML_PARSER_EOF) 3437 return(NULL); 3438 } 3439 len += l; 3440 NEXTL(l); 3441 c = CUR_CHAR(l); 3442 if (c == 0 && ctxt->instate == XML_PARSER_EOF) 3443 return(NULL); 3444 } 3445 } else { 3446 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3447 (!IS_LETTER(c) && (c != '_') && 3448 (c != ':'))) { 3449 return(NULL); 3450 } 3451 len += l; 3452 NEXTL(l); 3453 c = CUR_CHAR(l); 3454 3455 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3456 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3457 (c == '.') || (c == '-') || 3458 (c == '_') || (c == ':') || 3459 (IS_COMBINING(c)) || 3460 (IS_EXTENDER(c)))) { 3461 if (count++ > XML_PARSER_CHUNK_SIZE) { 3462 count = 0; 3463 GROW; 3464 if (ctxt->instate == XML_PARSER_EOF) 3465 return(NULL); 3466 } 3467 len += l; 3468 NEXTL(l); 3469 c = CUR_CHAR(l); 3470 } 3471 } 3472 3473 if ((len > XML_MAX_NAME_LENGTH) && 3474 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3475 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3476 return(NULL); 3477 } 3478 if (ctxt->input->cur - ctxt->input->base < len) { 3479 /* 3480 * There were a couple of bugs where PERefs lead to to a change 3481 * of the buffer. Check the buffer size to avoid passing an invalid 3482 * pointer to xmlDictLookup. 3483 */ 3484 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 3485 "unexpected change of input buffer"); 3486 return (NULL); 3487 } 3488 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3489 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3490 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3491 } 3492 3493 /** 3494 * xmlParseName: 3495 * @ctxt: an XML parser context 3496 * 3497 * parse an XML name. 3498 * 3499 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3500 * CombiningChar | Extender 3501 * 3502 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3503 * 3504 * [6] Names ::= Name (#x20 Name)* 3505 * 3506 * Returns the Name parsed or NULL 3507 */ 3508 3509 const xmlChar * 3510 xmlParseName(xmlParserCtxtPtr ctxt) { 3511 const xmlChar *in; 3512 const xmlChar *ret; 3513 int count = 0; 3514 3515 GROW; 3516 3517 #ifdef DEBUG 3518 nbParseName++; 3519 #endif 3520 3521 /* 3522 * Accelerator for simple ASCII names 3523 */ 3524 in = ctxt->input->cur; 3525 if (((*in >= 0x61) && (*in <= 0x7A)) || 3526 ((*in >= 0x41) && (*in <= 0x5A)) || 3527 (*in == '_') || (*in == ':')) { 3528 in++; 3529 while (((*in >= 0x61) && (*in <= 0x7A)) || 3530 ((*in >= 0x41) && (*in <= 0x5A)) || 3531 ((*in >= 0x30) && (*in <= 0x39)) || 3532 (*in == '_') || (*in == '-') || 3533 (*in == ':') || (*in == '.')) 3534 in++; 3535 if ((*in > 0) && (*in < 0x80)) { 3536 count = in - ctxt->input->cur; 3537 if ((count > XML_MAX_NAME_LENGTH) && 3538 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3539 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3540 return(NULL); 3541 } 3542 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3543 ctxt->input->cur = in; 3544 ctxt->nbChars += count; 3545 ctxt->input->col += count; 3546 if (ret == NULL) 3547 xmlErrMemory(ctxt, NULL); 3548 return(ret); 3549 } 3550 } 3551 /* accelerator for special cases */ 3552 return(xmlParseNameComplex(ctxt)); 3553 } 3554 3555 static const xmlChar * 3556 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3557 int len = 0, l; 3558 int c; 3559 int count = 0; 3560 size_t startPosition = 0; 3561 3562 #ifdef DEBUG 3563 nbParseNCNameComplex++; 3564 #endif 3565 3566 /* 3567 * Handler for more complex cases 3568 */ 3569 GROW; 3570 startPosition = CUR_PTR - BASE_PTR; 3571 c = CUR_CHAR(l); 3572 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3573 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3574 return(NULL); 3575 } 3576 3577 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3578 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3579 if (count++ > XML_PARSER_CHUNK_SIZE) { 3580 if ((len > XML_MAX_NAME_LENGTH) && 3581 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3582 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3583 return(NULL); 3584 } 3585 count = 0; 3586 GROW; 3587 if (ctxt->instate == XML_PARSER_EOF) 3588 return(NULL); 3589 } 3590 len += l; 3591 NEXTL(l); 3592 c = CUR_CHAR(l); 3593 if (c == 0) { 3594 /* 3595 * A xmlStructuredErrorFunc could call xmlStopParser(), so 3596 * return early if that happens. 3597 */ 3598 if (ctxt->instate == XML_PARSER_EOF) 3599 return(NULL); 3600 count = 0; 3601 /* 3602 * when shrinking to extend the buffer we really need to preserve 3603 * the part of the name we already parsed. Hence rolling back 3604 * by current lenght. 3605 */ 3606 ctxt->input->cur -= l; 3607 GROW; 3608 if (ctxt->instate == XML_PARSER_EOF) 3609 return(NULL); 3610 ctxt->input->cur += l; 3611 c = CUR_CHAR(l); 3612 } 3613 } 3614 if ((len > XML_MAX_NAME_LENGTH) && 3615 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3616 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3617 return(NULL); 3618 } 3619 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3620 } 3621 3622 /** 3623 * xmlParseNCName: 3624 * @ctxt: an XML parser context 3625 * @len: length of the string parsed 3626 * 3627 * parse an XML name. 3628 * 3629 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3630 * CombiningChar | Extender 3631 * 3632 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3633 * 3634 * Returns the Name parsed or NULL 3635 */ 3636 3637 static const xmlChar * 3638 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3639 const xmlChar *in, *e; 3640 const xmlChar *ret; 3641 int count = 0; 3642 3643 #ifdef DEBUG 3644 nbParseNCName++; 3645 #endif 3646 3647 /* 3648 * Accelerator for simple ASCII names 3649 */ 3650 in = ctxt->input->cur; 3651 e = ctxt->input->end; 3652 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3653 ((*in >= 0x41) && (*in <= 0x5A)) || 3654 (*in == '_')) && (in < e)) { 3655 in++; 3656 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3657 ((*in >= 0x41) && (*in <= 0x5A)) || 3658 ((*in >= 0x30) && (*in <= 0x39)) || 3659 (*in == '_') || (*in == '-') || 3660 (*in == '.')) && (in < e)) 3661 in++; 3662 if (in >= e) 3663 goto complex; 3664 if ((*in > 0) && (*in < 0x80)) { 3665 count = in - ctxt->input->cur; 3666 if ((count > XML_MAX_NAME_LENGTH) && 3667 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3668 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3669 return(NULL); 3670 } 3671 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3672 ctxt->input->cur = in; 3673 ctxt->nbChars += count; 3674 ctxt->input->col += count; 3675 if (ret == NULL) { 3676 xmlErrMemory(ctxt, NULL); 3677 } 3678 return(ret); 3679 } 3680 } 3681 complex: 3682 return(xmlParseNCNameComplex(ctxt)); 3683 } 3684 3685 /** 3686 * xmlParseNameAndCompare: 3687 * @ctxt: an XML parser context 3688 * 3689 * parse an XML name and compares for match 3690 * (specialized for endtag parsing) 3691 * 3692 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3693 * and the name for mismatch 3694 */ 3695 3696 static const xmlChar * 3697 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3698 register const xmlChar *cmp = other; 3699 register const xmlChar *in; 3700 const xmlChar *ret; 3701 3702 GROW; 3703 if (ctxt->instate == XML_PARSER_EOF) 3704 return(NULL); 3705 3706 in = ctxt->input->cur; 3707 while (*in != 0 && *in == *cmp) { 3708 ++in; 3709 ++cmp; 3710 ctxt->input->col++; 3711 } 3712 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3713 /* success */ 3714 ctxt->input->cur = in; 3715 return (const xmlChar*) 1; 3716 } 3717 /* failure (or end of input buffer), check with full function */ 3718 ret = xmlParseName (ctxt); 3719 /* strings coming from the dictionary direct compare possible */ 3720 if (ret == other) { 3721 return (const xmlChar*) 1; 3722 } 3723 return ret; 3724 } 3725 3726 /** 3727 * xmlParseStringName: 3728 * @ctxt: an XML parser context 3729 * @str: a pointer to the string pointer (IN/OUT) 3730 * 3731 * parse an XML name. 3732 * 3733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3734 * CombiningChar | Extender 3735 * 3736 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3737 * 3738 * [6] Names ::= Name (#x20 Name)* 3739 * 3740 * Returns the Name parsed or NULL. The @str pointer 3741 * is updated to the current location in the string. 3742 */ 3743 3744 static xmlChar * 3745 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3746 xmlChar buf[XML_MAX_NAMELEN + 5]; 3747 const xmlChar *cur = *str; 3748 int len = 0, l; 3749 int c; 3750 3751 #ifdef DEBUG 3752 nbParseStringName++; 3753 #endif 3754 3755 c = CUR_SCHAR(cur, l); 3756 if (!xmlIsNameStartChar(ctxt, c)) { 3757 return(NULL); 3758 } 3759 3760 COPY_BUF(l,buf,len,c); 3761 cur += l; 3762 c = CUR_SCHAR(cur, l); 3763 while (xmlIsNameChar(ctxt, c)) { 3764 COPY_BUF(l,buf,len,c); 3765 cur += l; 3766 c = CUR_SCHAR(cur, l); 3767 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3768 /* 3769 * Okay someone managed to make a huge name, so he's ready to pay 3770 * for the processing speed. 3771 */ 3772 xmlChar *buffer; 3773 int max = len * 2; 3774 3775 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3776 if (buffer == NULL) { 3777 xmlErrMemory(ctxt, NULL); 3778 return(NULL); 3779 } 3780 memcpy(buffer, buf, len); 3781 while (xmlIsNameChar(ctxt, c)) { 3782 if (len + 10 > max) { 3783 xmlChar *tmp; 3784 3785 if ((len > XML_MAX_NAME_LENGTH) && 3786 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3787 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3788 xmlFree(buffer); 3789 return(NULL); 3790 } 3791 max *= 2; 3792 tmp = (xmlChar *) xmlRealloc(buffer, 3793 max * sizeof(xmlChar)); 3794 if (tmp == NULL) { 3795 xmlErrMemory(ctxt, NULL); 3796 xmlFree(buffer); 3797 return(NULL); 3798 } 3799 buffer = tmp; 3800 } 3801 COPY_BUF(l,buffer,len,c); 3802 cur += l; 3803 c = CUR_SCHAR(cur, l); 3804 } 3805 buffer[len] = 0; 3806 *str = cur; 3807 return(buffer); 3808 } 3809 } 3810 if ((len > XML_MAX_NAME_LENGTH) && 3811 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3812 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3813 return(NULL); 3814 } 3815 *str = cur; 3816 return(xmlStrndup(buf, len)); 3817 } 3818 3819 /** 3820 * xmlParseNmtoken: 3821 * @ctxt: an XML parser context 3822 * 3823 * parse an XML Nmtoken. 3824 * 3825 * [7] Nmtoken ::= (NameChar)+ 3826 * 3827 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3828 * 3829 * Returns the Nmtoken parsed or NULL 3830 */ 3831 3832 xmlChar * 3833 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3834 xmlChar buf[XML_MAX_NAMELEN + 5]; 3835 int len = 0, l; 3836 int c; 3837 int count = 0; 3838 3839 #ifdef DEBUG 3840 nbParseNmToken++; 3841 #endif 3842 3843 GROW; 3844 if (ctxt->instate == XML_PARSER_EOF) 3845 return(NULL); 3846 c = CUR_CHAR(l); 3847 3848 while (xmlIsNameChar(ctxt, c)) { 3849 if (count++ > XML_PARSER_CHUNK_SIZE) { 3850 count = 0; 3851 GROW; 3852 } 3853 COPY_BUF(l,buf,len,c); 3854 NEXTL(l); 3855 c = CUR_CHAR(l); 3856 if (c == 0) { 3857 count = 0; 3858 GROW; 3859 if (ctxt->instate == XML_PARSER_EOF) 3860 return(NULL); 3861 c = CUR_CHAR(l); 3862 } 3863 if (len >= XML_MAX_NAMELEN) { 3864 /* 3865 * Okay someone managed to make a huge token, so he's ready to pay 3866 * for the processing speed. 3867 */ 3868 xmlChar *buffer; 3869 int max = len * 2; 3870 3871 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3872 if (buffer == NULL) { 3873 xmlErrMemory(ctxt, NULL); 3874 return(NULL); 3875 } 3876 memcpy(buffer, buf, len); 3877 while (xmlIsNameChar(ctxt, c)) { 3878 if (count++ > XML_PARSER_CHUNK_SIZE) { 3879 count = 0; 3880 GROW; 3881 if (ctxt->instate == XML_PARSER_EOF) { 3882 xmlFree(buffer); 3883 return(NULL); 3884 } 3885 } 3886 if (len + 10 > max) { 3887 xmlChar *tmp; 3888 3889 if ((max > XML_MAX_NAME_LENGTH) && 3890 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3891 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3892 xmlFree(buffer); 3893 return(NULL); 3894 } 3895 max *= 2; 3896 tmp = (xmlChar *) xmlRealloc(buffer, 3897 max * sizeof(xmlChar)); 3898 if (tmp == NULL) { 3899 xmlErrMemory(ctxt, NULL); 3900 xmlFree(buffer); 3901 return(NULL); 3902 } 3903 buffer = tmp; 3904 } 3905 COPY_BUF(l,buffer,len,c); 3906 NEXTL(l); 3907 c = CUR_CHAR(l); 3908 } 3909 buffer[len] = 0; 3910 return(buffer); 3911 } 3912 } 3913 if (len == 0) 3914 return(NULL); 3915 if ((len > XML_MAX_NAME_LENGTH) && 3916 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3917 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3918 return(NULL); 3919 } 3920 return(xmlStrndup(buf, len)); 3921 } 3922 3923 /** 3924 * xmlParseEntityValue: 3925 * @ctxt: an XML parser context 3926 * @orig: if non-NULL store a copy of the original entity value 3927 * 3928 * parse a value for ENTITY declarations 3929 * 3930 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3931 * "'" ([^%&'] | PEReference | Reference)* "'" 3932 * 3933 * Returns the EntityValue parsed with reference substituted or NULL 3934 */ 3935 3936 xmlChar * 3937 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3938 xmlChar *buf = NULL; 3939 int len = 0; 3940 int size = XML_PARSER_BUFFER_SIZE; 3941 int c, l; 3942 xmlChar stop; 3943 xmlChar *ret = NULL; 3944 const xmlChar *cur = NULL; 3945 xmlParserInputPtr input; 3946 3947 if (RAW == '"') stop = '"'; 3948 else if (RAW == '\'') stop = '\''; 3949 else { 3950 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3951 return(NULL); 3952 } 3953 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3954 if (buf == NULL) { 3955 xmlErrMemory(ctxt, NULL); 3956 return(NULL); 3957 } 3958 3959 /* 3960 * The content of the entity definition is copied in a buffer. 3961 */ 3962 3963 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3964 input = ctxt->input; 3965 GROW; 3966 if (ctxt->instate == XML_PARSER_EOF) { 3967 xmlFree(buf); 3968 return(NULL); 3969 } 3970 NEXT; 3971 c = CUR_CHAR(l); 3972 /* 3973 * NOTE: 4.4.5 Included in Literal 3974 * When a parameter entity reference appears in a literal entity 3975 * value, ... a single or double quote character in the replacement 3976 * text is always treated as a normal data character and will not 3977 * terminate the literal. 3978 * In practice it means we stop the loop only when back at parsing 3979 * the initial entity and the quote is found 3980 */ 3981 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3982 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3983 if (len + 5 >= size) { 3984 xmlChar *tmp; 3985 3986 size *= 2; 3987 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3988 if (tmp == NULL) { 3989 xmlErrMemory(ctxt, NULL); 3990 xmlFree(buf); 3991 return(NULL); 3992 } 3993 buf = tmp; 3994 } 3995 COPY_BUF(l,buf,len,c); 3996 NEXTL(l); 3997 /* 3998 * Pop-up of finished entities. 3999 */ 4000 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 4001 xmlPopInput(ctxt); 4002 4003 GROW; 4004 c = CUR_CHAR(l); 4005 if (c == 0) { 4006 GROW; 4007 c = CUR_CHAR(l); 4008 } 4009 } 4010 buf[len] = 0; 4011 if (ctxt->instate == XML_PARSER_EOF) { 4012 xmlFree(buf); 4013 return(NULL); 4014 } 4015 4016 /* 4017 * Raise problem w.r.t. '&' and '%' being used in non-entities 4018 * reference constructs. Note Charref will be handled in 4019 * xmlStringDecodeEntities() 4020 */ 4021 cur = buf; 4022 while (*cur != 0) { /* non input consuming */ 4023 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 4024 xmlChar *name; 4025 xmlChar tmp = *cur; 4026 4027 cur++; 4028 name = xmlParseStringName(ctxt, &cur); 4029 if ((name == NULL) || (*cur != ';')) { 4030 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 4031 "EntityValue: '%c' forbidden except for entities references\n", 4032 tmp); 4033 } 4034 if ((tmp == '%') && (ctxt->inSubset == 1) && 4035 (ctxt->inputNr == 1)) { 4036 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 4037 } 4038 if (name != NULL) 4039 xmlFree(name); 4040 if (*cur == 0) 4041 break; 4042 } 4043 cur++; 4044 } 4045 4046 /* 4047 * Then PEReference entities are substituted. 4048 */ 4049 if (c != stop) { 4050 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 4051 xmlFree(buf); 4052 } else { 4053 NEXT; 4054 /* 4055 * NOTE: 4.4.7 Bypassed 4056 * When a general entity reference appears in the EntityValue in 4057 * an entity declaration, it is bypassed and left as is. 4058 * so XML_SUBSTITUTE_REF is not set here. 4059 */ 4060 ++ctxt->depth; 4061 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 4062 0, 0, 0); 4063 --ctxt->depth; 4064 if (orig != NULL) 4065 *orig = buf; 4066 else 4067 xmlFree(buf); 4068 } 4069 4070 return(ret); 4071 } 4072 4073 /** 4074 * xmlParseAttValueComplex: 4075 * @ctxt: an XML parser context 4076 * @len: the resulting attribute len 4077 * @normalize: wether to apply the inner normalization 4078 * 4079 * parse a value for an attribute, this is the fallback function 4080 * of xmlParseAttValue() when the attribute parsing requires handling 4081 * of non-ASCII characters, or normalization compaction. 4082 * 4083 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4084 */ 4085 static xmlChar * 4086 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 4087 xmlChar limit = 0; 4088 xmlChar *buf = NULL; 4089 xmlChar *rep = NULL; 4090 size_t len = 0; 4091 size_t buf_size = 0; 4092 int c, l, in_space = 0; 4093 xmlChar *current = NULL; 4094 xmlEntityPtr ent; 4095 4096 if (NXT(0) == '"') { 4097 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4098 limit = '"'; 4099 NEXT; 4100 } else if (NXT(0) == '\'') { 4101 limit = '\''; 4102 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4103 NEXT; 4104 } else { 4105 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 4106 return(NULL); 4107 } 4108 4109 /* 4110 * allocate a translation buffer. 4111 */ 4112 buf_size = XML_PARSER_BUFFER_SIZE; 4113 buf = (xmlChar *) xmlMallocAtomic(buf_size); 4114 if (buf == NULL) goto mem_error; 4115 4116 /* 4117 * OK loop until we reach one of the ending char or a size limit. 4118 */ 4119 c = CUR_CHAR(l); 4120 while (((NXT(0) != limit) && /* checked */ 4121 (IS_CHAR(c)) && (c != '<')) && 4122 (ctxt->instate != XML_PARSER_EOF)) { 4123 /* 4124 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 4125 * special option is given 4126 */ 4127 if ((len > XML_MAX_TEXT_LENGTH) && 4128 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4129 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4130 "AttValue length too long\n"); 4131 goto mem_error; 4132 } 4133 if (c == 0) break; 4134 if (c == '&') { 4135 in_space = 0; 4136 if (NXT(1) == '#') { 4137 int val = xmlParseCharRef(ctxt); 4138 4139 if (val == '&') { 4140 if (ctxt->replaceEntities) { 4141 if (len + 10 > buf_size) { 4142 growBuffer(buf, 10); 4143 } 4144 buf[len++] = '&'; 4145 } else { 4146 /* 4147 * The reparsing will be done in xmlStringGetNodeList() 4148 * called by the attribute() function in SAX.c 4149 */ 4150 if (len + 10 > buf_size) { 4151 growBuffer(buf, 10); 4152 } 4153 buf[len++] = '&'; 4154 buf[len++] = '#'; 4155 buf[len++] = '3'; 4156 buf[len++] = '8'; 4157 buf[len++] = ';'; 4158 } 4159 } else if (val != 0) { 4160 if (len + 10 > buf_size) { 4161 growBuffer(buf, 10); 4162 } 4163 len += xmlCopyChar(0, &buf[len], val); 4164 } 4165 } else { 4166 ent = xmlParseEntityRef(ctxt); 4167 ctxt->nbentities++; 4168 if (ent != NULL) 4169 ctxt->nbentities += ent->owner; 4170 if ((ent != NULL) && 4171 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 4172 if (len + 10 > buf_size) { 4173 growBuffer(buf, 10); 4174 } 4175 if ((ctxt->replaceEntities == 0) && 4176 (ent->content[0] == '&')) { 4177 buf[len++] = '&'; 4178 buf[len++] = '#'; 4179 buf[len++] = '3'; 4180 buf[len++] = '8'; 4181 buf[len++] = ';'; 4182 } else { 4183 buf[len++] = ent->content[0]; 4184 } 4185 } else if ((ent != NULL) && 4186 (ctxt->replaceEntities != 0)) { 4187 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4188 ++ctxt->depth; 4189 rep = xmlStringDecodeEntities(ctxt, ent->content, 4190 XML_SUBSTITUTE_REF, 4191 0, 0, 0); 4192 --ctxt->depth; 4193 if (rep != NULL) { 4194 current = rep; 4195 while (*current != 0) { /* non input consuming */ 4196 if ((*current == 0xD) || (*current == 0xA) || 4197 (*current == 0x9)) { 4198 buf[len++] = 0x20; 4199 current++; 4200 } else 4201 buf[len++] = *current++; 4202 if (len + 10 > buf_size) { 4203 growBuffer(buf, 10); 4204 } 4205 } 4206 xmlFree(rep); 4207 rep = NULL; 4208 } 4209 } else { 4210 if (len + 10 > buf_size) { 4211 growBuffer(buf, 10); 4212 } 4213 if (ent->content != NULL) 4214 buf[len++] = ent->content[0]; 4215 } 4216 } else if (ent != NULL) { 4217 int i = xmlStrlen(ent->name); 4218 const xmlChar *cur = ent->name; 4219 4220 /* 4221 * This may look absurd but is needed to detect 4222 * entities problems 4223 */ 4224 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4225 (ent->content != NULL) && (ent->checked == 0)) { 4226 unsigned long oldnbent = ctxt->nbentities; 4227 4228 ++ctxt->depth; 4229 rep = xmlStringDecodeEntities(ctxt, ent->content, 4230 XML_SUBSTITUTE_REF, 0, 0, 0); 4231 --ctxt->depth; 4232 4233 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4234 if (rep != NULL) { 4235 if (xmlStrchr(rep, '<')) 4236 ent->checked |= 1; 4237 xmlFree(rep); 4238 rep = NULL; 4239 } 4240 } 4241 4242 /* 4243 * Just output the reference 4244 */ 4245 buf[len++] = '&'; 4246 while (len + i + 10 > buf_size) { 4247 growBuffer(buf, i + 10); 4248 } 4249 for (;i > 0;i--) 4250 buf[len++] = *cur++; 4251 buf[len++] = ';'; 4252 } 4253 } 4254 } else { 4255 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4256 if ((len != 0) || (!normalize)) { 4257 if ((!normalize) || (!in_space)) { 4258 COPY_BUF(l,buf,len,0x20); 4259 while (len + 10 > buf_size) { 4260 growBuffer(buf, 10); 4261 } 4262 } 4263 in_space = 1; 4264 } 4265 } else { 4266 in_space = 0; 4267 COPY_BUF(l,buf,len,c); 4268 if (len + 10 > buf_size) { 4269 growBuffer(buf, 10); 4270 } 4271 } 4272 NEXTL(l); 4273 } 4274 GROW; 4275 c = CUR_CHAR(l); 4276 } 4277 if (ctxt->instate == XML_PARSER_EOF) 4278 goto error; 4279 4280 if ((in_space) && (normalize)) { 4281 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4282 } 4283 buf[len] = 0; 4284 if (RAW == '<') { 4285 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4286 } else if (RAW != limit) { 4287 if ((c != 0) && (!IS_CHAR(c))) { 4288 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4289 "invalid character in attribute value\n"); 4290 } else { 4291 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4292 "AttValue: ' expected\n"); 4293 } 4294 } else 4295 NEXT; 4296 4297 /* 4298 * There we potentially risk an overflow, don't allow attribute value of 4299 * length more than INT_MAX it is a very reasonnable assumption ! 4300 */ 4301 if (len >= INT_MAX) { 4302 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4303 "AttValue length too long\n"); 4304 goto mem_error; 4305 } 4306 4307 if (attlen != NULL) *attlen = (int) len; 4308 return(buf); 4309 4310 mem_error: 4311 xmlErrMemory(ctxt, NULL); 4312 error: 4313 if (buf != NULL) 4314 xmlFree(buf); 4315 if (rep != NULL) 4316 xmlFree(rep); 4317 return(NULL); 4318 } 4319 4320 /** 4321 * xmlParseAttValue: 4322 * @ctxt: an XML parser context 4323 * 4324 * parse a value for an attribute 4325 * Note: the parser won't do substitution of entities here, this 4326 * will be handled later in xmlStringGetNodeList 4327 * 4328 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4329 * "'" ([^<&'] | Reference)* "'" 4330 * 4331 * 3.3.3 Attribute-Value Normalization: 4332 * Before the value of an attribute is passed to the application or 4333 * checked for validity, the XML processor must normalize it as follows: 4334 * - a character reference is processed by appending the referenced 4335 * character to the attribute value 4336 * - an entity reference is processed by recursively processing the 4337 * replacement text of the entity 4338 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4339 * appending #x20 to the normalized value, except that only a single 4340 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4341 * parsed entity or the literal entity value of an internal parsed entity 4342 * - other characters are processed by appending them to the normalized value 4343 * If the declared value is not CDATA, then the XML processor must further 4344 * process the normalized attribute value by discarding any leading and 4345 * trailing space (#x20) characters, and by replacing sequences of space 4346 * (#x20) characters by a single space (#x20) character. 4347 * All attributes for which no declaration has been read should be treated 4348 * by a non-validating parser as if declared CDATA. 4349 * 4350 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4351 */ 4352 4353 4354 xmlChar * 4355 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4356 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4357 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4358 } 4359 4360 /** 4361 * xmlParseSystemLiteral: 4362 * @ctxt: an XML parser context 4363 * 4364 * parse an XML Literal 4365 * 4366 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4367 * 4368 * Returns the SystemLiteral parsed or NULL 4369 */ 4370 4371 xmlChar * 4372 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4373 xmlChar *buf = NULL; 4374 int len = 0; 4375 int size = XML_PARSER_BUFFER_SIZE; 4376 int cur, l; 4377 xmlChar stop; 4378 int state = ctxt->instate; 4379 int count = 0; 4380 4381 SHRINK; 4382 if (RAW == '"') { 4383 NEXT; 4384 stop = '"'; 4385 } else if (RAW == '\'') { 4386 NEXT; 4387 stop = '\''; 4388 } else { 4389 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4390 return(NULL); 4391 } 4392 4393 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4394 if (buf == NULL) { 4395 xmlErrMemory(ctxt, NULL); 4396 return(NULL); 4397 } 4398 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4399 cur = CUR_CHAR(l); 4400 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4401 if (len + 5 >= size) { 4402 xmlChar *tmp; 4403 4404 if ((size > XML_MAX_NAME_LENGTH) && 4405 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4406 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4407 xmlFree(buf); 4408 ctxt->instate = (xmlParserInputState) state; 4409 return(NULL); 4410 } 4411 size *= 2; 4412 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4413 if (tmp == NULL) { 4414 xmlFree(buf); 4415 xmlErrMemory(ctxt, NULL); 4416 ctxt->instate = (xmlParserInputState) state; 4417 return(NULL); 4418 } 4419 buf = tmp; 4420 } 4421 count++; 4422 if (count > 50) { 4423 GROW; 4424 count = 0; 4425 if (ctxt->instate == XML_PARSER_EOF) { 4426 xmlFree(buf); 4427 return(NULL); 4428 } 4429 } 4430 COPY_BUF(l,buf,len,cur); 4431 NEXTL(l); 4432 cur = CUR_CHAR(l); 4433 if (cur == 0) { 4434 GROW; 4435 SHRINK; 4436 cur = CUR_CHAR(l); 4437 } 4438 } 4439 buf[len] = 0; 4440 ctxt->instate = (xmlParserInputState) state; 4441 if (!IS_CHAR(cur)) { 4442 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4443 } else { 4444 NEXT; 4445 } 4446 return(buf); 4447 } 4448 4449 /** 4450 * xmlParsePubidLiteral: 4451 * @ctxt: an XML parser context 4452 * 4453 * parse an XML public literal 4454 * 4455 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4456 * 4457 * Returns the PubidLiteral parsed or NULL. 4458 */ 4459 4460 xmlChar * 4461 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4462 xmlChar *buf = NULL; 4463 int len = 0; 4464 int size = XML_PARSER_BUFFER_SIZE; 4465 xmlChar cur; 4466 xmlChar stop; 4467 int count = 0; 4468 xmlParserInputState oldstate = ctxt->instate; 4469 4470 SHRINK; 4471 if (RAW == '"') { 4472 NEXT; 4473 stop = '"'; 4474 } else if (RAW == '\'') { 4475 NEXT; 4476 stop = '\''; 4477 } else { 4478 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4479 return(NULL); 4480 } 4481 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4482 if (buf == NULL) { 4483 xmlErrMemory(ctxt, NULL); 4484 return(NULL); 4485 } 4486 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4487 cur = CUR; 4488 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4489 if (len + 1 >= size) { 4490 xmlChar *tmp; 4491 4492 if ((size > XML_MAX_NAME_LENGTH) && 4493 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4494 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4495 xmlFree(buf); 4496 return(NULL); 4497 } 4498 size *= 2; 4499 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4500 if (tmp == NULL) { 4501 xmlErrMemory(ctxt, NULL); 4502 xmlFree(buf); 4503 return(NULL); 4504 } 4505 buf = tmp; 4506 } 4507 buf[len++] = cur; 4508 count++; 4509 if (count > 50) { 4510 GROW; 4511 count = 0; 4512 if (ctxt->instate == XML_PARSER_EOF) { 4513 xmlFree(buf); 4514 return(NULL); 4515 } 4516 } 4517 NEXT; 4518 cur = CUR; 4519 if (cur == 0) { 4520 GROW; 4521 SHRINK; 4522 cur = CUR; 4523 } 4524 } 4525 buf[len] = 0; 4526 if (cur != stop) { 4527 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4528 } else { 4529 NEXT; 4530 } 4531 ctxt->instate = oldstate; 4532 return(buf); 4533 } 4534 4535 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4536 4537 /* 4538 * used for the test in the inner loop of the char data testing 4539 */ 4540 static const unsigned char test_char_data[256] = { 4541 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4542 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4543 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4544 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4545 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4546 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4547 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4548 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4549 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4550 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4551 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4552 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4553 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4554 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4555 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4556 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4557 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4558 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4559 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4560 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4561 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4562 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4563 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4564 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4565 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4566 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4567 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4568 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4569 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4570 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4571 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4572 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4573 }; 4574 4575 /** 4576 * xmlParseCharData: 4577 * @ctxt: an XML parser context 4578 * @cdata: int indicating whether we are within a CDATA section 4579 * 4580 * parse a CharData section. 4581 * if we are within a CDATA section ']]>' marks an end of section. 4582 * 4583 * The right angle bracket (>) may be represented using the string ">", 4584 * and must, for compatibility, be escaped using ">" or a character 4585 * reference when it appears in the string "]]>" in content, when that 4586 * string is not marking the end of a CDATA section. 4587 * 4588 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4589 */ 4590 4591 void 4592 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4593 const xmlChar *in; 4594 int nbchar = 0; 4595 int line = ctxt->input->line; 4596 int col = ctxt->input->col; 4597 int ccol; 4598 4599 SHRINK; 4600 GROW; 4601 /* 4602 * Accelerated common case where input don't need to be 4603 * modified before passing it to the handler. 4604 */ 4605 if (!cdata) { 4606 in = ctxt->input->cur; 4607 do { 4608 get_more_space: 4609 while (*in == 0x20) { in++; ctxt->input->col++; } 4610 if (*in == 0xA) { 4611 do { 4612 ctxt->input->line++; ctxt->input->col = 1; 4613 in++; 4614 } while (*in == 0xA); 4615 goto get_more_space; 4616 } 4617 if (*in == '<') { 4618 nbchar = in - ctxt->input->cur; 4619 if (nbchar > 0) { 4620 const xmlChar *tmp = ctxt->input->cur; 4621 ctxt->input->cur = in; 4622 4623 if ((ctxt->sax != NULL) && 4624 (ctxt->sax->ignorableWhitespace != 4625 ctxt->sax->characters)) { 4626 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4627 if (ctxt->sax->ignorableWhitespace != NULL) 4628 ctxt->sax->ignorableWhitespace(ctxt->userData, 4629 tmp, nbchar); 4630 } else { 4631 if (ctxt->sax->characters != NULL) 4632 ctxt->sax->characters(ctxt->userData, 4633 tmp, nbchar); 4634 if (*ctxt->space == -1) 4635 *ctxt->space = -2; 4636 } 4637 } else if ((ctxt->sax != NULL) && 4638 (ctxt->sax->characters != NULL)) { 4639 ctxt->sax->characters(ctxt->userData, 4640 tmp, nbchar); 4641 } 4642 } 4643 return; 4644 } 4645 4646 get_more: 4647 ccol = ctxt->input->col; 4648 while (test_char_data[*in]) { 4649 in++; 4650 ccol++; 4651 } 4652 ctxt->input->col = ccol; 4653 if (*in == 0xA) { 4654 do { 4655 ctxt->input->line++; ctxt->input->col = 1; 4656 in++; 4657 } while (*in == 0xA); 4658 goto get_more; 4659 } 4660 if (*in == ']') { 4661 if ((in[1] == ']') && (in[2] == '>')) { 4662 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4663 ctxt->input->cur = in; 4664 return; 4665 } 4666 in++; 4667 ctxt->input->col++; 4668 goto get_more; 4669 } 4670 nbchar = in - ctxt->input->cur; 4671 if (nbchar > 0) { 4672 if ((ctxt->sax != NULL) && 4673 (ctxt->sax->ignorableWhitespace != 4674 ctxt->sax->characters) && 4675 (IS_BLANK_CH(*ctxt->input->cur))) { 4676 const xmlChar *tmp = ctxt->input->cur; 4677 ctxt->input->cur = in; 4678 4679 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4680 if (ctxt->sax->ignorableWhitespace != NULL) 4681 ctxt->sax->ignorableWhitespace(ctxt->userData, 4682 tmp, nbchar); 4683 } else { 4684 if (ctxt->sax->characters != NULL) 4685 ctxt->sax->characters(ctxt->userData, 4686 tmp, nbchar); 4687 if (*ctxt->space == -1) 4688 *ctxt->space = -2; 4689 } 4690 line = ctxt->input->line; 4691 col = ctxt->input->col; 4692 } else if (ctxt->sax != NULL) { 4693 if (ctxt->sax->characters != NULL) 4694 ctxt->sax->characters(ctxt->userData, 4695 ctxt->input->cur, nbchar); 4696 line = ctxt->input->line; 4697 col = ctxt->input->col; 4698 } 4699 /* something really bad happened in the SAX callback */ 4700 if (ctxt->instate != XML_PARSER_CONTENT) 4701 return; 4702 } 4703 ctxt->input->cur = in; 4704 if (*in == 0xD) { 4705 in++; 4706 if (*in == 0xA) { 4707 ctxt->input->cur = in; 4708 in++; 4709 ctxt->input->line++; ctxt->input->col = 1; 4710 continue; /* while */ 4711 } 4712 in--; 4713 } 4714 if (*in == '<') { 4715 return; 4716 } 4717 if (*in == '&') { 4718 return; 4719 } 4720 SHRINK; 4721 GROW; 4722 if (ctxt->instate == XML_PARSER_EOF) 4723 return; 4724 in = ctxt->input->cur; 4725 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4726 nbchar = 0; 4727 } 4728 ctxt->input->line = line; 4729 ctxt->input->col = col; 4730 xmlParseCharDataComplex(ctxt, cdata); 4731 } 4732 4733 /** 4734 * xmlParseCharDataComplex: 4735 * @ctxt: an XML parser context 4736 * @cdata: int indicating whether we are within a CDATA section 4737 * 4738 * parse a CharData section.this is the fallback function 4739 * of xmlParseCharData() when the parsing requires handling 4740 * of non-ASCII characters. 4741 */ 4742 static void 4743 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4744 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4745 int nbchar = 0; 4746 int cur, l; 4747 int count = 0; 4748 4749 SHRINK; 4750 GROW; 4751 cur = CUR_CHAR(l); 4752 while ((cur != '<') && /* checked */ 4753 (cur != '&') && 4754 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4755 if ((cur == ']') && (NXT(1) == ']') && 4756 (NXT(2) == '>')) { 4757 if (cdata) break; 4758 else { 4759 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4760 } 4761 } 4762 COPY_BUF(l,buf,nbchar,cur); 4763 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4764 buf[nbchar] = 0; 4765 4766 /* 4767 * OK the segment is to be consumed as chars. 4768 */ 4769 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4770 if (areBlanks(ctxt, buf, nbchar, 0)) { 4771 if (ctxt->sax->ignorableWhitespace != NULL) 4772 ctxt->sax->ignorableWhitespace(ctxt->userData, 4773 buf, nbchar); 4774 } else { 4775 if (ctxt->sax->characters != NULL) 4776 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4777 if ((ctxt->sax->characters != 4778 ctxt->sax->ignorableWhitespace) && 4779 (*ctxt->space == -1)) 4780 *ctxt->space = -2; 4781 } 4782 } 4783 nbchar = 0; 4784 /* something really bad happened in the SAX callback */ 4785 if (ctxt->instate != XML_PARSER_CONTENT) 4786 return; 4787 } 4788 count++; 4789 if (count > 50) { 4790 GROW; 4791 count = 0; 4792 if (ctxt->instate == XML_PARSER_EOF) 4793 return; 4794 } 4795 NEXTL(l); 4796 cur = CUR_CHAR(l); 4797 } 4798 if (nbchar != 0) { 4799 buf[nbchar] = 0; 4800 /* 4801 * OK the segment is to be consumed as chars. 4802 */ 4803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4804 if (areBlanks(ctxt, buf, nbchar, 0)) { 4805 if (ctxt->sax->ignorableWhitespace != NULL) 4806 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4807 } else { 4808 if (ctxt->sax->characters != NULL) 4809 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4810 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4811 (*ctxt->space == -1)) 4812 *ctxt->space = -2; 4813 } 4814 } 4815 } 4816 if ((cur != 0) && (!IS_CHAR(cur))) { 4817 /* Generate the error and skip the offending character */ 4818 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4819 "PCDATA invalid Char value %d\n", 4820 cur); 4821 NEXTL(l); 4822 } 4823 } 4824 4825 /** 4826 * xmlParseExternalID: 4827 * @ctxt: an XML parser context 4828 * @publicID: a xmlChar** receiving PubidLiteral 4829 * @strict: indicate whether we should restrict parsing to only 4830 * production [75], see NOTE below 4831 * 4832 * Parse an External ID or a Public ID 4833 * 4834 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4835 * 'PUBLIC' S PubidLiteral S SystemLiteral 4836 * 4837 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4838 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4839 * 4840 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4841 * 4842 * Returns the function returns SystemLiteral and in the second 4843 * case publicID receives PubidLiteral, is strict is off 4844 * it is possible to return NULL and have publicID set. 4845 */ 4846 4847 xmlChar * 4848 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4849 xmlChar *URI = NULL; 4850 4851 SHRINK; 4852 4853 *publicID = NULL; 4854 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4855 SKIP(6); 4856 if (!IS_BLANK_CH(CUR)) { 4857 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4858 "Space required after 'SYSTEM'\n"); 4859 } 4860 SKIP_BLANKS; 4861 URI = xmlParseSystemLiteral(ctxt); 4862 if (URI == NULL) { 4863 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4864 } 4865 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4866 SKIP(6); 4867 if (!IS_BLANK_CH(CUR)) { 4868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4869 "Space required after 'PUBLIC'\n"); 4870 } 4871 SKIP_BLANKS; 4872 *publicID = xmlParsePubidLiteral(ctxt); 4873 if (*publicID == NULL) { 4874 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4875 } 4876 if (strict) { 4877 /* 4878 * We don't handle [83] so "S SystemLiteral" is required. 4879 */ 4880 if (!IS_BLANK_CH(CUR)) { 4881 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4882 "Space required after the Public Identifier\n"); 4883 } 4884 } else { 4885 /* 4886 * We handle [83] so we return immediately, if 4887 * "S SystemLiteral" is not detected. From a purely parsing 4888 * point of view that's a nice mess. 4889 */ 4890 const xmlChar *ptr; 4891 GROW; 4892 4893 ptr = CUR_PTR; 4894 if (!IS_BLANK_CH(*ptr)) return(NULL); 4895 4896 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4897 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4898 } 4899 SKIP_BLANKS; 4900 URI = xmlParseSystemLiteral(ctxt); 4901 if (URI == NULL) { 4902 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4903 } 4904 } 4905 return(URI); 4906 } 4907 4908 /** 4909 * xmlParseCommentComplex: 4910 * @ctxt: an XML parser context 4911 * @buf: the already parsed part of the buffer 4912 * @len: number of bytes filles in the buffer 4913 * @size: allocated size of the buffer 4914 * 4915 * Skip an XML (SGML) comment <!-- .... --> 4916 * The spec says that "For compatibility, the string "--" (double-hyphen) 4917 * must not occur within comments. " 4918 * This is the slow routine in case the accelerator for ascii didn't work 4919 * 4920 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4921 */ 4922 static void 4923 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4924 size_t len, size_t size) { 4925 int q, ql; 4926 int r, rl; 4927 int cur, l; 4928 size_t count = 0; 4929 int inputid; 4930 4931 inputid = ctxt->input->id; 4932 4933 if (buf == NULL) { 4934 len = 0; 4935 size = XML_PARSER_BUFFER_SIZE; 4936 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4937 if (buf == NULL) { 4938 xmlErrMemory(ctxt, NULL); 4939 return; 4940 } 4941 } 4942 GROW; /* Assure there's enough input data */ 4943 q = CUR_CHAR(ql); 4944 if (q == 0) 4945 goto not_terminated; 4946 if (!IS_CHAR(q)) { 4947 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4948 "xmlParseComment: invalid xmlChar value %d\n", 4949 q); 4950 xmlFree (buf); 4951 return; 4952 } 4953 NEXTL(ql); 4954 r = CUR_CHAR(rl); 4955 if (r == 0) 4956 goto not_terminated; 4957 if (!IS_CHAR(r)) { 4958 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4959 "xmlParseComment: invalid xmlChar value %d\n", 4960 q); 4961 xmlFree (buf); 4962 return; 4963 } 4964 NEXTL(rl); 4965 cur = CUR_CHAR(l); 4966 if (cur == 0) 4967 goto not_terminated; 4968 while (IS_CHAR(cur) && /* checked */ 4969 ((cur != '>') || 4970 (r != '-') || (q != '-'))) { 4971 if ((r == '-') && (q == '-')) { 4972 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4973 } 4974 if ((len > XML_MAX_TEXT_LENGTH) && 4975 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4976 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4977 "Comment too big found", NULL); 4978 xmlFree (buf); 4979 return; 4980 } 4981 if (len + 5 >= size) { 4982 xmlChar *new_buf; 4983 size_t new_size; 4984 4985 new_size = size * 2; 4986 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4987 if (new_buf == NULL) { 4988 xmlFree (buf); 4989 xmlErrMemory(ctxt, NULL); 4990 return; 4991 } 4992 buf = new_buf; 4993 size = new_size; 4994 } 4995 COPY_BUF(ql,buf,len,q); 4996 q = r; 4997 ql = rl; 4998 r = cur; 4999 rl = l; 5000 5001 count++; 5002 if (count > 50) { 5003 GROW; 5004 count = 0; 5005 if (ctxt->instate == XML_PARSER_EOF) { 5006 xmlFree(buf); 5007 return; 5008 } 5009 } 5010 NEXTL(l); 5011 cur = CUR_CHAR(l); 5012 if (cur == 0) { 5013 SHRINK; 5014 GROW; 5015 cur = CUR_CHAR(l); 5016 } 5017 } 5018 buf[len] = 0; 5019 if (cur == 0) { 5020 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5021 "Comment not terminated \n<!--%.50s\n", buf); 5022 } else if (!IS_CHAR(cur)) { 5023 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 5024 "xmlParseComment: invalid xmlChar value %d\n", 5025 cur); 5026 } else { 5027 if (inputid != ctxt->input->id) { 5028 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5029 "Comment doesn't start and stop in the same entity\n"); 5030 } 5031 NEXT; 5032 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5033 (!ctxt->disableSAX)) 5034 ctxt->sax->comment(ctxt->userData, buf); 5035 } 5036 xmlFree(buf); 5037 return; 5038 not_terminated: 5039 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5040 "Comment not terminated\n", NULL); 5041 xmlFree(buf); 5042 return; 5043 } 5044 5045 /** 5046 * xmlParseComment: 5047 * @ctxt: an XML parser context 5048 * 5049 * Skip an XML (SGML) comment <!-- .... --> 5050 * The spec says that "For compatibility, the string "--" (double-hyphen) 5051 * must not occur within comments. " 5052 * 5053 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 5054 */ 5055 void 5056 xmlParseComment(xmlParserCtxtPtr ctxt) { 5057 xmlChar *buf = NULL; 5058 size_t size = XML_PARSER_BUFFER_SIZE; 5059 size_t len = 0; 5060 xmlParserInputState state; 5061 const xmlChar *in; 5062 size_t nbchar = 0; 5063 int ccol; 5064 int inputid; 5065 5066 /* 5067 * Check that there is a comment right here. 5068 */ 5069 if ((RAW != '<') || (NXT(1) != '!') || 5070 (NXT(2) != '-') || (NXT(3) != '-')) return; 5071 state = ctxt->instate; 5072 ctxt->instate = XML_PARSER_COMMENT; 5073 inputid = ctxt->input->id; 5074 SKIP(4); 5075 SHRINK; 5076 GROW; 5077 5078 /* 5079 * Accelerated common case where input don't need to be 5080 * modified before passing it to the handler. 5081 */ 5082 in = ctxt->input->cur; 5083 do { 5084 if (*in == 0xA) { 5085 do { 5086 ctxt->input->line++; ctxt->input->col = 1; 5087 in++; 5088 } while (*in == 0xA); 5089 } 5090 get_more: 5091 ccol = ctxt->input->col; 5092 while (((*in > '-') && (*in <= 0x7F)) || 5093 ((*in >= 0x20) && (*in < '-')) || 5094 (*in == 0x09)) { 5095 in++; 5096 ccol++; 5097 } 5098 ctxt->input->col = ccol; 5099 if (*in == 0xA) { 5100 do { 5101 ctxt->input->line++; ctxt->input->col = 1; 5102 in++; 5103 } while (*in == 0xA); 5104 goto get_more; 5105 } 5106 nbchar = in - ctxt->input->cur; 5107 /* 5108 * save current set of data 5109 */ 5110 if (nbchar > 0) { 5111 if ((ctxt->sax != NULL) && 5112 (ctxt->sax->comment != NULL)) { 5113 if (buf == NULL) { 5114 if ((*in == '-') && (in[1] == '-')) 5115 size = nbchar + 1; 5116 else 5117 size = XML_PARSER_BUFFER_SIZE + nbchar; 5118 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5119 if (buf == NULL) { 5120 xmlErrMemory(ctxt, NULL); 5121 ctxt->instate = state; 5122 return; 5123 } 5124 len = 0; 5125 } else if (len + nbchar + 1 >= size) { 5126 xmlChar *new_buf; 5127 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 5128 new_buf = (xmlChar *) xmlRealloc(buf, 5129 size * sizeof(xmlChar)); 5130 if (new_buf == NULL) { 5131 xmlFree (buf); 5132 xmlErrMemory(ctxt, NULL); 5133 ctxt->instate = state; 5134 return; 5135 } 5136 buf = new_buf; 5137 } 5138 memcpy(&buf[len], ctxt->input->cur, nbchar); 5139 len += nbchar; 5140 buf[len] = 0; 5141 } 5142 } 5143 if ((len > XML_MAX_TEXT_LENGTH) && 5144 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5145 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5146 "Comment too big found", NULL); 5147 if (buf != NULL) 5148 xmlFree(buf); 5149 return; 5150 } 5151 ctxt->input->cur = in; 5152 if (*in == 0xA) { 5153 in++; 5154 ctxt->input->line++; ctxt->input->col = 1; 5155 } 5156 if (*in == 0xD) { 5157 in++; 5158 if (*in == 0xA) { 5159 ctxt->input->cur = in; 5160 in++; 5161 ctxt->input->line++; ctxt->input->col = 1; 5162 continue; /* while */ 5163 } 5164 in--; 5165 } 5166 SHRINK; 5167 GROW; 5168 if (ctxt->instate == XML_PARSER_EOF) { 5169 if (buf != NULL) 5170 xmlFree(buf); 5171 return; 5172 } 5173 in = ctxt->input->cur; 5174 if (*in == '-') { 5175 if (in[1] == '-') { 5176 if (in[2] == '>') { 5177 if (ctxt->input->id != inputid) { 5178 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5179 "comment doesn't start and stop in the same entity\n"); 5180 } 5181 SKIP(3); 5182 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5183 (!ctxt->disableSAX)) { 5184 if (buf != NULL) 5185 ctxt->sax->comment(ctxt->userData, buf); 5186 else 5187 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5188 } 5189 if (buf != NULL) 5190 xmlFree(buf); 5191 if (ctxt->instate != XML_PARSER_EOF) 5192 ctxt->instate = state; 5193 return; 5194 } 5195 if (buf != NULL) { 5196 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5197 "Double hyphen within comment: " 5198 "<!--%.50s\n", 5199 buf); 5200 } else 5201 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5202 "Double hyphen within comment\n", NULL); 5203 /* Check if xmlStopParser() was called in xmlStructuredErrorFunc(). */ 5204 if (ctxt->instate == XML_PARSER_EOF) { 5205 if (buf != NULL) 5206 xmlFree(buf); 5207 return; 5208 } 5209 in++; 5210 ctxt->input->col++; 5211 } 5212 in++; 5213 ctxt->input->col++; 5214 goto get_more; 5215 } 5216 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5217 xmlParseCommentComplex(ctxt, buf, len, size); 5218 if (ctxt->instate != XML_PARSER_EOF) 5219 ctxt->instate = state; 5220 return; 5221 } 5222 5223 5224 /** 5225 * xmlParsePITarget: 5226 * @ctxt: an XML parser context 5227 * 5228 * parse the name of a PI 5229 * 5230 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5231 * 5232 * Returns the PITarget name or NULL 5233 */ 5234 5235 const xmlChar * 5236 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5237 const xmlChar *name; 5238 5239 name = xmlParseName(ctxt); 5240 if ((name != NULL) && 5241 ((name[0] == 'x') || (name[0] == 'X')) && 5242 ((name[1] == 'm') || (name[1] == 'M')) && 5243 ((name[2] == 'l') || (name[2] == 'L'))) { 5244 int i; 5245 if ((name[0] == 'x') && (name[1] == 'm') && 5246 (name[2] == 'l') && (name[3] == 0)) { 5247 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5248 "XML declaration allowed only at the start of the document\n"); 5249 return(name); 5250 } else if (name[3] == 0) { 5251 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5252 return(name); 5253 } 5254 for (i = 0;;i++) { 5255 if (xmlW3CPIs[i] == NULL) break; 5256 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5257 return(name); 5258 } 5259 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5260 "xmlParsePITarget: invalid name prefix 'xml'\n", 5261 NULL, NULL); 5262 } 5263 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5264 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5265 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5266 } 5267 return(name); 5268 } 5269 5270 #ifdef LIBXML_CATALOG_ENABLED 5271 /** 5272 * xmlParseCatalogPI: 5273 * @ctxt: an XML parser context 5274 * @catalog: the PI value string 5275 * 5276 * parse an XML Catalog Processing Instruction. 5277 * 5278 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5279 * 5280 * Occurs only if allowed by the user and if happening in the Misc 5281 * part of the document before any doctype informations 5282 * This will add the given catalog to the parsing context in order 5283 * to be used if there is a resolution need further down in the document 5284 */ 5285 5286 static void 5287 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5288 xmlChar *URL = NULL; 5289 const xmlChar *tmp, *base; 5290 xmlChar marker; 5291 5292 tmp = catalog; 5293 while (IS_BLANK_CH(*tmp)) tmp++; 5294 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5295 goto error; 5296 tmp += 7; 5297 while (IS_BLANK_CH(*tmp)) tmp++; 5298 if (*tmp != '=') { 5299 return; 5300 } 5301 tmp++; 5302 while (IS_BLANK_CH(*tmp)) tmp++; 5303 marker = *tmp; 5304 if ((marker != '\'') && (marker != '"')) 5305 goto error; 5306 tmp++; 5307 base = tmp; 5308 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5309 if (*tmp == 0) 5310 goto error; 5311 URL = xmlStrndup(base, tmp - base); 5312 tmp++; 5313 while (IS_BLANK_CH(*tmp)) tmp++; 5314 if (*tmp != 0) 5315 goto error; 5316 5317 if (URL != NULL) { 5318 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5319 xmlFree(URL); 5320 } 5321 return; 5322 5323 error: 5324 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5325 "Catalog PI syntax error: %s\n", 5326 catalog, NULL); 5327 if (URL != NULL) 5328 xmlFree(URL); 5329 } 5330 #endif 5331 5332 /** 5333 * xmlParsePI: 5334 * @ctxt: an XML parser context 5335 * 5336 * parse an XML Processing Instruction. 5337 * 5338 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5339 * 5340 * The processing is transfered to SAX once parsed. 5341 */ 5342 5343 void 5344 xmlParsePI(xmlParserCtxtPtr ctxt) { 5345 xmlChar *buf = NULL; 5346 size_t len = 0; 5347 size_t size = XML_PARSER_BUFFER_SIZE; 5348 int cur, l; 5349 const xmlChar *target; 5350 xmlParserInputState state; 5351 int count = 0; 5352 5353 if ((RAW == '<') && (NXT(1) == '?')) { 5354 xmlParserInputPtr input = ctxt->input; 5355 state = ctxt->instate; 5356 ctxt->instate = XML_PARSER_PI; 5357 /* 5358 * this is a Processing Instruction. 5359 */ 5360 SKIP(2); 5361 SHRINK; 5362 5363 /* 5364 * Parse the target name and check for special support like 5365 * namespace. 5366 */ 5367 target = xmlParsePITarget(ctxt); 5368 if (target != NULL) { 5369 if ((RAW == '?') && (NXT(1) == '>')) { 5370 if (input != ctxt->input) { 5371 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5372 "PI declaration doesn't start and stop in the same entity\n"); 5373 } 5374 SKIP(2); 5375 5376 /* 5377 * SAX: PI detected. 5378 */ 5379 if ((ctxt->sax) && (!ctxt->disableSAX) && 5380 (ctxt->sax->processingInstruction != NULL)) 5381 ctxt->sax->processingInstruction(ctxt->userData, 5382 target, NULL); 5383 if (ctxt->instate != XML_PARSER_EOF) 5384 ctxt->instate = state; 5385 return; 5386 } 5387 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5388 if (buf == NULL) { 5389 xmlErrMemory(ctxt, NULL); 5390 ctxt->instate = state; 5391 return; 5392 } 5393 cur = CUR; 5394 if (!IS_BLANK(cur)) { 5395 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5396 "ParsePI: PI %s space expected\n", target); 5397 } 5398 SKIP_BLANKS; 5399 cur = CUR_CHAR(l); 5400 while (IS_CHAR(cur) && /* checked */ 5401 ((cur != '?') || (NXT(1) != '>'))) { 5402 if (len + 5 >= size) { 5403 xmlChar *tmp; 5404 size_t new_size = size * 2; 5405 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5406 if (tmp == NULL) { 5407 xmlErrMemory(ctxt, NULL); 5408 xmlFree(buf); 5409 ctxt->instate = state; 5410 return; 5411 } 5412 buf = tmp; 5413 size = new_size; 5414 } 5415 count++; 5416 if (count > 50) { 5417 GROW; 5418 if (ctxt->instate == XML_PARSER_EOF) { 5419 xmlFree(buf); 5420 return; 5421 } 5422 count = 0; 5423 if ((len > XML_MAX_TEXT_LENGTH) && 5424 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5425 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5426 "PI %s too big found", target); 5427 xmlFree(buf); 5428 ctxt->instate = state; 5429 return; 5430 } 5431 } 5432 COPY_BUF(l,buf,len,cur); 5433 NEXTL(l); 5434 cur = CUR_CHAR(l); 5435 if (cur == 0) { 5436 SHRINK; 5437 GROW; 5438 cur = CUR_CHAR(l); 5439 } 5440 } 5441 if ((len > XML_MAX_TEXT_LENGTH) && 5442 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5443 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5444 "PI %s too big found", target); 5445 xmlFree(buf); 5446 ctxt->instate = state; 5447 return; 5448 } 5449 buf[len] = 0; 5450 if (cur != '?') { 5451 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5452 "ParsePI: PI %s never end ...\n", target); 5453 } else { 5454 if (input != ctxt->input) { 5455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5456 "PI declaration doesn't start and stop in the same entity\n"); 5457 } 5458 SKIP(2); 5459 5460 #ifdef LIBXML_CATALOG_ENABLED 5461 if (((state == XML_PARSER_MISC) || 5462 (state == XML_PARSER_START)) && 5463 (xmlStrEqual(target, XML_CATALOG_PI))) { 5464 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5465 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5466 (allow == XML_CATA_ALLOW_ALL)) 5467 xmlParseCatalogPI(ctxt, buf); 5468 } 5469 #endif 5470 5471 5472 /* 5473 * SAX: PI detected. 5474 */ 5475 if ((ctxt->sax) && (!ctxt->disableSAX) && 5476 (ctxt->sax->processingInstruction != NULL)) 5477 ctxt->sax->processingInstruction(ctxt->userData, 5478 target, buf); 5479 } 5480 xmlFree(buf); 5481 } else { 5482 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5483 } 5484 if (ctxt->instate != XML_PARSER_EOF) 5485 ctxt->instate = state; 5486 } 5487 } 5488 5489 /** 5490 * xmlParseNotationDecl: 5491 * @ctxt: an XML parser context 5492 * 5493 * parse a notation declaration 5494 * 5495 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5496 * 5497 * Hence there is actually 3 choices: 5498 * 'PUBLIC' S PubidLiteral 5499 * 'PUBLIC' S PubidLiteral S SystemLiteral 5500 * and 'SYSTEM' S SystemLiteral 5501 * 5502 * See the NOTE on xmlParseExternalID(). 5503 */ 5504 5505 void 5506 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5507 const xmlChar *name; 5508 xmlChar *Pubid; 5509 xmlChar *Systemid; 5510 5511 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5512 xmlParserInputPtr input = ctxt->input; 5513 SHRINK; 5514 SKIP(10); 5515 if (!IS_BLANK_CH(CUR)) { 5516 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5517 "Space required after '<!NOTATION'\n"); 5518 return; 5519 } 5520 SKIP_BLANKS; 5521 5522 name = xmlParseName(ctxt); 5523 if (name == NULL) { 5524 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5525 return; 5526 } 5527 if (!IS_BLANK_CH(CUR)) { 5528 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5529 "Space required after the NOTATION name'\n"); 5530 return; 5531 } 5532 if (xmlStrchr(name, ':') != NULL) { 5533 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5534 "colons are forbidden from notation names '%s'\n", 5535 name, NULL, NULL); 5536 } 5537 SKIP_BLANKS; 5538 5539 /* 5540 * Parse the IDs. 5541 */ 5542 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5543 SKIP_BLANKS; 5544 5545 if (RAW == '>') { 5546 if (input != ctxt->input) { 5547 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5548 "Notation declaration doesn't start and stop in the same entity\n"); 5549 } 5550 NEXT; 5551 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5552 (ctxt->sax->notationDecl != NULL)) 5553 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5554 } else { 5555 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5556 } 5557 if (Systemid != NULL) xmlFree(Systemid); 5558 if (Pubid != NULL) xmlFree(Pubid); 5559 } 5560 } 5561 5562 /** 5563 * xmlParseEntityDecl: 5564 * @ctxt: an XML parser context 5565 * 5566 * parse <!ENTITY declarations 5567 * 5568 * [70] EntityDecl ::= GEDecl | PEDecl 5569 * 5570 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5571 * 5572 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5573 * 5574 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5575 * 5576 * [74] PEDef ::= EntityValue | ExternalID 5577 * 5578 * [76] NDataDecl ::= S 'NDATA' S Name 5579 * 5580 * [ VC: Notation Declared ] 5581 * The Name must match the declared name of a notation. 5582 */ 5583 5584 void 5585 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5586 const xmlChar *name = NULL; 5587 xmlChar *value = NULL; 5588 xmlChar *URI = NULL, *literal = NULL; 5589 const xmlChar *ndata = NULL; 5590 int isParameter = 0; 5591 xmlChar *orig = NULL; 5592 int skipped; 5593 5594 /* GROW; done in the caller */ 5595 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5596 xmlParserInputPtr input = ctxt->input; 5597 SHRINK; 5598 SKIP(8); 5599 skipped = SKIP_BLANKS; 5600 if (skipped == 0) { 5601 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5602 "Space required after '<!ENTITY'\n"); 5603 } 5604 5605 if (RAW == '%') { 5606 NEXT; 5607 skipped = SKIP_BLANKS; 5608 if (skipped == 0) { 5609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5610 "Space required after '%%'\n"); 5611 } 5612 isParameter = 1; 5613 } 5614 5615 name = xmlParseName(ctxt); 5616 if (name == NULL) { 5617 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5618 "xmlParseEntityDecl: no name\n"); 5619 return; 5620 } 5621 if (xmlStrchr(name, ':') != NULL) { 5622 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5623 "colons are forbidden from entities names '%s'\n", 5624 name, NULL, NULL); 5625 } 5626 skipped = SKIP_BLANKS; 5627 if (skipped == 0) { 5628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5629 "Space required after the entity name\n"); 5630 } 5631 5632 ctxt->instate = XML_PARSER_ENTITY_DECL; 5633 /* 5634 * handle the various case of definitions... 5635 */ 5636 if (isParameter) { 5637 if ((RAW == '"') || (RAW == '\'')) { 5638 value = xmlParseEntityValue(ctxt, &orig); 5639 if (value) { 5640 if ((ctxt->sax != NULL) && 5641 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5642 ctxt->sax->entityDecl(ctxt->userData, name, 5643 XML_INTERNAL_PARAMETER_ENTITY, 5644 NULL, NULL, value); 5645 } 5646 } else { 5647 URI = xmlParseExternalID(ctxt, &literal, 1); 5648 if ((URI == NULL) && (literal == NULL)) { 5649 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5650 } 5651 if (URI) { 5652 xmlURIPtr uri; 5653 5654 uri = xmlParseURI((const char *) URI); 5655 if (uri == NULL) { 5656 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5657 "Invalid URI: %s\n", URI); 5658 /* 5659 * This really ought to be a well formedness error 5660 * but the XML Core WG decided otherwise c.f. issue 5661 * E26 of the XML erratas. 5662 */ 5663 } else { 5664 if (uri->fragment != NULL) { 5665 /* 5666 * Okay this is foolish to block those but not 5667 * invalid URIs. 5668 */ 5669 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5670 } else { 5671 if ((ctxt->sax != NULL) && 5672 (!ctxt->disableSAX) && 5673 (ctxt->sax->entityDecl != NULL)) 5674 ctxt->sax->entityDecl(ctxt->userData, name, 5675 XML_EXTERNAL_PARAMETER_ENTITY, 5676 literal, URI, NULL); 5677 } 5678 xmlFreeURI(uri); 5679 } 5680 } 5681 } 5682 } else { 5683 if ((RAW == '"') || (RAW == '\'')) { 5684 value = xmlParseEntityValue(ctxt, &orig); 5685 if ((ctxt->sax != NULL) && 5686 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5687 ctxt->sax->entityDecl(ctxt->userData, name, 5688 XML_INTERNAL_GENERAL_ENTITY, 5689 NULL, NULL, value); 5690 /* 5691 * For expat compatibility in SAX mode. 5692 */ 5693 if ((ctxt->myDoc == NULL) || 5694 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5695 if (ctxt->myDoc == NULL) { 5696 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5697 if (ctxt->myDoc == NULL) { 5698 xmlErrMemory(ctxt, "New Doc failed"); 5699 return; 5700 } 5701 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5702 } 5703 if (ctxt->myDoc->intSubset == NULL) 5704 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5705 BAD_CAST "fake", NULL, NULL); 5706 5707 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5708 NULL, NULL, value); 5709 } 5710 } else { 5711 URI = xmlParseExternalID(ctxt, &literal, 1); 5712 if ((URI == NULL) && (literal == NULL)) { 5713 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5714 } 5715 if (URI) { 5716 xmlURIPtr uri; 5717 5718 uri = xmlParseURI((const char *)URI); 5719 if (uri == NULL) { 5720 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5721 "Invalid URI: %s\n", URI); 5722 /* 5723 * This really ought to be a well formedness error 5724 * but the XML Core WG decided otherwise c.f. issue 5725 * E26 of the XML erratas. 5726 */ 5727 } else { 5728 if (uri->fragment != NULL) { 5729 /* 5730 * Okay this is foolish to block those but not 5731 * invalid URIs. 5732 */ 5733 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5734 } 5735 xmlFreeURI(uri); 5736 } 5737 } 5738 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5739 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5740 "Space required before 'NDATA'\n"); 5741 } 5742 SKIP_BLANKS; 5743 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5744 SKIP(5); 5745 if (!IS_BLANK_CH(CUR)) { 5746 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5747 "Space required after 'NDATA'\n"); 5748 } 5749 SKIP_BLANKS; 5750 ndata = xmlParseName(ctxt); 5751 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5752 (ctxt->sax->unparsedEntityDecl != NULL)) 5753 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5754 literal, URI, ndata); 5755 } else { 5756 if ((ctxt->sax != NULL) && 5757 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5758 ctxt->sax->entityDecl(ctxt->userData, name, 5759 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5760 literal, URI, NULL); 5761 /* 5762 * For expat compatibility in SAX mode. 5763 * assuming the entity repalcement was asked for 5764 */ 5765 if ((ctxt->replaceEntities != 0) && 5766 ((ctxt->myDoc == NULL) || 5767 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5768 if (ctxt->myDoc == NULL) { 5769 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5770 if (ctxt->myDoc == NULL) { 5771 xmlErrMemory(ctxt, "New Doc failed"); 5772 return; 5773 } 5774 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5775 } 5776 5777 if (ctxt->myDoc->intSubset == NULL) 5778 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5779 BAD_CAST "fake", NULL, NULL); 5780 xmlSAX2EntityDecl(ctxt, name, 5781 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5782 literal, URI, NULL); 5783 } 5784 } 5785 } 5786 } 5787 if (ctxt->instate == XML_PARSER_EOF) 5788 goto done; 5789 SKIP_BLANKS; 5790 if (RAW != '>') { 5791 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5792 "xmlParseEntityDecl: entity %s not terminated\n", name); 5793 xmlHaltParser(ctxt); 5794 } else { 5795 if (input != ctxt->input) { 5796 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5797 "Entity declaration doesn't start and stop in the same entity\n"); 5798 } 5799 NEXT; 5800 } 5801 if (orig != NULL) { 5802 /* 5803 * Ugly mechanism to save the raw entity value. 5804 */ 5805 xmlEntityPtr cur = NULL; 5806 5807 if (isParameter) { 5808 if ((ctxt->sax != NULL) && 5809 (ctxt->sax->getParameterEntity != NULL)) 5810 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5811 } else { 5812 if ((ctxt->sax != NULL) && 5813 (ctxt->sax->getEntity != NULL)) 5814 cur = ctxt->sax->getEntity(ctxt->userData, name); 5815 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5816 cur = xmlSAX2GetEntity(ctxt, name); 5817 } 5818 } 5819 if ((cur != NULL) && (cur->orig == NULL)) { 5820 cur->orig = orig; 5821 orig = NULL; 5822 } 5823 } 5824 5825 done: 5826 if (value != NULL) xmlFree(value); 5827 if (URI != NULL) xmlFree(URI); 5828 if (literal != NULL) xmlFree(literal); 5829 if (orig != NULL) xmlFree(orig); 5830 } 5831 } 5832 5833 /** 5834 * xmlParseDefaultDecl: 5835 * @ctxt: an XML parser context 5836 * @value: Receive a possible fixed default value for the attribute 5837 * 5838 * Parse an attribute default declaration 5839 * 5840 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5841 * 5842 * [ VC: Required Attribute ] 5843 * if the default declaration is the keyword #REQUIRED, then the 5844 * attribute must be specified for all elements of the type in the 5845 * attribute-list declaration. 5846 * 5847 * [ VC: Attribute Default Legal ] 5848 * The declared default value must meet the lexical constraints of 5849 * the declared attribute type c.f. xmlValidateAttributeDecl() 5850 * 5851 * [ VC: Fixed Attribute Default ] 5852 * if an attribute has a default value declared with the #FIXED 5853 * keyword, instances of that attribute must match the default value. 5854 * 5855 * [ WFC: No < in Attribute Values ] 5856 * handled in xmlParseAttValue() 5857 * 5858 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5859 * or XML_ATTRIBUTE_FIXED. 5860 */ 5861 5862 int 5863 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5864 int val; 5865 xmlChar *ret; 5866 5867 *value = NULL; 5868 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5869 SKIP(9); 5870 return(XML_ATTRIBUTE_REQUIRED); 5871 } 5872 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5873 SKIP(8); 5874 return(XML_ATTRIBUTE_IMPLIED); 5875 } 5876 val = XML_ATTRIBUTE_NONE; 5877 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5878 SKIP(6); 5879 val = XML_ATTRIBUTE_FIXED; 5880 if (!IS_BLANK_CH(CUR)) { 5881 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5882 "Space required after '#FIXED'\n"); 5883 } 5884 SKIP_BLANKS; 5885 } 5886 ret = xmlParseAttValue(ctxt); 5887 ctxt->instate = XML_PARSER_DTD; 5888 if (ret == NULL) { 5889 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5890 "Attribute default value declaration error\n"); 5891 } else 5892 *value = ret; 5893 return(val); 5894 } 5895 5896 /** 5897 * xmlParseNotationType: 5898 * @ctxt: an XML parser context 5899 * 5900 * parse an Notation attribute type. 5901 * 5902 * Note: the leading 'NOTATION' S part has already being parsed... 5903 * 5904 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5905 * 5906 * [ VC: Notation Attributes ] 5907 * Values of this type must match one of the notation names included 5908 * in the declaration; all notation names in the declaration must be declared. 5909 * 5910 * Returns: the notation attribute tree built while parsing 5911 */ 5912 5913 xmlEnumerationPtr 5914 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5915 const xmlChar *name; 5916 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5917 5918 if (RAW != '(') { 5919 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5920 return(NULL); 5921 } 5922 SHRINK; 5923 do { 5924 NEXT; 5925 SKIP_BLANKS; 5926 name = xmlParseName(ctxt); 5927 if (name == NULL) { 5928 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5929 "Name expected in NOTATION declaration\n"); 5930 xmlFreeEnumeration(ret); 5931 return(NULL); 5932 } 5933 tmp = ret; 5934 while (tmp != NULL) { 5935 if (xmlStrEqual(name, tmp->name)) { 5936 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5937 "standalone: attribute notation value token %s duplicated\n", 5938 name, NULL); 5939 if (!xmlDictOwns(ctxt->dict, name)) 5940 xmlFree((xmlChar *) name); 5941 break; 5942 } 5943 tmp = tmp->next; 5944 } 5945 if (tmp == NULL) { 5946 cur = xmlCreateEnumeration(name); 5947 if (cur == NULL) { 5948 xmlFreeEnumeration(ret); 5949 return(NULL); 5950 } 5951 if (last == NULL) ret = last = cur; 5952 else { 5953 last->next = cur; 5954 last = cur; 5955 } 5956 } 5957 SKIP_BLANKS; 5958 } while (RAW == '|'); 5959 if (RAW != ')') { 5960 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5961 xmlFreeEnumeration(ret); 5962 return(NULL); 5963 } 5964 NEXT; 5965 return(ret); 5966 } 5967 5968 /** 5969 * xmlParseEnumerationType: 5970 * @ctxt: an XML parser context 5971 * 5972 * parse an Enumeration attribute type. 5973 * 5974 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5975 * 5976 * [ VC: Enumeration ] 5977 * Values of this type must match one of the Nmtoken tokens in 5978 * the declaration 5979 * 5980 * Returns: the enumeration attribute tree built while parsing 5981 */ 5982 5983 xmlEnumerationPtr 5984 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5985 xmlChar *name; 5986 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5987 5988 if (RAW != '(') { 5989 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5990 return(NULL); 5991 } 5992 SHRINK; 5993 do { 5994 NEXT; 5995 SKIP_BLANKS; 5996 name = xmlParseNmtoken(ctxt); 5997 if (name == NULL) { 5998 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5999 return(ret); 6000 } 6001 tmp = ret; 6002 while (tmp != NULL) { 6003 if (xmlStrEqual(name, tmp->name)) { 6004 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 6005 "standalone: attribute enumeration value token %s duplicated\n", 6006 name, NULL); 6007 if (!xmlDictOwns(ctxt->dict, name)) 6008 xmlFree(name); 6009 break; 6010 } 6011 tmp = tmp->next; 6012 } 6013 if (tmp == NULL) { 6014 cur = xmlCreateEnumeration(name); 6015 if (!xmlDictOwns(ctxt->dict, name)) 6016 xmlFree(name); 6017 if (cur == NULL) { 6018 xmlFreeEnumeration(ret); 6019 return(NULL); 6020 } 6021 if (last == NULL) ret = last = cur; 6022 else { 6023 last->next = cur; 6024 last = cur; 6025 } 6026 } 6027 SKIP_BLANKS; 6028 } while (RAW == '|'); 6029 if (RAW != ')') { 6030 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 6031 return(ret); 6032 } 6033 NEXT; 6034 return(ret); 6035 } 6036 6037 /** 6038 * xmlParseEnumeratedType: 6039 * @ctxt: an XML parser context 6040 * @tree: the enumeration tree built while parsing 6041 * 6042 * parse an Enumerated attribute type. 6043 * 6044 * [57] EnumeratedType ::= NotationType | Enumeration 6045 * 6046 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 6047 * 6048 * 6049 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 6050 */ 6051 6052 int 6053 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 6054 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 6055 SKIP(8); 6056 if (!IS_BLANK_CH(CUR)) { 6057 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6058 "Space required after 'NOTATION'\n"); 6059 return(0); 6060 } 6061 SKIP_BLANKS; 6062 *tree = xmlParseNotationType(ctxt); 6063 if (*tree == NULL) return(0); 6064 return(XML_ATTRIBUTE_NOTATION); 6065 } 6066 *tree = xmlParseEnumerationType(ctxt); 6067 if (*tree == NULL) return(0); 6068 return(XML_ATTRIBUTE_ENUMERATION); 6069 } 6070 6071 /** 6072 * xmlParseAttributeType: 6073 * @ctxt: an XML parser context 6074 * @tree: the enumeration tree built while parsing 6075 * 6076 * parse the Attribute list def for an element 6077 * 6078 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 6079 * 6080 * [55] StringType ::= 'CDATA' 6081 * 6082 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 6083 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 6084 * 6085 * Validity constraints for attribute values syntax are checked in 6086 * xmlValidateAttributeValue() 6087 * 6088 * [ VC: ID ] 6089 * Values of type ID must match the Name production. A name must not 6090 * appear more than once in an XML document as a value of this type; 6091 * i.e., ID values must uniquely identify the elements which bear them. 6092 * 6093 * [ VC: One ID per Element Type ] 6094 * No element type may have more than one ID attribute specified. 6095 * 6096 * [ VC: ID Attribute Default ] 6097 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 6098 * 6099 * [ VC: IDREF ] 6100 * Values of type IDREF must match the Name production, and values 6101 * of type IDREFS must match Names; each IDREF Name must match the value 6102 * of an ID attribute on some element in the XML document; i.e. IDREF 6103 * values must match the value of some ID attribute. 6104 * 6105 * [ VC: Entity Name ] 6106 * Values of type ENTITY must match the Name production, values 6107 * of type ENTITIES must match Names; each Entity Name must match the 6108 * name of an unparsed entity declared in the DTD. 6109 * 6110 * [ VC: Name Token ] 6111 * Values of type NMTOKEN must match the Nmtoken production; values 6112 * of type NMTOKENS must match Nmtokens. 6113 * 6114 * Returns the attribute type 6115 */ 6116 int 6117 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 6118 SHRINK; 6119 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 6120 SKIP(5); 6121 return(XML_ATTRIBUTE_CDATA); 6122 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 6123 SKIP(6); 6124 return(XML_ATTRIBUTE_IDREFS); 6125 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 6126 SKIP(5); 6127 return(XML_ATTRIBUTE_IDREF); 6128 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 6129 SKIP(2); 6130 return(XML_ATTRIBUTE_ID); 6131 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 6132 SKIP(6); 6133 return(XML_ATTRIBUTE_ENTITY); 6134 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 6135 SKIP(8); 6136 return(XML_ATTRIBUTE_ENTITIES); 6137 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 6138 SKIP(8); 6139 return(XML_ATTRIBUTE_NMTOKENS); 6140 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 6141 SKIP(7); 6142 return(XML_ATTRIBUTE_NMTOKEN); 6143 } 6144 return(xmlParseEnumeratedType(ctxt, tree)); 6145 } 6146 6147 /** 6148 * xmlParseAttributeListDecl: 6149 * @ctxt: an XML parser context 6150 * 6151 * : parse the Attribute list def for an element 6152 * 6153 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 6154 * 6155 * [53] AttDef ::= S Name S AttType S DefaultDecl 6156 * 6157 */ 6158 void 6159 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 6160 const xmlChar *elemName; 6161 const xmlChar *attrName; 6162 xmlEnumerationPtr tree; 6163 6164 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 6165 xmlParserInputPtr input = ctxt->input; 6166 6167 SKIP(9); 6168 if (!IS_BLANK_CH(CUR)) { 6169 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6170 "Space required after '<!ATTLIST'\n"); 6171 } 6172 SKIP_BLANKS; 6173 elemName = xmlParseName(ctxt); 6174 if (elemName == NULL) { 6175 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6176 "ATTLIST: no name for Element\n"); 6177 return; 6178 } 6179 SKIP_BLANKS; 6180 GROW; 6181 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 6182 const xmlChar *check = CUR_PTR; 6183 int type; 6184 int def; 6185 xmlChar *defaultValue = NULL; 6186 6187 GROW; 6188 tree = NULL; 6189 attrName = xmlParseName(ctxt); 6190 if (attrName == NULL) { 6191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6192 "ATTLIST: no name for Attribute\n"); 6193 break; 6194 } 6195 GROW; 6196 if (!IS_BLANK_CH(CUR)) { 6197 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6198 "Space required after the attribute name\n"); 6199 break; 6200 } 6201 SKIP_BLANKS; 6202 6203 type = xmlParseAttributeType(ctxt, &tree); 6204 if (type <= 0) { 6205 break; 6206 } 6207 6208 GROW; 6209 if (!IS_BLANK_CH(CUR)) { 6210 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6211 "Space required after the attribute type\n"); 6212 if (tree != NULL) 6213 xmlFreeEnumeration(tree); 6214 break; 6215 } 6216 SKIP_BLANKS; 6217 6218 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6219 if (def <= 0) { 6220 if (defaultValue != NULL) 6221 xmlFree(defaultValue); 6222 if (tree != NULL) 6223 xmlFreeEnumeration(tree); 6224 break; 6225 } 6226 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6227 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6228 6229 GROW; 6230 if (RAW != '>') { 6231 if (!IS_BLANK_CH(CUR)) { 6232 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6233 "Space required after the attribute default value\n"); 6234 if (defaultValue != NULL) 6235 xmlFree(defaultValue); 6236 if (tree != NULL) 6237 xmlFreeEnumeration(tree); 6238 break; 6239 } 6240 SKIP_BLANKS; 6241 } 6242 if (check == CUR_PTR) { 6243 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6244 "in xmlParseAttributeListDecl\n"); 6245 if (defaultValue != NULL) 6246 xmlFree(defaultValue); 6247 if (tree != NULL) 6248 xmlFreeEnumeration(tree); 6249 break; 6250 } 6251 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6252 (ctxt->sax->attributeDecl != NULL)) 6253 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6254 type, def, defaultValue, tree); 6255 else if (tree != NULL) 6256 xmlFreeEnumeration(tree); 6257 6258 if ((ctxt->sax2) && (defaultValue != NULL) && 6259 (def != XML_ATTRIBUTE_IMPLIED) && 6260 (def != XML_ATTRIBUTE_REQUIRED)) { 6261 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6262 } 6263 if (ctxt->sax2) { 6264 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6265 } 6266 if (defaultValue != NULL) 6267 xmlFree(defaultValue); 6268 GROW; 6269 } 6270 if (RAW == '>') { 6271 if (input != ctxt->input) { 6272 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6273 "Attribute list declaration doesn't start and stop in the same entity\n", 6274 NULL, NULL); 6275 } 6276 NEXT; 6277 } 6278 } 6279 } 6280 6281 /** 6282 * xmlParseElementMixedContentDecl: 6283 * @ctxt: an XML parser context 6284 * @inputchk: the input used for the current entity, needed for boundary checks 6285 * 6286 * parse the declaration for a Mixed Element content 6287 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6288 * 6289 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6290 * '(' S? '#PCDATA' S? ')' 6291 * 6292 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6293 * 6294 * [ VC: No Duplicate Types ] 6295 * The same name must not appear more than once in a single 6296 * mixed-content declaration. 6297 * 6298 * returns: the list of the xmlElementContentPtr describing the element choices 6299 */ 6300 xmlElementContentPtr 6301 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6302 xmlElementContentPtr ret = NULL, cur = NULL, n; 6303 const xmlChar *elem = NULL; 6304 6305 GROW; 6306 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6307 SKIP(7); 6308 SKIP_BLANKS; 6309 SHRINK; 6310 if (RAW == ')') { 6311 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6312 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6313 "Element content declaration doesn't start and stop in the same entity\n", 6314 NULL, NULL); 6315 } 6316 NEXT; 6317 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6318 if (ret == NULL) 6319 return(NULL); 6320 if (RAW == '*') { 6321 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6322 NEXT; 6323 } 6324 return(ret); 6325 } 6326 if ((RAW == '(') || (RAW == '|')) { 6327 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6328 if (ret == NULL) return(NULL); 6329 } 6330 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6331 NEXT; 6332 if (elem == NULL) { 6333 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6334 if (ret == NULL) return(NULL); 6335 ret->c1 = cur; 6336 if (cur != NULL) 6337 cur->parent = ret; 6338 cur = ret; 6339 } else { 6340 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6341 if (n == NULL) return(NULL); 6342 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6343 if (n->c1 != NULL) 6344 n->c1->parent = n; 6345 cur->c2 = n; 6346 if (n != NULL) 6347 n->parent = cur; 6348 cur = n; 6349 } 6350 SKIP_BLANKS; 6351 elem = xmlParseName(ctxt); 6352 if (elem == NULL) { 6353 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6354 "xmlParseElementMixedContentDecl : Name expected\n"); 6355 xmlFreeDocElementContent(ctxt->myDoc, ret); 6356 return(NULL); 6357 } 6358 SKIP_BLANKS; 6359 GROW; 6360 } 6361 if ((RAW == ')') && (NXT(1) == '*')) { 6362 if (elem != NULL) { 6363 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6364 XML_ELEMENT_CONTENT_ELEMENT); 6365 if (cur->c2 != NULL) 6366 cur->c2->parent = cur; 6367 } 6368 if (ret != NULL) 6369 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6370 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6371 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6372 "Element content declaration doesn't start and stop in the same entity\n", 6373 NULL, NULL); 6374 } 6375 SKIP(2); 6376 } else { 6377 xmlFreeDocElementContent(ctxt->myDoc, ret); 6378 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6379 return(NULL); 6380 } 6381 6382 } else { 6383 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6384 } 6385 return(ret); 6386 } 6387 6388 /** 6389 * xmlParseElementChildrenContentDeclPriv: 6390 * @ctxt: an XML parser context 6391 * @inputchk: the input used for the current entity, needed for boundary checks 6392 * @depth: the level of recursion 6393 * 6394 * parse the declaration for a Mixed Element content 6395 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6396 * 6397 * 6398 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6399 * 6400 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6401 * 6402 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6403 * 6404 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6405 * 6406 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6407 * TODO Parameter-entity replacement text must be properly nested 6408 * with parenthesized groups. That is to say, if either of the 6409 * opening or closing parentheses in a choice, seq, or Mixed 6410 * construct is contained in the replacement text for a parameter 6411 * entity, both must be contained in the same replacement text. For 6412 * interoperability, if a parameter-entity reference appears in a 6413 * choice, seq, or Mixed construct, its replacement text should not 6414 * be empty, and neither the first nor last non-blank character of 6415 * the replacement text should be a connector (| or ,). 6416 * 6417 * Returns the tree of xmlElementContentPtr describing the element 6418 * hierarchy. 6419 */ 6420 static xmlElementContentPtr 6421 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6422 int depth) { 6423 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6424 const xmlChar *elem; 6425 xmlChar type = 0; 6426 6427 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6428 (depth > 2048)) { 6429 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6430 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6431 depth); 6432 return(NULL); 6433 } 6434 SKIP_BLANKS; 6435 GROW; 6436 if (RAW == '(') { 6437 int inputid = ctxt->input->id; 6438 6439 /* Recurse on first child */ 6440 NEXT; 6441 SKIP_BLANKS; 6442 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6443 depth + 1); 6444 SKIP_BLANKS; 6445 GROW; 6446 } else { 6447 elem = xmlParseName(ctxt); 6448 if (elem == NULL) { 6449 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6450 return(NULL); 6451 } 6452 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6453 if (cur == NULL) { 6454 xmlErrMemory(ctxt, NULL); 6455 return(NULL); 6456 } 6457 GROW; 6458 if (RAW == '?') { 6459 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6460 NEXT; 6461 } else if (RAW == '*') { 6462 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6463 NEXT; 6464 } else if (RAW == '+') { 6465 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6466 NEXT; 6467 } else { 6468 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6469 } 6470 GROW; 6471 } 6472 SKIP_BLANKS; 6473 SHRINK; 6474 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6475 /* 6476 * Each loop we parse one separator and one element. 6477 */ 6478 if (RAW == ',') { 6479 if (type == 0) type = CUR; 6480 6481 /* 6482 * Detect "Name | Name , Name" error 6483 */ 6484 else if (type != CUR) { 6485 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6486 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6487 type); 6488 if ((last != NULL) && (last != ret)) 6489 xmlFreeDocElementContent(ctxt->myDoc, last); 6490 if (ret != NULL) 6491 xmlFreeDocElementContent(ctxt->myDoc, ret); 6492 return(NULL); 6493 } 6494 NEXT; 6495 6496 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6497 if (op == NULL) { 6498 if ((last != NULL) && (last != ret)) 6499 xmlFreeDocElementContent(ctxt->myDoc, last); 6500 xmlFreeDocElementContent(ctxt->myDoc, ret); 6501 return(NULL); 6502 } 6503 if (last == NULL) { 6504 op->c1 = ret; 6505 if (ret != NULL) 6506 ret->parent = op; 6507 ret = cur = op; 6508 } else { 6509 cur->c2 = op; 6510 if (op != NULL) 6511 op->parent = cur; 6512 op->c1 = last; 6513 if (last != NULL) 6514 last->parent = op; 6515 cur =op; 6516 last = NULL; 6517 } 6518 } else if (RAW == '|') { 6519 if (type == 0) type = CUR; 6520 6521 /* 6522 * Detect "Name , Name | Name" error 6523 */ 6524 else if (type != CUR) { 6525 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6526 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6527 type); 6528 if ((last != NULL) && (last != ret)) 6529 xmlFreeDocElementContent(ctxt->myDoc, last); 6530 if (ret != NULL) 6531 xmlFreeDocElementContent(ctxt->myDoc, ret); 6532 return(NULL); 6533 } 6534 NEXT; 6535 6536 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6537 if (op == NULL) { 6538 if ((last != NULL) && (last != ret)) 6539 xmlFreeDocElementContent(ctxt->myDoc, last); 6540 if (ret != NULL) 6541 xmlFreeDocElementContent(ctxt->myDoc, ret); 6542 return(NULL); 6543 } 6544 if (last == NULL) { 6545 op->c1 = ret; 6546 if (ret != NULL) 6547 ret->parent = op; 6548 ret = cur = op; 6549 } else { 6550 cur->c2 = op; 6551 if (op != NULL) 6552 op->parent = cur; 6553 op->c1 = last; 6554 if (last != NULL) 6555 last->parent = op; 6556 cur =op; 6557 last = NULL; 6558 } 6559 } else { 6560 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6561 if ((last != NULL) && (last != ret)) 6562 xmlFreeDocElementContent(ctxt->myDoc, last); 6563 if (ret != NULL) 6564 xmlFreeDocElementContent(ctxt->myDoc, ret); 6565 return(NULL); 6566 } 6567 GROW; 6568 SKIP_BLANKS; 6569 GROW; 6570 if (RAW == '(') { 6571 int inputid = ctxt->input->id; 6572 /* Recurse on second child */ 6573 NEXT; 6574 SKIP_BLANKS; 6575 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6576 depth + 1); 6577 SKIP_BLANKS; 6578 } else { 6579 elem = xmlParseName(ctxt); 6580 if (elem == NULL) { 6581 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6582 if (ret != NULL) 6583 xmlFreeDocElementContent(ctxt->myDoc, ret); 6584 return(NULL); 6585 } 6586 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6587 if (last == NULL) { 6588 if (ret != NULL) 6589 xmlFreeDocElementContent(ctxt->myDoc, ret); 6590 return(NULL); 6591 } 6592 if (RAW == '?') { 6593 last->ocur = XML_ELEMENT_CONTENT_OPT; 6594 NEXT; 6595 } else if (RAW == '*') { 6596 last->ocur = XML_ELEMENT_CONTENT_MULT; 6597 NEXT; 6598 } else if (RAW == '+') { 6599 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6600 NEXT; 6601 } else { 6602 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6603 } 6604 } 6605 SKIP_BLANKS; 6606 GROW; 6607 } 6608 if ((cur != NULL) && (last != NULL)) { 6609 cur->c2 = last; 6610 if (last != NULL) 6611 last->parent = cur; 6612 } 6613 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6614 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6615 "Element content declaration doesn't start and stop in the same entity\n", 6616 NULL, NULL); 6617 } 6618 NEXT; 6619 if (RAW == '?') { 6620 if (ret != NULL) { 6621 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6622 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6623 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6624 else 6625 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6626 } 6627 NEXT; 6628 } else if (RAW == '*') { 6629 if (ret != NULL) { 6630 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6631 cur = ret; 6632 /* 6633 * Some normalization: 6634 * (a | b* | c?)* == (a | b | c)* 6635 */ 6636 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6637 if ((cur->c1 != NULL) && 6638 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6639 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6640 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6641 if ((cur->c2 != NULL) && 6642 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6643 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6644 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6645 cur = cur->c2; 6646 } 6647 } 6648 NEXT; 6649 } else if (RAW == '+') { 6650 if (ret != NULL) { 6651 int found = 0; 6652 6653 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6654 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6655 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6656 else 6657 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6658 /* 6659 * Some normalization: 6660 * (a | b*)+ == (a | b)* 6661 * (a | b?)+ == (a | b)* 6662 */ 6663 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6664 if ((cur->c1 != NULL) && 6665 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6666 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6667 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6668 found = 1; 6669 } 6670 if ((cur->c2 != NULL) && 6671 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6672 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6673 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6674 found = 1; 6675 } 6676 cur = cur->c2; 6677 } 6678 if (found) 6679 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6680 } 6681 NEXT; 6682 } 6683 return(ret); 6684 } 6685 6686 /** 6687 * xmlParseElementChildrenContentDecl: 6688 * @ctxt: an XML parser context 6689 * @inputchk: the input used for the current entity, needed for boundary checks 6690 * 6691 * parse the declaration for a Mixed Element content 6692 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6693 * 6694 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6695 * 6696 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6697 * 6698 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6699 * 6700 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6701 * 6702 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6703 * TODO Parameter-entity replacement text must be properly nested 6704 * with parenthesized groups. That is to say, if either of the 6705 * opening or closing parentheses in a choice, seq, or Mixed 6706 * construct is contained in the replacement text for a parameter 6707 * entity, both must be contained in the same replacement text. For 6708 * interoperability, if a parameter-entity reference appears in a 6709 * choice, seq, or Mixed construct, its replacement text should not 6710 * be empty, and neither the first nor last non-blank character of 6711 * the replacement text should be a connector (| or ,). 6712 * 6713 * Returns the tree of xmlElementContentPtr describing the element 6714 * hierarchy. 6715 */ 6716 xmlElementContentPtr 6717 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6718 /* stub left for API/ABI compat */ 6719 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6720 } 6721 6722 /** 6723 * xmlParseElementContentDecl: 6724 * @ctxt: an XML parser context 6725 * @name: the name of the element being defined. 6726 * @result: the Element Content pointer will be stored here if any 6727 * 6728 * parse the declaration for an Element content either Mixed or Children, 6729 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6730 * 6731 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6732 * 6733 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6734 */ 6735 6736 int 6737 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6738 xmlElementContentPtr *result) { 6739 6740 xmlElementContentPtr tree = NULL; 6741 int inputid = ctxt->input->id; 6742 int res; 6743 6744 *result = NULL; 6745 6746 if (RAW != '(') { 6747 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6748 "xmlParseElementContentDecl : %s '(' expected\n", name); 6749 return(-1); 6750 } 6751 NEXT; 6752 GROW; 6753 if (ctxt->instate == XML_PARSER_EOF) 6754 return(-1); 6755 SKIP_BLANKS; 6756 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6757 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6758 res = XML_ELEMENT_TYPE_MIXED; 6759 } else { 6760 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6761 res = XML_ELEMENT_TYPE_ELEMENT; 6762 } 6763 SKIP_BLANKS; 6764 *result = tree; 6765 return(res); 6766 } 6767 6768 /** 6769 * xmlParseElementDecl: 6770 * @ctxt: an XML parser context 6771 * 6772 * parse an Element declaration. 6773 * 6774 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6775 * 6776 * [ VC: Unique Element Type Declaration ] 6777 * No element type may be declared more than once 6778 * 6779 * Returns the type of the element, or -1 in case of error 6780 */ 6781 int 6782 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6783 const xmlChar *name; 6784 int ret = -1; 6785 xmlElementContentPtr content = NULL; 6786 6787 /* GROW; done in the caller */ 6788 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6789 xmlParserInputPtr input = ctxt->input; 6790 6791 SKIP(9); 6792 if (!IS_BLANK_CH(CUR)) { 6793 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6794 "Space required after 'ELEMENT'\n"); 6795 return(-1); 6796 } 6797 SKIP_BLANKS; 6798 name = xmlParseName(ctxt); 6799 if (name == NULL) { 6800 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6801 "xmlParseElementDecl: no name for Element\n"); 6802 return(-1); 6803 } 6804 while ((RAW == 0) && (ctxt->inputNr > 1)) 6805 xmlPopInput(ctxt); 6806 if (!IS_BLANK_CH(CUR)) { 6807 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6808 "Space required after the element name\n"); 6809 } 6810 SKIP_BLANKS; 6811 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6812 SKIP(5); 6813 /* 6814 * Element must always be empty. 6815 */ 6816 ret = XML_ELEMENT_TYPE_EMPTY; 6817 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6818 (NXT(2) == 'Y')) { 6819 SKIP(3); 6820 /* 6821 * Element is a generic container. 6822 */ 6823 ret = XML_ELEMENT_TYPE_ANY; 6824 } else if (RAW == '(') { 6825 ret = xmlParseElementContentDecl(ctxt, name, &content); 6826 } else { 6827 /* 6828 * [ WFC: PEs in Internal Subset ] error handling. 6829 */ 6830 if ((RAW == '%') && (ctxt->external == 0) && 6831 (ctxt->inputNr == 1)) { 6832 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6833 "PEReference: forbidden within markup decl in internal subset\n"); 6834 } else { 6835 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6836 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6837 } 6838 return(-1); 6839 } 6840 6841 SKIP_BLANKS; 6842 /* 6843 * Pop-up of finished entities. 6844 */ 6845 while ((RAW == 0) && (ctxt->inputNr > 1)) 6846 xmlPopInput(ctxt); 6847 SKIP_BLANKS; 6848 6849 if (RAW != '>') { 6850 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6851 if (content != NULL) { 6852 xmlFreeDocElementContent(ctxt->myDoc, content); 6853 } 6854 } else { 6855 if (input != ctxt->input) { 6856 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6857 "Element declaration doesn't start and stop in the same entity\n"); 6858 } 6859 6860 NEXT; 6861 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6862 (ctxt->sax->elementDecl != NULL)) { 6863 if (content != NULL) 6864 content->parent = NULL; 6865 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6866 content); 6867 if ((content != NULL) && (content->parent == NULL)) { 6868 /* 6869 * this is a trick: if xmlAddElementDecl is called, 6870 * instead of copying the full tree it is plugged directly 6871 * if called from the parser. Avoid duplicating the 6872 * interfaces or change the API/ABI 6873 */ 6874 xmlFreeDocElementContent(ctxt->myDoc, content); 6875 } 6876 } else if (content != NULL) { 6877 xmlFreeDocElementContent(ctxt->myDoc, content); 6878 } 6879 } 6880 } 6881 return(ret); 6882 } 6883 6884 /** 6885 * xmlParseConditionalSections 6886 * @ctxt: an XML parser context 6887 * 6888 * [61] conditionalSect ::= includeSect | ignoreSect 6889 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6890 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6891 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6892 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6893 */ 6894 6895 static void 6896 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6897 int id = ctxt->input->id; 6898 6899 SKIP(3); 6900 SKIP_BLANKS; 6901 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6902 SKIP(7); 6903 SKIP_BLANKS; 6904 if (RAW != '[') { 6905 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6906 xmlHaltParser(ctxt); 6907 return; 6908 } else { 6909 if (ctxt->input->id != id) { 6910 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6911 "All markup of the conditional section is not in the same entity\n", 6912 NULL, NULL); 6913 } 6914 NEXT; 6915 } 6916 if (xmlParserDebugEntities) { 6917 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6918 xmlGenericError(xmlGenericErrorContext, 6919 "%s(%d): ", ctxt->input->filename, 6920 ctxt->input->line); 6921 xmlGenericError(xmlGenericErrorContext, 6922 "Entering INCLUDE Conditional Section\n"); 6923 } 6924 6925 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6926 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6927 const xmlChar *check = CUR_PTR; 6928 unsigned int cons = ctxt->input->consumed; 6929 6930 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6931 xmlParseConditionalSections(ctxt); 6932 } else if (IS_BLANK_CH(CUR)) { 6933 NEXT; 6934 } else if (RAW == '%') { 6935 xmlParsePEReference(ctxt); 6936 } else 6937 xmlParseMarkupDecl(ctxt); 6938 6939 /* 6940 * Pop-up of finished entities. 6941 */ 6942 while ((RAW == 0) && (ctxt->inputNr > 1)) 6943 xmlPopInput(ctxt); 6944 6945 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6946 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6947 xmlHaltParser(ctxt); 6948 break; 6949 } 6950 } 6951 if (xmlParserDebugEntities) { 6952 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6953 xmlGenericError(xmlGenericErrorContext, 6954 "%s(%d): ", ctxt->input->filename, 6955 ctxt->input->line); 6956 xmlGenericError(xmlGenericErrorContext, 6957 "Leaving INCLUDE Conditional Section\n"); 6958 } 6959 6960 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6961 int state; 6962 xmlParserInputState instate; 6963 int depth = 0; 6964 6965 SKIP(6); 6966 SKIP_BLANKS; 6967 if (RAW != '[') { 6968 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6969 xmlHaltParser(ctxt); 6970 return; 6971 } else { 6972 if (ctxt->input->id != id) { 6973 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6974 "All markup of the conditional section is not in the same entity\n", 6975 NULL, NULL); 6976 } 6977 NEXT; 6978 } 6979 if (xmlParserDebugEntities) { 6980 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6981 xmlGenericError(xmlGenericErrorContext, 6982 "%s(%d): ", ctxt->input->filename, 6983 ctxt->input->line); 6984 xmlGenericError(xmlGenericErrorContext, 6985 "Entering IGNORE Conditional Section\n"); 6986 } 6987 6988 /* 6989 * Parse up to the end of the conditional section 6990 * But disable SAX event generating DTD building in the meantime 6991 */ 6992 state = ctxt->disableSAX; 6993 instate = ctxt->instate; 6994 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6995 ctxt->instate = XML_PARSER_IGNORE; 6996 6997 while (((depth >= 0) && (RAW != 0)) && 6998 (ctxt->instate != XML_PARSER_EOF)) { 6999 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7000 depth++; 7001 SKIP(3); 7002 continue; 7003 } 7004 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 7005 if (--depth >= 0) SKIP(3); 7006 continue; 7007 } 7008 NEXT; 7009 continue; 7010 } 7011 7012 ctxt->disableSAX = state; 7013 ctxt->instate = instate; 7014 7015 if (xmlParserDebugEntities) { 7016 if ((ctxt->input != NULL) && (ctxt->input->filename)) 7017 xmlGenericError(xmlGenericErrorContext, 7018 "%s(%d): ", ctxt->input->filename, 7019 ctxt->input->line); 7020 xmlGenericError(xmlGenericErrorContext, 7021 "Leaving IGNORE Conditional Section\n"); 7022 } 7023 7024 } else { 7025 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 7026 xmlHaltParser(ctxt); 7027 return; 7028 } 7029 7030 if (RAW == 0) 7031 SHRINK; 7032 7033 if (RAW == 0) { 7034 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 7035 } else { 7036 if (ctxt->input->id != id) { 7037 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 7038 "All markup of the conditional section is not in the same entity\n", 7039 NULL, NULL); 7040 } 7041 if ((ctxt-> instate != XML_PARSER_EOF) && 7042 ((ctxt->input->cur + 3) <= ctxt->input->end)) 7043 SKIP(3); 7044 } 7045 } 7046 7047 /** 7048 * xmlParseMarkupDecl: 7049 * @ctxt: an XML parser context 7050 * 7051 * parse Markup declarations 7052 * 7053 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 7054 * NotationDecl | PI | Comment 7055 * 7056 * [ VC: Proper Declaration/PE Nesting ] 7057 * Parameter-entity replacement text must be properly nested with 7058 * markup declarations. That is to say, if either the first character 7059 * or the last character of a markup declaration (markupdecl above) is 7060 * contained in the replacement text for a parameter-entity reference, 7061 * both must be contained in the same replacement text. 7062 * 7063 * [ WFC: PEs in Internal Subset ] 7064 * In the internal DTD subset, parameter-entity references can occur 7065 * only where markup declarations can occur, not within markup declarations. 7066 * (This does not apply to references that occur in external parameter 7067 * entities or to the external subset.) 7068 */ 7069 void 7070 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 7071 GROW; 7072 if (CUR == '<') { 7073 if (NXT(1) == '!') { 7074 switch (NXT(2)) { 7075 case 'E': 7076 if (NXT(3) == 'L') 7077 xmlParseElementDecl(ctxt); 7078 else if (NXT(3) == 'N') 7079 xmlParseEntityDecl(ctxt); 7080 break; 7081 case 'A': 7082 xmlParseAttributeListDecl(ctxt); 7083 break; 7084 case 'N': 7085 xmlParseNotationDecl(ctxt); 7086 break; 7087 case '-': 7088 xmlParseComment(ctxt); 7089 break; 7090 default: 7091 /* there is an error but it will be detected later */ 7092 break; 7093 } 7094 } else if (NXT(1) == '?') { 7095 xmlParsePI(ctxt); 7096 } 7097 } 7098 7099 /* 7100 * detect requirement to exit there and act accordingly 7101 * and avoid having instate overriden later on 7102 */ 7103 if (ctxt->instate == XML_PARSER_EOF) 7104 return; 7105 7106 /* 7107 * This is only for internal subset. On external entities, 7108 * the replacement is done before parsing stage 7109 */ 7110 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 7111 xmlParsePEReference(ctxt); 7112 7113 /* 7114 * Conditional sections are allowed from entities included 7115 * by PE References in the internal subset. 7116 */ 7117 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 7118 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7119 xmlParseConditionalSections(ctxt); 7120 } 7121 } 7122 7123 ctxt->instate = XML_PARSER_DTD; 7124 } 7125 7126 /** 7127 * xmlParseTextDecl: 7128 * @ctxt: an XML parser context 7129 * 7130 * parse an XML declaration header for external entities 7131 * 7132 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 7133 */ 7134 7135 void 7136 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 7137 xmlChar *version; 7138 const xmlChar *encoding; 7139 7140 /* 7141 * We know that '<?xml' is here. 7142 */ 7143 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 7144 SKIP(5); 7145 } else { 7146 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 7147 return; 7148 } 7149 7150 if (!IS_BLANK_CH(CUR)) { 7151 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7152 "Space needed after '<?xml'\n"); 7153 } 7154 SKIP_BLANKS; 7155 7156 /* 7157 * We may have the VersionInfo here. 7158 */ 7159 version = xmlParseVersionInfo(ctxt); 7160 if (version == NULL) 7161 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7162 else { 7163 if (!IS_BLANK_CH(CUR)) { 7164 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7165 "Space needed here\n"); 7166 } 7167 } 7168 ctxt->input->version = version; 7169 7170 /* 7171 * We must have the encoding declaration 7172 */ 7173 encoding = xmlParseEncodingDecl(ctxt); 7174 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7175 /* 7176 * The XML REC instructs us to stop parsing right here 7177 */ 7178 return; 7179 } 7180 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 7181 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 7182 "Missing encoding in text declaration\n"); 7183 } 7184 7185 SKIP_BLANKS; 7186 if ((RAW == '?') && (NXT(1) == '>')) { 7187 SKIP(2); 7188 } else if (RAW == '>') { 7189 /* Deprecated old WD ... */ 7190 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7191 NEXT; 7192 } else { 7193 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7194 MOVETO_ENDTAG(CUR_PTR); 7195 NEXT; 7196 } 7197 } 7198 7199 /** 7200 * xmlParseExternalSubset: 7201 * @ctxt: an XML parser context 7202 * @ExternalID: the external identifier 7203 * @SystemID: the system identifier (or URL) 7204 * 7205 * parse Markup declarations from an external subset 7206 * 7207 * [30] extSubset ::= textDecl? extSubsetDecl 7208 * 7209 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7210 */ 7211 void 7212 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7213 const xmlChar *SystemID) { 7214 xmlDetectSAX2(ctxt); 7215 GROW; 7216 7217 if ((ctxt->encoding == NULL) && 7218 (ctxt->input->end - ctxt->input->cur >= 4)) { 7219 xmlChar start[4]; 7220 xmlCharEncoding enc; 7221 7222 start[0] = RAW; 7223 start[1] = NXT(1); 7224 start[2] = NXT(2); 7225 start[3] = NXT(3); 7226 enc = xmlDetectCharEncoding(start, 4); 7227 if (enc != XML_CHAR_ENCODING_NONE) 7228 xmlSwitchEncoding(ctxt, enc); 7229 } 7230 7231 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7232 xmlParseTextDecl(ctxt); 7233 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7234 /* 7235 * The XML REC instructs us to stop parsing right here 7236 */ 7237 xmlHaltParser(ctxt); 7238 return; 7239 } 7240 } 7241 if (ctxt->myDoc == NULL) { 7242 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7243 if (ctxt->myDoc == NULL) { 7244 xmlErrMemory(ctxt, "New Doc failed"); 7245 return; 7246 } 7247 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7248 } 7249 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7250 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7251 7252 ctxt->instate = XML_PARSER_DTD; 7253 ctxt->external = 1; 7254 while (((RAW == '<') && (NXT(1) == '?')) || 7255 ((RAW == '<') && (NXT(1) == '!')) || 7256 (RAW == '%') || IS_BLANK_CH(CUR)) { 7257 const xmlChar *check = CUR_PTR; 7258 unsigned int cons = ctxt->input->consumed; 7259 7260 GROW; 7261 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7262 xmlParseConditionalSections(ctxt); 7263 } else if (IS_BLANK_CH(CUR)) { 7264 NEXT; 7265 } else if (RAW == '%') { 7266 xmlParsePEReference(ctxt); 7267 } else 7268 xmlParseMarkupDecl(ctxt); 7269 7270 /* 7271 * Pop-up of finished entities. 7272 */ 7273 while ((RAW == 0) && (ctxt->inputNr > 1)) 7274 xmlPopInput(ctxt); 7275 7276 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7277 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7278 break; 7279 } 7280 } 7281 7282 if (RAW != 0) { 7283 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7284 } 7285 7286 } 7287 7288 /** 7289 * xmlParseReference: 7290 * @ctxt: an XML parser context 7291 * 7292 * parse and handle entity references in content, depending on the SAX 7293 * interface, this may end-up in a call to character() if this is a 7294 * CharRef, a predefined entity, if there is no reference() callback. 7295 * or if the parser was asked to switch to that mode. 7296 * 7297 * [67] Reference ::= EntityRef | CharRef 7298 */ 7299 void 7300 xmlParseReference(xmlParserCtxtPtr ctxt) { 7301 xmlEntityPtr ent; 7302 xmlChar *val; 7303 int was_checked; 7304 xmlNodePtr list = NULL; 7305 xmlParserErrors ret = XML_ERR_OK; 7306 7307 7308 if (RAW != '&') 7309 return; 7310 7311 /* 7312 * Simple case of a CharRef 7313 */ 7314 if (NXT(1) == '#') { 7315 int i = 0; 7316 xmlChar out[10]; 7317 int hex = NXT(2); 7318 int value = xmlParseCharRef(ctxt); 7319 7320 if (value == 0) 7321 return; 7322 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7323 /* 7324 * So we are using non-UTF-8 buffers 7325 * Check that the char fit on 8bits, if not 7326 * generate a CharRef. 7327 */ 7328 if (value <= 0xFF) { 7329 out[0] = value; 7330 out[1] = 0; 7331 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7332 (!ctxt->disableSAX)) 7333 ctxt->sax->characters(ctxt->userData, out, 1); 7334 } else { 7335 if ((hex == 'x') || (hex == 'X')) 7336 snprintf((char *)out, sizeof(out), "#x%X", value); 7337 else 7338 snprintf((char *)out, sizeof(out), "#%d", value); 7339 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7340 (!ctxt->disableSAX)) 7341 ctxt->sax->reference(ctxt->userData, out); 7342 } 7343 } else { 7344 /* 7345 * Just encode the value in UTF-8 7346 */ 7347 COPY_BUF(0 ,out, i, value); 7348 out[i] = 0; 7349 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7350 (!ctxt->disableSAX)) 7351 ctxt->sax->characters(ctxt->userData, out, i); 7352 } 7353 return; 7354 } 7355 7356 /* 7357 * We are seeing an entity reference 7358 */ 7359 ent = xmlParseEntityRef(ctxt); 7360 if (ent == NULL) return; 7361 if (!ctxt->wellFormed) 7362 return; 7363 was_checked = ent->checked; 7364 7365 /* special case of predefined entities */ 7366 if ((ent->name == NULL) || 7367 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7368 val = ent->content; 7369 if (val == NULL) return; 7370 /* 7371 * inline the entity. 7372 */ 7373 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7374 (!ctxt->disableSAX)) 7375 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7376 return; 7377 } 7378 7379 /* 7380 * The first reference to the entity trigger a parsing phase 7381 * where the ent->children is filled with the result from 7382 * the parsing. 7383 * Note: external parsed entities will not be loaded, it is not 7384 * required for a non-validating parser, unless the parsing option 7385 * of validating, or substituting entities were given. Doing so is 7386 * far more secure as the parser will only process data coming from 7387 * the document entity by default. 7388 */ 7389 if (((ent->checked == 0) || 7390 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7391 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7392 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7393 unsigned long oldnbent = ctxt->nbentities; 7394 7395 /* 7396 * This is a bit hackish but this seems the best 7397 * way to make sure both SAX and DOM entity support 7398 * behaves okay. 7399 */ 7400 void *user_data; 7401 if (ctxt->userData == ctxt) 7402 user_data = NULL; 7403 else 7404 user_data = ctxt->userData; 7405 7406 /* 7407 * Check that this entity is well formed 7408 * 4.3.2: An internal general parsed entity is well-formed 7409 * if its replacement text matches the production labeled 7410 * content. 7411 */ 7412 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7413 ctxt->depth++; 7414 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7415 user_data, &list); 7416 ctxt->depth--; 7417 7418 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7419 ctxt->depth++; 7420 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7421 user_data, ctxt->depth, ent->URI, 7422 ent->ExternalID, &list); 7423 ctxt->depth--; 7424 } else { 7425 ret = XML_ERR_ENTITY_PE_INTERNAL; 7426 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7427 "invalid entity type found\n", NULL); 7428 } 7429 7430 /* 7431 * Store the number of entities needing parsing for this entity 7432 * content and do checkings 7433 */ 7434 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7435 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7436 ent->checked |= 1; 7437 if (ret == XML_ERR_ENTITY_LOOP) { 7438 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7439 xmlFreeNodeList(list); 7440 return; 7441 } 7442 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7443 xmlFreeNodeList(list); 7444 return; 7445 } 7446 7447 if ((ret == XML_ERR_OK) && (list != NULL)) { 7448 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7449 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7450 (ent->children == NULL)) { 7451 ent->children = list; 7452 if (ctxt->replaceEntities) { 7453 /* 7454 * Prune it directly in the generated document 7455 * except for single text nodes. 7456 */ 7457 if (((list->type == XML_TEXT_NODE) && 7458 (list->next == NULL)) || 7459 (ctxt->parseMode == XML_PARSE_READER)) { 7460 list->parent = (xmlNodePtr) ent; 7461 list = NULL; 7462 ent->owner = 1; 7463 } else { 7464 ent->owner = 0; 7465 while (list != NULL) { 7466 list->parent = (xmlNodePtr) ctxt->node; 7467 list->doc = ctxt->myDoc; 7468 if (list->next == NULL) 7469 ent->last = list; 7470 list = list->next; 7471 } 7472 list = ent->children; 7473 #ifdef LIBXML_LEGACY_ENABLED 7474 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7475 xmlAddEntityReference(ent, list, NULL); 7476 #endif /* LIBXML_LEGACY_ENABLED */ 7477 } 7478 } else { 7479 ent->owner = 1; 7480 while (list != NULL) { 7481 list->parent = (xmlNodePtr) ent; 7482 xmlSetTreeDoc(list, ent->doc); 7483 if (list->next == NULL) 7484 ent->last = list; 7485 list = list->next; 7486 } 7487 } 7488 } else { 7489 xmlFreeNodeList(list); 7490 list = NULL; 7491 } 7492 } else if ((ret != XML_ERR_OK) && 7493 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7494 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7495 "Entity '%s' failed to parse\n", ent->name); 7496 xmlParserEntityCheck(ctxt, 0, ent, 0); 7497 } else if (list != NULL) { 7498 xmlFreeNodeList(list); 7499 list = NULL; 7500 } 7501 if (ent->checked == 0) 7502 ent->checked = 2; 7503 7504 /* Prevent entity from being parsed and expanded twice (Bug 760367). */ 7505 was_checked = 0; 7506 } else if (ent->checked != 1) { 7507 ctxt->nbentities += ent->checked / 2; 7508 } 7509 7510 /* 7511 * Now that the entity content has been gathered 7512 * provide it to the application, this can take different forms based 7513 * on the parsing modes. 7514 */ 7515 if (ent->children == NULL) { 7516 /* 7517 * Probably running in SAX mode and the callbacks don't 7518 * build the entity content. So unless we already went 7519 * though parsing for first checking go though the entity 7520 * content to generate callbacks associated to the entity 7521 */ 7522 if (was_checked != 0) { 7523 void *user_data; 7524 /* 7525 * This is a bit hackish but this seems the best 7526 * way to make sure both SAX and DOM entity support 7527 * behaves okay. 7528 */ 7529 if (ctxt->userData == ctxt) 7530 user_data = NULL; 7531 else 7532 user_data = ctxt->userData; 7533 7534 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7535 ctxt->depth++; 7536 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7537 ent->content, user_data, NULL); 7538 ctxt->depth--; 7539 } else if (ent->etype == 7540 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7541 ctxt->depth++; 7542 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7543 ctxt->sax, user_data, ctxt->depth, 7544 ent->URI, ent->ExternalID, NULL); 7545 ctxt->depth--; 7546 } else { 7547 ret = XML_ERR_ENTITY_PE_INTERNAL; 7548 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7549 "invalid entity type found\n", NULL); 7550 } 7551 if (ret == XML_ERR_ENTITY_LOOP) { 7552 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7553 return; 7554 } 7555 } 7556 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7557 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7558 /* 7559 * Entity reference callback comes second, it's somewhat 7560 * superfluous but a compatibility to historical behaviour 7561 */ 7562 ctxt->sax->reference(ctxt->userData, ent->name); 7563 } 7564 return; 7565 } 7566 7567 /* 7568 * If we didn't get any children for the entity being built 7569 */ 7570 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7571 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7572 /* 7573 * Create a node. 7574 */ 7575 ctxt->sax->reference(ctxt->userData, ent->name); 7576 return; 7577 } 7578 7579 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7580 /* 7581 * There is a problem on the handling of _private for entities 7582 * (bug 155816): Should we copy the content of the field from 7583 * the entity (possibly overwriting some value set by the user 7584 * when a copy is created), should we leave it alone, or should 7585 * we try to take care of different situations? The problem 7586 * is exacerbated by the usage of this field by the xmlReader. 7587 * To fix this bug, we look at _private on the created node 7588 * and, if it's NULL, we copy in whatever was in the entity. 7589 * If it's not NULL we leave it alone. This is somewhat of a 7590 * hack - maybe we should have further tests to determine 7591 * what to do. 7592 */ 7593 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7594 /* 7595 * Seems we are generating the DOM content, do 7596 * a simple tree copy for all references except the first 7597 * In the first occurrence list contains the replacement. 7598 */ 7599 if (((list == NULL) && (ent->owner == 0)) || 7600 (ctxt->parseMode == XML_PARSE_READER)) { 7601 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7602 7603 /* 7604 * We are copying here, make sure there is no abuse 7605 */ 7606 ctxt->sizeentcopy += ent->length + 5; 7607 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7608 return; 7609 7610 /* 7611 * when operating on a reader, the entities definitions 7612 * are always owning the entities subtree. 7613 if (ctxt->parseMode == XML_PARSE_READER) 7614 ent->owner = 1; 7615 */ 7616 7617 cur = ent->children; 7618 while (cur != NULL) { 7619 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7620 if (nw != NULL) { 7621 if (nw->_private == NULL) 7622 nw->_private = cur->_private; 7623 if (firstChild == NULL){ 7624 firstChild = nw; 7625 } 7626 nw = xmlAddChild(ctxt->node, nw); 7627 } 7628 if (cur == ent->last) { 7629 /* 7630 * needed to detect some strange empty 7631 * node cases in the reader tests 7632 */ 7633 if ((ctxt->parseMode == XML_PARSE_READER) && 7634 (nw != NULL) && 7635 (nw->type == XML_ELEMENT_NODE) && 7636 (nw->children == NULL)) 7637 nw->extra = 1; 7638 7639 break; 7640 } 7641 cur = cur->next; 7642 } 7643 #ifdef LIBXML_LEGACY_ENABLED 7644 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7645 xmlAddEntityReference(ent, firstChild, nw); 7646 #endif /* LIBXML_LEGACY_ENABLED */ 7647 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7648 xmlNodePtr nw = NULL, cur, next, last, 7649 firstChild = NULL; 7650 7651 /* 7652 * We are copying here, make sure there is no abuse 7653 */ 7654 ctxt->sizeentcopy += ent->length + 5; 7655 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7656 return; 7657 7658 /* 7659 * Copy the entity child list and make it the new 7660 * entity child list. The goal is to make sure any 7661 * ID or REF referenced will be the one from the 7662 * document content and not the entity copy. 7663 */ 7664 cur = ent->children; 7665 ent->children = NULL; 7666 last = ent->last; 7667 ent->last = NULL; 7668 while (cur != NULL) { 7669 next = cur->next; 7670 cur->next = NULL; 7671 cur->parent = NULL; 7672 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7673 if (nw != NULL) { 7674 if (nw->_private == NULL) 7675 nw->_private = cur->_private; 7676 if (firstChild == NULL){ 7677 firstChild = cur; 7678 } 7679 xmlAddChild((xmlNodePtr) ent, nw); 7680 xmlAddChild(ctxt->node, cur); 7681 } 7682 if (cur == last) 7683 break; 7684 cur = next; 7685 } 7686 if (ent->owner == 0) 7687 ent->owner = 1; 7688 #ifdef LIBXML_LEGACY_ENABLED 7689 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7690 xmlAddEntityReference(ent, firstChild, nw); 7691 #endif /* LIBXML_LEGACY_ENABLED */ 7692 } else { 7693 const xmlChar *nbktext; 7694 7695 /* 7696 * the name change is to avoid coalescing of the 7697 * node with a possible previous text one which 7698 * would make ent->children a dangling pointer 7699 */ 7700 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7701 -1); 7702 if (ent->children->type == XML_TEXT_NODE) 7703 ent->children->name = nbktext; 7704 if ((ent->last != ent->children) && 7705 (ent->last->type == XML_TEXT_NODE)) 7706 ent->last->name = nbktext; 7707 xmlAddChildList(ctxt->node, ent->children); 7708 } 7709 7710 /* 7711 * This is to avoid a nasty side effect, see 7712 * characters() in SAX.c 7713 */ 7714 ctxt->nodemem = 0; 7715 ctxt->nodelen = 0; 7716 return; 7717 } 7718 } 7719 } 7720 7721 /** 7722 * xmlParseEntityRef: 7723 * @ctxt: an XML parser context 7724 * 7725 * parse ENTITY references declarations 7726 * 7727 * [68] EntityRef ::= '&' Name ';' 7728 * 7729 * [ WFC: Entity Declared ] 7730 * In a document without any DTD, a document with only an internal DTD 7731 * subset which contains no parameter entity references, or a document 7732 * with "standalone='yes'", the Name given in the entity reference 7733 * must match that in an entity declaration, except that well-formed 7734 * documents need not declare any of the following entities: amp, lt, 7735 * gt, apos, quot. The declaration of a parameter entity must precede 7736 * any reference to it. Similarly, the declaration of a general entity 7737 * must precede any reference to it which appears in a default value in an 7738 * attribute-list declaration. Note that if entities are declared in the 7739 * external subset or in external parameter entities, a non-validating 7740 * processor is not obligated to read and process their declarations; 7741 * for such documents, the rule that an entity must be declared is a 7742 * well-formedness constraint only if standalone='yes'. 7743 * 7744 * [ WFC: Parsed Entity ] 7745 * An entity reference must not contain the name of an unparsed entity 7746 * 7747 * Returns the xmlEntityPtr if found, or NULL otherwise. 7748 */ 7749 xmlEntityPtr 7750 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7751 const xmlChar *name; 7752 xmlEntityPtr ent = NULL; 7753 7754 GROW; 7755 if (ctxt->instate == XML_PARSER_EOF) 7756 return(NULL); 7757 7758 if (RAW != '&') 7759 return(NULL); 7760 NEXT; 7761 name = xmlParseName(ctxt); 7762 if (name == NULL) { 7763 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7764 "xmlParseEntityRef: no name\n"); 7765 return(NULL); 7766 } 7767 if (RAW != ';') { 7768 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7769 return(NULL); 7770 } 7771 NEXT; 7772 7773 /* 7774 * Predefined entities override any extra definition 7775 */ 7776 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7777 ent = xmlGetPredefinedEntity(name); 7778 if (ent != NULL) 7779 return(ent); 7780 } 7781 7782 /* 7783 * Increase the number of entity references parsed 7784 */ 7785 ctxt->nbentities++; 7786 7787 /* 7788 * Ask first SAX for entity resolution, otherwise try the 7789 * entities which may have stored in the parser context. 7790 */ 7791 if (ctxt->sax != NULL) { 7792 if (ctxt->sax->getEntity != NULL) 7793 ent = ctxt->sax->getEntity(ctxt->userData, name); 7794 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7795 (ctxt->options & XML_PARSE_OLDSAX)) 7796 ent = xmlGetPredefinedEntity(name); 7797 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7798 (ctxt->userData==ctxt)) { 7799 ent = xmlSAX2GetEntity(ctxt, name); 7800 } 7801 } 7802 if (ctxt->instate == XML_PARSER_EOF) 7803 return(NULL); 7804 /* 7805 * [ WFC: Entity Declared ] 7806 * In a document without any DTD, a document with only an 7807 * internal DTD subset which contains no parameter entity 7808 * references, or a document with "standalone='yes'", the 7809 * Name given in the entity reference must match that in an 7810 * entity declaration, except that well-formed documents 7811 * need not declare any of the following entities: amp, lt, 7812 * gt, apos, quot. 7813 * The declaration of a parameter entity must precede any 7814 * reference to it. 7815 * Similarly, the declaration of a general entity must 7816 * precede any reference to it which appears in a default 7817 * value in an attribute-list declaration. Note that if 7818 * entities are declared in the external subset or in 7819 * external parameter entities, a non-validating processor 7820 * is not obligated to read and process their declarations; 7821 * for such documents, the rule that an entity must be 7822 * declared is a well-formedness constraint only if 7823 * standalone='yes'. 7824 */ 7825 if (ent == NULL) { 7826 if ((ctxt->standalone == 1) || 7827 ((ctxt->hasExternalSubset == 0) && 7828 (ctxt->hasPErefs == 0))) { 7829 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7830 "Entity '%s' not defined\n", name); 7831 } else { 7832 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7833 "Entity '%s' not defined\n", name); 7834 if ((ctxt->inSubset == 0) && 7835 (ctxt->sax != NULL) && 7836 (ctxt->sax->reference != NULL)) { 7837 ctxt->sax->reference(ctxt->userData, name); 7838 } 7839 } 7840 xmlParserEntityCheck(ctxt, 0, ent, 0); 7841 ctxt->valid = 0; 7842 } 7843 7844 /* 7845 * [ WFC: Parsed Entity ] 7846 * An entity reference must not contain the name of an 7847 * unparsed entity 7848 */ 7849 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7850 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7851 "Entity reference to unparsed entity %s\n", name); 7852 } 7853 7854 /* 7855 * [ WFC: No External Entity References ] 7856 * Attribute values cannot contain direct or indirect 7857 * entity references to external entities. 7858 */ 7859 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7860 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7861 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7862 "Attribute references external entity '%s'\n", name); 7863 } 7864 /* 7865 * [ WFC: No < in Attribute Values ] 7866 * The replacement text of any entity referred to directly or 7867 * indirectly in an attribute value (other than "<") must 7868 * not contain a <. 7869 */ 7870 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7871 (ent != NULL) && 7872 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7873 if (((ent->checked & 1) || (ent->checked == 0)) && 7874 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7875 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7876 "'<' in entity '%s' is not allowed in attributes values\n", name); 7877 } 7878 } 7879 7880 /* 7881 * Internal check, no parameter entities here ... 7882 */ 7883 else { 7884 switch (ent->etype) { 7885 case XML_INTERNAL_PARAMETER_ENTITY: 7886 case XML_EXTERNAL_PARAMETER_ENTITY: 7887 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7888 "Attempt to reference the parameter entity '%s'\n", 7889 name); 7890 break; 7891 default: 7892 break; 7893 } 7894 } 7895 7896 /* 7897 * [ WFC: No Recursion ] 7898 * A parsed entity must not contain a recursive reference 7899 * to itself, either directly or indirectly. 7900 * Done somewhere else 7901 */ 7902 return(ent); 7903 } 7904 7905 /** 7906 * xmlParseStringEntityRef: 7907 * @ctxt: an XML parser context 7908 * @str: a pointer to an index in the string 7909 * 7910 * parse ENTITY references declarations, but this version parses it from 7911 * a string value. 7912 * 7913 * [68] EntityRef ::= '&' Name ';' 7914 * 7915 * [ WFC: Entity Declared ] 7916 * In a document without any DTD, a document with only an internal DTD 7917 * subset which contains no parameter entity references, or a document 7918 * with "standalone='yes'", the Name given in the entity reference 7919 * must match that in an entity declaration, except that well-formed 7920 * documents need not declare any of the following entities: amp, lt, 7921 * gt, apos, quot. The declaration of a parameter entity must precede 7922 * any reference to it. Similarly, the declaration of a general entity 7923 * must precede any reference to it which appears in a default value in an 7924 * attribute-list declaration. Note that if entities are declared in the 7925 * external subset or in external parameter entities, a non-validating 7926 * processor is not obligated to read and process their declarations; 7927 * for such documents, the rule that an entity must be declared is a 7928 * well-formedness constraint only if standalone='yes'. 7929 * 7930 * [ WFC: Parsed Entity ] 7931 * An entity reference must not contain the name of an unparsed entity 7932 * 7933 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7934 * is updated to the current location in the string. 7935 */ 7936 static xmlEntityPtr 7937 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7938 xmlChar *name; 7939 const xmlChar *ptr; 7940 xmlChar cur; 7941 xmlEntityPtr ent = NULL; 7942 7943 if ((str == NULL) || (*str == NULL)) 7944 return(NULL); 7945 ptr = *str; 7946 cur = *ptr; 7947 if (cur != '&') 7948 return(NULL); 7949 7950 ptr++; 7951 name = xmlParseStringName(ctxt, &ptr); 7952 if (name == NULL) { 7953 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7954 "xmlParseStringEntityRef: no name\n"); 7955 *str = ptr; 7956 return(NULL); 7957 } 7958 if (*ptr != ';') { 7959 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7960 xmlFree(name); 7961 *str = ptr; 7962 return(NULL); 7963 } 7964 ptr++; 7965 7966 7967 /* 7968 * Predefined entities override any extra definition 7969 */ 7970 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7971 ent = xmlGetPredefinedEntity(name); 7972 if (ent != NULL) { 7973 xmlFree(name); 7974 *str = ptr; 7975 return(ent); 7976 } 7977 } 7978 7979 /* 7980 * Increate the number of entity references parsed 7981 */ 7982 ctxt->nbentities++; 7983 7984 /* 7985 * Ask first SAX for entity resolution, otherwise try the 7986 * entities which may have stored in the parser context. 7987 */ 7988 if (ctxt->sax != NULL) { 7989 if (ctxt->sax->getEntity != NULL) 7990 ent = ctxt->sax->getEntity(ctxt->userData, name); 7991 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7992 ent = xmlGetPredefinedEntity(name); 7993 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7994 ent = xmlSAX2GetEntity(ctxt, name); 7995 } 7996 } 7997 if (ctxt->instate == XML_PARSER_EOF) { 7998 xmlFree(name); 7999 return(NULL); 8000 } 8001 8002 /* 8003 * [ WFC: Entity Declared ] 8004 * In a document without any DTD, a document with only an 8005 * internal DTD subset which contains no parameter entity 8006 * references, or a document with "standalone='yes'", the 8007 * Name given in the entity reference must match that in an 8008 * entity declaration, except that well-formed documents 8009 * need not declare any of the following entities: amp, lt, 8010 * gt, apos, quot. 8011 * The declaration of a parameter entity must precede any 8012 * reference to it. 8013 * Similarly, the declaration of a general entity must 8014 * precede any reference to it which appears in a default 8015 * value in an attribute-list declaration. Note that if 8016 * entities are declared in the external subset or in 8017 * external parameter entities, a non-validating processor 8018 * is not obligated to read and process their declarations; 8019 * for such documents, the rule that an entity must be 8020 * declared is a well-formedness constraint only if 8021 * standalone='yes'. 8022 */ 8023 if (ent == NULL) { 8024 if ((ctxt->standalone == 1) || 8025 ((ctxt->hasExternalSubset == 0) && 8026 (ctxt->hasPErefs == 0))) { 8027 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8028 "Entity '%s' not defined\n", name); 8029 } else { 8030 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 8031 "Entity '%s' not defined\n", 8032 name); 8033 } 8034 xmlParserEntityCheck(ctxt, 0, ent, 0); 8035 /* TODO ? check regressions ctxt->valid = 0; */ 8036 } 8037 8038 /* 8039 * [ WFC: Parsed Entity ] 8040 * An entity reference must not contain the name of an 8041 * unparsed entity 8042 */ 8043 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 8044 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 8045 "Entity reference to unparsed entity %s\n", name); 8046 } 8047 8048 /* 8049 * [ WFC: No External Entity References ] 8050 * Attribute values cannot contain direct or indirect 8051 * entity references to external entities. 8052 */ 8053 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 8054 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 8055 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 8056 "Attribute references external entity '%s'\n", name); 8057 } 8058 /* 8059 * [ WFC: No < in Attribute Values ] 8060 * The replacement text of any entity referred to directly or 8061 * indirectly in an attribute value (other than "<") must 8062 * not contain a <. 8063 */ 8064 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 8065 (ent != NULL) && (ent->content != NULL) && 8066 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 8067 (xmlStrchr(ent->content, '<'))) { 8068 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 8069 "'<' in entity '%s' is not allowed in attributes values\n", 8070 name); 8071 } 8072 8073 /* 8074 * Internal check, no parameter entities here ... 8075 */ 8076 else { 8077 switch (ent->etype) { 8078 case XML_INTERNAL_PARAMETER_ENTITY: 8079 case XML_EXTERNAL_PARAMETER_ENTITY: 8080 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 8081 "Attempt to reference the parameter entity '%s'\n", 8082 name); 8083 break; 8084 default: 8085 break; 8086 } 8087 } 8088 8089 /* 8090 * [ WFC: No Recursion ] 8091 * A parsed entity must not contain a recursive reference 8092 * to itself, either directly or indirectly. 8093 * Done somewhere else 8094 */ 8095 8096 xmlFree(name); 8097 *str = ptr; 8098 return(ent); 8099 } 8100 8101 /** 8102 * xmlParsePEReference: 8103 * @ctxt: an XML parser context 8104 * 8105 * parse PEReference declarations 8106 * The entity content is handled directly by pushing it's content as 8107 * a new input stream. 8108 * 8109 * [69] PEReference ::= '%' Name ';' 8110 * 8111 * [ WFC: No Recursion ] 8112 * A parsed entity must not contain a recursive 8113 * reference to itself, either directly or indirectly. 8114 * 8115 * [ WFC: Entity Declared ] 8116 * In a document without any DTD, a document with only an internal DTD 8117 * subset which contains no parameter entity references, or a document 8118 * with "standalone='yes'", ... ... The declaration of a parameter 8119 * entity must precede any reference to it... 8120 * 8121 * [ VC: Entity Declared ] 8122 * In a document with an external subset or external parameter entities 8123 * with "standalone='no'", ... ... The declaration of a parameter entity 8124 * must precede any reference to it... 8125 * 8126 * [ WFC: In DTD ] 8127 * Parameter-entity references may only appear in the DTD. 8128 * NOTE: misleading but this is handled. 8129 */ 8130 void 8131 xmlParsePEReference(xmlParserCtxtPtr ctxt) 8132 { 8133 const xmlChar *name; 8134 xmlEntityPtr entity = NULL; 8135 xmlParserInputPtr input; 8136 8137 if (RAW != '%') 8138 return; 8139 NEXT; 8140 name = xmlParseName(ctxt); 8141 if (name == NULL) { 8142 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8143 "xmlParsePEReference: no name\n"); 8144 return; 8145 } 8146 if (RAW != ';') { 8147 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8148 return; 8149 } 8150 8151 NEXT; 8152 8153 /* 8154 * Increate the number of entity references parsed 8155 */ 8156 ctxt->nbentities++; 8157 8158 /* 8159 * Request the entity from SAX 8160 */ 8161 if ((ctxt->sax != NULL) && 8162 (ctxt->sax->getParameterEntity != NULL)) 8163 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8164 if (ctxt->instate == XML_PARSER_EOF) 8165 return; 8166 if (entity == NULL) { 8167 /* 8168 * [ WFC: Entity Declared ] 8169 * In a document without any DTD, a document with only an 8170 * internal DTD subset which contains no parameter entity 8171 * references, or a document with "standalone='yes'", ... 8172 * ... The declaration of a parameter entity must precede 8173 * any reference to it... 8174 */ 8175 if ((ctxt->standalone == 1) || 8176 ((ctxt->hasExternalSubset == 0) && 8177 (ctxt->hasPErefs == 0))) { 8178 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8179 "PEReference: %%%s; not found\n", 8180 name); 8181 } else { 8182 /* 8183 * [ VC: Entity Declared ] 8184 * In a document with an external subset or external 8185 * parameter entities with "standalone='no'", ... 8186 * ... The declaration of a parameter entity must 8187 * precede any reference to it... 8188 */ 8189 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8190 "PEReference: %%%s; not found\n", 8191 name, NULL); 8192 ctxt->valid = 0; 8193 } 8194 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8195 } else { 8196 /* 8197 * Internal checking in case the entity quest barfed 8198 */ 8199 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8200 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8201 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8202 "Internal: %%%s; is not a parameter entity\n", 8203 name, NULL); 8204 } else if (ctxt->input->free != deallocblankswrapper) { 8205 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 8206 if (xmlPushInput(ctxt, input) < 0) 8207 return; 8208 } else { 8209 if (xmlParserEntityCheck(ctxt, 0, entity, 0)) 8210 return; 8211 8212 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8213 ((ctxt->options & XML_PARSE_NOENT) == 0) && 8214 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 8215 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 8216 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 8217 (ctxt->replaceEntities == 0) && 8218 (ctxt->validate == 0)) 8219 return; 8220 8221 /* 8222 * TODO !!! 8223 * handle the extra spaces added before and after 8224 * c.f. http://www.w3.org/TR/REC-xml#as-PE 8225 */ 8226 input = xmlNewEntityInputStream(ctxt, entity); 8227 if (xmlPushInput(ctxt, input) < 0) 8228 return; 8229 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8230 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8231 (IS_BLANK_CH(NXT(5)))) { 8232 xmlParseTextDecl(ctxt); 8233 if (ctxt->errNo == 8234 XML_ERR_UNSUPPORTED_ENCODING) { 8235 /* 8236 * The XML REC instructs us to stop parsing 8237 * right here 8238 */ 8239 xmlHaltParser(ctxt); 8240 return; 8241 } 8242 } 8243 } 8244 } 8245 ctxt->hasPErefs = 1; 8246 } 8247 8248 /** 8249 * xmlLoadEntityContent: 8250 * @ctxt: an XML parser context 8251 * @entity: an unloaded system entity 8252 * 8253 * Load the original content of the given system entity from the 8254 * ExternalID/SystemID given. This is to be used for Included in Literal 8255 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8256 * 8257 * Returns 0 in case of success and -1 in case of failure 8258 */ 8259 static int 8260 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8261 xmlParserInputPtr input; 8262 xmlBufferPtr buf; 8263 int l, c; 8264 int count = 0; 8265 8266 if ((ctxt == NULL) || (entity == NULL) || 8267 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8268 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8269 (entity->content != NULL)) { 8270 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8271 "xmlLoadEntityContent parameter error"); 8272 return(-1); 8273 } 8274 8275 if (xmlParserDebugEntities) 8276 xmlGenericError(xmlGenericErrorContext, 8277 "Reading %s entity content input\n", entity->name); 8278 8279 buf = xmlBufferCreate(); 8280 if (buf == NULL) { 8281 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8282 "xmlLoadEntityContent parameter error"); 8283 return(-1); 8284 } 8285 8286 input = xmlNewEntityInputStream(ctxt, entity); 8287 if (input == NULL) { 8288 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8289 "xmlLoadEntityContent input error"); 8290 xmlBufferFree(buf); 8291 return(-1); 8292 } 8293 8294 /* 8295 * Push the entity as the current input, read char by char 8296 * saving to the buffer until the end of the entity or an error 8297 */ 8298 if (xmlPushInput(ctxt, input) < 0) { 8299 xmlBufferFree(buf); 8300 return(-1); 8301 } 8302 8303 GROW; 8304 c = CUR_CHAR(l); 8305 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8306 (IS_CHAR(c))) { 8307 xmlBufferAdd(buf, ctxt->input->cur, l); 8308 if (count++ > XML_PARSER_CHUNK_SIZE) { 8309 count = 0; 8310 GROW; 8311 if (ctxt->instate == XML_PARSER_EOF) { 8312 xmlBufferFree(buf); 8313 return(-1); 8314 } 8315 } 8316 NEXTL(l); 8317 c = CUR_CHAR(l); 8318 if (c == 0) { 8319 count = 0; 8320 GROW; 8321 if (ctxt->instate == XML_PARSER_EOF) { 8322 xmlBufferFree(buf); 8323 return(-1); 8324 } 8325 c = CUR_CHAR(l); 8326 } 8327 } 8328 8329 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8330 xmlPopInput(ctxt); 8331 } else if (!IS_CHAR(c)) { 8332 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8333 "xmlLoadEntityContent: invalid char value %d\n", 8334 c); 8335 xmlBufferFree(buf); 8336 return(-1); 8337 } 8338 entity->content = buf->content; 8339 buf->content = NULL; 8340 xmlBufferFree(buf); 8341 8342 return(0); 8343 } 8344 8345 /** 8346 * xmlParseStringPEReference: 8347 * @ctxt: an XML parser context 8348 * @str: a pointer to an index in the string 8349 * 8350 * parse PEReference declarations 8351 * 8352 * [69] PEReference ::= '%' Name ';' 8353 * 8354 * [ WFC: No Recursion ] 8355 * A parsed entity must not contain a recursive 8356 * reference to itself, either directly or indirectly. 8357 * 8358 * [ WFC: Entity Declared ] 8359 * In a document without any DTD, a document with only an internal DTD 8360 * subset which contains no parameter entity references, or a document 8361 * with "standalone='yes'", ... ... The declaration of a parameter 8362 * entity must precede any reference to it... 8363 * 8364 * [ VC: Entity Declared ] 8365 * In a document with an external subset or external parameter entities 8366 * with "standalone='no'", ... ... The declaration of a parameter entity 8367 * must precede any reference to it... 8368 * 8369 * [ WFC: In DTD ] 8370 * Parameter-entity references may only appear in the DTD. 8371 * NOTE: misleading but this is handled. 8372 * 8373 * Returns the string of the entity content. 8374 * str is updated to the current value of the index 8375 */ 8376 static xmlEntityPtr 8377 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8378 const xmlChar *ptr; 8379 xmlChar cur; 8380 xmlChar *name; 8381 xmlEntityPtr entity = NULL; 8382 8383 if ((str == NULL) || (*str == NULL)) return(NULL); 8384 ptr = *str; 8385 cur = *ptr; 8386 if (cur != '%') 8387 return(NULL); 8388 ptr++; 8389 name = xmlParseStringName(ctxt, &ptr); 8390 if (name == NULL) { 8391 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8392 "xmlParseStringPEReference: no name\n"); 8393 *str = ptr; 8394 return(NULL); 8395 } 8396 cur = *ptr; 8397 if (cur != ';') { 8398 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8399 xmlFree(name); 8400 *str = ptr; 8401 return(NULL); 8402 } 8403 ptr++; 8404 8405 /* 8406 * Increate the number of entity references parsed 8407 */ 8408 ctxt->nbentities++; 8409 8410 /* 8411 * Request the entity from SAX 8412 */ 8413 if ((ctxt->sax != NULL) && 8414 (ctxt->sax->getParameterEntity != NULL)) 8415 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8416 if (ctxt->instate == XML_PARSER_EOF) { 8417 xmlFree(name); 8418 *str = ptr; 8419 return(NULL); 8420 } 8421 if (entity == NULL) { 8422 /* 8423 * [ WFC: Entity Declared ] 8424 * In a document without any DTD, a document with only an 8425 * internal DTD subset which contains no parameter entity 8426 * references, or a document with "standalone='yes'", ... 8427 * ... The declaration of a parameter entity must precede 8428 * any reference to it... 8429 */ 8430 if ((ctxt->standalone == 1) || 8431 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8432 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8433 "PEReference: %%%s; not found\n", name); 8434 } else { 8435 /* 8436 * [ VC: Entity Declared ] 8437 * In a document with an external subset or external 8438 * parameter entities with "standalone='no'", ... 8439 * ... The declaration of a parameter entity must 8440 * precede any reference to it... 8441 */ 8442 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8443 "PEReference: %%%s; not found\n", 8444 name, NULL); 8445 ctxt->valid = 0; 8446 } 8447 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8448 } else { 8449 /* 8450 * Internal checking in case the entity quest barfed 8451 */ 8452 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8453 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8454 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8455 "%%%s; is not a parameter entity\n", 8456 name, NULL); 8457 } 8458 } 8459 ctxt->hasPErefs = 1; 8460 xmlFree(name); 8461 *str = ptr; 8462 return(entity); 8463 } 8464 8465 /** 8466 * xmlParseDocTypeDecl: 8467 * @ctxt: an XML parser context 8468 * 8469 * parse a DOCTYPE declaration 8470 * 8471 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8472 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8473 * 8474 * [ VC: Root Element Type ] 8475 * The Name in the document type declaration must match the element 8476 * type of the root element. 8477 */ 8478 8479 void 8480 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8481 const xmlChar *name = NULL; 8482 xmlChar *ExternalID = NULL; 8483 xmlChar *URI = NULL; 8484 8485 /* 8486 * We know that '<!DOCTYPE' has been detected. 8487 */ 8488 SKIP(9); 8489 8490 SKIP_BLANKS; 8491 8492 /* 8493 * Parse the DOCTYPE name. 8494 */ 8495 name = xmlParseName(ctxt); 8496 if (name == NULL) { 8497 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8498 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8499 } 8500 ctxt->intSubName = name; 8501 8502 SKIP_BLANKS; 8503 8504 /* 8505 * Check for SystemID and ExternalID 8506 */ 8507 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8508 8509 if ((URI != NULL) || (ExternalID != NULL)) { 8510 ctxt->hasExternalSubset = 1; 8511 } 8512 ctxt->extSubURI = URI; 8513 ctxt->extSubSystem = ExternalID; 8514 8515 SKIP_BLANKS; 8516 8517 /* 8518 * Create and update the internal subset. 8519 */ 8520 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8521 (!ctxt->disableSAX)) 8522 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8523 if (ctxt->instate == XML_PARSER_EOF) 8524 return; 8525 8526 /* 8527 * Is there any internal subset declarations ? 8528 * they are handled separately in xmlParseInternalSubset() 8529 */ 8530 if (RAW == '[') 8531 return; 8532 8533 /* 8534 * We should be at the end of the DOCTYPE declaration. 8535 */ 8536 if (RAW != '>') { 8537 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8538 } 8539 NEXT; 8540 } 8541 8542 /** 8543 * xmlParseInternalSubset: 8544 * @ctxt: an XML parser context 8545 * 8546 * parse the internal subset declaration 8547 * 8548 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8549 */ 8550 8551 static void 8552 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8553 /* 8554 * Is there any DTD definition ? 8555 */ 8556 if (RAW == '[') { 8557 ctxt->instate = XML_PARSER_DTD; 8558 NEXT; 8559 /* 8560 * Parse the succession of Markup declarations and 8561 * PEReferences. 8562 * Subsequence (markupdecl | PEReference | S)* 8563 */ 8564 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8565 const xmlChar *check = CUR_PTR; 8566 unsigned int cons = ctxt->input->consumed; 8567 8568 SKIP_BLANKS; 8569 xmlParseMarkupDecl(ctxt); 8570 xmlParsePEReference(ctxt); 8571 8572 /* 8573 * Pop-up of finished entities. 8574 */ 8575 while ((RAW == 0) && (ctxt->inputNr > 1)) 8576 xmlPopInput(ctxt); 8577 8578 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8579 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8580 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8581 break; 8582 } 8583 } 8584 if (RAW == ']') { 8585 NEXT; 8586 SKIP_BLANKS; 8587 } 8588 } 8589 8590 /* 8591 * We should be at the end of the DOCTYPE declaration. 8592 */ 8593 if (RAW != '>') { 8594 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8595 return; 8596 } 8597 NEXT; 8598 } 8599 8600 #ifdef LIBXML_SAX1_ENABLED 8601 /** 8602 * xmlParseAttribute: 8603 * @ctxt: an XML parser context 8604 * @value: a xmlChar ** used to store the value of the attribute 8605 * 8606 * parse an attribute 8607 * 8608 * [41] Attribute ::= Name Eq AttValue 8609 * 8610 * [ WFC: No External Entity References ] 8611 * Attribute values cannot contain direct or indirect entity references 8612 * to external entities. 8613 * 8614 * [ WFC: No < in Attribute Values ] 8615 * The replacement text of any entity referred to directly or indirectly in 8616 * an attribute value (other than "<") must not contain a <. 8617 * 8618 * [ VC: Attribute Value Type ] 8619 * The attribute must have been declared; the value must be of the type 8620 * declared for it. 8621 * 8622 * [25] Eq ::= S? '=' S? 8623 * 8624 * With namespace: 8625 * 8626 * [NS 11] Attribute ::= QName Eq AttValue 8627 * 8628 * Also the case QName == xmlns:??? is handled independently as a namespace 8629 * definition. 8630 * 8631 * Returns the attribute name, and the value in *value. 8632 */ 8633 8634 const xmlChar * 8635 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8636 const xmlChar *name; 8637 xmlChar *val; 8638 8639 *value = NULL; 8640 GROW; 8641 name = xmlParseName(ctxt); 8642 if (name == NULL) { 8643 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8644 "error parsing attribute name\n"); 8645 return(NULL); 8646 } 8647 8648 /* 8649 * read the value 8650 */ 8651 SKIP_BLANKS; 8652 if (RAW == '=') { 8653 NEXT; 8654 SKIP_BLANKS; 8655 val = xmlParseAttValue(ctxt); 8656 ctxt->instate = XML_PARSER_CONTENT; 8657 } else { 8658 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8659 "Specification mandates value for attribute %s\n", name); 8660 return(NULL); 8661 } 8662 8663 /* 8664 * Check that xml:lang conforms to the specification 8665 * No more registered as an error, just generate a warning now 8666 * since this was deprecated in XML second edition 8667 */ 8668 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8669 if (!xmlCheckLanguageID(val)) { 8670 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8671 "Malformed value for xml:lang : %s\n", 8672 val, NULL); 8673 } 8674 } 8675 8676 /* 8677 * Check that xml:space conforms to the specification 8678 */ 8679 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8680 if (xmlStrEqual(val, BAD_CAST "default")) 8681 *(ctxt->space) = 0; 8682 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8683 *(ctxt->space) = 1; 8684 else { 8685 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8686 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8687 val, NULL); 8688 } 8689 } 8690 8691 *value = val; 8692 return(name); 8693 } 8694 8695 /** 8696 * xmlParseStartTag: 8697 * @ctxt: an XML parser context 8698 * 8699 * parse a start of tag either for rule element or 8700 * EmptyElement. In both case we don't parse the tag closing chars. 8701 * 8702 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8703 * 8704 * [ WFC: Unique Att Spec ] 8705 * No attribute name may appear more than once in the same start-tag or 8706 * empty-element tag. 8707 * 8708 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8709 * 8710 * [ WFC: Unique Att Spec ] 8711 * No attribute name may appear more than once in the same start-tag or 8712 * empty-element tag. 8713 * 8714 * With namespace: 8715 * 8716 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8717 * 8718 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8719 * 8720 * Returns the element name parsed 8721 */ 8722 8723 const xmlChar * 8724 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8725 const xmlChar *name; 8726 const xmlChar *attname; 8727 xmlChar *attvalue; 8728 const xmlChar **atts = ctxt->atts; 8729 int nbatts = 0; 8730 int maxatts = ctxt->maxatts; 8731 int i; 8732 8733 if (RAW != '<') return(NULL); 8734 NEXT1; 8735 8736 name = xmlParseName(ctxt); 8737 if (name == NULL) { 8738 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8739 "xmlParseStartTag: invalid element name\n"); 8740 return(NULL); 8741 } 8742 8743 /* 8744 * Now parse the attributes, it ends up with the ending 8745 * 8746 * (S Attribute)* S? 8747 */ 8748 SKIP_BLANKS; 8749 GROW; 8750 8751 while (((RAW != '>') && 8752 ((RAW != '/') || (NXT(1) != '>')) && 8753 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8754 const xmlChar *q = CUR_PTR; 8755 unsigned int cons = ctxt->input->consumed; 8756 8757 attname = xmlParseAttribute(ctxt, &attvalue); 8758 if ((attname != NULL) && (attvalue != NULL)) { 8759 /* 8760 * [ WFC: Unique Att Spec ] 8761 * No attribute name may appear more than once in the same 8762 * start-tag or empty-element tag. 8763 */ 8764 for (i = 0; i < nbatts;i += 2) { 8765 if (xmlStrEqual(atts[i], attname)) { 8766 xmlErrAttributeDup(ctxt, NULL, attname); 8767 xmlFree(attvalue); 8768 goto failed; 8769 } 8770 } 8771 /* 8772 * Add the pair to atts 8773 */ 8774 if (atts == NULL) { 8775 maxatts = 22; /* allow for 10 attrs by default */ 8776 atts = (const xmlChar **) 8777 xmlMalloc(maxatts * sizeof(xmlChar *)); 8778 if (atts == NULL) { 8779 xmlErrMemory(ctxt, NULL); 8780 if (attvalue != NULL) 8781 xmlFree(attvalue); 8782 goto failed; 8783 } 8784 ctxt->atts = atts; 8785 ctxt->maxatts = maxatts; 8786 } else if (nbatts + 4 > maxatts) { 8787 const xmlChar **n; 8788 8789 maxatts *= 2; 8790 n = (const xmlChar **) xmlRealloc((void *) atts, 8791 maxatts * sizeof(const xmlChar *)); 8792 if (n == NULL) { 8793 xmlErrMemory(ctxt, NULL); 8794 if (attvalue != NULL) 8795 xmlFree(attvalue); 8796 goto failed; 8797 } 8798 atts = n; 8799 ctxt->atts = atts; 8800 ctxt->maxatts = maxatts; 8801 } 8802 atts[nbatts++] = attname; 8803 atts[nbatts++] = attvalue; 8804 atts[nbatts] = NULL; 8805 atts[nbatts + 1] = NULL; 8806 } else { 8807 if (attvalue != NULL) 8808 xmlFree(attvalue); 8809 } 8810 8811 failed: 8812 8813 GROW 8814 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8815 break; 8816 if (!IS_BLANK_CH(RAW)) { 8817 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8818 "attributes construct error\n"); 8819 } 8820 SKIP_BLANKS; 8821 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8822 (attname == NULL) && (attvalue == NULL)) { 8823 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8824 "xmlParseStartTag: problem parsing attributes\n"); 8825 break; 8826 } 8827 SHRINK; 8828 GROW; 8829 } 8830 8831 /* 8832 * SAX: Start of Element ! 8833 */ 8834 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8835 (!ctxt->disableSAX)) { 8836 if (nbatts > 0) 8837 ctxt->sax->startElement(ctxt->userData, name, atts); 8838 else 8839 ctxt->sax->startElement(ctxt->userData, name, NULL); 8840 } 8841 8842 if (atts != NULL) { 8843 /* Free only the content strings */ 8844 for (i = 1;i < nbatts;i+=2) 8845 if (atts[i] != NULL) 8846 xmlFree((xmlChar *) atts[i]); 8847 } 8848 return(name); 8849 } 8850 8851 /** 8852 * xmlParseEndTag1: 8853 * @ctxt: an XML parser context 8854 * @line: line of the start tag 8855 * @nsNr: number of namespaces on the start tag 8856 * 8857 * parse an end of tag 8858 * 8859 * [42] ETag ::= '</' Name S? '>' 8860 * 8861 * With namespace 8862 * 8863 * [NS 9] ETag ::= '</' QName S? '>' 8864 */ 8865 8866 static void 8867 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8868 const xmlChar *name; 8869 8870 GROW; 8871 if ((RAW != '<') || (NXT(1) != '/')) { 8872 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8873 "xmlParseEndTag: '</' not found\n"); 8874 return; 8875 } 8876 SKIP(2); 8877 8878 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8879 8880 /* 8881 * We should definitely be at the ending "S? '>'" part 8882 */ 8883 GROW; 8884 SKIP_BLANKS; 8885 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8886 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8887 } else 8888 NEXT1; 8889 8890 /* 8891 * [ WFC: Element Type Match ] 8892 * The Name in an element's end-tag must match the element type in the 8893 * start-tag. 8894 * 8895 */ 8896 if (name != (xmlChar*)1) { 8897 if (name == NULL) name = BAD_CAST "unparseable"; 8898 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8899 "Opening and ending tag mismatch: %s line %d and %s\n", 8900 ctxt->name, line, name); 8901 } 8902 8903 /* 8904 * SAX: End of Tag 8905 */ 8906 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8907 (!ctxt->disableSAX)) 8908 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8909 8910 namePop(ctxt); 8911 spacePop(ctxt); 8912 return; 8913 } 8914 8915 /** 8916 * xmlParseEndTag: 8917 * @ctxt: an XML parser context 8918 * 8919 * parse an end of tag 8920 * 8921 * [42] ETag ::= '</' Name S? '>' 8922 * 8923 * With namespace 8924 * 8925 * [NS 9] ETag ::= '</' QName S? '>' 8926 */ 8927 8928 void 8929 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8930 xmlParseEndTag1(ctxt, 0); 8931 } 8932 #endif /* LIBXML_SAX1_ENABLED */ 8933 8934 /************************************************************************ 8935 * * 8936 * SAX 2 specific operations * 8937 * * 8938 ************************************************************************/ 8939 8940 /* 8941 * xmlGetNamespace: 8942 * @ctxt: an XML parser context 8943 * @prefix: the prefix to lookup 8944 * 8945 * Lookup the namespace name for the @prefix (which ca be NULL) 8946 * The prefix must come from the @ctxt->dict dictionary 8947 * 8948 * Returns the namespace name or NULL if not bound 8949 */ 8950 static const xmlChar * 8951 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8952 int i; 8953 8954 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8955 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8956 if (ctxt->nsTab[i] == prefix) { 8957 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8958 return(NULL); 8959 return(ctxt->nsTab[i + 1]); 8960 } 8961 return(NULL); 8962 } 8963 8964 /** 8965 * xmlParseQName: 8966 * @ctxt: an XML parser context 8967 * @prefix: pointer to store the prefix part 8968 * 8969 * parse an XML Namespace QName 8970 * 8971 * [6] QName ::= (Prefix ':')? LocalPart 8972 * [7] Prefix ::= NCName 8973 * [8] LocalPart ::= NCName 8974 * 8975 * Returns the Name parsed or NULL 8976 */ 8977 8978 static const xmlChar * 8979 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8980 const xmlChar *l, *p; 8981 8982 GROW; 8983 8984 l = xmlParseNCName(ctxt); 8985 if (l == NULL) { 8986 if (CUR == ':') { 8987 l = xmlParseName(ctxt); 8988 if (l != NULL) { 8989 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8990 "Failed to parse QName '%s'\n", l, NULL, NULL); 8991 *prefix = NULL; 8992 return(l); 8993 } 8994 } 8995 return(NULL); 8996 } 8997 if (CUR == ':') { 8998 NEXT; 8999 p = l; 9000 l = xmlParseNCName(ctxt); 9001 if (l == NULL) { 9002 xmlChar *tmp; 9003 9004 if (ctxt->instate == XML_PARSER_EOF) 9005 return(NULL); 9006 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 9007 "Failed to parse QName '%s:'\n", p, NULL, NULL); 9008 l = xmlParseNmtoken(ctxt); 9009 if (l == NULL) { 9010 if (ctxt->instate == XML_PARSER_EOF) 9011 return(NULL); 9012 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 9013 } else { 9014 tmp = xmlBuildQName(l, p, NULL, 0); 9015 xmlFree((char *)l); 9016 } 9017 p = xmlDictLookup(ctxt->dict, tmp, -1); 9018 if (tmp != NULL) xmlFree(tmp); 9019 *prefix = NULL; 9020 return(p); 9021 } 9022 if (CUR == ':') { 9023 xmlChar *tmp; 9024 9025 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 9026 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 9027 NEXT; 9028 tmp = (xmlChar *) xmlParseName(ctxt); 9029 if (tmp != NULL) { 9030 tmp = xmlBuildQName(tmp, l, NULL, 0); 9031 l = xmlDictLookup(ctxt->dict, tmp, -1); 9032 if (tmp != NULL) xmlFree(tmp); 9033 *prefix = p; 9034 return(l); 9035 } 9036 if (ctxt->instate == XML_PARSER_EOF) 9037 return(NULL); 9038 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 9039 l = xmlDictLookup(ctxt->dict, tmp, -1); 9040 if (tmp != NULL) xmlFree(tmp); 9041 *prefix = p; 9042 return(l); 9043 } 9044 *prefix = p; 9045 } else 9046 *prefix = NULL; 9047 return(l); 9048 } 9049 9050 /** 9051 * xmlParseQNameAndCompare: 9052 * @ctxt: an XML parser context 9053 * @name: the localname 9054 * @prefix: the prefix, if any. 9055 * 9056 * parse an XML name and compares for match 9057 * (specialized for endtag parsing) 9058 * 9059 * Returns NULL for an illegal name, (xmlChar*) 1 for success 9060 * and the name for mismatch 9061 */ 9062 9063 static const xmlChar * 9064 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 9065 xmlChar const *prefix) { 9066 const xmlChar *cmp; 9067 const xmlChar *in; 9068 const xmlChar *ret; 9069 const xmlChar *prefix2; 9070 9071 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 9072 9073 GROW; 9074 in = ctxt->input->cur; 9075 9076 cmp = prefix; 9077 while (*in != 0 && *in == *cmp) { 9078 ++in; 9079 ++cmp; 9080 } 9081 if ((*cmp == 0) && (*in == ':')) { 9082 in++; 9083 cmp = name; 9084 while (*in != 0 && *in == *cmp) { 9085 ++in; 9086 ++cmp; 9087 } 9088 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 9089 /* success */ 9090 ctxt->input->cur = in; 9091 return((const xmlChar*) 1); 9092 } 9093 } 9094 /* 9095 * all strings coms from the dictionary, equality can be done directly 9096 */ 9097 ret = xmlParseQName (ctxt, &prefix2); 9098 if ((ret == name) && (prefix == prefix2)) 9099 return((const xmlChar*) 1); 9100 return ret; 9101 } 9102 9103 /** 9104 * xmlParseAttValueInternal: 9105 * @ctxt: an XML parser context 9106 * @len: attribute len result 9107 * @alloc: whether the attribute was reallocated as a new string 9108 * @normalize: if 1 then further non-CDATA normalization must be done 9109 * 9110 * parse a value for an attribute. 9111 * NOTE: if no normalization is needed, the routine will return pointers 9112 * directly from the data buffer. 9113 * 9114 * 3.3.3 Attribute-Value Normalization: 9115 * Before the value of an attribute is passed to the application or 9116 * checked for validity, the XML processor must normalize it as follows: 9117 * - a character reference is processed by appending the referenced 9118 * character to the attribute value 9119 * - an entity reference is processed by recursively processing the 9120 * replacement text of the entity 9121 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 9122 * appending #x20 to the normalized value, except that only a single 9123 * #x20 is appended for a "#xD#xA" sequence that is part of an external 9124 * parsed entity or the literal entity value of an internal parsed entity 9125 * - other characters are processed by appending them to the normalized value 9126 * If the declared value is not CDATA, then the XML processor must further 9127 * process the normalized attribute value by discarding any leading and 9128 * trailing space (#x20) characters, and by replacing sequences of space 9129 * (#x20) characters by a single space (#x20) character. 9130 * All attributes for which no declaration has been read should be treated 9131 * by a non-validating parser as if declared CDATA. 9132 * 9133 * Returns the AttValue parsed or NULL. The value has to be freed by the 9134 * caller if it was copied, this can be detected by val[*len] == 0. 9135 */ 9136 9137 static xmlChar * 9138 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 9139 int normalize) 9140 { 9141 xmlChar limit = 0; 9142 const xmlChar *in = NULL, *start, *end, *last; 9143 xmlChar *ret = NULL; 9144 int line, col; 9145 9146 GROW; 9147 in = (xmlChar *) CUR_PTR; 9148 line = ctxt->input->line; 9149 col = ctxt->input->col; 9150 if (*in != '"' && *in != '\'') { 9151 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 9152 return (NULL); 9153 } 9154 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 9155 9156 /* 9157 * try to handle in this routine the most common case where no 9158 * allocation of a new string is required and where content is 9159 * pure ASCII. 9160 */ 9161 limit = *in++; 9162 col++; 9163 end = ctxt->input->end; 9164 start = in; 9165 if (in >= end) { 9166 const xmlChar *oldbase = ctxt->input->base; 9167 GROW; 9168 if (oldbase != ctxt->input->base) { 9169 long delta = ctxt->input->base - oldbase; 9170 start = start + delta; 9171 in = in + delta; 9172 } 9173 end = ctxt->input->end; 9174 } 9175 if (normalize) { 9176 /* 9177 * Skip any leading spaces 9178 */ 9179 while ((in < end) && (*in != limit) && 9180 ((*in == 0x20) || (*in == 0x9) || 9181 (*in == 0xA) || (*in == 0xD))) { 9182 if (*in == 0xA) { 9183 line++; col = 1; 9184 } else { 9185 col++; 9186 } 9187 in++; 9188 start = in; 9189 if (in >= end) { 9190 const xmlChar *oldbase = ctxt->input->base; 9191 GROW; 9192 if (ctxt->instate == XML_PARSER_EOF) 9193 return(NULL); 9194 if (oldbase != ctxt->input->base) { 9195 long delta = ctxt->input->base - oldbase; 9196 start = start + delta; 9197 in = in + delta; 9198 } 9199 end = ctxt->input->end; 9200 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9201 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9202 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9203 "AttValue length too long\n"); 9204 return(NULL); 9205 } 9206 } 9207 } 9208 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9209 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9210 col++; 9211 if ((*in++ == 0x20) && (*in == 0x20)) break; 9212 if (in >= end) { 9213 const xmlChar *oldbase = ctxt->input->base; 9214 GROW; 9215 if (ctxt->instate == XML_PARSER_EOF) 9216 return(NULL); 9217 if (oldbase != ctxt->input->base) { 9218 long delta = ctxt->input->base - oldbase; 9219 start = start + delta; 9220 in = in + delta; 9221 } 9222 end = ctxt->input->end; 9223 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9224 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9225 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9226 "AttValue length too long\n"); 9227 return(NULL); 9228 } 9229 } 9230 } 9231 last = in; 9232 /* 9233 * skip the trailing blanks 9234 */ 9235 while ((last[-1] == 0x20) && (last > start)) last--; 9236 while ((in < end) && (*in != limit) && 9237 ((*in == 0x20) || (*in == 0x9) || 9238 (*in == 0xA) || (*in == 0xD))) { 9239 if (*in == 0xA) { 9240 line++, col = 1; 9241 } else { 9242 col++; 9243 } 9244 in++; 9245 if (in >= end) { 9246 const xmlChar *oldbase = ctxt->input->base; 9247 GROW; 9248 if (ctxt->instate == XML_PARSER_EOF) 9249 return(NULL); 9250 if (oldbase != ctxt->input->base) { 9251 long delta = ctxt->input->base - oldbase; 9252 start = start + delta; 9253 in = in + delta; 9254 last = last + delta; 9255 } 9256 end = ctxt->input->end; 9257 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9258 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9259 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9260 "AttValue length too long\n"); 9261 return(NULL); 9262 } 9263 } 9264 } 9265 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9266 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9267 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9268 "AttValue length too long\n"); 9269 return(NULL); 9270 } 9271 if (*in != limit) goto need_complex; 9272 } else { 9273 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9274 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9275 in++; 9276 col++; 9277 if (in >= end) { 9278 const xmlChar *oldbase = ctxt->input->base; 9279 GROW; 9280 if (ctxt->instate == XML_PARSER_EOF) 9281 return(NULL); 9282 if (oldbase != ctxt->input->base) { 9283 long delta = ctxt->input->base - oldbase; 9284 start = start + delta; 9285 in = in + delta; 9286 } 9287 end = ctxt->input->end; 9288 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9289 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9290 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9291 "AttValue length too long\n"); 9292 return(NULL); 9293 } 9294 } 9295 } 9296 last = in; 9297 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9298 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9299 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9300 "AttValue length too long\n"); 9301 return(NULL); 9302 } 9303 if (*in != limit) goto need_complex; 9304 } 9305 in++; 9306 col++; 9307 if (len != NULL) { 9308 *len = last - start; 9309 ret = (xmlChar *) start; 9310 } else { 9311 if (alloc) *alloc = 1; 9312 ret = xmlStrndup(start, last - start); 9313 } 9314 CUR_PTR = in; 9315 ctxt->input->line = line; 9316 ctxt->input->col = col; 9317 if (alloc) *alloc = 0; 9318 return ret; 9319 need_complex: 9320 if (alloc) *alloc = 1; 9321 return xmlParseAttValueComplex(ctxt, len, normalize); 9322 } 9323 9324 /** 9325 * xmlParseAttribute2: 9326 * @ctxt: an XML parser context 9327 * @pref: the element prefix 9328 * @elem: the element name 9329 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9330 * @value: a xmlChar ** used to store the value of the attribute 9331 * @len: an int * to save the length of the attribute 9332 * @alloc: an int * to indicate if the attribute was allocated 9333 * 9334 * parse an attribute in the new SAX2 framework. 9335 * 9336 * Returns the attribute name, and the value in *value, . 9337 */ 9338 9339 static const xmlChar * 9340 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9341 const xmlChar * pref, const xmlChar * elem, 9342 const xmlChar ** prefix, xmlChar ** value, 9343 int *len, int *alloc) 9344 { 9345 const xmlChar *name; 9346 xmlChar *val, *internal_val = NULL; 9347 int normalize = 0; 9348 9349 *value = NULL; 9350 GROW; 9351 name = xmlParseQName(ctxt, prefix); 9352 if (name == NULL) { 9353 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9354 "error parsing attribute name\n"); 9355 return (NULL); 9356 } 9357 9358 /* 9359 * get the type if needed 9360 */ 9361 if (ctxt->attsSpecial != NULL) { 9362 int type; 9363 9364 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9365 pref, elem, *prefix, name); 9366 if (type != 0) 9367 normalize = 1; 9368 } 9369 9370 /* 9371 * read the value 9372 */ 9373 SKIP_BLANKS; 9374 if (RAW == '=') { 9375 NEXT; 9376 SKIP_BLANKS; 9377 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9378 if (normalize) { 9379 /* 9380 * Sometimes a second normalisation pass for spaces is needed 9381 * but that only happens if charrefs or entities refernces 9382 * have been used in the attribute value, i.e. the attribute 9383 * value have been extracted in an allocated string already. 9384 */ 9385 if (*alloc) { 9386 const xmlChar *val2; 9387 9388 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9389 if ((val2 != NULL) && (val2 != val)) { 9390 xmlFree(val); 9391 val = (xmlChar *) val2; 9392 } 9393 } 9394 } 9395 ctxt->instate = XML_PARSER_CONTENT; 9396 } else { 9397 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9398 "Specification mandates value for attribute %s\n", 9399 name); 9400 return (NULL); 9401 } 9402 9403 if (*prefix == ctxt->str_xml) { 9404 /* 9405 * Check that xml:lang conforms to the specification 9406 * No more registered as an error, just generate a warning now 9407 * since this was deprecated in XML second edition 9408 */ 9409 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9410 internal_val = xmlStrndup(val, *len); 9411 if (!xmlCheckLanguageID(internal_val)) { 9412 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9413 "Malformed value for xml:lang : %s\n", 9414 internal_val, NULL); 9415 } 9416 } 9417 9418 /* 9419 * Check that xml:space conforms to the specification 9420 */ 9421 if (xmlStrEqual(name, BAD_CAST "space")) { 9422 internal_val = xmlStrndup(val, *len); 9423 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9424 *(ctxt->space) = 0; 9425 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9426 *(ctxt->space) = 1; 9427 else { 9428 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9429 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9430 internal_val, NULL); 9431 } 9432 } 9433 if (internal_val) { 9434 xmlFree(internal_val); 9435 } 9436 } 9437 9438 *value = val; 9439 return (name); 9440 } 9441 /** 9442 * xmlParseStartTag2: 9443 * @ctxt: an XML parser context 9444 * 9445 * parse a start of tag either for rule element or 9446 * EmptyElement. In both case we don't parse the tag closing chars. 9447 * This routine is called when running SAX2 parsing 9448 * 9449 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9450 * 9451 * [ WFC: Unique Att Spec ] 9452 * No attribute name may appear more than once in the same start-tag or 9453 * empty-element tag. 9454 * 9455 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9456 * 9457 * [ WFC: Unique Att Spec ] 9458 * No attribute name may appear more than once in the same start-tag or 9459 * empty-element tag. 9460 * 9461 * With namespace: 9462 * 9463 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9464 * 9465 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9466 * 9467 * Returns the element name parsed 9468 */ 9469 9470 static const xmlChar * 9471 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9472 const xmlChar **URI, int *tlen) { 9473 const xmlChar *localname; 9474 const xmlChar *prefix; 9475 const xmlChar *attname; 9476 const xmlChar *aprefix; 9477 const xmlChar *nsname; 9478 xmlChar *attvalue; 9479 const xmlChar **atts = ctxt->atts; 9480 int maxatts = ctxt->maxatts; 9481 int nratts, nbatts, nbdef; 9482 int i, j, nbNs, attval, oldline, oldcol, inputNr; 9483 const xmlChar *base; 9484 unsigned long cur; 9485 int nsNr = ctxt->nsNr; 9486 9487 if (RAW != '<') return(NULL); 9488 NEXT1; 9489 9490 /* 9491 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9492 * point since the attribute values may be stored as pointers to 9493 * the buffer and calling SHRINK would destroy them ! 9494 * The Shrinking is only possible once the full set of attribute 9495 * callbacks have been done. 9496 */ 9497 reparse: 9498 SHRINK; 9499 base = ctxt->input->base; 9500 cur = ctxt->input->cur - ctxt->input->base; 9501 inputNr = ctxt->inputNr; 9502 oldline = ctxt->input->line; 9503 oldcol = ctxt->input->col; 9504 nbatts = 0; 9505 nratts = 0; 9506 nbdef = 0; 9507 nbNs = 0; 9508 attval = 0; 9509 /* Forget any namespaces added during an earlier parse of this element. */ 9510 ctxt->nsNr = nsNr; 9511 9512 localname = xmlParseQName(ctxt, &prefix); 9513 if (localname == NULL) { 9514 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9515 "StartTag: invalid element name\n"); 9516 return(NULL); 9517 } 9518 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9519 9520 /* 9521 * Now parse the attributes, it ends up with the ending 9522 * 9523 * (S Attribute)* S? 9524 */ 9525 SKIP_BLANKS; 9526 GROW; 9527 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9528 goto base_changed; 9529 9530 while (((RAW != '>') && 9531 ((RAW != '/') || (NXT(1) != '>')) && 9532 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9533 const xmlChar *q = CUR_PTR; 9534 unsigned int cons = ctxt->input->consumed; 9535 int len = -1, alloc = 0; 9536 9537 attname = xmlParseAttribute2(ctxt, prefix, localname, 9538 &aprefix, &attvalue, &len, &alloc); 9539 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) { 9540 if ((attvalue != NULL) && (alloc != 0)) 9541 xmlFree(attvalue); 9542 attvalue = NULL; 9543 goto base_changed; 9544 } 9545 if ((attname != NULL) && (attvalue != NULL)) { 9546 if (len < 0) len = xmlStrlen(attvalue); 9547 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9548 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9549 xmlURIPtr uri; 9550 9551 if (URL == NULL) { 9552 xmlErrMemory(ctxt, "dictionary allocation failure"); 9553 if ((attvalue != NULL) && (alloc != 0)) 9554 xmlFree(attvalue); 9555 return(NULL); 9556 } 9557 if (*URL != 0) { 9558 uri = xmlParseURI((const char *) URL); 9559 if (uri == NULL) { 9560 xmlNsErr(ctxt, XML_WAR_NS_URI, 9561 "xmlns: '%s' is not a valid URI\n", 9562 URL, NULL, NULL); 9563 } else { 9564 if (uri->scheme == NULL) { 9565 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9566 "xmlns: URI %s is not absolute\n", 9567 URL, NULL, NULL); 9568 } 9569 xmlFreeURI(uri); 9570 } 9571 if (URL == ctxt->str_xml_ns) { 9572 if (attname != ctxt->str_xml) { 9573 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9574 "xml namespace URI cannot be the default namespace\n", 9575 NULL, NULL, NULL); 9576 } 9577 goto skip_default_ns; 9578 } 9579 if ((len == 29) && 9580 (xmlStrEqual(URL, 9581 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9582 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9583 "reuse of the xmlns namespace name is forbidden\n", 9584 NULL, NULL, NULL); 9585 goto skip_default_ns; 9586 } 9587 } 9588 /* 9589 * check that it's not a defined namespace 9590 */ 9591 for (j = 1;j <= nbNs;j++) 9592 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9593 break; 9594 if (j <= nbNs) 9595 xmlErrAttributeDup(ctxt, NULL, attname); 9596 else 9597 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9598 skip_default_ns: 9599 if ((attvalue != NULL) && (alloc != 0)) { 9600 xmlFree(attvalue); 9601 attvalue = NULL; 9602 } 9603 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9604 break; 9605 if (!IS_BLANK_CH(RAW)) { 9606 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9607 "attributes construct error\n"); 9608 break; 9609 } 9610 SKIP_BLANKS; 9611 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9612 goto base_changed; 9613 continue; 9614 } 9615 if (aprefix == ctxt->str_xmlns) { 9616 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9617 xmlURIPtr uri; 9618 9619 if (attname == ctxt->str_xml) { 9620 if (URL != ctxt->str_xml_ns) { 9621 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9622 "xml namespace prefix mapped to wrong URI\n", 9623 NULL, NULL, NULL); 9624 } 9625 /* 9626 * Do not keep a namespace definition node 9627 */ 9628 goto skip_ns; 9629 } 9630 if (URL == ctxt->str_xml_ns) { 9631 if (attname != ctxt->str_xml) { 9632 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9633 "xml namespace URI mapped to wrong prefix\n", 9634 NULL, NULL, NULL); 9635 } 9636 goto skip_ns; 9637 } 9638 if (attname == ctxt->str_xmlns) { 9639 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9640 "redefinition of the xmlns prefix is forbidden\n", 9641 NULL, NULL, NULL); 9642 goto skip_ns; 9643 } 9644 if ((len == 29) && 9645 (xmlStrEqual(URL, 9646 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9647 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9648 "reuse of the xmlns namespace name is forbidden\n", 9649 NULL, NULL, NULL); 9650 goto skip_ns; 9651 } 9652 if ((URL == NULL) || (URL[0] == 0)) { 9653 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9654 "xmlns:%s: Empty XML namespace is not allowed\n", 9655 attname, NULL, NULL); 9656 goto skip_ns; 9657 } else { 9658 uri = xmlParseURI((const char *) URL); 9659 if (uri == NULL) { 9660 xmlNsErr(ctxt, XML_WAR_NS_URI, 9661 "xmlns:%s: '%s' is not a valid URI\n", 9662 attname, URL, NULL); 9663 } else { 9664 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9665 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9666 "xmlns:%s: URI %s is not absolute\n", 9667 attname, URL, NULL); 9668 } 9669 xmlFreeURI(uri); 9670 } 9671 } 9672 9673 /* 9674 * check that it's not a defined namespace 9675 */ 9676 for (j = 1;j <= nbNs;j++) 9677 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9678 break; 9679 if (j <= nbNs) 9680 xmlErrAttributeDup(ctxt, aprefix, attname); 9681 else 9682 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9683 skip_ns: 9684 if ((attvalue != NULL) && (alloc != 0)) { 9685 xmlFree(attvalue); 9686 attvalue = NULL; 9687 } 9688 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9689 break; 9690 if (!IS_BLANK_CH(RAW)) { 9691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9692 "attributes construct error\n"); 9693 break; 9694 } 9695 SKIP_BLANKS; 9696 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9697 goto base_changed; 9698 continue; 9699 } 9700 9701 /* 9702 * Add the pair to atts 9703 */ 9704 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9705 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9706 if (attvalue[len] == 0) 9707 xmlFree(attvalue); 9708 goto failed; 9709 } 9710 maxatts = ctxt->maxatts; 9711 atts = ctxt->atts; 9712 } 9713 ctxt->attallocs[nratts++] = alloc; 9714 atts[nbatts++] = attname; 9715 atts[nbatts++] = aprefix; 9716 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9717 atts[nbatts++] = attvalue; 9718 attvalue += len; 9719 atts[nbatts++] = attvalue; 9720 /* 9721 * tag if some deallocation is needed 9722 */ 9723 if (alloc != 0) attval = 1; 9724 } else { 9725 if ((attvalue != NULL) && (attvalue[len] == 0)) 9726 xmlFree(attvalue); 9727 } 9728 9729 failed: 9730 9731 GROW 9732 if (ctxt->instate == XML_PARSER_EOF) 9733 break; 9734 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9735 goto base_changed; 9736 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9737 break; 9738 if (!IS_BLANK_CH(RAW)) { 9739 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9740 "attributes construct error\n"); 9741 break; 9742 } 9743 SKIP_BLANKS; 9744 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9745 (attname == NULL) && (attvalue == NULL)) { 9746 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9747 "xmlParseStartTag: problem parsing attributes\n"); 9748 break; 9749 } 9750 GROW; 9751 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9752 goto base_changed; 9753 } 9754 9755 /* 9756 * The attributes defaulting 9757 */ 9758 if (ctxt->attsDefault != NULL) { 9759 xmlDefAttrsPtr defaults; 9760 9761 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9762 if (defaults != NULL) { 9763 for (i = 0;i < defaults->nbAttrs;i++) { 9764 attname = defaults->values[5 * i]; 9765 aprefix = defaults->values[5 * i + 1]; 9766 9767 /* 9768 * special work for namespaces defaulted defs 9769 */ 9770 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9771 /* 9772 * check that it's not a defined namespace 9773 */ 9774 for (j = 1;j <= nbNs;j++) 9775 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9776 break; 9777 if (j <= nbNs) continue; 9778 9779 nsname = xmlGetNamespace(ctxt, NULL); 9780 if (nsname != defaults->values[5 * i + 2]) { 9781 if (nsPush(ctxt, NULL, 9782 defaults->values[5 * i + 2]) > 0) 9783 nbNs++; 9784 } 9785 } else if (aprefix == ctxt->str_xmlns) { 9786 /* 9787 * check that it's not a defined namespace 9788 */ 9789 for (j = 1;j <= nbNs;j++) 9790 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9791 break; 9792 if (j <= nbNs) continue; 9793 9794 nsname = xmlGetNamespace(ctxt, attname); 9795 if (nsname != defaults->values[2]) { 9796 if (nsPush(ctxt, attname, 9797 defaults->values[5 * i + 2]) > 0) 9798 nbNs++; 9799 } 9800 } else { 9801 /* 9802 * check that it's not a defined attribute 9803 */ 9804 for (j = 0;j < nbatts;j+=5) { 9805 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9806 break; 9807 } 9808 if (j < nbatts) continue; 9809 9810 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9811 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9812 return(NULL); 9813 } 9814 maxatts = ctxt->maxatts; 9815 atts = ctxt->atts; 9816 } 9817 atts[nbatts++] = attname; 9818 atts[nbatts++] = aprefix; 9819 if (aprefix == NULL) 9820 atts[nbatts++] = NULL; 9821 else 9822 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9823 atts[nbatts++] = defaults->values[5 * i + 2]; 9824 atts[nbatts++] = defaults->values[5 * i + 3]; 9825 if ((ctxt->standalone == 1) && 9826 (defaults->values[5 * i + 4] != NULL)) { 9827 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9828 "standalone: attribute %s on %s defaulted from external subset\n", 9829 attname, localname); 9830 } 9831 nbdef++; 9832 } 9833 } 9834 } 9835 } 9836 9837 /* 9838 * The attributes checkings 9839 */ 9840 for (i = 0; i < nbatts;i += 5) { 9841 /* 9842 * The default namespace does not apply to attribute names. 9843 */ 9844 if (atts[i + 1] != NULL) { 9845 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9846 if (nsname == NULL) { 9847 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9848 "Namespace prefix %s for %s on %s is not defined\n", 9849 atts[i + 1], atts[i], localname); 9850 } 9851 atts[i + 2] = nsname; 9852 } else 9853 nsname = NULL; 9854 /* 9855 * [ WFC: Unique Att Spec ] 9856 * No attribute name may appear more than once in the same 9857 * start-tag or empty-element tag. 9858 * As extended by the Namespace in XML REC. 9859 */ 9860 for (j = 0; j < i;j += 5) { 9861 if (atts[i] == atts[j]) { 9862 if (atts[i+1] == atts[j+1]) { 9863 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9864 break; 9865 } 9866 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9867 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9868 "Namespaced Attribute %s in '%s' redefined\n", 9869 atts[i], nsname, NULL); 9870 break; 9871 } 9872 } 9873 } 9874 } 9875 9876 nsname = xmlGetNamespace(ctxt, prefix); 9877 if ((prefix != NULL) && (nsname == NULL)) { 9878 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9879 "Namespace prefix %s on %s is not defined\n", 9880 prefix, localname, NULL); 9881 } 9882 *pref = prefix; 9883 *URI = nsname; 9884 9885 /* 9886 * SAX: Start of Element ! 9887 */ 9888 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9889 (!ctxt->disableSAX)) { 9890 if (nbNs > 0) 9891 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9892 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9893 nbatts / 5, nbdef, atts); 9894 else 9895 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9896 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9897 } 9898 9899 /* 9900 * Free up attribute allocated strings if needed 9901 */ 9902 if (attval != 0) { 9903 for (i = 3,j = 0; j < nratts;i += 5,j++) 9904 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9905 xmlFree((xmlChar *) atts[i]); 9906 } 9907 9908 return(localname); 9909 9910 base_changed: 9911 /* 9912 * the attribute strings are valid iif the base didn't changed 9913 */ 9914 if (attval != 0) { 9915 for (i = 3,j = 0; j < nratts;i += 5,j++) 9916 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9917 xmlFree((xmlChar *) atts[i]); 9918 } 9919 9920 /* 9921 * We can't switch from one entity to another in the middle 9922 * of a start tag 9923 */ 9924 if (inputNr != ctxt->inputNr) { 9925 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 9926 "Start tag doesn't start and stop in the same entity\n"); 9927 return(NULL); 9928 } 9929 9930 ctxt->input->cur = ctxt->input->base + cur; 9931 ctxt->input->line = oldline; 9932 ctxt->input->col = oldcol; 9933 if (ctxt->wellFormed == 1) { 9934 goto reparse; 9935 } 9936 return(NULL); 9937 } 9938 9939 /** 9940 * xmlParseEndTag2: 9941 * @ctxt: an XML parser context 9942 * @line: line of the start tag 9943 * @nsNr: number of namespaces on the start tag 9944 * 9945 * parse an end of tag 9946 * 9947 * [42] ETag ::= '</' Name S? '>' 9948 * 9949 * With namespace 9950 * 9951 * [NS 9] ETag ::= '</' QName S? '>' 9952 */ 9953 9954 static void 9955 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9956 const xmlChar *URI, int line, int nsNr, int tlen) { 9957 const xmlChar *name; 9958 size_t curLength; 9959 9960 GROW; 9961 if ((RAW != '<') || (NXT(1) != '/')) { 9962 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9963 return; 9964 } 9965 SKIP(2); 9966 9967 curLength = ctxt->input->end - ctxt->input->cur; 9968 if ((tlen > 0) && (curLength >= (size_t)tlen) && 9969 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9970 if ((curLength >= (size_t)(tlen + 1)) && 9971 (ctxt->input->cur[tlen] == '>')) { 9972 ctxt->input->cur += tlen + 1; 9973 ctxt->input->col += tlen + 1; 9974 goto done; 9975 } 9976 ctxt->input->cur += tlen; 9977 ctxt->input->col += tlen; 9978 name = (xmlChar*)1; 9979 } else { 9980 if (prefix == NULL) 9981 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9982 else 9983 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9984 } 9985 9986 /* 9987 * We should definitely be at the ending "S? '>'" part 9988 */ 9989 GROW; 9990 if (ctxt->instate == XML_PARSER_EOF) 9991 return; 9992 SKIP_BLANKS; 9993 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9994 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9995 } else 9996 NEXT1; 9997 9998 /* 9999 * [ WFC: Element Type Match ] 10000 * The Name in an element's end-tag must match the element type in the 10001 * start-tag. 10002 * 10003 */ 10004 if (name != (xmlChar*)1) { 10005 if (name == NULL) name = BAD_CAST "unparseable"; 10006 if ((line == 0) && (ctxt->node != NULL)) 10007 line = ctxt->node->line; 10008 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 10009 "Opening and ending tag mismatch: %s line %d and %s\n", 10010 ctxt->name, line, name); 10011 } 10012 10013 /* 10014 * SAX: End of Tag 10015 */ 10016 done: 10017 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10018 (!ctxt->disableSAX)) 10019 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 10020 10021 spacePop(ctxt); 10022 if (nsNr != 0) 10023 nsPop(ctxt, nsNr); 10024 return; 10025 } 10026 10027 /** 10028 * xmlParseCDSect: 10029 * @ctxt: an XML parser context 10030 * 10031 * Parse escaped pure raw content. 10032 * 10033 * [18] CDSect ::= CDStart CData CDEnd 10034 * 10035 * [19] CDStart ::= '<![CDATA[' 10036 * 10037 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 10038 * 10039 * [21] CDEnd ::= ']]>' 10040 */ 10041 void 10042 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 10043 xmlChar *buf = NULL; 10044 int len = 0; 10045 int size = XML_PARSER_BUFFER_SIZE; 10046 int r, rl; 10047 int s, sl; 10048 int cur, l; 10049 int count = 0; 10050 10051 /* Check 2.6.0 was NXT(0) not RAW */ 10052 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 10053 SKIP(9); 10054 } else 10055 return; 10056 10057 ctxt->instate = XML_PARSER_CDATA_SECTION; 10058 r = CUR_CHAR(rl); 10059 if (!IS_CHAR(r)) { 10060 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 10061 ctxt->instate = XML_PARSER_CONTENT; 10062 return; 10063 } 10064 NEXTL(rl); 10065 s = CUR_CHAR(sl); 10066 if (!IS_CHAR(s)) { 10067 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 10068 ctxt->instate = XML_PARSER_CONTENT; 10069 return; 10070 } 10071 NEXTL(sl); 10072 cur = CUR_CHAR(l); 10073 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10074 if (buf == NULL) { 10075 xmlErrMemory(ctxt, NULL); 10076 return; 10077 } 10078 while (IS_CHAR(cur) && 10079 ((r != ']') || (s != ']') || (cur != '>'))) { 10080 if (len + 5 >= size) { 10081 xmlChar *tmp; 10082 10083 if ((size > XML_MAX_TEXT_LENGTH) && 10084 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10085 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 10086 "CData section too big found", NULL); 10087 xmlFree (buf); 10088 return; 10089 } 10090 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 10091 if (tmp == NULL) { 10092 xmlFree(buf); 10093 xmlErrMemory(ctxt, NULL); 10094 return; 10095 } 10096 buf = tmp; 10097 size *= 2; 10098 } 10099 COPY_BUF(rl,buf,len,r); 10100 r = s; 10101 rl = sl; 10102 s = cur; 10103 sl = l; 10104 count++; 10105 if (count > 50) { 10106 GROW; 10107 if (ctxt->instate == XML_PARSER_EOF) { 10108 xmlFree(buf); 10109 return; 10110 } 10111 count = 0; 10112 } 10113 NEXTL(l); 10114 cur = CUR_CHAR(l); 10115 } 10116 buf[len] = 0; 10117 ctxt->instate = XML_PARSER_CONTENT; 10118 if (cur != '>') { 10119 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 10120 "CData section not finished\n%.50s\n", buf); 10121 xmlFree(buf); 10122 return; 10123 } 10124 NEXTL(l); 10125 10126 /* 10127 * OK the buffer is to be consumed as cdata. 10128 */ 10129 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10130 if (ctxt->sax->cdataBlock != NULL) 10131 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 10132 else if (ctxt->sax->characters != NULL) 10133 ctxt->sax->characters(ctxt->userData, buf, len); 10134 } 10135 xmlFree(buf); 10136 } 10137 10138 /** 10139 * xmlParseContent: 10140 * @ctxt: an XML parser context 10141 * 10142 * Parse a content: 10143 * 10144 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10145 */ 10146 10147 void 10148 xmlParseContent(xmlParserCtxtPtr ctxt) { 10149 GROW; 10150 while ((RAW != 0) && 10151 ((RAW != '<') || (NXT(1) != '/')) && 10152 (ctxt->instate != XML_PARSER_EOF)) { 10153 const xmlChar *test = CUR_PTR; 10154 unsigned int cons = ctxt->input->consumed; 10155 const xmlChar *cur = ctxt->input->cur; 10156 10157 /* 10158 * First case : a Processing Instruction. 10159 */ 10160 if ((*cur == '<') && (cur[1] == '?')) { 10161 xmlParsePI(ctxt); 10162 } 10163 10164 /* 10165 * Second case : a CDSection 10166 */ 10167 /* 2.6.0 test was *cur not RAW */ 10168 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 10169 xmlParseCDSect(ctxt); 10170 } 10171 10172 /* 10173 * Third case : a comment 10174 */ 10175 else if ((*cur == '<') && (NXT(1) == '!') && 10176 (NXT(2) == '-') && (NXT(3) == '-')) { 10177 xmlParseComment(ctxt); 10178 if (ctxt->instate != XML_PARSER_EOF) 10179 ctxt->instate = XML_PARSER_CONTENT; 10180 } 10181 10182 /* 10183 * Fourth case : a sub-element. 10184 */ 10185 else if (*cur == '<') { 10186 xmlParseElement(ctxt); 10187 } 10188 10189 /* 10190 * Fifth case : a reference. If if has not been resolved, 10191 * parsing returns it's Name, create the node 10192 */ 10193 10194 else if (*cur == '&') { 10195 xmlParseReference(ctxt); 10196 } 10197 10198 /* 10199 * Last case, text. Note that References are handled directly. 10200 */ 10201 else { 10202 xmlParseCharData(ctxt, 0); 10203 } 10204 10205 GROW; 10206 /* 10207 * Pop-up of finished entities. 10208 */ 10209 while ((RAW == 0) && (ctxt->inputNr > 1)) 10210 xmlPopInput(ctxt); 10211 SHRINK; 10212 10213 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10214 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10215 "detected an error in element content\n"); 10216 xmlHaltParser(ctxt); 10217 break; 10218 } 10219 } 10220 } 10221 10222 /** 10223 * xmlParseElement: 10224 * @ctxt: an XML parser context 10225 * 10226 * parse an XML element, this is highly recursive 10227 * 10228 * [39] element ::= EmptyElemTag | STag content ETag 10229 * 10230 * [ WFC: Element Type Match ] 10231 * The Name in an element's end-tag must match the element type in the 10232 * start-tag. 10233 * 10234 */ 10235 10236 void 10237 xmlParseElement(xmlParserCtxtPtr ctxt) { 10238 const xmlChar *name; 10239 const xmlChar *prefix = NULL; 10240 const xmlChar *URI = NULL; 10241 xmlParserNodeInfo node_info; 10242 int line, tlen = 0; 10243 xmlNodePtr ret; 10244 int nsNr = ctxt->nsNr; 10245 10246 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10247 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10248 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10249 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10250 xmlParserMaxDepth); 10251 xmlHaltParser(ctxt); 10252 return; 10253 } 10254 10255 /* Capture start position */ 10256 if (ctxt->record_info) { 10257 node_info.begin_pos = ctxt->input->consumed + 10258 (CUR_PTR - ctxt->input->base); 10259 node_info.begin_line = ctxt->input->line; 10260 } 10261 10262 if (ctxt->spaceNr == 0) 10263 spacePush(ctxt, -1); 10264 else if (*ctxt->space == -2) 10265 spacePush(ctxt, -1); 10266 else 10267 spacePush(ctxt, *ctxt->space); 10268 10269 line = ctxt->input->line; 10270 #ifdef LIBXML_SAX1_ENABLED 10271 if (ctxt->sax2) 10272 #endif /* LIBXML_SAX1_ENABLED */ 10273 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10274 #ifdef LIBXML_SAX1_ENABLED 10275 else 10276 name = xmlParseStartTag(ctxt); 10277 #endif /* LIBXML_SAX1_ENABLED */ 10278 if (ctxt->instate == XML_PARSER_EOF) 10279 return; 10280 if (name == NULL) { 10281 spacePop(ctxt); 10282 return; 10283 } 10284 namePush(ctxt, name); 10285 ret = ctxt->node; 10286 10287 #ifdef LIBXML_VALID_ENABLED 10288 /* 10289 * [ VC: Root Element Type ] 10290 * The Name in the document type declaration must match the element 10291 * type of the root element. 10292 */ 10293 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10294 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10295 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10296 #endif /* LIBXML_VALID_ENABLED */ 10297 10298 /* 10299 * Check for an Empty Element. 10300 */ 10301 if ((RAW == '/') && (NXT(1) == '>')) { 10302 SKIP(2); 10303 if (ctxt->sax2) { 10304 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10305 (!ctxt->disableSAX)) 10306 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10307 #ifdef LIBXML_SAX1_ENABLED 10308 } else { 10309 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10310 (!ctxt->disableSAX)) 10311 ctxt->sax->endElement(ctxt->userData, name); 10312 #endif /* LIBXML_SAX1_ENABLED */ 10313 } 10314 namePop(ctxt); 10315 spacePop(ctxt); 10316 if (nsNr != ctxt->nsNr) 10317 nsPop(ctxt, ctxt->nsNr - nsNr); 10318 if ( ret != NULL && ctxt->record_info ) { 10319 node_info.end_pos = ctxt->input->consumed + 10320 (CUR_PTR - ctxt->input->base); 10321 node_info.end_line = ctxt->input->line; 10322 node_info.node = ret; 10323 xmlParserAddNodeInfo(ctxt, &node_info); 10324 } 10325 return; 10326 } 10327 if (RAW == '>') { 10328 NEXT1; 10329 } else { 10330 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10331 "Couldn't find end of Start Tag %s line %d\n", 10332 name, line, NULL); 10333 10334 /* 10335 * end of parsing of this node. 10336 */ 10337 nodePop(ctxt); 10338 namePop(ctxt); 10339 spacePop(ctxt); 10340 if (nsNr != ctxt->nsNr) 10341 nsPop(ctxt, ctxt->nsNr - nsNr); 10342 10343 /* 10344 * Capture end position and add node 10345 */ 10346 if ( ret != NULL && ctxt->record_info ) { 10347 node_info.end_pos = ctxt->input->consumed + 10348 (CUR_PTR - ctxt->input->base); 10349 node_info.end_line = ctxt->input->line; 10350 node_info.node = ret; 10351 xmlParserAddNodeInfo(ctxt, &node_info); 10352 } 10353 return; 10354 } 10355 10356 /* 10357 * Parse the content of the element: 10358 */ 10359 xmlParseContent(ctxt); 10360 if (ctxt->instate == XML_PARSER_EOF) 10361 return; 10362 if (!IS_BYTE_CHAR(RAW)) { 10363 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10364 "Premature end of data in tag %s line %d\n", 10365 name, line, NULL); 10366 10367 /* 10368 * end of parsing of this node. 10369 */ 10370 nodePop(ctxt); 10371 namePop(ctxt); 10372 spacePop(ctxt); 10373 if (nsNr != ctxt->nsNr) 10374 nsPop(ctxt, ctxt->nsNr - nsNr); 10375 return; 10376 } 10377 10378 /* 10379 * parse the end of tag: '</' should be here. 10380 */ 10381 if (ctxt->sax2) { 10382 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10383 namePop(ctxt); 10384 } 10385 #ifdef LIBXML_SAX1_ENABLED 10386 else 10387 xmlParseEndTag1(ctxt, line); 10388 #endif /* LIBXML_SAX1_ENABLED */ 10389 10390 /* 10391 * Capture end position and add node 10392 */ 10393 if ( ret != NULL && ctxt->record_info ) { 10394 node_info.end_pos = ctxt->input->consumed + 10395 (CUR_PTR - ctxt->input->base); 10396 node_info.end_line = ctxt->input->line; 10397 node_info.node = ret; 10398 xmlParserAddNodeInfo(ctxt, &node_info); 10399 } 10400 } 10401 10402 /** 10403 * xmlParseVersionNum: 10404 * @ctxt: an XML parser context 10405 * 10406 * parse the XML version value. 10407 * 10408 * [26] VersionNum ::= '1.' [0-9]+ 10409 * 10410 * In practice allow [0-9].[0-9]+ at that level 10411 * 10412 * Returns the string giving the XML version number, or NULL 10413 */ 10414 xmlChar * 10415 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10416 xmlChar *buf = NULL; 10417 int len = 0; 10418 int size = 10; 10419 xmlChar cur; 10420 10421 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10422 if (buf == NULL) { 10423 xmlErrMemory(ctxt, NULL); 10424 return(NULL); 10425 } 10426 cur = CUR; 10427 if (!((cur >= '0') && (cur <= '9'))) { 10428 xmlFree(buf); 10429 return(NULL); 10430 } 10431 buf[len++] = cur; 10432 NEXT; 10433 cur=CUR; 10434 if (cur != '.') { 10435 xmlFree(buf); 10436 return(NULL); 10437 } 10438 buf[len++] = cur; 10439 NEXT; 10440 cur=CUR; 10441 while ((cur >= '0') && (cur <= '9')) { 10442 if (len + 1 >= size) { 10443 xmlChar *tmp; 10444 10445 size *= 2; 10446 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10447 if (tmp == NULL) { 10448 xmlFree(buf); 10449 xmlErrMemory(ctxt, NULL); 10450 return(NULL); 10451 } 10452 buf = tmp; 10453 } 10454 buf[len++] = cur; 10455 NEXT; 10456 cur=CUR; 10457 } 10458 buf[len] = 0; 10459 return(buf); 10460 } 10461 10462 /** 10463 * xmlParseVersionInfo: 10464 * @ctxt: an XML parser context 10465 * 10466 * parse the XML version. 10467 * 10468 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10469 * 10470 * [25] Eq ::= S? '=' S? 10471 * 10472 * Returns the version string, e.g. "1.0" 10473 */ 10474 10475 xmlChar * 10476 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10477 xmlChar *version = NULL; 10478 10479 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10480 SKIP(7); 10481 SKIP_BLANKS; 10482 if (RAW != '=') { 10483 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10484 return(NULL); 10485 } 10486 NEXT; 10487 SKIP_BLANKS; 10488 if (RAW == '"') { 10489 NEXT; 10490 version = xmlParseVersionNum(ctxt); 10491 if (RAW != '"') { 10492 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10493 } else 10494 NEXT; 10495 } else if (RAW == '\''){ 10496 NEXT; 10497 version = xmlParseVersionNum(ctxt); 10498 if (RAW != '\'') { 10499 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10500 } else 10501 NEXT; 10502 } else { 10503 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10504 } 10505 } 10506 return(version); 10507 } 10508 10509 /** 10510 * xmlParseEncName: 10511 * @ctxt: an XML parser context 10512 * 10513 * parse the XML encoding name 10514 * 10515 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10516 * 10517 * Returns the encoding name value or NULL 10518 */ 10519 xmlChar * 10520 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10521 xmlChar *buf = NULL; 10522 int len = 0; 10523 int size = 10; 10524 xmlChar cur; 10525 10526 cur = CUR; 10527 if (((cur >= 'a') && (cur <= 'z')) || 10528 ((cur >= 'A') && (cur <= 'Z'))) { 10529 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10530 if (buf == NULL) { 10531 xmlErrMemory(ctxt, NULL); 10532 return(NULL); 10533 } 10534 10535 buf[len++] = cur; 10536 NEXT; 10537 cur = CUR; 10538 while (((cur >= 'a') && (cur <= 'z')) || 10539 ((cur >= 'A') && (cur <= 'Z')) || 10540 ((cur >= '0') && (cur <= '9')) || 10541 (cur == '.') || (cur == '_') || 10542 (cur == '-')) { 10543 if (len + 1 >= size) { 10544 xmlChar *tmp; 10545 10546 size *= 2; 10547 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10548 if (tmp == NULL) { 10549 xmlErrMemory(ctxt, NULL); 10550 xmlFree(buf); 10551 return(NULL); 10552 } 10553 buf = tmp; 10554 } 10555 buf[len++] = cur; 10556 NEXT; 10557 cur = CUR; 10558 if (cur == 0) { 10559 SHRINK; 10560 GROW; 10561 cur = CUR; 10562 } 10563 } 10564 buf[len] = 0; 10565 } else { 10566 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10567 } 10568 return(buf); 10569 } 10570 10571 /** 10572 * xmlParseEncodingDecl: 10573 * @ctxt: an XML parser context 10574 * 10575 * parse the XML encoding declaration 10576 * 10577 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10578 * 10579 * this setups the conversion filters. 10580 * 10581 * Returns the encoding value or NULL 10582 */ 10583 10584 const xmlChar * 10585 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10586 xmlChar *encoding = NULL; 10587 10588 SKIP_BLANKS; 10589 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10590 SKIP(8); 10591 SKIP_BLANKS; 10592 if (RAW != '=') { 10593 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10594 return(NULL); 10595 } 10596 NEXT; 10597 SKIP_BLANKS; 10598 if (RAW == '"') { 10599 NEXT; 10600 encoding = xmlParseEncName(ctxt); 10601 if (RAW != '"') { 10602 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10603 xmlFree((xmlChar *) encoding); 10604 return(NULL); 10605 } else 10606 NEXT; 10607 } else if (RAW == '\''){ 10608 NEXT; 10609 encoding = xmlParseEncName(ctxt); 10610 if (RAW != '\'') { 10611 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10612 xmlFree((xmlChar *) encoding); 10613 return(NULL); 10614 } else 10615 NEXT; 10616 } else { 10617 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10618 } 10619 10620 /* 10621 * Non standard parsing, allowing the user to ignore encoding 10622 */ 10623 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10624 xmlFree((xmlChar *) encoding); 10625 return(NULL); 10626 } 10627 10628 /* 10629 * UTF-16 encoding stwich has already taken place at this stage, 10630 * more over the little-endian/big-endian selection is already done 10631 */ 10632 if ((encoding != NULL) && 10633 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10634 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10635 /* 10636 * If no encoding was passed to the parser, that we are 10637 * using UTF-16 and no decoder is present i.e. the 10638 * document is apparently UTF-8 compatible, then raise an 10639 * encoding mismatch fatal error 10640 */ 10641 if ((ctxt->encoding == NULL) && 10642 (ctxt->input->buf != NULL) && 10643 (ctxt->input->buf->encoder == NULL)) { 10644 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10645 "Document labelled UTF-16 but has UTF-8 content\n"); 10646 } 10647 if (ctxt->encoding != NULL) 10648 xmlFree((xmlChar *) ctxt->encoding); 10649 ctxt->encoding = encoding; 10650 } 10651 /* 10652 * UTF-8 encoding is handled natively 10653 */ 10654 else if ((encoding != NULL) && 10655 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10656 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10657 if (ctxt->encoding != NULL) 10658 xmlFree((xmlChar *) ctxt->encoding); 10659 ctxt->encoding = encoding; 10660 } 10661 else if (encoding != NULL) { 10662 xmlCharEncodingHandlerPtr handler; 10663 10664 if (ctxt->input->encoding != NULL) 10665 xmlFree((xmlChar *) ctxt->input->encoding); 10666 ctxt->input->encoding = encoding; 10667 10668 handler = xmlFindCharEncodingHandler((const char *) encoding); 10669 if (handler != NULL) { 10670 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10671 /* failed to convert */ 10672 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10673 return(NULL); 10674 } 10675 } else { 10676 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10677 "Unsupported encoding %s\n", encoding); 10678 return(NULL); 10679 } 10680 } 10681 } 10682 return(encoding); 10683 } 10684 10685 /** 10686 * xmlParseSDDecl: 10687 * @ctxt: an XML parser context 10688 * 10689 * parse the XML standalone declaration 10690 * 10691 * [32] SDDecl ::= S 'standalone' Eq 10692 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10693 * 10694 * [ VC: Standalone Document Declaration ] 10695 * TODO The standalone document declaration must have the value "no" 10696 * if any external markup declarations contain declarations of: 10697 * - attributes with default values, if elements to which these 10698 * attributes apply appear in the document without specifications 10699 * of values for these attributes, or 10700 * - entities (other than amp, lt, gt, apos, quot), if references 10701 * to those entities appear in the document, or 10702 * - attributes with values subject to normalization, where the 10703 * attribute appears in the document with a value which will change 10704 * as a result of normalization, or 10705 * - element types with element content, if white space occurs directly 10706 * within any instance of those types. 10707 * 10708 * Returns: 10709 * 1 if standalone="yes" 10710 * 0 if standalone="no" 10711 * -2 if standalone attribute is missing or invalid 10712 * (A standalone value of -2 means that the XML declaration was found, 10713 * but no value was specified for the standalone attribute). 10714 */ 10715 10716 int 10717 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10718 int standalone = -2; 10719 10720 SKIP_BLANKS; 10721 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10722 SKIP(10); 10723 SKIP_BLANKS; 10724 if (RAW != '=') { 10725 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10726 return(standalone); 10727 } 10728 NEXT; 10729 SKIP_BLANKS; 10730 if (RAW == '\''){ 10731 NEXT; 10732 if ((RAW == 'n') && (NXT(1) == 'o')) { 10733 standalone = 0; 10734 SKIP(2); 10735 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10736 (NXT(2) == 's')) { 10737 standalone = 1; 10738 SKIP(3); 10739 } else { 10740 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10741 } 10742 if (RAW != '\'') { 10743 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10744 } else 10745 NEXT; 10746 } else if (RAW == '"'){ 10747 NEXT; 10748 if ((RAW == 'n') && (NXT(1) == 'o')) { 10749 standalone = 0; 10750 SKIP(2); 10751 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10752 (NXT(2) == 's')) { 10753 standalone = 1; 10754 SKIP(3); 10755 } else { 10756 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10757 } 10758 if (RAW != '"') { 10759 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10760 } else 10761 NEXT; 10762 } else { 10763 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10764 } 10765 } 10766 return(standalone); 10767 } 10768 10769 /** 10770 * xmlParseXMLDecl: 10771 * @ctxt: an XML parser context 10772 * 10773 * parse an XML declaration header 10774 * 10775 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10776 */ 10777 10778 void 10779 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10780 xmlChar *version; 10781 10782 /* 10783 * This value for standalone indicates that the document has an 10784 * XML declaration but it does not have a standalone attribute. 10785 * It will be overwritten later if a standalone attribute is found. 10786 */ 10787 ctxt->input->standalone = -2; 10788 10789 /* 10790 * We know that '<?xml' is here. 10791 */ 10792 SKIP(5); 10793 10794 if (!IS_BLANK_CH(RAW)) { 10795 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10796 "Blank needed after '<?xml'\n"); 10797 } 10798 SKIP_BLANKS; 10799 10800 /* 10801 * We must have the VersionInfo here. 10802 */ 10803 version = xmlParseVersionInfo(ctxt); 10804 if (version == NULL) { 10805 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10806 } else { 10807 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10808 /* 10809 * Changed here for XML-1.0 5th edition 10810 */ 10811 if (ctxt->options & XML_PARSE_OLD10) { 10812 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10813 "Unsupported version '%s'\n", 10814 version); 10815 } else { 10816 if ((version[0] == '1') && ((version[1] == '.'))) { 10817 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10818 "Unsupported version '%s'\n", 10819 version, NULL); 10820 } else { 10821 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10822 "Unsupported version '%s'\n", 10823 version); 10824 } 10825 } 10826 } 10827 if (ctxt->version != NULL) 10828 xmlFree((void *) ctxt->version); 10829 ctxt->version = version; 10830 } 10831 10832 /* 10833 * We may have the encoding declaration 10834 */ 10835 if (!IS_BLANK_CH(RAW)) { 10836 if ((RAW == '?') && (NXT(1) == '>')) { 10837 SKIP(2); 10838 return; 10839 } 10840 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10841 } 10842 xmlParseEncodingDecl(ctxt); 10843 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10844 (ctxt->instate == XML_PARSER_EOF)) { 10845 /* 10846 * The XML REC instructs us to stop parsing right here 10847 */ 10848 return; 10849 } 10850 10851 /* 10852 * We may have the standalone status. 10853 */ 10854 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10855 if ((RAW == '?') && (NXT(1) == '>')) { 10856 SKIP(2); 10857 return; 10858 } 10859 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10860 } 10861 10862 /* 10863 * We can grow the input buffer freely at that point 10864 */ 10865 GROW; 10866 10867 SKIP_BLANKS; 10868 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10869 10870 SKIP_BLANKS; 10871 if ((RAW == '?') && (NXT(1) == '>')) { 10872 SKIP(2); 10873 } else if (RAW == '>') { 10874 /* Deprecated old WD ... */ 10875 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10876 NEXT; 10877 } else { 10878 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10879 MOVETO_ENDTAG(CUR_PTR); 10880 NEXT; 10881 } 10882 } 10883 10884 /** 10885 * xmlParseMisc: 10886 * @ctxt: an XML parser context 10887 * 10888 * parse an XML Misc* optional field. 10889 * 10890 * [27] Misc ::= Comment | PI | S 10891 */ 10892 10893 void 10894 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10895 while ((ctxt->instate != XML_PARSER_EOF) && 10896 (((RAW == '<') && (NXT(1) == '?')) || 10897 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10898 IS_BLANK_CH(CUR))) { 10899 if ((RAW == '<') && (NXT(1) == '?')) { 10900 xmlParsePI(ctxt); 10901 } else if (IS_BLANK_CH(CUR)) { 10902 NEXT; 10903 } else 10904 xmlParseComment(ctxt); 10905 } 10906 } 10907 10908 /** 10909 * xmlParseDocument: 10910 * @ctxt: an XML parser context 10911 * 10912 * parse an XML document (and build a tree if using the standard SAX 10913 * interface). 10914 * 10915 * [1] document ::= prolog element Misc* 10916 * 10917 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10918 * 10919 * Returns 0, -1 in case of error. the parser context is augmented 10920 * as a result of the parsing. 10921 */ 10922 10923 int 10924 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10925 xmlChar start[4]; 10926 xmlCharEncoding enc; 10927 10928 xmlInitParser(); 10929 10930 if ((ctxt == NULL) || (ctxt->input == NULL)) 10931 return(-1); 10932 10933 GROW; 10934 10935 /* 10936 * SAX: detecting the level. 10937 */ 10938 xmlDetectSAX2(ctxt); 10939 10940 /* 10941 * SAX: beginning of the document processing. 10942 */ 10943 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10944 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10945 if (ctxt->instate == XML_PARSER_EOF) 10946 return(-1); 10947 10948 if ((ctxt->encoding == NULL) && 10949 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10950 /* 10951 * Get the 4 first bytes and decode the charset 10952 * if enc != XML_CHAR_ENCODING_NONE 10953 * plug some encoding conversion routines. 10954 */ 10955 start[0] = RAW; 10956 start[1] = NXT(1); 10957 start[2] = NXT(2); 10958 start[3] = NXT(3); 10959 enc = xmlDetectCharEncoding(&start[0], 4); 10960 if (enc != XML_CHAR_ENCODING_NONE) { 10961 xmlSwitchEncoding(ctxt, enc); 10962 } 10963 } 10964 10965 10966 if (CUR == 0) { 10967 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10968 return(-1); 10969 } 10970 10971 /* 10972 * Check for the XMLDecl in the Prolog. 10973 * do not GROW here to avoid the detected encoder to decode more 10974 * than just the first line, unless the amount of data is really 10975 * too small to hold "<?xml version="1.0" encoding="foo" 10976 */ 10977 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10978 GROW; 10979 } 10980 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10981 10982 /* 10983 * Note that we will switch encoding on the fly. 10984 */ 10985 xmlParseXMLDecl(ctxt); 10986 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10987 (ctxt->instate == XML_PARSER_EOF)) { 10988 /* 10989 * The XML REC instructs us to stop parsing right here 10990 */ 10991 return(-1); 10992 } 10993 ctxt->standalone = ctxt->input->standalone; 10994 SKIP_BLANKS; 10995 } else { 10996 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10997 } 10998 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10999 ctxt->sax->startDocument(ctxt->userData); 11000 if (ctxt->instate == XML_PARSER_EOF) 11001 return(-1); 11002 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 11003 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 11004 ctxt->myDoc->compression = ctxt->input->buf->compressed; 11005 } 11006 11007 /* 11008 * The Misc part of the Prolog 11009 */ 11010 GROW; 11011 xmlParseMisc(ctxt); 11012 11013 /* 11014 * Then possibly doc type declaration(s) and more Misc 11015 * (doctypedecl Misc*)? 11016 */ 11017 GROW; 11018 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 11019 11020 ctxt->inSubset = 1; 11021 xmlParseDocTypeDecl(ctxt); 11022 if (RAW == '[') { 11023 ctxt->instate = XML_PARSER_DTD; 11024 xmlParseInternalSubset(ctxt); 11025 if (ctxt->instate == XML_PARSER_EOF) 11026 return(-1); 11027 } 11028 11029 /* 11030 * Create and update the external subset. 11031 */ 11032 ctxt->inSubset = 2; 11033 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 11034 (!ctxt->disableSAX)) 11035 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11036 ctxt->extSubSystem, ctxt->extSubURI); 11037 if (ctxt->instate == XML_PARSER_EOF) 11038 return(-1); 11039 ctxt->inSubset = 0; 11040 11041 xmlCleanSpecialAttr(ctxt); 11042 11043 ctxt->instate = XML_PARSER_PROLOG; 11044 xmlParseMisc(ctxt); 11045 } 11046 11047 /* 11048 * Time to start parsing the tree itself 11049 */ 11050 GROW; 11051 if (RAW != '<') { 11052 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 11053 "Start tag expected, '<' not found\n"); 11054 } else { 11055 ctxt->instate = XML_PARSER_CONTENT; 11056 xmlParseElement(ctxt); 11057 ctxt->instate = XML_PARSER_EPILOG; 11058 11059 11060 /* 11061 * The Misc part at the end 11062 */ 11063 xmlParseMisc(ctxt); 11064 11065 if (RAW != 0) { 11066 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11067 } 11068 ctxt->instate = XML_PARSER_EOF; 11069 } 11070 11071 /* 11072 * SAX: end of the document processing. 11073 */ 11074 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11075 ctxt->sax->endDocument(ctxt->userData); 11076 11077 /* 11078 * Remove locally kept entity definitions if the tree was not built 11079 */ 11080 if ((ctxt->myDoc != NULL) && 11081 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 11082 xmlFreeDoc(ctxt->myDoc); 11083 ctxt->myDoc = NULL; 11084 } 11085 11086 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 11087 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 11088 if (ctxt->valid) 11089 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 11090 if (ctxt->nsWellFormed) 11091 ctxt->myDoc->properties |= XML_DOC_NSVALID; 11092 if (ctxt->options & XML_PARSE_OLD10) 11093 ctxt->myDoc->properties |= XML_DOC_OLD10; 11094 } 11095 if (! ctxt->wellFormed) { 11096 ctxt->valid = 0; 11097 return(-1); 11098 } 11099 return(0); 11100 } 11101 11102 /** 11103 * xmlParseExtParsedEnt: 11104 * @ctxt: an XML parser context 11105 * 11106 * parse a general parsed entity 11107 * An external general parsed entity is well-formed if it matches the 11108 * production labeled extParsedEnt. 11109 * 11110 * [78] extParsedEnt ::= TextDecl? content 11111 * 11112 * Returns 0, -1 in case of error. the parser context is augmented 11113 * as a result of the parsing. 11114 */ 11115 11116 int 11117 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 11118 xmlChar start[4]; 11119 xmlCharEncoding enc; 11120 11121 if ((ctxt == NULL) || (ctxt->input == NULL)) 11122 return(-1); 11123 11124 xmlDefaultSAXHandlerInit(); 11125 11126 xmlDetectSAX2(ctxt); 11127 11128 GROW; 11129 11130 /* 11131 * SAX: beginning of the document processing. 11132 */ 11133 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11134 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 11135 11136 /* 11137 * Get the 4 first bytes and decode the charset 11138 * if enc != XML_CHAR_ENCODING_NONE 11139 * plug some encoding conversion routines. 11140 */ 11141 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11142 start[0] = RAW; 11143 start[1] = NXT(1); 11144 start[2] = NXT(2); 11145 start[3] = NXT(3); 11146 enc = xmlDetectCharEncoding(start, 4); 11147 if (enc != XML_CHAR_ENCODING_NONE) { 11148 xmlSwitchEncoding(ctxt, enc); 11149 } 11150 } 11151 11152 11153 if (CUR == 0) { 11154 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11155 } 11156 11157 /* 11158 * Check for the XMLDecl in the Prolog. 11159 */ 11160 GROW; 11161 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11162 11163 /* 11164 * Note that we will switch encoding on the fly. 11165 */ 11166 xmlParseXMLDecl(ctxt); 11167 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11168 /* 11169 * The XML REC instructs us to stop parsing right here 11170 */ 11171 return(-1); 11172 } 11173 SKIP_BLANKS; 11174 } else { 11175 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11176 } 11177 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 11178 ctxt->sax->startDocument(ctxt->userData); 11179 if (ctxt->instate == XML_PARSER_EOF) 11180 return(-1); 11181 11182 /* 11183 * Doing validity checking on chunk doesn't make sense 11184 */ 11185 ctxt->instate = XML_PARSER_CONTENT; 11186 ctxt->validate = 0; 11187 ctxt->loadsubset = 0; 11188 ctxt->depth = 0; 11189 11190 xmlParseContent(ctxt); 11191 if (ctxt->instate == XML_PARSER_EOF) 11192 return(-1); 11193 11194 if ((RAW == '<') && (NXT(1) == '/')) { 11195 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11196 } else if (RAW != 0) { 11197 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11198 } 11199 11200 /* 11201 * SAX: end of the document processing. 11202 */ 11203 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11204 ctxt->sax->endDocument(ctxt->userData); 11205 11206 if (! ctxt->wellFormed) return(-1); 11207 return(0); 11208 } 11209 11210 #ifdef LIBXML_PUSH_ENABLED 11211 /************************************************************************ 11212 * * 11213 * Progressive parsing interfaces * 11214 * * 11215 ************************************************************************/ 11216 11217 /** 11218 * xmlParseLookupSequence: 11219 * @ctxt: an XML parser context 11220 * @first: the first char to lookup 11221 * @next: the next char to lookup or zero 11222 * @third: the next char to lookup or zero 11223 * 11224 * Try to find if a sequence (first, next, third) or just (first next) or 11225 * (first) is available in the input stream. 11226 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 11227 * to avoid rescanning sequences of bytes, it DOES change the state of the 11228 * parser, do not use liberally. 11229 * 11230 * Returns the index to the current parsing point if the full sequence 11231 * is available, -1 otherwise. 11232 */ 11233 static int 11234 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 11235 xmlChar next, xmlChar third) { 11236 int base, len; 11237 xmlParserInputPtr in; 11238 const xmlChar *buf; 11239 11240 in = ctxt->input; 11241 if (in == NULL) return(-1); 11242 base = in->cur - in->base; 11243 if (base < 0) return(-1); 11244 if (ctxt->checkIndex > base) 11245 base = ctxt->checkIndex; 11246 if (in->buf == NULL) { 11247 buf = in->base; 11248 len = in->length; 11249 } else { 11250 buf = xmlBufContent(in->buf->buffer); 11251 len = xmlBufUse(in->buf->buffer); 11252 } 11253 /* take into account the sequence length */ 11254 if (third) len -= 2; 11255 else if (next) len --; 11256 for (;base < len;base++) { 11257 if (buf[base] == first) { 11258 if (third != 0) { 11259 if ((buf[base + 1] != next) || 11260 (buf[base + 2] != third)) continue; 11261 } else if (next != 0) { 11262 if (buf[base + 1] != next) continue; 11263 } 11264 ctxt->checkIndex = 0; 11265 #ifdef DEBUG_PUSH 11266 if (next == 0) 11267 xmlGenericError(xmlGenericErrorContext, 11268 "PP: lookup '%c' found at %d\n", 11269 first, base); 11270 else if (third == 0) 11271 xmlGenericError(xmlGenericErrorContext, 11272 "PP: lookup '%c%c' found at %d\n", 11273 first, next, base); 11274 else 11275 xmlGenericError(xmlGenericErrorContext, 11276 "PP: lookup '%c%c%c' found at %d\n", 11277 first, next, third, base); 11278 #endif 11279 return(base - (in->cur - in->base)); 11280 } 11281 } 11282 ctxt->checkIndex = base; 11283 #ifdef DEBUG_PUSH 11284 if (next == 0) 11285 xmlGenericError(xmlGenericErrorContext, 11286 "PP: lookup '%c' failed\n", first); 11287 else if (third == 0) 11288 xmlGenericError(xmlGenericErrorContext, 11289 "PP: lookup '%c%c' failed\n", first, next); 11290 else 11291 xmlGenericError(xmlGenericErrorContext, 11292 "PP: lookup '%c%c%c' failed\n", first, next, third); 11293 #endif 11294 return(-1); 11295 } 11296 11297 /** 11298 * xmlParseGetLasts: 11299 * @ctxt: an XML parser context 11300 * @lastlt: pointer to store the last '<' from the input 11301 * @lastgt: pointer to store the last '>' from the input 11302 * 11303 * Lookup the last < and > in the current chunk 11304 */ 11305 static void 11306 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11307 const xmlChar **lastgt) { 11308 const xmlChar *tmp; 11309 11310 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11311 xmlGenericError(xmlGenericErrorContext, 11312 "Internal error: xmlParseGetLasts\n"); 11313 return; 11314 } 11315 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11316 tmp = ctxt->input->end; 11317 tmp--; 11318 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11319 if (tmp < ctxt->input->base) { 11320 *lastlt = NULL; 11321 *lastgt = NULL; 11322 } else { 11323 *lastlt = tmp; 11324 tmp++; 11325 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11326 if (*tmp == '\'') { 11327 tmp++; 11328 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11329 if (tmp < ctxt->input->end) tmp++; 11330 } else if (*tmp == '"') { 11331 tmp++; 11332 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11333 if (tmp < ctxt->input->end) tmp++; 11334 } else 11335 tmp++; 11336 } 11337 if (tmp < ctxt->input->end) 11338 *lastgt = tmp; 11339 else { 11340 tmp = *lastlt; 11341 tmp--; 11342 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11343 if (tmp >= ctxt->input->base) 11344 *lastgt = tmp; 11345 else 11346 *lastgt = NULL; 11347 } 11348 } 11349 } else { 11350 *lastlt = NULL; 11351 *lastgt = NULL; 11352 } 11353 } 11354 /** 11355 * xmlCheckCdataPush: 11356 * @cur: pointer to the block of characters 11357 * @len: length of the block in bytes 11358 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11359 * 11360 * Check that the block of characters is okay as SCdata content [20] 11361 * 11362 * Returns the number of bytes to pass if okay, a negative index where an 11363 * UTF-8 error occurred otherwise 11364 */ 11365 static int 11366 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11367 int ix; 11368 unsigned char c; 11369 int codepoint; 11370 11371 if ((utf == NULL) || (len <= 0)) 11372 return(0); 11373 11374 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11375 c = utf[ix]; 11376 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11377 if (c >= 0x20) 11378 ix++; 11379 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11380 ix++; 11381 else 11382 return(-ix); 11383 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11384 if (ix + 2 > len) return(complete ? -ix : ix); 11385 if ((utf[ix+1] & 0xc0 ) != 0x80) 11386 return(-ix); 11387 codepoint = (utf[ix] & 0x1f) << 6; 11388 codepoint |= utf[ix+1] & 0x3f; 11389 if (!xmlIsCharQ(codepoint)) 11390 return(-ix); 11391 ix += 2; 11392 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11393 if (ix + 3 > len) return(complete ? -ix : ix); 11394 if (((utf[ix+1] & 0xc0) != 0x80) || 11395 ((utf[ix+2] & 0xc0) != 0x80)) 11396 return(-ix); 11397 codepoint = (utf[ix] & 0xf) << 12; 11398 codepoint |= (utf[ix+1] & 0x3f) << 6; 11399 codepoint |= utf[ix+2] & 0x3f; 11400 if (!xmlIsCharQ(codepoint)) 11401 return(-ix); 11402 ix += 3; 11403 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11404 if (ix + 4 > len) return(complete ? -ix : ix); 11405 if (((utf[ix+1] & 0xc0) != 0x80) || 11406 ((utf[ix+2] & 0xc0) != 0x80) || 11407 ((utf[ix+3] & 0xc0) != 0x80)) 11408 return(-ix); 11409 codepoint = (utf[ix] & 0x7) << 18; 11410 codepoint |= (utf[ix+1] & 0x3f) << 12; 11411 codepoint |= (utf[ix+2] & 0x3f) << 6; 11412 codepoint |= utf[ix+3] & 0x3f; 11413 if (!xmlIsCharQ(codepoint)) 11414 return(-ix); 11415 ix += 4; 11416 } else /* unknown encoding */ 11417 return(-ix); 11418 } 11419 return(ix); 11420 } 11421 11422 /** 11423 * xmlParseTryOrFinish: 11424 * @ctxt: an XML parser context 11425 * @terminate: last chunk indicator 11426 * 11427 * Try to progress on parsing 11428 * 11429 * Returns zero if no parsing was possible 11430 */ 11431 static int 11432 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11433 int ret = 0; 11434 int avail, tlen; 11435 xmlChar cur, next; 11436 const xmlChar *lastlt, *lastgt; 11437 11438 if (ctxt->input == NULL) 11439 return(0); 11440 11441 #ifdef DEBUG_PUSH 11442 switch (ctxt->instate) { 11443 case XML_PARSER_EOF: 11444 xmlGenericError(xmlGenericErrorContext, 11445 "PP: try EOF\n"); break; 11446 case XML_PARSER_START: 11447 xmlGenericError(xmlGenericErrorContext, 11448 "PP: try START\n"); break; 11449 case XML_PARSER_MISC: 11450 xmlGenericError(xmlGenericErrorContext, 11451 "PP: try MISC\n");break; 11452 case XML_PARSER_COMMENT: 11453 xmlGenericError(xmlGenericErrorContext, 11454 "PP: try COMMENT\n");break; 11455 case XML_PARSER_PROLOG: 11456 xmlGenericError(xmlGenericErrorContext, 11457 "PP: try PROLOG\n");break; 11458 case XML_PARSER_START_TAG: 11459 xmlGenericError(xmlGenericErrorContext, 11460 "PP: try START_TAG\n");break; 11461 case XML_PARSER_CONTENT: 11462 xmlGenericError(xmlGenericErrorContext, 11463 "PP: try CONTENT\n");break; 11464 case XML_PARSER_CDATA_SECTION: 11465 xmlGenericError(xmlGenericErrorContext, 11466 "PP: try CDATA_SECTION\n");break; 11467 case XML_PARSER_END_TAG: 11468 xmlGenericError(xmlGenericErrorContext, 11469 "PP: try END_TAG\n");break; 11470 case XML_PARSER_ENTITY_DECL: 11471 xmlGenericError(xmlGenericErrorContext, 11472 "PP: try ENTITY_DECL\n");break; 11473 case XML_PARSER_ENTITY_VALUE: 11474 xmlGenericError(xmlGenericErrorContext, 11475 "PP: try ENTITY_VALUE\n");break; 11476 case XML_PARSER_ATTRIBUTE_VALUE: 11477 xmlGenericError(xmlGenericErrorContext, 11478 "PP: try ATTRIBUTE_VALUE\n");break; 11479 case XML_PARSER_DTD: 11480 xmlGenericError(xmlGenericErrorContext, 11481 "PP: try DTD\n");break; 11482 case XML_PARSER_EPILOG: 11483 xmlGenericError(xmlGenericErrorContext, 11484 "PP: try EPILOG\n");break; 11485 case XML_PARSER_PI: 11486 xmlGenericError(xmlGenericErrorContext, 11487 "PP: try PI\n");break; 11488 case XML_PARSER_IGNORE: 11489 xmlGenericError(xmlGenericErrorContext, 11490 "PP: try IGNORE\n");break; 11491 } 11492 #endif 11493 11494 if ((ctxt->input != NULL) && 11495 (ctxt->input->cur - ctxt->input->base > 4096)) { 11496 xmlSHRINK(ctxt); 11497 ctxt->checkIndex = 0; 11498 } 11499 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11500 11501 while (ctxt->instate != XML_PARSER_EOF) { 11502 if ((ctxt->wellFormed != 1) && (ctxt->disableSAX == 1)) 11503 return(0); 11504 11505 11506 /* 11507 * Pop-up of finished entities. 11508 */ 11509 while ((RAW == 0) && (ctxt->inputNr > 1)) 11510 xmlPopInput(ctxt); 11511 11512 if (ctxt->input == NULL) break; 11513 if (ctxt->input->buf == NULL) 11514 avail = ctxt->input->length - 11515 (ctxt->input->cur - ctxt->input->base); 11516 else { 11517 /* 11518 * If we are operating on converted input, try to flush 11519 * remainng chars to avoid them stalling in the non-converted 11520 * buffer. But do not do this in document start where 11521 * encoding="..." may not have been read and we work on a 11522 * guessed encoding. 11523 */ 11524 if ((ctxt->instate != XML_PARSER_START) && 11525 (ctxt->input->buf->raw != NULL) && 11526 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11527 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11528 ctxt->input); 11529 size_t current = ctxt->input->cur - ctxt->input->base; 11530 11531 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11532 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11533 base, current); 11534 } 11535 avail = xmlBufUse(ctxt->input->buf->buffer) - 11536 (ctxt->input->cur - ctxt->input->base); 11537 } 11538 if (avail < 1) 11539 goto done; 11540 switch (ctxt->instate) { 11541 case XML_PARSER_EOF: 11542 /* 11543 * Document parsing is done ! 11544 */ 11545 goto done; 11546 case XML_PARSER_START: 11547 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11548 xmlChar start[4]; 11549 xmlCharEncoding enc; 11550 11551 /* 11552 * Very first chars read from the document flow. 11553 */ 11554 if (avail < 4) 11555 goto done; 11556 11557 /* 11558 * Get the 4 first bytes and decode the charset 11559 * if enc != XML_CHAR_ENCODING_NONE 11560 * plug some encoding conversion routines, 11561 * else xmlSwitchEncoding will set to (default) 11562 * UTF8. 11563 */ 11564 start[0] = RAW; 11565 start[1] = NXT(1); 11566 start[2] = NXT(2); 11567 start[3] = NXT(3); 11568 enc = xmlDetectCharEncoding(start, 4); 11569 xmlSwitchEncoding(ctxt, enc); 11570 break; 11571 } 11572 11573 if (avail < 2) 11574 goto done; 11575 cur = ctxt->input->cur[0]; 11576 next = ctxt->input->cur[1]; 11577 if (cur == 0) { 11578 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11579 ctxt->sax->setDocumentLocator(ctxt->userData, 11580 &xmlDefaultSAXLocator); 11581 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11582 xmlHaltParser(ctxt); 11583 #ifdef DEBUG_PUSH 11584 xmlGenericError(xmlGenericErrorContext, 11585 "PP: entering EOF\n"); 11586 #endif 11587 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11588 ctxt->sax->endDocument(ctxt->userData); 11589 goto done; 11590 } 11591 if ((cur == '<') && (next == '?')) { 11592 /* PI or XML decl */ 11593 if (avail < 5) return(ret); 11594 if ((!terminate) && 11595 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11596 return(ret); 11597 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11598 ctxt->sax->setDocumentLocator(ctxt->userData, 11599 &xmlDefaultSAXLocator); 11600 if ((ctxt->input->cur[2] == 'x') && 11601 (ctxt->input->cur[3] == 'm') && 11602 (ctxt->input->cur[4] == 'l') && 11603 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11604 ret += 5; 11605 #ifdef DEBUG_PUSH 11606 xmlGenericError(xmlGenericErrorContext, 11607 "PP: Parsing XML Decl\n"); 11608 #endif 11609 xmlParseXMLDecl(ctxt); 11610 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11611 /* 11612 * The XML REC instructs us to stop parsing right 11613 * here 11614 */ 11615 xmlHaltParser(ctxt); 11616 return(0); 11617 } 11618 ctxt->standalone = ctxt->input->standalone; 11619 if ((ctxt->encoding == NULL) && 11620 (ctxt->input->encoding != NULL)) 11621 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11622 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11623 (!ctxt->disableSAX)) 11624 ctxt->sax->startDocument(ctxt->userData); 11625 ctxt->instate = XML_PARSER_MISC; 11626 #ifdef DEBUG_PUSH 11627 xmlGenericError(xmlGenericErrorContext, 11628 "PP: entering MISC\n"); 11629 #endif 11630 } else { 11631 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11632 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11633 (!ctxt->disableSAX)) 11634 ctxt->sax->startDocument(ctxt->userData); 11635 ctxt->instate = XML_PARSER_MISC; 11636 #ifdef DEBUG_PUSH 11637 xmlGenericError(xmlGenericErrorContext, 11638 "PP: entering MISC\n"); 11639 #endif 11640 } 11641 } else { 11642 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11643 ctxt->sax->setDocumentLocator(ctxt->userData, 11644 &xmlDefaultSAXLocator); 11645 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11646 if (ctxt->version == NULL) { 11647 xmlErrMemory(ctxt, NULL); 11648 break; 11649 } 11650 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11651 (!ctxt->disableSAX)) 11652 ctxt->sax->startDocument(ctxt->userData); 11653 ctxt->instate = XML_PARSER_MISC; 11654 #ifdef DEBUG_PUSH 11655 xmlGenericError(xmlGenericErrorContext, 11656 "PP: entering MISC\n"); 11657 #endif 11658 } 11659 break; 11660 case XML_PARSER_START_TAG: { 11661 const xmlChar *name; 11662 const xmlChar *prefix = NULL; 11663 const xmlChar *URI = NULL; 11664 int nsNr = ctxt->nsNr; 11665 11666 if ((avail < 2) && (ctxt->inputNr == 1)) 11667 goto done; 11668 cur = ctxt->input->cur[0]; 11669 if (cur != '<') { 11670 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11671 xmlHaltParser(ctxt); 11672 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11673 ctxt->sax->endDocument(ctxt->userData); 11674 goto done; 11675 } 11676 if (!terminate) { 11677 if (ctxt->progressive) { 11678 /* > can be found unescaped in attribute values */ 11679 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11680 goto done; 11681 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11682 goto done; 11683 } 11684 } 11685 if (ctxt->spaceNr == 0) 11686 spacePush(ctxt, -1); 11687 else if (*ctxt->space == -2) 11688 spacePush(ctxt, -1); 11689 else 11690 spacePush(ctxt, *ctxt->space); 11691 #ifdef LIBXML_SAX1_ENABLED 11692 if (ctxt->sax2) 11693 #endif /* LIBXML_SAX1_ENABLED */ 11694 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11695 #ifdef LIBXML_SAX1_ENABLED 11696 else 11697 name = xmlParseStartTag(ctxt); 11698 #endif /* LIBXML_SAX1_ENABLED */ 11699 if (ctxt->instate == XML_PARSER_EOF) 11700 goto done; 11701 if (name == NULL) { 11702 spacePop(ctxt); 11703 xmlHaltParser(ctxt); 11704 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11705 ctxt->sax->endDocument(ctxt->userData); 11706 goto done; 11707 } 11708 #ifdef LIBXML_VALID_ENABLED 11709 /* 11710 * [ VC: Root Element Type ] 11711 * The Name in the document type declaration must match 11712 * the element type of the root element. 11713 */ 11714 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11715 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11716 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11717 #endif /* LIBXML_VALID_ENABLED */ 11718 11719 /* 11720 * Check for an Empty Element. 11721 */ 11722 if ((RAW == '/') && (NXT(1) == '>')) { 11723 SKIP(2); 11724 11725 if (ctxt->sax2) { 11726 if ((ctxt->sax != NULL) && 11727 (ctxt->sax->endElementNs != NULL) && 11728 (!ctxt->disableSAX)) 11729 ctxt->sax->endElementNs(ctxt->userData, name, 11730 prefix, URI); 11731 if (ctxt->nsNr - nsNr > 0) 11732 nsPop(ctxt, ctxt->nsNr - nsNr); 11733 #ifdef LIBXML_SAX1_ENABLED 11734 } else { 11735 if ((ctxt->sax != NULL) && 11736 (ctxt->sax->endElement != NULL) && 11737 (!ctxt->disableSAX)) 11738 ctxt->sax->endElement(ctxt->userData, name); 11739 #endif /* LIBXML_SAX1_ENABLED */ 11740 } 11741 if (ctxt->instate == XML_PARSER_EOF) 11742 goto done; 11743 spacePop(ctxt); 11744 if (ctxt->nameNr == 0) { 11745 ctxt->instate = XML_PARSER_EPILOG; 11746 } else { 11747 ctxt->instate = XML_PARSER_CONTENT; 11748 } 11749 ctxt->progressive = 1; 11750 break; 11751 } 11752 if (RAW == '>') { 11753 NEXT; 11754 } else { 11755 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11756 "Couldn't find end of Start Tag %s\n", 11757 name); 11758 nodePop(ctxt); 11759 spacePop(ctxt); 11760 } 11761 if (ctxt->sax2) 11762 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11763 #ifdef LIBXML_SAX1_ENABLED 11764 else 11765 namePush(ctxt, name); 11766 #endif /* LIBXML_SAX1_ENABLED */ 11767 11768 ctxt->instate = XML_PARSER_CONTENT; 11769 ctxt->progressive = 1; 11770 break; 11771 } 11772 case XML_PARSER_CONTENT: { 11773 const xmlChar *test; 11774 unsigned int cons; 11775 if ((avail < 2) && (ctxt->inputNr == 1)) 11776 goto done; 11777 cur = ctxt->input->cur[0]; 11778 next = ctxt->input->cur[1]; 11779 11780 test = CUR_PTR; 11781 cons = ctxt->input->consumed; 11782 if ((cur == '<') && (next == '/')) { 11783 ctxt->instate = XML_PARSER_END_TAG; 11784 break; 11785 } else if ((cur == '<') && (next == '?')) { 11786 if ((!terminate) && 11787 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11788 ctxt->progressive = XML_PARSER_PI; 11789 goto done; 11790 } 11791 xmlParsePI(ctxt); 11792 ctxt->instate = XML_PARSER_CONTENT; 11793 ctxt->progressive = 1; 11794 } else if ((cur == '<') && (next != '!')) { 11795 ctxt->instate = XML_PARSER_START_TAG; 11796 break; 11797 } else if ((cur == '<') && (next == '!') && 11798 (ctxt->input->cur[2] == '-') && 11799 (ctxt->input->cur[3] == '-')) { 11800 int term; 11801 11802 if (avail < 4) 11803 goto done; 11804 ctxt->input->cur += 4; 11805 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11806 ctxt->input->cur -= 4; 11807 if ((!terminate) && (term < 0)) { 11808 ctxt->progressive = XML_PARSER_COMMENT; 11809 goto done; 11810 } 11811 xmlParseComment(ctxt); 11812 ctxt->instate = XML_PARSER_CONTENT; 11813 ctxt->progressive = 1; 11814 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11815 (ctxt->input->cur[2] == '[') && 11816 (ctxt->input->cur[3] == 'C') && 11817 (ctxt->input->cur[4] == 'D') && 11818 (ctxt->input->cur[5] == 'A') && 11819 (ctxt->input->cur[6] == 'T') && 11820 (ctxt->input->cur[7] == 'A') && 11821 (ctxt->input->cur[8] == '[')) { 11822 SKIP(9); 11823 ctxt->instate = XML_PARSER_CDATA_SECTION; 11824 break; 11825 } else if ((cur == '<') && (next == '!') && 11826 (avail < 9)) { 11827 goto done; 11828 } else if (cur == '&') { 11829 if ((!terminate) && 11830 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11831 goto done; 11832 xmlParseReference(ctxt); 11833 } else { 11834 /* TODO Avoid the extra copy, handle directly !!! */ 11835 /* 11836 * Goal of the following test is: 11837 * - minimize calls to the SAX 'character' callback 11838 * when they are mergeable 11839 * - handle an problem for isBlank when we only parse 11840 * a sequence of blank chars and the next one is 11841 * not available to check against '<' presence. 11842 * - tries to homogenize the differences in SAX 11843 * callbacks between the push and pull versions 11844 * of the parser. 11845 */ 11846 if ((ctxt->inputNr == 1) && 11847 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11848 if (!terminate) { 11849 if (ctxt->progressive) { 11850 if ((lastlt == NULL) || 11851 (ctxt->input->cur > lastlt)) 11852 goto done; 11853 } else if (xmlParseLookupSequence(ctxt, 11854 '<', 0, 0) < 0) { 11855 goto done; 11856 } 11857 } 11858 } 11859 ctxt->checkIndex = 0; 11860 xmlParseCharData(ctxt, 0); 11861 } 11862 /* 11863 * Pop-up of finished entities. 11864 */ 11865 while ((RAW == 0) && (ctxt->inputNr > 1)) 11866 xmlPopInput(ctxt); 11867 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11868 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11869 "detected an error in element content\n"); 11870 xmlHaltParser(ctxt); 11871 break; 11872 } 11873 break; 11874 } 11875 case XML_PARSER_END_TAG: 11876 if (avail < 2) 11877 goto done; 11878 if (!terminate) { 11879 if (ctxt->progressive) { 11880 /* > can be found unescaped in attribute values */ 11881 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11882 goto done; 11883 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11884 goto done; 11885 } 11886 } 11887 if (ctxt->sax2) { 11888 xmlParseEndTag2(ctxt, 11889 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11890 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11891 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11892 nameNsPop(ctxt); 11893 } 11894 #ifdef LIBXML_SAX1_ENABLED 11895 else 11896 xmlParseEndTag1(ctxt, 0); 11897 #endif /* LIBXML_SAX1_ENABLED */ 11898 if (ctxt->instate == XML_PARSER_EOF) { 11899 /* Nothing */ 11900 } else if (ctxt->nameNr == 0) { 11901 ctxt->instate = XML_PARSER_EPILOG; 11902 } else { 11903 ctxt->instate = XML_PARSER_CONTENT; 11904 } 11905 break; 11906 case XML_PARSER_CDATA_SECTION: { 11907 /* 11908 * The Push mode need to have the SAX callback for 11909 * cdataBlock merge back contiguous callbacks. 11910 */ 11911 int base; 11912 11913 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11914 if (base < 0) { 11915 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11916 int tmp; 11917 11918 tmp = xmlCheckCdataPush(ctxt->input->cur, 11919 XML_PARSER_BIG_BUFFER_SIZE, 0); 11920 if (tmp < 0) { 11921 tmp = -tmp; 11922 ctxt->input->cur += tmp; 11923 goto encoding_error; 11924 } 11925 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11926 if (ctxt->sax->cdataBlock != NULL) 11927 ctxt->sax->cdataBlock(ctxt->userData, 11928 ctxt->input->cur, tmp); 11929 else if (ctxt->sax->characters != NULL) 11930 ctxt->sax->characters(ctxt->userData, 11931 ctxt->input->cur, tmp); 11932 } 11933 if (ctxt->instate == XML_PARSER_EOF) 11934 goto done; 11935 SKIPL(tmp); 11936 ctxt->checkIndex = 0; 11937 } 11938 goto done; 11939 } else { 11940 int tmp; 11941 11942 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11943 if ((tmp < 0) || (tmp != base)) { 11944 tmp = -tmp; 11945 ctxt->input->cur += tmp; 11946 goto encoding_error; 11947 } 11948 if ((ctxt->sax != NULL) && (base == 0) && 11949 (ctxt->sax->cdataBlock != NULL) && 11950 (!ctxt->disableSAX)) { 11951 /* 11952 * Special case to provide identical behaviour 11953 * between pull and push parsers on enpty CDATA 11954 * sections 11955 */ 11956 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11957 (!strncmp((const char *)&ctxt->input->cur[-9], 11958 "<![CDATA[", 9))) 11959 ctxt->sax->cdataBlock(ctxt->userData, 11960 BAD_CAST "", 0); 11961 } else if ((ctxt->sax != NULL) && (base > 0) && 11962 (!ctxt->disableSAX)) { 11963 if (ctxt->sax->cdataBlock != NULL) 11964 ctxt->sax->cdataBlock(ctxt->userData, 11965 ctxt->input->cur, base); 11966 else if (ctxt->sax->characters != NULL) 11967 ctxt->sax->characters(ctxt->userData, 11968 ctxt->input->cur, base); 11969 } 11970 if (ctxt->instate == XML_PARSER_EOF) 11971 goto done; 11972 SKIPL(base + 3); 11973 ctxt->checkIndex = 0; 11974 ctxt->instate = XML_PARSER_CONTENT; 11975 #ifdef DEBUG_PUSH 11976 xmlGenericError(xmlGenericErrorContext, 11977 "PP: entering CONTENT\n"); 11978 #endif 11979 } 11980 break; 11981 } 11982 case XML_PARSER_MISC: 11983 SKIP_BLANKS; 11984 if (ctxt->input->buf == NULL) 11985 avail = ctxt->input->length - 11986 (ctxt->input->cur - ctxt->input->base); 11987 else 11988 avail = xmlBufUse(ctxt->input->buf->buffer) - 11989 (ctxt->input->cur - ctxt->input->base); 11990 if (avail < 2) 11991 goto done; 11992 cur = ctxt->input->cur[0]; 11993 next = ctxt->input->cur[1]; 11994 if ((cur == '<') && (next == '?')) { 11995 if ((!terminate) && 11996 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11997 ctxt->progressive = XML_PARSER_PI; 11998 goto done; 11999 } 12000 #ifdef DEBUG_PUSH 12001 xmlGenericError(xmlGenericErrorContext, 12002 "PP: Parsing PI\n"); 12003 #endif 12004 xmlParsePI(ctxt); 12005 if (ctxt->instate == XML_PARSER_EOF) 12006 goto done; 12007 ctxt->instate = XML_PARSER_MISC; 12008 ctxt->progressive = 1; 12009 ctxt->checkIndex = 0; 12010 } else if ((cur == '<') && (next == '!') && 12011 (ctxt->input->cur[2] == '-') && 12012 (ctxt->input->cur[3] == '-')) { 12013 if ((!terminate) && 12014 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12015 ctxt->progressive = XML_PARSER_COMMENT; 12016 goto done; 12017 } 12018 #ifdef DEBUG_PUSH 12019 xmlGenericError(xmlGenericErrorContext, 12020 "PP: Parsing Comment\n"); 12021 #endif 12022 xmlParseComment(ctxt); 12023 if (ctxt->instate == XML_PARSER_EOF) 12024 goto done; 12025 ctxt->instate = XML_PARSER_MISC; 12026 ctxt->progressive = 1; 12027 ctxt->checkIndex = 0; 12028 } else if ((cur == '<') && (next == '!') && 12029 (ctxt->input->cur[2] == 'D') && 12030 (ctxt->input->cur[3] == 'O') && 12031 (ctxt->input->cur[4] == 'C') && 12032 (ctxt->input->cur[5] == 'T') && 12033 (ctxt->input->cur[6] == 'Y') && 12034 (ctxt->input->cur[7] == 'P') && 12035 (ctxt->input->cur[8] == 'E')) { 12036 if ((!terminate) && 12037 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 12038 ctxt->progressive = XML_PARSER_DTD; 12039 goto done; 12040 } 12041 #ifdef DEBUG_PUSH 12042 xmlGenericError(xmlGenericErrorContext, 12043 "PP: Parsing internal subset\n"); 12044 #endif 12045 ctxt->inSubset = 1; 12046 ctxt->progressive = 0; 12047 ctxt->checkIndex = 0; 12048 xmlParseDocTypeDecl(ctxt); 12049 if (ctxt->instate == XML_PARSER_EOF) 12050 goto done; 12051 if (RAW == '[') { 12052 ctxt->instate = XML_PARSER_DTD; 12053 #ifdef DEBUG_PUSH 12054 xmlGenericError(xmlGenericErrorContext, 12055 "PP: entering DTD\n"); 12056 #endif 12057 } else { 12058 /* 12059 * Create and update the external subset. 12060 */ 12061 ctxt->inSubset = 2; 12062 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12063 (ctxt->sax->externalSubset != NULL)) 12064 ctxt->sax->externalSubset(ctxt->userData, 12065 ctxt->intSubName, ctxt->extSubSystem, 12066 ctxt->extSubURI); 12067 ctxt->inSubset = 0; 12068 xmlCleanSpecialAttr(ctxt); 12069 ctxt->instate = XML_PARSER_PROLOG; 12070 #ifdef DEBUG_PUSH 12071 xmlGenericError(xmlGenericErrorContext, 12072 "PP: entering PROLOG\n"); 12073 #endif 12074 } 12075 } else if ((cur == '<') && (next == '!') && 12076 (avail < 9)) { 12077 goto done; 12078 } else { 12079 ctxt->instate = XML_PARSER_START_TAG; 12080 ctxt->progressive = XML_PARSER_START_TAG; 12081 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 12082 #ifdef DEBUG_PUSH 12083 xmlGenericError(xmlGenericErrorContext, 12084 "PP: entering START_TAG\n"); 12085 #endif 12086 } 12087 break; 12088 case XML_PARSER_PROLOG: 12089 SKIP_BLANKS; 12090 if (ctxt->input->buf == NULL) 12091 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 12092 else 12093 avail = xmlBufUse(ctxt->input->buf->buffer) - 12094 (ctxt->input->cur - ctxt->input->base); 12095 if (avail < 2) 12096 goto done; 12097 cur = ctxt->input->cur[0]; 12098 next = ctxt->input->cur[1]; 12099 if ((cur == '<') && (next == '?')) { 12100 if ((!terminate) && 12101 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 12102 ctxt->progressive = XML_PARSER_PI; 12103 goto done; 12104 } 12105 #ifdef DEBUG_PUSH 12106 xmlGenericError(xmlGenericErrorContext, 12107 "PP: Parsing PI\n"); 12108 #endif 12109 xmlParsePI(ctxt); 12110 if (ctxt->instate == XML_PARSER_EOF) 12111 goto done; 12112 ctxt->instate = XML_PARSER_PROLOG; 12113 ctxt->progressive = 1; 12114 } else if ((cur == '<') && (next == '!') && 12115 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 12116 if ((!terminate) && 12117 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12118 ctxt->progressive = XML_PARSER_COMMENT; 12119 goto done; 12120 } 12121 #ifdef DEBUG_PUSH 12122 xmlGenericError(xmlGenericErrorContext, 12123 "PP: Parsing Comment\n"); 12124 #endif 12125 xmlParseComment(ctxt); 12126 if (ctxt->instate == XML_PARSER_EOF) 12127 goto done; 12128 ctxt->instate = XML_PARSER_PROLOG; 12129 ctxt->progressive = 1; 12130 } else if ((cur == '<') && (next == '!') && 12131 (avail < 4)) { 12132 goto done; 12133 } else { 12134 ctxt->instate = XML_PARSER_START_TAG; 12135 if (ctxt->progressive == 0) 12136 ctxt->progressive = XML_PARSER_START_TAG; 12137 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 12138 #ifdef DEBUG_PUSH 12139 xmlGenericError(xmlGenericErrorContext, 12140 "PP: entering START_TAG\n"); 12141 #endif 12142 } 12143 break; 12144 case XML_PARSER_EPILOG: 12145 SKIP_BLANKS; 12146 if (ctxt->input->buf == NULL) 12147 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 12148 else 12149 avail = xmlBufUse(ctxt->input->buf->buffer) - 12150 (ctxt->input->cur - ctxt->input->base); 12151 if (avail < 2) 12152 goto done; 12153 cur = ctxt->input->cur[0]; 12154 next = ctxt->input->cur[1]; 12155 if ((cur == '<') && (next == '?')) { 12156 if ((!terminate) && 12157 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 12158 ctxt->progressive = XML_PARSER_PI; 12159 goto done; 12160 } 12161 #ifdef DEBUG_PUSH 12162 xmlGenericError(xmlGenericErrorContext, 12163 "PP: Parsing PI\n"); 12164 #endif 12165 xmlParsePI(ctxt); 12166 if (ctxt->instate == XML_PARSER_EOF) 12167 goto done; 12168 ctxt->instate = XML_PARSER_EPILOG; 12169 ctxt->progressive = 1; 12170 } else if ((cur == '<') && (next == '!') && 12171 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 12172 if ((!terminate) && 12173 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12174 ctxt->progressive = XML_PARSER_COMMENT; 12175 goto done; 12176 } 12177 #ifdef DEBUG_PUSH 12178 xmlGenericError(xmlGenericErrorContext, 12179 "PP: Parsing Comment\n"); 12180 #endif 12181 xmlParseComment(ctxt); 12182 if (ctxt->instate == XML_PARSER_EOF) 12183 goto done; 12184 ctxt->instate = XML_PARSER_EPILOG; 12185 ctxt->progressive = 1; 12186 } else if ((cur == '<') && (next == '!') && 12187 (avail < 4)) { 12188 goto done; 12189 } else { 12190 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12191 xmlHaltParser(ctxt); 12192 #ifdef DEBUG_PUSH 12193 xmlGenericError(xmlGenericErrorContext, 12194 "PP: entering EOF\n"); 12195 #endif 12196 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12197 ctxt->sax->endDocument(ctxt->userData); 12198 goto done; 12199 } 12200 break; 12201 case XML_PARSER_DTD: { 12202 /* 12203 * Sorry but progressive parsing of the internal subset 12204 * is not expected to be supported. We first check that 12205 * the full content of the internal subset is available and 12206 * the parsing is launched only at that point. 12207 * Internal subset ends up with "']' S? '>'" in an unescaped 12208 * section and not in a ']]>' sequence which are conditional 12209 * sections (whoever argued to keep that crap in XML deserve 12210 * a place in hell !). 12211 */ 12212 int base, i; 12213 xmlChar *buf; 12214 xmlChar quote = 0; 12215 size_t use; 12216 12217 base = ctxt->input->cur - ctxt->input->base; 12218 if (base < 0) return(0); 12219 if (ctxt->checkIndex > base) 12220 base = ctxt->checkIndex; 12221 buf = xmlBufContent(ctxt->input->buf->buffer); 12222 use = xmlBufUse(ctxt->input->buf->buffer); 12223 for (;(unsigned int) base < use; base++) { 12224 if (quote != 0) { 12225 if (buf[base] == quote) 12226 quote = 0; 12227 continue; 12228 } 12229 if ((quote == 0) && (buf[base] == '<')) { 12230 int found = 0; 12231 /* special handling of comments */ 12232 if (((unsigned int) base + 4 < use) && 12233 (buf[base + 1] == '!') && 12234 (buf[base + 2] == '-') && 12235 (buf[base + 3] == '-')) { 12236 for (;(unsigned int) base + 3 < use; base++) { 12237 if ((buf[base] == '-') && 12238 (buf[base + 1] == '-') && 12239 (buf[base + 2] == '>')) { 12240 found = 1; 12241 base += 2; 12242 break; 12243 } 12244 } 12245 if (!found) { 12246 #if 0 12247 fprintf(stderr, "unfinished comment\n"); 12248 #endif 12249 break; /* for */ 12250 } 12251 continue; 12252 } 12253 } 12254 if (buf[base] == '"') { 12255 quote = '"'; 12256 continue; 12257 } 12258 if (buf[base] == '\'') { 12259 quote = '\''; 12260 continue; 12261 } 12262 if (buf[base] == ']') { 12263 #if 0 12264 fprintf(stderr, "%c%c%c%c: ", buf[base], 12265 buf[base + 1], buf[base + 2], buf[base + 3]); 12266 #endif 12267 if ((unsigned int) base +1 >= use) 12268 break; 12269 if (buf[base + 1] == ']') { 12270 /* conditional crap, skip both ']' ! */ 12271 base++; 12272 continue; 12273 } 12274 for (i = 1; (unsigned int) base + i < use; i++) { 12275 if (buf[base + i] == '>') { 12276 #if 0 12277 fprintf(stderr, "found\n"); 12278 #endif 12279 goto found_end_int_subset; 12280 } 12281 if (!IS_BLANK_CH(buf[base + i])) { 12282 #if 0 12283 fprintf(stderr, "not found\n"); 12284 #endif 12285 goto not_end_of_int_subset; 12286 } 12287 } 12288 #if 0 12289 fprintf(stderr, "end of stream\n"); 12290 #endif 12291 break; 12292 12293 } 12294 not_end_of_int_subset: 12295 continue; /* for */ 12296 } 12297 /* 12298 * We didn't found the end of the Internal subset 12299 */ 12300 if (quote == 0) 12301 ctxt->checkIndex = base; 12302 else 12303 ctxt->checkIndex = 0; 12304 #ifdef DEBUG_PUSH 12305 if (next == 0) 12306 xmlGenericError(xmlGenericErrorContext, 12307 "PP: lookup of int subset end filed\n"); 12308 #endif 12309 goto done; 12310 12311 found_end_int_subset: 12312 ctxt->checkIndex = 0; 12313 xmlParseInternalSubset(ctxt); 12314 if (ctxt->instate == XML_PARSER_EOF) 12315 goto done; 12316 ctxt->inSubset = 2; 12317 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12318 (ctxt->sax->externalSubset != NULL)) 12319 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12320 ctxt->extSubSystem, ctxt->extSubURI); 12321 ctxt->inSubset = 0; 12322 xmlCleanSpecialAttr(ctxt); 12323 if (ctxt->instate == XML_PARSER_EOF) 12324 goto done; 12325 ctxt->instate = XML_PARSER_PROLOG; 12326 ctxt->checkIndex = 0; 12327 #ifdef DEBUG_PUSH 12328 xmlGenericError(xmlGenericErrorContext, 12329 "PP: entering PROLOG\n"); 12330 #endif 12331 break; 12332 } 12333 case XML_PARSER_COMMENT: 12334 xmlGenericError(xmlGenericErrorContext, 12335 "PP: internal error, state == COMMENT\n"); 12336 ctxt->instate = XML_PARSER_CONTENT; 12337 #ifdef DEBUG_PUSH 12338 xmlGenericError(xmlGenericErrorContext, 12339 "PP: entering CONTENT\n"); 12340 #endif 12341 break; 12342 case XML_PARSER_IGNORE: 12343 xmlGenericError(xmlGenericErrorContext, 12344 "PP: internal error, state == IGNORE"); 12345 ctxt->instate = XML_PARSER_DTD; 12346 #ifdef DEBUG_PUSH 12347 xmlGenericError(xmlGenericErrorContext, 12348 "PP: entering DTD\n"); 12349 #endif 12350 break; 12351 case XML_PARSER_PI: 12352 xmlGenericError(xmlGenericErrorContext, 12353 "PP: internal error, state == PI\n"); 12354 ctxt->instate = XML_PARSER_CONTENT; 12355 #ifdef DEBUG_PUSH 12356 xmlGenericError(xmlGenericErrorContext, 12357 "PP: entering CONTENT\n"); 12358 #endif 12359 break; 12360 case XML_PARSER_ENTITY_DECL: 12361 xmlGenericError(xmlGenericErrorContext, 12362 "PP: internal error, state == ENTITY_DECL\n"); 12363 ctxt->instate = XML_PARSER_DTD; 12364 #ifdef DEBUG_PUSH 12365 xmlGenericError(xmlGenericErrorContext, 12366 "PP: entering DTD\n"); 12367 #endif 12368 break; 12369 case XML_PARSER_ENTITY_VALUE: 12370 xmlGenericError(xmlGenericErrorContext, 12371 "PP: internal error, state == ENTITY_VALUE\n"); 12372 ctxt->instate = XML_PARSER_CONTENT; 12373 #ifdef DEBUG_PUSH 12374 xmlGenericError(xmlGenericErrorContext, 12375 "PP: entering DTD\n"); 12376 #endif 12377 break; 12378 case XML_PARSER_ATTRIBUTE_VALUE: 12379 xmlGenericError(xmlGenericErrorContext, 12380 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12381 ctxt->instate = XML_PARSER_START_TAG; 12382 #ifdef DEBUG_PUSH 12383 xmlGenericError(xmlGenericErrorContext, 12384 "PP: entering START_TAG\n"); 12385 #endif 12386 break; 12387 case XML_PARSER_SYSTEM_LITERAL: 12388 xmlGenericError(xmlGenericErrorContext, 12389 "PP: internal error, state == SYSTEM_LITERAL\n"); 12390 ctxt->instate = XML_PARSER_START_TAG; 12391 #ifdef DEBUG_PUSH 12392 xmlGenericError(xmlGenericErrorContext, 12393 "PP: entering START_TAG\n"); 12394 #endif 12395 break; 12396 case XML_PARSER_PUBLIC_LITERAL: 12397 xmlGenericError(xmlGenericErrorContext, 12398 "PP: internal error, state == PUBLIC_LITERAL\n"); 12399 ctxt->instate = XML_PARSER_START_TAG; 12400 #ifdef DEBUG_PUSH 12401 xmlGenericError(xmlGenericErrorContext, 12402 "PP: entering START_TAG\n"); 12403 #endif 12404 break; 12405 } 12406 } 12407 done: 12408 #ifdef DEBUG_PUSH 12409 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12410 #endif 12411 return(ret); 12412 encoding_error: 12413 { 12414 char buffer[150]; 12415 12416 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12417 ctxt->input->cur[0], ctxt->input->cur[1], 12418 ctxt->input->cur[2], ctxt->input->cur[3]); 12419 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12420 "Input is not proper UTF-8, indicate encoding !\n%s", 12421 BAD_CAST buffer, NULL); 12422 } 12423 return(0); 12424 } 12425 12426 /** 12427 * xmlParseCheckTransition: 12428 * @ctxt: an XML parser context 12429 * @chunk: a char array 12430 * @size: the size in byte of the chunk 12431 * 12432 * Check depending on the current parser state if the chunk given must be 12433 * processed immediately or one need more data to advance on parsing. 12434 * 12435 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12436 */ 12437 static int 12438 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12439 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12440 return(-1); 12441 if (ctxt->instate == XML_PARSER_START_TAG) { 12442 if (memchr(chunk, '>', size) != NULL) 12443 return(1); 12444 return(0); 12445 } 12446 if (ctxt->progressive == XML_PARSER_COMMENT) { 12447 if (memchr(chunk, '>', size) != NULL) 12448 return(1); 12449 return(0); 12450 } 12451 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12452 if (memchr(chunk, '>', size) != NULL) 12453 return(1); 12454 return(0); 12455 } 12456 if (ctxt->progressive == XML_PARSER_PI) { 12457 if (memchr(chunk, '>', size) != NULL) 12458 return(1); 12459 return(0); 12460 } 12461 if (ctxt->instate == XML_PARSER_END_TAG) { 12462 if (memchr(chunk, '>', size) != NULL) 12463 return(1); 12464 return(0); 12465 } 12466 if ((ctxt->progressive == XML_PARSER_DTD) || 12467 (ctxt->instate == XML_PARSER_DTD)) { 12468 if (memchr(chunk, '>', size) != NULL) 12469 return(1); 12470 return(0); 12471 } 12472 return(1); 12473 } 12474 12475 /** 12476 * xmlParseChunk: 12477 * @ctxt: an XML parser context 12478 * @chunk: an char array 12479 * @size: the size in byte of the chunk 12480 * @terminate: last chunk indicator 12481 * 12482 * Parse a Chunk of memory 12483 * 12484 * Returns zero if no error, the xmlParserErrors otherwise. 12485 */ 12486 int 12487 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12488 int terminate) { 12489 int end_in_lf = 0; 12490 int remain = 0; 12491 size_t old_avail = 0; 12492 size_t avail = 0; 12493 12494 if (ctxt == NULL) 12495 return(XML_ERR_INTERNAL_ERROR); 12496 if ((ctxt->wellFormed != 1) && (ctxt->disableSAX == 1)) 12497 return(ctxt->errNo); 12498 if (ctxt->instate == XML_PARSER_EOF) 12499 return(-1); 12500 if (ctxt->instate == XML_PARSER_START) 12501 xmlDetectSAX2(ctxt); 12502 if ((size > 0) && (chunk != NULL) && (!terminate) && 12503 (chunk[size - 1] == '\r')) { 12504 end_in_lf = 1; 12505 size--; 12506 } 12507 12508 xmldecl_done: 12509 12510 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12511 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12512 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12513 size_t cur = ctxt->input->cur - ctxt->input->base; 12514 int res; 12515 12516 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12517 /* 12518 * Specific handling if we autodetected an encoding, we should not 12519 * push more than the first line ... which depend on the encoding 12520 * And only push the rest once the final encoding was detected 12521 */ 12522 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12523 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12524 unsigned int len = 45; 12525 12526 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12527 BAD_CAST "UTF-16")) || 12528 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12529 BAD_CAST "UTF16"))) 12530 len = 90; 12531 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12532 BAD_CAST "UCS-4")) || 12533 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12534 BAD_CAST "UCS4"))) 12535 len = 180; 12536 12537 if (ctxt->input->buf->rawconsumed < len) 12538 len -= ctxt->input->buf->rawconsumed; 12539 12540 /* 12541 * Change size for reading the initial declaration only 12542 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12543 * will blindly copy extra bytes from memory. 12544 */ 12545 if ((unsigned int) size > len) { 12546 remain = size - len; 12547 size = len; 12548 } else { 12549 remain = 0; 12550 } 12551 } 12552 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12553 if (res < 0) { 12554 ctxt->errNo = XML_PARSER_EOF; 12555 xmlHaltParser(ctxt); 12556 return (XML_PARSER_EOF); 12557 } 12558 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12559 #ifdef DEBUG_PUSH 12560 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12561 #endif 12562 12563 } else if (ctxt->instate != XML_PARSER_EOF) { 12564 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12565 xmlParserInputBufferPtr in = ctxt->input->buf; 12566 if ((in->encoder != NULL) && (in->buffer != NULL) && 12567 (in->raw != NULL)) { 12568 int nbchars; 12569 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12570 size_t current = ctxt->input->cur - ctxt->input->base; 12571 12572 nbchars = xmlCharEncInput(in, terminate); 12573 if (nbchars < 0) { 12574 /* TODO 2.6.0 */ 12575 xmlGenericError(xmlGenericErrorContext, 12576 "xmlParseChunk: encoder error\n"); 12577 xmlHaltParser(ctxt); 12578 return(XML_ERR_INVALID_ENCODING); 12579 } 12580 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12581 } 12582 } 12583 } 12584 if (remain != 0) { 12585 xmlParseTryOrFinish(ctxt, 0); 12586 } else { 12587 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12588 avail = xmlBufUse(ctxt->input->buf->buffer); 12589 /* 12590 * Depending on the current state it may not be such 12591 * a good idea to try parsing if there is nothing in the chunk 12592 * which would be worth doing a parser state transition and we 12593 * need to wait for more data 12594 */ 12595 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12596 (old_avail == 0) || (avail == 0) || 12597 (xmlParseCheckTransition(ctxt, 12598 (const char *)&ctxt->input->base[old_avail], 12599 avail - old_avail))) 12600 xmlParseTryOrFinish(ctxt, terminate); 12601 } 12602 if (ctxt->instate == XML_PARSER_EOF) 12603 return(ctxt->errNo); 12604 12605 if ((ctxt->input != NULL) && 12606 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12607 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12608 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12609 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12610 xmlHaltParser(ctxt); 12611 } 12612 if ((ctxt->wellFormed != 1) && (ctxt->disableSAX == 1)) 12613 return(ctxt->errNo); 12614 12615 if (remain != 0) { 12616 chunk += size; 12617 size = remain; 12618 remain = 0; 12619 goto xmldecl_done; 12620 } 12621 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12622 (ctxt->input->buf != NULL)) { 12623 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12624 ctxt->input); 12625 size_t current = ctxt->input->cur - ctxt->input->base; 12626 12627 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12628 12629 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12630 base, current); 12631 } 12632 if (terminate) { 12633 /* 12634 * Check for termination 12635 */ 12636 int cur_avail = 0; 12637 12638 if (ctxt->input != NULL) { 12639 if (ctxt->input->buf == NULL) 12640 cur_avail = ctxt->input->length - 12641 (ctxt->input->cur - ctxt->input->base); 12642 else 12643 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12644 (ctxt->input->cur - ctxt->input->base); 12645 } 12646 12647 if ((ctxt->instate != XML_PARSER_EOF) && 12648 (ctxt->instate != XML_PARSER_EPILOG)) { 12649 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12650 } 12651 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12652 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12653 } 12654 if (ctxt->instate != XML_PARSER_EOF) { 12655 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12656 ctxt->sax->endDocument(ctxt->userData); 12657 } 12658 ctxt->instate = XML_PARSER_EOF; 12659 } 12660 if (ctxt->wellFormed == 0) 12661 return((xmlParserErrors) ctxt->errNo); 12662 else 12663 return(0); 12664 } 12665 12666 /************************************************************************ 12667 * * 12668 * I/O front end functions to the parser * 12669 * * 12670 ************************************************************************/ 12671 12672 /** 12673 * xmlCreatePushParserCtxt: 12674 * @sax: a SAX handler 12675 * @user_data: The user data returned on SAX callbacks 12676 * @chunk: a pointer to an array of chars 12677 * @size: number of chars in the array 12678 * @filename: an optional file name or URI 12679 * 12680 * Create a parser context for using the XML parser in push mode. 12681 * If @buffer and @size are non-NULL, the data is used to detect 12682 * the encoding. The remaining characters will be parsed so they 12683 * don't need to be fed in again through xmlParseChunk. 12684 * To allow content encoding detection, @size should be >= 4 12685 * The value of @filename is used for fetching external entities 12686 * and error/warning reports. 12687 * 12688 * Returns the new parser context or NULL 12689 */ 12690 12691 xmlParserCtxtPtr 12692 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12693 const char *chunk, int size, const char *filename) { 12694 xmlParserCtxtPtr ctxt; 12695 xmlParserInputPtr inputStream; 12696 xmlParserInputBufferPtr buf; 12697 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12698 12699 /* 12700 * plug some encoding conversion routines 12701 */ 12702 if ((chunk != NULL) && (size >= 4)) 12703 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12704 12705 buf = xmlAllocParserInputBuffer(enc); 12706 if (buf == NULL) return(NULL); 12707 12708 ctxt = xmlNewParserCtxt(); 12709 if (ctxt == NULL) { 12710 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12711 xmlFreeParserInputBuffer(buf); 12712 return(NULL); 12713 } 12714 ctxt->dictNames = 1; 12715 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12716 if (ctxt->pushTab == NULL) { 12717 xmlErrMemory(ctxt, NULL); 12718 xmlFreeParserInputBuffer(buf); 12719 xmlFreeParserCtxt(ctxt); 12720 return(NULL); 12721 } 12722 if (sax != NULL) { 12723 #ifdef LIBXML_SAX1_ENABLED 12724 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12725 #endif /* LIBXML_SAX1_ENABLED */ 12726 xmlFree(ctxt->sax); 12727 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12728 if (ctxt->sax == NULL) { 12729 xmlErrMemory(ctxt, NULL); 12730 xmlFreeParserInputBuffer(buf); 12731 xmlFreeParserCtxt(ctxt); 12732 return(NULL); 12733 } 12734 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12735 if (sax->initialized == XML_SAX2_MAGIC) 12736 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12737 else 12738 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12739 if (user_data != NULL) 12740 ctxt->userData = user_data; 12741 } 12742 if (filename == NULL) { 12743 ctxt->directory = NULL; 12744 } else { 12745 ctxt->directory = xmlParserGetDirectory(filename); 12746 } 12747 12748 inputStream = xmlNewInputStream(ctxt); 12749 if (inputStream == NULL) { 12750 xmlFreeParserCtxt(ctxt); 12751 xmlFreeParserInputBuffer(buf); 12752 return(NULL); 12753 } 12754 12755 if (filename == NULL) 12756 inputStream->filename = NULL; 12757 else { 12758 inputStream->filename = (char *) 12759 xmlCanonicPath((const xmlChar *) filename); 12760 if (inputStream->filename == NULL) { 12761 xmlFreeParserCtxt(ctxt); 12762 xmlFreeParserInputBuffer(buf); 12763 return(NULL); 12764 } 12765 } 12766 inputStream->buf = buf; 12767 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12768 inputPush(ctxt, inputStream); 12769 12770 /* 12771 * If the caller didn't provide an initial 'chunk' for determining 12772 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12773 * that it can be automatically determined later 12774 */ 12775 if ((size == 0) || (chunk == NULL)) { 12776 ctxt->charset = XML_CHAR_ENCODING_NONE; 12777 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12778 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12779 size_t cur = ctxt->input->cur - ctxt->input->base; 12780 12781 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12782 12783 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12784 #ifdef DEBUG_PUSH 12785 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12786 #endif 12787 } 12788 12789 if (enc != XML_CHAR_ENCODING_NONE) { 12790 xmlSwitchEncoding(ctxt, enc); 12791 } 12792 12793 return(ctxt); 12794 } 12795 #endif /* LIBXML_PUSH_ENABLED */ 12796 12797 /** 12798 * xmlHaltParser: 12799 * @ctxt: an XML parser context 12800 * 12801 * Blocks further parser processing don't override error 12802 * for internal use 12803 */ 12804 static void 12805 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12806 if (ctxt == NULL) 12807 return; 12808 ctxt->instate = XML_PARSER_EOF; 12809 ctxt->disableSAX = 1; 12810 if (ctxt->input != NULL) { 12811 /* 12812 * in case there was a specific allocation deallocate before 12813 * overriding base 12814 */ 12815 if (ctxt->input->free != NULL) { 12816 ctxt->input->free((xmlChar *) ctxt->input->base); 12817 ctxt->input->free = NULL; 12818 } 12819 ctxt->input->cur = BAD_CAST""; 12820 ctxt->input->base = ctxt->input->cur; 12821 ctxt->input->end = ctxt->input->cur; 12822 } 12823 } 12824 12825 /** 12826 * xmlStopParser: 12827 * @ctxt: an XML parser context 12828 * 12829 * Blocks further parser processing 12830 */ 12831 void 12832 xmlStopParser(xmlParserCtxtPtr ctxt) { 12833 if (ctxt == NULL) 12834 return; 12835 assert(!ctxt->html); 12836 xmlHaltParser(ctxt); 12837 ctxt->errNo = XML_ERR_USER_STOP; 12838 } 12839 12840 /** 12841 * xmlCreateIOParserCtxt: 12842 * @sax: a SAX handler 12843 * @user_data: The user data returned on SAX callbacks 12844 * @ioread: an I/O read function 12845 * @ioclose: an I/O close function 12846 * @ioctx: an I/O handler 12847 * @enc: the charset encoding if known 12848 * 12849 * Create a parser context for using the XML parser with an existing 12850 * I/O stream 12851 * 12852 * Returns the new parser context or NULL 12853 */ 12854 xmlParserCtxtPtr 12855 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12856 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12857 void *ioctx, xmlCharEncoding enc) { 12858 xmlParserCtxtPtr ctxt; 12859 xmlParserInputPtr inputStream; 12860 xmlParserInputBufferPtr buf; 12861 12862 if (ioread == NULL) return(NULL); 12863 12864 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12865 if (buf == NULL) { 12866 if (ioclose != NULL) 12867 ioclose(ioctx); 12868 return (NULL); 12869 } 12870 12871 ctxt = xmlNewParserCtxt(); 12872 if (ctxt == NULL) { 12873 xmlFreeParserInputBuffer(buf); 12874 return(NULL); 12875 } 12876 if (sax != NULL) { 12877 #ifdef LIBXML_SAX1_ENABLED 12878 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12879 #endif /* LIBXML_SAX1_ENABLED */ 12880 xmlFree(ctxt->sax); 12881 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12882 if (ctxt->sax == NULL) { 12883 xmlErrMemory(ctxt, NULL); 12884 xmlFreeParserCtxt(ctxt); 12885 return(NULL); 12886 } 12887 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12888 if (sax->initialized == XML_SAX2_MAGIC) 12889 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12890 else 12891 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12892 if (user_data != NULL) 12893 ctxt->userData = user_data; 12894 } 12895 12896 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12897 if (inputStream == NULL) { 12898 xmlFreeParserCtxt(ctxt); 12899 return(NULL); 12900 } 12901 inputPush(ctxt, inputStream); 12902 12903 return(ctxt); 12904 } 12905 12906 #ifdef LIBXML_VALID_ENABLED 12907 /************************************************************************ 12908 * * 12909 * Front ends when parsing a DTD * 12910 * * 12911 ************************************************************************/ 12912 12913 /** 12914 * xmlIOParseDTD: 12915 * @sax: the SAX handler block or NULL 12916 * @input: an Input Buffer 12917 * @enc: the charset encoding if known 12918 * 12919 * Load and parse a DTD 12920 * 12921 * Returns the resulting xmlDtdPtr or NULL in case of error. 12922 * @input will be freed by the function in any case. 12923 */ 12924 12925 xmlDtdPtr 12926 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12927 xmlCharEncoding enc) { 12928 xmlDtdPtr ret = NULL; 12929 xmlParserCtxtPtr ctxt; 12930 xmlParserInputPtr pinput = NULL; 12931 xmlChar start[4]; 12932 12933 if (input == NULL) 12934 return(NULL); 12935 12936 ctxt = xmlNewParserCtxt(); 12937 if (ctxt == NULL) { 12938 xmlFreeParserInputBuffer(input); 12939 return(NULL); 12940 } 12941 12942 /* We are loading a DTD */ 12943 ctxt->options |= XML_PARSE_DTDLOAD; 12944 12945 /* 12946 * Set-up the SAX context 12947 */ 12948 if (sax != NULL) { 12949 if (ctxt->sax != NULL) 12950 xmlFree(ctxt->sax); 12951 ctxt->sax = sax; 12952 ctxt->userData = ctxt; 12953 } 12954 xmlDetectSAX2(ctxt); 12955 12956 /* 12957 * generate a parser input from the I/O handler 12958 */ 12959 12960 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12961 if (pinput == NULL) { 12962 if (sax != NULL) ctxt->sax = NULL; 12963 xmlFreeParserInputBuffer(input); 12964 xmlFreeParserCtxt(ctxt); 12965 return(NULL); 12966 } 12967 12968 /* 12969 * plug some encoding conversion routines here. 12970 */ 12971 if (xmlPushInput(ctxt, pinput) < 0) { 12972 if (sax != NULL) ctxt->sax = NULL; 12973 xmlFreeParserCtxt(ctxt); 12974 return(NULL); 12975 } 12976 if (enc != XML_CHAR_ENCODING_NONE) { 12977 xmlSwitchEncoding(ctxt, enc); 12978 } 12979 12980 pinput->filename = NULL; 12981 pinput->line = 1; 12982 pinput->col = 1; 12983 pinput->base = ctxt->input->cur; 12984 pinput->cur = ctxt->input->cur; 12985 pinput->free = NULL; 12986 12987 /* 12988 * let's parse that entity knowing it's an external subset. 12989 */ 12990 ctxt->inSubset = 2; 12991 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12992 if (ctxt->myDoc == NULL) { 12993 xmlErrMemory(ctxt, "New Doc failed"); 12994 return(NULL); 12995 } 12996 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12997 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12998 BAD_CAST "none", BAD_CAST "none"); 12999 13000 if ((enc == XML_CHAR_ENCODING_NONE) && 13001 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 13002 /* 13003 * Get the 4 first bytes and decode the charset 13004 * if enc != XML_CHAR_ENCODING_NONE 13005 * plug some encoding conversion routines. 13006 */ 13007 start[0] = RAW; 13008 start[1] = NXT(1); 13009 start[2] = NXT(2); 13010 start[3] = NXT(3); 13011 enc = xmlDetectCharEncoding(start, 4); 13012 if (enc != XML_CHAR_ENCODING_NONE) { 13013 xmlSwitchEncoding(ctxt, enc); 13014 } 13015 } 13016 13017 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 13018 13019 if (ctxt->myDoc != NULL) { 13020 if (ctxt->wellFormed) { 13021 ret = ctxt->myDoc->extSubset; 13022 ctxt->myDoc->extSubset = NULL; 13023 if (ret != NULL) { 13024 xmlNodePtr tmp; 13025 13026 ret->doc = NULL; 13027 tmp = ret->children; 13028 while (tmp != NULL) { 13029 tmp->doc = NULL; 13030 tmp = tmp->next; 13031 } 13032 } 13033 } else { 13034 ret = NULL; 13035 } 13036 xmlFreeDoc(ctxt->myDoc); 13037 ctxt->myDoc = NULL; 13038 } 13039 if (sax != NULL) ctxt->sax = NULL; 13040 xmlFreeParserCtxt(ctxt); 13041 13042 return(ret); 13043 } 13044 13045 /** 13046 * xmlSAXParseDTD: 13047 * @sax: the SAX handler block 13048 * @ExternalID: a NAME* containing the External ID of the DTD 13049 * @SystemID: a NAME* containing the URL to the DTD 13050 * 13051 * Load and parse an external subset. 13052 * 13053 * Returns the resulting xmlDtdPtr or NULL in case of error. 13054 */ 13055 13056 xmlDtdPtr 13057 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 13058 const xmlChar *SystemID) { 13059 xmlDtdPtr ret = NULL; 13060 xmlParserCtxtPtr ctxt; 13061 xmlParserInputPtr input = NULL; 13062 xmlCharEncoding enc; 13063 xmlChar* systemIdCanonic; 13064 13065 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 13066 13067 ctxt = xmlNewParserCtxt(); 13068 if (ctxt == NULL) { 13069 return(NULL); 13070 } 13071 13072 /* We are loading a DTD */ 13073 ctxt->options |= XML_PARSE_DTDLOAD; 13074 13075 /* 13076 * Set-up the SAX context 13077 */ 13078 if (sax != NULL) { 13079 if (ctxt->sax != NULL) 13080 xmlFree(ctxt->sax); 13081 ctxt->sax = sax; 13082 ctxt->userData = ctxt; 13083 } 13084 13085 /* 13086 * Canonicalise the system ID 13087 */ 13088 systemIdCanonic = xmlCanonicPath(SystemID); 13089 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 13090 xmlFreeParserCtxt(ctxt); 13091 return(NULL); 13092 } 13093 13094 /* 13095 * Ask the Entity resolver to load the damn thing 13096 */ 13097 13098 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 13099 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 13100 systemIdCanonic); 13101 if (input == NULL) { 13102 if (sax != NULL) ctxt->sax = NULL; 13103 xmlFreeParserCtxt(ctxt); 13104 if (systemIdCanonic != NULL) 13105 xmlFree(systemIdCanonic); 13106 return(NULL); 13107 } 13108 13109 /* 13110 * plug some encoding conversion routines here. 13111 */ 13112 if (xmlPushInput(ctxt, input) < 0) { 13113 if (sax != NULL) ctxt->sax = NULL; 13114 xmlFreeParserCtxt(ctxt); 13115 if (systemIdCanonic != NULL) 13116 xmlFree(systemIdCanonic); 13117 return(NULL); 13118 } 13119 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13120 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 13121 xmlSwitchEncoding(ctxt, enc); 13122 } 13123 13124 if (input->filename == NULL) 13125 input->filename = (char *) systemIdCanonic; 13126 else 13127 xmlFree(systemIdCanonic); 13128 input->line = 1; 13129 input->col = 1; 13130 input->base = ctxt->input->cur; 13131 input->cur = ctxt->input->cur; 13132 input->free = NULL; 13133 13134 /* 13135 * let's parse that entity knowing it's an external subset. 13136 */ 13137 ctxt->inSubset = 2; 13138 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 13139 if (ctxt->myDoc == NULL) { 13140 xmlErrMemory(ctxt, "New Doc failed"); 13141 if (sax != NULL) ctxt->sax = NULL; 13142 xmlFreeParserCtxt(ctxt); 13143 return(NULL); 13144 } 13145 ctxt->myDoc->properties = XML_DOC_INTERNAL; 13146 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 13147 ExternalID, SystemID); 13148 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 13149 13150 if (ctxt->myDoc != NULL) { 13151 if (ctxt->wellFormed) { 13152 ret = ctxt->myDoc->extSubset; 13153 ctxt->myDoc->extSubset = NULL; 13154 if (ret != NULL) { 13155 xmlNodePtr tmp; 13156 13157 ret->doc = NULL; 13158 tmp = ret->children; 13159 while (tmp != NULL) { 13160 tmp->doc = NULL; 13161 tmp = tmp->next; 13162 } 13163 } 13164 } else { 13165 ret = NULL; 13166 } 13167 xmlFreeDoc(ctxt->myDoc); 13168 ctxt->myDoc = NULL; 13169 } 13170 if (sax != NULL) ctxt->sax = NULL; 13171 xmlFreeParserCtxt(ctxt); 13172 13173 return(ret); 13174 } 13175 13176 13177 /** 13178 * xmlParseDTD: 13179 * @ExternalID: a NAME* containing the External ID of the DTD 13180 * @SystemID: a NAME* containing the URL to the DTD 13181 * 13182 * Load and parse an external subset. 13183 * 13184 * Returns the resulting xmlDtdPtr or NULL in case of error. 13185 */ 13186 13187 xmlDtdPtr 13188 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 13189 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 13190 } 13191 #endif /* LIBXML_VALID_ENABLED */ 13192 13193 /************************************************************************ 13194 * * 13195 * Front ends when parsing an Entity * 13196 * * 13197 ************************************************************************/ 13198 13199 /** 13200 * xmlParseCtxtExternalEntity: 13201 * @ctx: the existing parsing context 13202 * @URL: the URL for the entity to load 13203 * @ID: the System ID for the entity to load 13204 * @lst: the return value for the set of parsed nodes 13205 * 13206 * Parse an external general entity within an existing parsing context 13207 * An external general parsed entity is well-formed if it matches the 13208 * production labeled extParsedEnt. 13209 * 13210 * [78] extParsedEnt ::= TextDecl? content 13211 * 13212 * Returns 0 if the entity is well formed, -1 in case of args problem and 13213 * the parser error code otherwise 13214 */ 13215 13216 int 13217 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 13218 const xmlChar *ID, xmlNodePtr *lst) { 13219 xmlParserCtxtPtr ctxt; 13220 xmlDocPtr newDoc; 13221 xmlNodePtr newRoot; 13222 xmlSAXHandlerPtr oldsax = NULL; 13223 int ret = 0; 13224 xmlChar start[4]; 13225 xmlCharEncoding enc; 13226 13227 if (ctx == NULL) return(-1); 13228 13229 if (((ctx->depth > xmlEntityDecodingDepthMax) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 13230 (ctx->depth > xmlEntityDecodingDepthHugeMax)) { 13231 return(XML_ERR_ENTITY_LOOP); 13232 } 13233 13234 if (lst != NULL) 13235 *lst = NULL; 13236 if ((URL == NULL) && (ID == NULL)) 13237 return(-1); 13238 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 13239 return(-1); 13240 13241 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 13242 if (ctxt == NULL) { 13243 return(-1); 13244 } 13245 13246 oldsax = ctxt->sax; 13247 ctxt->sax = ctx->sax; 13248 xmlDetectSAX2(ctxt); 13249 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13250 if (newDoc == NULL) { 13251 xmlFreeParserCtxt(ctxt); 13252 return(-1); 13253 } 13254 newDoc->properties = XML_DOC_INTERNAL; 13255 if (ctx->myDoc->dict) { 13256 newDoc->dict = ctx->myDoc->dict; 13257 xmlDictReference(newDoc->dict); 13258 } 13259 if (ctx->myDoc != NULL) { 13260 newDoc->intSubset = ctx->myDoc->intSubset; 13261 newDoc->extSubset = ctx->myDoc->extSubset; 13262 } 13263 if (ctx->myDoc->URL != NULL) { 13264 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 13265 } 13266 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13267 if (newRoot == NULL) { 13268 ctxt->sax = oldsax; 13269 xmlFreeParserCtxt(ctxt); 13270 newDoc->intSubset = NULL; 13271 newDoc->extSubset = NULL; 13272 xmlFreeDoc(newDoc); 13273 return(-1); 13274 } 13275 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13276 nodePush(ctxt, newDoc->children); 13277 if (ctx->myDoc == NULL) { 13278 ctxt->myDoc = newDoc; 13279 } else { 13280 ctxt->myDoc = ctx->myDoc; 13281 newDoc->children->doc = ctx->myDoc; 13282 } 13283 13284 /* 13285 * Get the 4 first bytes and decode the charset 13286 * if enc != XML_CHAR_ENCODING_NONE 13287 * plug some encoding conversion routines. 13288 */ 13289 GROW 13290 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13291 start[0] = RAW; 13292 start[1] = NXT(1); 13293 start[2] = NXT(2); 13294 start[3] = NXT(3); 13295 enc = xmlDetectCharEncoding(start, 4); 13296 if (enc != XML_CHAR_ENCODING_NONE) { 13297 xmlSwitchEncoding(ctxt, enc); 13298 } 13299 } 13300 13301 /* 13302 * Parse a possible text declaration first 13303 */ 13304 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13305 xmlParseTextDecl(ctxt); 13306 /* 13307 * An XML-1.0 document can't reference an entity not XML-1.0 13308 */ 13309 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 13310 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13311 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13312 "Version mismatch between document and entity\n"); 13313 } 13314 } 13315 13316 /* 13317 * If the user provided its own SAX callbacks then reuse the 13318 * useData callback field, otherwise the expected setup in a 13319 * DOM builder is to have userData == ctxt 13320 */ 13321 if (ctx->userData == ctx) 13322 ctxt->userData = ctxt; 13323 else 13324 ctxt->userData = ctx->userData; 13325 13326 /* 13327 * Doing validity checking on chunk doesn't make sense 13328 */ 13329 ctxt->instate = XML_PARSER_CONTENT; 13330 ctxt->validate = ctx->validate; 13331 ctxt->valid = ctx->valid; 13332 ctxt->loadsubset = ctx->loadsubset; 13333 ctxt->depth = ctx->depth + 1; 13334 ctxt->replaceEntities = ctx->replaceEntities; 13335 if (ctxt->validate) { 13336 ctxt->vctxt.error = ctx->vctxt.error; 13337 ctxt->vctxt.warning = ctx->vctxt.warning; 13338 } else { 13339 ctxt->vctxt.error = NULL; 13340 ctxt->vctxt.warning = NULL; 13341 } 13342 ctxt->vctxt.nodeTab = NULL; 13343 ctxt->vctxt.nodeNr = 0; 13344 ctxt->vctxt.nodeMax = 0; 13345 ctxt->vctxt.node = NULL; 13346 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13347 ctxt->dict = ctx->dict; 13348 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13349 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13350 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13351 ctxt->dictNames = ctx->dictNames; 13352 ctxt->attsDefault = ctx->attsDefault; 13353 ctxt->attsSpecial = ctx->attsSpecial; 13354 ctxt->linenumbers = ctx->linenumbers; 13355 13356 xmlParseContent(ctxt); 13357 13358 ctx->validate = ctxt->validate; 13359 ctx->valid = ctxt->valid; 13360 if ((RAW == '<') && (NXT(1) == '/')) { 13361 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13362 } else if (RAW != 0) { 13363 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13364 } 13365 if (ctxt->node != newDoc->children) { 13366 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13367 } 13368 13369 if (!ctxt->wellFormed) { 13370 if (ctxt->errNo == 0) 13371 ret = 1; 13372 else 13373 ret = ctxt->errNo; 13374 } else { 13375 if (lst != NULL) { 13376 xmlNodePtr cur; 13377 13378 /* 13379 * Return the newly created nodeset after unlinking it from 13380 * they pseudo parent. 13381 */ 13382 cur = newDoc->children->children; 13383 *lst = cur; 13384 while (cur != NULL) { 13385 cur->parent = NULL; 13386 cur = cur->next; 13387 } 13388 newDoc->children->children = NULL; 13389 } 13390 ret = 0; 13391 } 13392 ctxt->sax = oldsax; 13393 ctxt->dict = NULL; 13394 ctxt->attsDefault = NULL; 13395 ctxt->attsSpecial = NULL; 13396 xmlFreeParserCtxt(ctxt); 13397 newDoc->intSubset = NULL; 13398 newDoc->extSubset = NULL; 13399 xmlFreeDoc(newDoc); 13400 13401 return(ret); 13402 } 13403 13404 /** 13405 * xmlParseExternalEntityPrivate: 13406 * @doc: the document the chunk pertains to 13407 * @oldctxt: the previous parser context if available 13408 * @sax: the SAX handler bloc (possibly NULL) 13409 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13410 * @depth: Used for loop detection, use 0 13411 * @URL: the URL for the entity to load 13412 * @ID: the System ID for the entity to load 13413 * @list: the return value for the set of parsed nodes 13414 * 13415 * Private version of xmlParseExternalEntity() 13416 * 13417 * Returns 0 if the entity is well formed, -1 in case of args problem and 13418 * the parser error code otherwise 13419 */ 13420 13421 static xmlParserErrors 13422 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13423 xmlSAXHandlerPtr sax, 13424 void *user_data, int depth, const xmlChar *URL, 13425 const xmlChar *ID, xmlNodePtr *list) { 13426 xmlParserCtxtPtr ctxt; 13427 xmlDocPtr newDoc; 13428 xmlNodePtr newRoot; 13429 xmlSAXHandlerPtr oldsax = NULL; 13430 xmlParserErrors ret = XML_ERR_OK; 13431 xmlChar start[4]; 13432 xmlCharEncoding enc; 13433 13434 if (((depth > xmlEntityDecodingDepthMax) && 13435 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13436 (depth > xmlEntityDecodingDepthHugeMax)) { 13437 return(XML_ERR_ENTITY_LOOP); 13438 } 13439 13440 if (list != NULL) 13441 *list = NULL; 13442 if ((URL == NULL) && (ID == NULL)) 13443 return(XML_ERR_INTERNAL_ERROR); 13444 if (doc == NULL) 13445 return(XML_ERR_INTERNAL_ERROR); 13446 13447 13448 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13449 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13450 ctxt->userData = ctxt; 13451 if (oldctxt != NULL) { 13452 ctxt->_private = oldctxt->_private; 13453 ctxt->loadsubset = oldctxt->loadsubset; 13454 ctxt->validate = oldctxt->validate; 13455 ctxt->external = oldctxt->external; 13456 ctxt->record_info = oldctxt->record_info; 13457 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13458 ctxt->node_seq.length = oldctxt->node_seq.length; 13459 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13460 } else { 13461 /* 13462 * Doing validity checking on chunk without context 13463 * doesn't make sense 13464 */ 13465 ctxt->_private = NULL; 13466 ctxt->validate = 0; 13467 ctxt->external = 2; 13468 ctxt->loadsubset = 0; 13469 } 13470 if (sax != NULL) { 13471 oldsax = ctxt->sax; 13472 ctxt->sax = sax; 13473 if (user_data != NULL) 13474 ctxt->userData = user_data; 13475 } 13476 xmlDetectSAX2(ctxt); 13477 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13478 if (newDoc == NULL) { 13479 ctxt->node_seq.maximum = 0; 13480 ctxt->node_seq.length = 0; 13481 ctxt->node_seq.buffer = NULL; 13482 xmlFreeParserCtxt(ctxt); 13483 return(XML_ERR_INTERNAL_ERROR); 13484 } 13485 newDoc->properties = XML_DOC_INTERNAL; 13486 newDoc->intSubset = doc->intSubset; 13487 newDoc->extSubset = doc->extSubset; 13488 newDoc->dict = doc->dict; 13489 xmlDictReference(newDoc->dict); 13490 13491 if (doc->URL != NULL) { 13492 newDoc->URL = xmlStrdup(doc->URL); 13493 } 13494 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13495 if (newRoot == NULL) { 13496 if (sax != NULL) 13497 ctxt->sax = oldsax; 13498 ctxt->node_seq.maximum = 0; 13499 ctxt->node_seq.length = 0; 13500 ctxt->node_seq.buffer = NULL; 13501 xmlFreeParserCtxt(ctxt); 13502 newDoc->intSubset = NULL; 13503 newDoc->extSubset = NULL; 13504 xmlFreeDoc(newDoc); 13505 return(XML_ERR_INTERNAL_ERROR); 13506 } 13507 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13508 nodePush(ctxt, newDoc->children); 13509 ctxt->myDoc = doc; 13510 newRoot->doc = doc; 13511 13512 /* 13513 * Get the 4 first bytes and decode the charset 13514 * if enc != XML_CHAR_ENCODING_NONE 13515 * plug some encoding conversion routines. 13516 */ 13517 GROW; 13518 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13519 start[0] = RAW; 13520 start[1] = NXT(1); 13521 start[2] = NXT(2); 13522 start[3] = NXT(3); 13523 enc = xmlDetectCharEncoding(start, 4); 13524 if (enc != XML_CHAR_ENCODING_NONE) { 13525 xmlSwitchEncoding(ctxt, enc); 13526 } 13527 } 13528 13529 /* 13530 * Parse a possible text declaration first 13531 */ 13532 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13533 xmlParseTextDecl(ctxt); 13534 } 13535 13536 ctxt->instate = XML_PARSER_CONTENT; 13537 ctxt->depth = depth; 13538 13539 xmlParseContent(ctxt); 13540 13541 if ((RAW == '<') && (NXT(1) == '/')) { 13542 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13543 } else if (RAW != 0) { 13544 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13545 } 13546 if (ctxt->node != newDoc->children) { 13547 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13548 } 13549 13550 if (!ctxt->wellFormed) { 13551 if (ctxt->errNo == 0) 13552 ret = XML_ERR_INTERNAL_ERROR; 13553 else 13554 ret = (xmlParserErrors)ctxt->errNo; 13555 } else { 13556 if (list != NULL) { 13557 xmlNodePtr cur; 13558 13559 /* 13560 * Return the newly created nodeset after unlinking it from 13561 * they pseudo parent. 13562 */ 13563 cur = newDoc->children->children; 13564 *list = cur; 13565 while (cur != NULL) { 13566 cur->parent = NULL; 13567 cur = cur->next; 13568 } 13569 newDoc->children->children = NULL; 13570 } 13571 ret = XML_ERR_OK; 13572 } 13573 13574 /* 13575 * Record in the parent context the number of entities replacement 13576 * done when parsing that reference. 13577 */ 13578 if (oldctxt != NULL) 13579 oldctxt->nbentities += ctxt->nbentities; 13580 13581 /* 13582 * Also record the size of the entity parsed 13583 */ 13584 if (ctxt->input != NULL && oldctxt != NULL) { 13585 oldctxt->sizeentities += ctxt->input->consumed; 13586 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13587 } 13588 /* 13589 * And record the last error if any 13590 */ 13591 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK)) 13592 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13593 13594 if (sax != NULL) 13595 ctxt->sax = oldsax; 13596 if (oldctxt != NULL) { 13597 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13598 oldctxt->node_seq.length = ctxt->node_seq.length; 13599 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13600 } 13601 ctxt->node_seq.maximum = 0; 13602 ctxt->node_seq.length = 0; 13603 ctxt->node_seq.buffer = NULL; 13604 xmlFreeParserCtxt(ctxt); 13605 newDoc->intSubset = NULL; 13606 newDoc->extSubset = NULL; 13607 xmlFreeDoc(newDoc); 13608 13609 return(ret); 13610 } 13611 13612 #ifdef LIBXML_SAX1_ENABLED 13613 /** 13614 * xmlParseExternalEntity: 13615 * @doc: the document the chunk pertains to 13616 * @sax: the SAX handler bloc (possibly NULL) 13617 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13618 * @depth: Used for loop detection, use 0 13619 * @URL: the URL for the entity to load 13620 * @ID: the System ID for the entity to load 13621 * @lst: the return value for the set of parsed nodes 13622 * 13623 * Parse an external general entity 13624 * An external general parsed entity is well-formed if it matches the 13625 * production labeled extParsedEnt. 13626 * 13627 * [78] extParsedEnt ::= TextDecl? content 13628 * 13629 * Returns 0 if the entity is well formed, -1 in case of args problem and 13630 * the parser error code otherwise 13631 */ 13632 13633 int 13634 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13635 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13636 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13637 ID, lst)); 13638 } 13639 13640 /** 13641 * xmlParseBalancedChunkMemory: 13642 * @doc: the document the chunk pertains to 13643 * @sax: the SAX handler bloc (possibly NULL) 13644 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13645 * @depth: Used for loop detection, use 0 13646 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13647 * @lst: the return value for the set of parsed nodes 13648 * 13649 * Parse a well-balanced chunk of an XML document 13650 * called by the parser 13651 * The allowed sequence for the Well Balanced Chunk is the one defined by 13652 * the content production in the XML grammar: 13653 * 13654 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13655 * 13656 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13657 * the parser error code otherwise 13658 */ 13659 13660 int 13661 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13662 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13663 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13664 depth, string, lst, 0 ); 13665 } 13666 #endif /* LIBXML_SAX1_ENABLED */ 13667 13668 /** 13669 * xmlParseBalancedChunkMemoryInternal: 13670 * @oldctxt: the existing parsing context 13671 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13672 * @user_data: the user data field for the parser context 13673 * @lst: the return value for the set of parsed nodes 13674 * 13675 * 13676 * Parse a well-balanced chunk of an XML document 13677 * called by the parser 13678 * The allowed sequence for the Well Balanced Chunk is the one defined by 13679 * the content production in the XML grammar: 13680 * 13681 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13682 * 13683 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13684 * error code otherwise 13685 * 13686 * In case recover is set to 1, the nodelist will not be empty even if 13687 * the parsed chunk is not well balanced. 13688 */ 13689 static xmlParserErrors 13690 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13691 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13692 xmlParserCtxtPtr ctxt; 13693 xmlDocPtr newDoc = NULL; 13694 xmlNodePtr newRoot; 13695 xmlSAXHandlerPtr oldsax = NULL; 13696 xmlNodePtr content = NULL; 13697 xmlNodePtr last = NULL; 13698 int size; 13699 xmlParserErrors ret = XML_ERR_OK; 13700 #ifdef SAX2 13701 int i; 13702 #endif 13703 13704 if (((oldctxt->depth > xmlEntityDecodingDepthMax) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13705 (oldctxt->depth > xmlEntityDecodingDepthHugeMax)) { 13706 return(XML_ERR_ENTITY_LOOP); 13707 } 13708 13709 13710 if (lst != NULL) 13711 *lst = NULL; 13712 if (string == NULL) 13713 return(XML_ERR_INTERNAL_ERROR); 13714 13715 size = xmlStrlen(string); 13716 13717 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13718 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13719 if (user_data != NULL) 13720 ctxt->userData = user_data; 13721 else 13722 ctxt->userData = ctxt; 13723 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13724 ctxt->dict = oldctxt->dict; 13725 ctxt->input_id = oldctxt->input_id + 1; 13726 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13727 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13728 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13729 13730 #ifdef SAX2 13731 /* propagate namespaces down the entity */ 13732 for (i = 0;i < oldctxt->nsNr;i += 2) { 13733 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13734 } 13735 #endif 13736 13737 oldsax = ctxt->sax; 13738 ctxt->sax = oldctxt->sax; 13739 xmlDetectSAX2(ctxt); 13740 ctxt->replaceEntities = oldctxt->replaceEntities; 13741 ctxt->options = oldctxt->options; 13742 13743 ctxt->_private = oldctxt->_private; 13744 if (oldctxt->myDoc == NULL) { 13745 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13746 if (newDoc == NULL) { 13747 ctxt->sax = oldsax; 13748 ctxt->dict = NULL; 13749 xmlFreeParserCtxt(ctxt); 13750 return(XML_ERR_INTERNAL_ERROR); 13751 } 13752 newDoc->properties = XML_DOC_INTERNAL; 13753 newDoc->dict = ctxt->dict; 13754 xmlDictReference(newDoc->dict); 13755 ctxt->myDoc = newDoc; 13756 } else { 13757 ctxt->myDoc = oldctxt->myDoc; 13758 content = ctxt->myDoc->children; 13759 last = ctxt->myDoc->last; 13760 } 13761 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13762 if (newRoot == NULL) { 13763 ctxt->sax = oldsax; 13764 ctxt->dict = NULL; 13765 xmlFreeParserCtxt(ctxt); 13766 if (newDoc != NULL) { 13767 xmlFreeDoc(newDoc); 13768 } 13769 return(XML_ERR_INTERNAL_ERROR); 13770 } 13771 ctxt->myDoc->children = NULL; 13772 ctxt->myDoc->last = NULL; 13773 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13774 nodePush(ctxt, ctxt->myDoc->children); 13775 ctxt->instate = XML_PARSER_CONTENT; 13776 ctxt->depth = oldctxt->depth + 1; 13777 13778 ctxt->validate = 0; 13779 ctxt->loadsubset = oldctxt->loadsubset; 13780 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13781 /* 13782 * ID/IDREF registration will be done in xmlValidateElement below 13783 */ 13784 ctxt->loadsubset |= XML_SKIP_IDS; 13785 } 13786 ctxt->dictNames = oldctxt->dictNames; 13787 ctxt->attsDefault = oldctxt->attsDefault; 13788 ctxt->attsSpecial = oldctxt->attsSpecial; 13789 13790 xmlParseContent(ctxt); 13791 if ((RAW == '<') && (NXT(1) == '/')) { 13792 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13793 } else if (RAW != 0) { 13794 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13795 } 13796 if (ctxt->node != ctxt->myDoc->children) { 13797 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13798 } 13799 13800 if (!ctxt->wellFormed) { 13801 if (ctxt->errNo == 0) 13802 ret = XML_ERR_INTERNAL_ERROR; 13803 else 13804 ret = (xmlParserErrors)ctxt->errNo; 13805 } else { 13806 ret = XML_ERR_OK; 13807 } 13808 13809 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13810 xmlNodePtr cur; 13811 13812 /* 13813 * Return the newly created nodeset after unlinking it from 13814 * they pseudo parent. 13815 */ 13816 cur = ctxt->myDoc->children->children; 13817 *lst = cur; 13818 while (cur != NULL) { 13819 #ifdef LIBXML_VALID_ENABLED 13820 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13821 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13822 (cur->type == XML_ELEMENT_NODE)) { 13823 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13824 oldctxt->myDoc, cur); 13825 } 13826 #endif /* LIBXML_VALID_ENABLED */ 13827 cur->parent = NULL; 13828 cur = cur->next; 13829 } 13830 ctxt->myDoc->children->children = NULL; 13831 } 13832 if (ctxt->myDoc != NULL) { 13833 xmlFreeNode(ctxt->myDoc->children); 13834 ctxt->myDoc->children = content; 13835 ctxt->myDoc->last = last; 13836 } 13837 13838 /* 13839 * Record in the parent context the number of entities replacement 13840 * done when parsing that reference. 13841 */ 13842 if (oldctxt != NULL) 13843 oldctxt->nbentities += ctxt->nbentities; 13844 13845 /* 13846 * Also record the last error if any 13847 */ 13848 if (ctxt->lastError.code != XML_ERR_OK) 13849 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13850 13851 ctxt->sax = oldsax; 13852 ctxt->dict = NULL; 13853 ctxt->attsDefault = NULL; 13854 ctxt->attsSpecial = NULL; 13855 xmlFreeParserCtxt(ctxt); 13856 if (newDoc != NULL) { 13857 xmlFreeDoc(newDoc); 13858 } 13859 13860 return(ret); 13861 } 13862 13863 /** 13864 * xmlParseInNodeContext: 13865 * @node: the context node 13866 * @data: the input string 13867 * @datalen: the input string length in bytes 13868 * @options: a combination of xmlParserOption 13869 * @lst: the return value for the set of parsed nodes 13870 * 13871 * Parse a well-balanced chunk of an XML document 13872 * within the context (DTD, namespaces, etc ...) of the given node. 13873 * 13874 * The allowed sequence for the data is a Well Balanced Chunk defined by 13875 * the content production in the XML grammar: 13876 * 13877 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13878 * 13879 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13880 * error code otherwise 13881 */ 13882 xmlParserErrors 13883 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13884 int options, xmlNodePtr *lst) { 13885 #ifdef SAX2 13886 xmlParserCtxtPtr ctxt; 13887 xmlDocPtr doc = NULL; 13888 xmlNodePtr fake, cur; 13889 int nsnr = 0; 13890 13891 xmlParserErrors ret = XML_ERR_OK; 13892 13893 /* 13894 * check all input parameters, grab the document 13895 */ 13896 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13897 return(XML_ERR_INTERNAL_ERROR); 13898 switch (node->type) { 13899 case XML_ELEMENT_NODE: 13900 case XML_ATTRIBUTE_NODE: 13901 case XML_TEXT_NODE: 13902 case XML_CDATA_SECTION_NODE: 13903 case XML_ENTITY_REF_NODE: 13904 case XML_PI_NODE: 13905 case XML_COMMENT_NODE: 13906 case XML_DOCUMENT_NODE: 13907 case XML_HTML_DOCUMENT_NODE: 13908 break; 13909 default: 13910 return(XML_ERR_INTERNAL_ERROR); 13911 13912 } 13913 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13914 (node->type != XML_DOCUMENT_NODE) && 13915 (node->type != XML_HTML_DOCUMENT_NODE)) 13916 node = node->parent; 13917 if (node == NULL) 13918 return(XML_ERR_INTERNAL_ERROR); 13919 if (node->type == XML_ELEMENT_NODE) 13920 doc = node->doc; 13921 else 13922 doc = (xmlDocPtr) node; 13923 if (doc == NULL) 13924 return(XML_ERR_INTERNAL_ERROR); 13925 13926 /* 13927 * allocate a context and set-up everything not related to the 13928 * node position in the tree 13929 */ 13930 if (doc->type == XML_DOCUMENT_NODE) 13931 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13932 #ifdef LIBXML_HTML_ENABLED 13933 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13934 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13935 /* 13936 * When parsing in context, it makes no sense to add implied 13937 * elements like html/body/etc... 13938 */ 13939 options |= HTML_PARSE_NOIMPLIED; 13940 } 13941 #endif 13942 else 13943 return(XML_ERR_INTERNAL_ERROR); 13944 13945 if (ctxt == NULL) 13946 return(XML_ERR_NO_MEMORY); 13947 13948 /* 13949 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13950 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13951 * we must wait until the last moment to free the original one. 13952 */ 13953 if (doc->dict != NULL) { 13954 if (ctxt->dict != NULL) 13955 xmlDictFree(ctxt->dict); 13956 ctxt->dict = doc->dict; 13957 } else 13958 options |= XML_PARSE_NODICT; 13959 13960 if (doc->encoding != NULL) { 13961 xmlCharEncodingHandlerPtr hdlr; 13962 13963 if (ctxt->encoding != NULL) 13964 xmlFree((xmlChar *) ctxt->encoding); 13965 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13966 13967 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13968 if (hdlr != NULL) { 13969 xmlSwitchToEncoding(ctxt, hdlr); 13970 } else { 13971 return(XML_ERR_UNSUPPORTED_ENCODING); 13972 } 13973 } 13974 13975 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13976 xmlDetectSAX2(ctxt); 13977 ctxt->myDoc = doc; 13978 /* parsing in context, i.e. as within existing content */ 13979 ctxt->input_id = 2; 13980 ctxt->instate = XML_PARSER_CONTENT; 13981 13982 fake = xmlNewComment(NULL); 13983 if (fake == NULL) { 13984 xmlFreeParserCtxt(ctxt); 13985 return(XML_ERR_NO_MEMORY); 13986 } 13987 xmlAddChild(node, fake); 13988 13989 if (node->type == XML_ELEMENT_NODE) { 13990 nodePush(ctxt, node); 13991 /* 13992 * initialize the SAX2 namespaces stack 13993 */ 13994 cur = node; 13995 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13996 xmlNsPtr ns = cur->nsDef; 13997 const xmlChar *iprefix, *ihref; 13998 13999 while (ns != NULL) { 14000 if (ctxt->dict) { 14001 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 14002 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 14003 } else { 14004 iprefix = ns->prefix; 14005 ihref = ns->href; 14006 } 14007 14008 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 14009 nsPush(ctxt, iprefix, ihref); 14010 nsnr++; 14011 } 14012 ns = ns->next; 14013 } 14014 cur = cur->parent; 14015 } 14016 } 14017 14018 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 14019 /* 14020 * ID/IDREF registration will be done in xmlValidateElement below 14021 */ 14022 ctxt->loadsubset |= XML_SKIP_IDS; 14023 } 14024 14025 #ifdef LIBXML_HTML_ENABLED 14026 if (doc->type == XML_HTML_DOCUMENT_NODE) 14027 __htmlParseContent(ctxt); 14028 else 14029 #endif 14030 xmlParseContent(ctxt); 14031 14032 nsPop(ctxt, nsnr); 14033 if ((RAW == '<') && (NXT(1) == '/')) { 14034 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14035 } else if (RAW != 0) { 14036 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 14037 } 14038 if ((ctxt->node != NULL) && (ctxt->node != node)) { 14039 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14040 ctxt->wellFormed = 0; 14041 } 14042 14043 if (!ctxt->wellFormed) { 14044 if (ctxt->errNo == 0) 14045 ret = XML_ERR_INTERNAL_ERROR; 14046 else 14047 ret = (xmlParserErrors)ctxt->errNo; 14048 } else { 14049 ret = XML_ERR_OK; 14050 } 14051 14052 /* 14053 * Return the newly created nodeset after unlinking it from 14054 * the pseudo sibling. 14055 */ 14056 14057 cur = fake->next; 14058 fake->next = NULL; 14059 node->last = fake; 14060 14061 if (cur != NULL) { 14062 cur->prev = NULL; 14063 } 14064 14065 *lst = cur; 14066 14067 while (cur != NULL) { 14068 cur->parent = NULL; 14069 cur = cur->next; 14070 } 14071 14072 xmlUnlinkNode(fake); 14073 xmlFreeNode(fake); 14074 14075 14076 if (ret != XML_ERR_OK) { 14077 xmlFreeNodeList(*lst); 14078 *lst = NULL; 14079 } 14080 14081 if (doc->dict != NULL) 14082 ctxt->dict = NULL; 14083 xmlFreeParserCtxt(ctxt); 14084 14085 return(ret); 14086 #else /* !SAX2 */ 14087 return(XML_ERR_INTERNAL_ERROR); 14088 #endif 14089 } 14090 14091 #ifdef LIBXML_SAX1_ENABLED 14092 /** 14093 * xmlParseBalancedChunkMemoryRecover: 14094 * @doc: the document the chunk pertains to 14095 * @sax: the SAX handler bloc (possibly NULL) 14096 * @user_data: The user data returned on SAX callbacks (possibly NULL) 14097 * @depth: Used for loop detection, use 0 14098 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 14099 * @lst: the return value for the set of parsed nodes 14100 * @recover: return nodes even if the data is broken (use 0) 14101 * 14102 * 14103 * Parse a well-balanced chunk of an XML document 14104 * called by the parser 14105 * The allowed sequence for the Well Balanced Chunk is the one defined by 14106 * the content production in the XML grammar: 14107 * 14108 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 14109 * 14110 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 14111 * the parser error code otherwise 14112 * 14113 * In case recover is set to 1, the nodelist will not be empty even if 14114 * the parsed chunk is not well balanced, assuming the parsing succeeded to 14115 * some extent. 14116 */ 14117 int 14118 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 14119 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 14120 int recover) { 14121 xmlParserCtxtPtr ctxt; 14122 xmlDocPtr newDoc; 14123 xmlSAXHandlerPtr oldsax = NULL; 14124 xmlNodePtr content, newRoot; 14125 int size; 14126 int ret = 0; 14127 14128 if (depth > xmlEntityDecodingDepthMax) { 14129 return(XML_ERR_ENTITY_LOOP); 14130 } 14131 14132 14133 if (lst != NULL) 14134 *lst = NULL; 14135 if (string == NULL) 14136 return(-1); 14137 14138 size = xmlStrlen(string); 14139 14140 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 14141 if (ctxt == NULL) return(-1); 14142 ctxt->userData = ctxt; 14143 if (sax != NULL) { 14144 oldsax = ctxt->sax; 14145 ctxt->sax = sax; 14146 if (user_data != NULL) 14147 ctxt->userData = user_data; 14148 } 14149 newDoc = xmlNewDoc(BAD_CAST "1.0"); 14150 if (newDoc == NULL) { 14151 xmlFreeParserCtxt(ctxt); 14152 return(-1); 14153 } 14154 newDoc->properties = XML_DOC_INTERNAL; 14155 if ((doc != NULL) && (doc->dict != NULL)) { 14156 xmlDictFree(ctxt->dict); 14157 ctxt->dict = doc->dict; 14158 xmlDictReference(ctxt->dict); 14159 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 14160 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 14161 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 14162 ctxt->dictNames = 1; 14163 } else { 14164 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 14165 } 14166 if (doc != NULL) { 14167 newDoc->intSubset = doc->intSubset; 14168 newDoc->extSubset = doc->extSubset; 14169 } 14170 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 14171 if (newRoot == NULL) { 14172 if (sax != NULL) 14173 ctxt->sax = oldsax; 14174 xmlFreeParserCtxt(ctxt); 14175 newDoc->intSubset = NULL; 14176 newDoc->extSubset = NULL; 14177 xmlFreeDoc(newDoc); 14178 return(-1); 14179 } 14180 xmlAddChild((xmlNodePtr) newDoc, newRoot); 14181 nodePush(ctxt, newRoot); 14182 if (doc == NULL) { 14183 ctxt->myDoc = newDoc; 14184 } else { 14185 ctxt->myDoc = newDoc; 14186 newDoc->children->doc = doc; 14187 /* Ensure that doc has XML spec namespace */ 14188 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 14189 newDoc->oldNs = doc->oldNs; 14190 } 14191 ctxt->instate = XML_PARSER_CONTENT; 14192 ctxt->input_id = 2; 14193 ctxt->depth = depth; 14194 14195 /* 14196 * Doing validity checking on chunk doesn't make sense 14197 */ 14198 ctxt->validate = 0; 14199 ctxt->loadsubset = 0; 14200 xmlDetectSAX2(ctxt); 14201 14202 if ( doc != NULL ){ 14203 content = doc->children; 14204 doc->children = NULL; 14205 xmlParseContent(ctxt); 14206 doc->children = content; 14207 } 14208 else { 14209 xmlParseContent(ctxt); 14210 } 14211 if ((RAW == '<') && (NXT(1) == '/')) { 14212 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14213 } else if (RAW != 0) { 14214 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 14215 } 14216 if (ctxt->node != newDoc->children) { 14217 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14218 } 14219 14220 if (!ctxt->wellFormed) { 14221 if (ctxt->errNo == 0) 14222 ret = 1; 14223 else 14224 ret = ctxt->errNo; 14225 } else { 14226 ret = 0; 14227 } 14228 14229 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 14230 xmlNodePtr cur; 14231 14232 /* 14233 * Return the newly created nodeset after unlinking it from 14234 * they pseudo parent. 14235 */ 14236 cur = newDoc->children->children; 14237 *lst = cur; 14238 while (cur != NULL) { 14239 xmlSetTreeDoc(cur, doc); 14240 cur->parent = NULL; 14241 cur = cur->next; 14242 } 14243 newDoc->children->children = NULL; 14244 } 14245 14246 if (sax != NULL) 14247 ctxt->sax = oldsax; 14248 xmlFreeParserCtxt(ctxt); 14249 newDoc->intSubset = NULL; 14250 newDoc->extSubset = NULL; 14251 newDoc->oldNs = NULL; 14252 xmlFreeDoc(newDoc); 14253 14254 return(ret); 14255 } 14256 14257 /** 14258 * xmlSAXParseEntity: 14259 * @sax: the SAX handler block 14260 * @filename: the filename 14261 * 14262 * parse an XML external entity out of context and build a tree. 14263 * It use the given SAX function block to handle the parsing callback. 14264 * If sax is NULL, fallback to the default DOM tree building routines. 14265 * 14266 * [78] extParsedEnt ::= TextDecl? content 14267 * 14268 * This correspond to a "Well Balanced" chunk 14269 * 14270 * Returns the resulting document tree 14271 */ 14272 14273 xmlDocPtr 14274 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 14275 xmlDocPtr ret; 14276 xmlParserCtxtPtr ctxt; 14277 14278 ctxt = xmlCreateFileParserCtxt(filename); 14279 if (ctxt == NULL) { 14280 return(NULL); 14281 } 14282 if (sax != NULL) { 14283 if (ctxt->sax != NULL) 14284 xmlFree(ctxt->sax); 14285 ctxt->sax = sax; 14286 ctxt->userData = NULL; 14287 } 14288 14289 xmlParseExtParsedEnt(ctxt); 14290 14291 if (ctxt->wellFormed) 14292 ret = ctxt->myDoc; 14293 else { 14294 ret = NULL; 14295 xmlFreeDoc(ctxt->myDoc); 14296 ctxt->myDoc = NULL; 14297 } 14298 if (sax != NULL) 14299 ctxt->sax = NULL; 14300 xmlFreeParserCtxt(ctxt); 14301 14302 return(ret); 14303 } 14304 14305 /** 14306 * xmlParseEntity: 14307 * @filename: the filename 14308 * 14309 * parse an XML external entity out of context and build a tree. 14310 * 14311 * [78] extParsedEnt ::= TextDecl? content 14312 * 14313 * This correspond to a "Well Balanced" chunk 14314 * 14315 * Returns the resulting document tree 14316 */ 14317 14318 xmlDocPtr 14319 xmlParseEntity(const char *filename) { 14320 return(xmlSAXParseEntity(NULL, filename)); 14321 } 14322 #endif /* LIBXML_SAX1_ENABLED */ 14323 14324 /** 14325 * xmlCreateEntityParserCtxtInternal: 14326 * @URL: the entity URL 14327 * @ID: the entity PUBLIC ID 14328 * @base: a possible base for the target URI 14329 * @pctx: parser context used to set options on new context 14330 * 14331 * Create a parser context for an external entity 14332 * Automatic support for ZLIB/Compress compressed document is provided 14333 * by default if found at compile-time. 14334 * 14335 * Returns the new parser context or NULL 14336 */ 14337 static xmlParserCtxtPtr 14338 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 14339 const xmlChar *base, xmlParserCtxtPtr pctx) { 14340 xmlParserCtxtPtr ctxt; 14341 xmlParserInputPtr inputStream; 14342 char *directory = NULL; 14343 xmlChar *uri; 14344 14345 ctxt = xmlNewParserCtxt(); 14346 if (ctxt == NULL) { 14347 return(NULL); 14348 } 14349 14350 if (pctx != NULL) { 14351 ctxt->options = pctx->options; 14352 ctxt->_private = pctx->_private; 14353 /* 14354 * this is a subparser of pctx, so the input_id should be 14355 * incremented to distinguish from main entity 14356 */ 14357 ctxt->input_id = pctx->input_id + 1; 14358 } 14359 14360 uri = xmlBuildURI(URL, base); 14361 14362 if (uri == NULL) { 14363 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14364 if (inputStream == NULL) { 14365 xmlFreeParserCtxt(ctxt); 14366 return(NULL); 14367 } 14368 14369 inputPush(ctxt, inputStream); 14370 14371 if ((ctxt->directory == NULL) && (directory == NULL)) 14372 directory = xmlParserGetDirectory((char *)URL); 14373 if ((ctxt->directory == NULL) && (directory != NULL)) 14374 ctxt->directory = directory; 14375 } else { 14376 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14377 if (inputStream == NULL) { 14378 xmlFree(uri); 14379 xmlFreeParserCtxt(ctxt); 14380 return(NULL); 14381 } 14382 14383 inputPush(ctxt, inputStream); 14384 14385 if ((ctxt->directory == NULL) && (directory == NULL)) 14386 directory = xmlParserGetDirectory((char *)uri); 14387 if ((ctxt->directory == NULL) && (directory != NULL)) 14388 ctxt->directory = directory; 14389 xmlFree(uri); 14390 } 14391 return(ctxt); 14392 } 14393 14394 /** 14395 * xmlCreateEntityParserCtxt: 14396 * @URL: the entity URL 14397 * @ID: the entity PUBLIC ID 14398 * @base: a possible base for the target URI 14399 * 14400 * Create a parser context for an external entity 14401 * Automatic support for ZLIB/Compress compressed document is provided 14402 * by default if found at compile-time. 14403 * 14404 * Returns the new parser context or NULL 14405 */ 14406 xmlParserCtxtPtr 14407 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14408 const xmlChar *base) { 14409 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14410 14411 } 14412 14413 /************************************************************************ 14414 * * 14415 * Front ends when parsing from a file * 14416 * * 14417 ************************************************************************/ 14418 14419 /** 14420 * xmlCreateURLParserCtxt: 14421 * @filename: the filename or URL 14422 * @options: a combination of xmlParserOption 14423 * 14424 * Create a parser context for a file or URL content. 14425 * Automatic support for ZLIB/Compress compressed document is provided 14426 * by default if found at compile-time and for file accesses 14427 * 14428 * Returns the new parser context or NULL 14429 */ 14430 xmlParserCtxtPtr 14431 xmlCreateURLParserCtxt(const char *filename, int options) 14432 { 14433 xmlParserCtxtPtr ctxt; 14434 xmlParserInputPtr inputStream; 14435 char *directory = NULL; 14436 14437 ctxt = xmlNewParserCtxt(); 14438 if (ctxt == NULL) { 14439 xmlErrMemory(NULL, "cannot allocate parser context"); 14440 return(NULL); 14441 } 14442 14443 if (options) 14444 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14445 ctxt->linenumbers = 1; 14446 14447 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14448 if (inputStream == NULL) { 14449 xmlFreeParserCtxt(ctxt); 14450 return(NULL); 14451 } 14452 14453 inputPush(ctxt, inputStream); 14454 if ((ctxt->directory == NULL) && (directory == NULL)) 14455 directory = xmlParserGetDirectory(filename); 14456 if ((ctxt->directory == NULL) && (directory != NULL)) 14457 ctxt->directory = directory; 14458 14459 return(ctxt); 14460 } 14461 14462 /** 14463 * xmlCreateFileParserCtxt: 14464 * @filename: the filename 14465 * 14466 * Create a parser context for a file content. 14467 * Automatic support for ZLIB/Compress compressed document is provided 14468 * by default if found at compile-time. 14469 * 14470 * Returns the new parser context or NULL 14471 */ 14472 xmlParserCtxtPtr 14473 xmlCreateFileParserCtxt(const char *filename) 14474 { 14475 return(xmlCreateURLParserCtxt(filename, 0)); 14476 } 14477 14478 #ifdef LIBXML_SAX1_ENABLED 14479 /** 14480 * xmlSAXParseFileWithData: 14481 * @sax: the SAX handler block 14482 * @filename: the filename 14483 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14484 * documents 14485 * @data: the userdata 14486 * 14487 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14488 * compressed document is provided by default if found at compile-time. 14489 * It use the given SAX function block to handle the parsing callback. 14490 * If sax is NULL, fallback to the default DOM tree building routines. 14491 * 14492 * User data (void *) is stored within the parser context in the 14493 * context's _private member, so it is available nearly everywhere in libxml 14494 * 14495 * Returns the resulting document tree 14496 */ 14497 14498 xmlDocPtr 14499 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14500 int recovery, void *data) { 14501 xmlDocPtr ret; 14502 xmlParserCtxtPtr ctxt; 14503 14504 xmlInitParser(); 14505 14506 ctxt = xmlCreateFileParserCtxt(filename); 14507 if (ctxt == NULL) { 14508 return(NULL); 14509 } 14510 if (sax != NULL) { 14511 if (ctxt->sax != NULL) 14512 xmlFree(ctxt->sax); 14513 ctxt->sax = sax; 14514 } 14515 xmlDetectSAX2(ctxt); 14516 if (data!=NULL) { 14517 ctxt->_private = data; 14518 } 14519 14520 if (ctxt->directory == NULL) 14521 ctxt->directory = xmlParserGetDirectory(filename); 14522 14523 ctxt->recovery = recovery; 14524 14525 xmlParseDocument(ctxt); 14526 14527 if ((ctxt->wellFormed) || recovery) { 14528 ret = ctxt->myDoc; 14529 if (ret != NULL) { 14530 if (ctxt->input->buf->compressed > 0) 14531 ret->compression = 9; 14532 else 14533 ret->compression = ctxt->input->buf->compressed; 14534 } 14535 } 14536 else { 14537 ret = NULL; 14538 xmlFreeDoc(ctxt->myDoc); 14539 ctxt->myDoc = NULL; 14540 } 14541 if (sax != NULL) 14542 ctxt->sax = NULL; 14543 xmlFreeParserCtxt(ctxt); 14544 14545 return(ret); 14546 } 14547 14548 /** 14549 * xmlSAXParseFile: 14550 * @sax: the SAX handler block 14551 * @filename: the filename 14552 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14553 * documents 14554 * 14555 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14556 * compressed document is provided by default if found at compile-time. 14557 * It use the given SAX function block to handle the parsing callback. 14558 * If sax is NULL, fallback to the default DOM tree building routines. 14559 * 14560 * Returns the resulting document tree 14561 */ 14562 14563 xmlDocPtr 14564 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14565 int recovery) { 14566 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14567 } 14568 14569 /** 14570 * xmlRecoverDoc: 14571 * @cur: a pointer to an array of xmlChar 14572 * 14573 * parse an XML in-memory document and build a tree. 14574 * In the case the document is not Well Formed, a attempt to build a 14575 * tree is tried anyway 14576 * 14577 * Returns the resulting document tree or NULL in case of failure 14578 */ 14579 14580 xmlDocPtr 14581 xmlRecoverDoc(const xmlChar *cur) { 14582 return(xmlSAXParseDoc(NULL, cur, 1)); 14583 } 14584 14585 /** 14586 * xmlParseFile: 14587 * @filename: the filename 14588 * 14589 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14590 * compressed document is provided by default if found at compile-time. 14591 * 14592 * Returns the resulting document tree if the file was wellformed, 14593 * NULL otherwise. 14594 */ 14595 14596 xmlDocPtr 14597 xmlParseFile(const char *filename) { 14598 return(xmlSAXParseFile(NULL, filename, 0)); 14599 } 14600 14601 /** 14602 * xmlRecoverFile: 14603 * @filename: the filename 14604 * 14605 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14606 * compressed document is provided by default if found at compile-time. 14607 * In the case the document is not Well Formed, it attempts to build 14608 * a tree anyway 14609 * 14610 * Returns the resulting document tree or NULL in case of failure 14611 */ 14612 14613 xmlDocPtr 14614 xmlRecoverFile(const char *filename) { 14615 return(xmlSAXParseFile(NULL, filename, 1)); 14616 } 14617 14618 14619 /** 14620 * xmlSetupParserForBuffer: 14621 * @ctxt: an XML parser context 14622 * @buffer: a xmlChar * buffer 14623 * @filename: a file name 14624 * 14625 * Setup the parser context to parse a new buffer; Clears any prior 14626 * contents from the parser context. The buffer parameter must not be 14627 * NULL, but the filename parameter can be 14628 */ 14629 void 14630 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14631 const char* filename) 14632 { 14633 xmlParserInputPtr input; 14634 14635 if ((ctxt == NULL) || (buffer == NULL)) 14636 return; 14637 14638 input = xmlNewInputStream(ctxt); 14639 if (input == NULL) { 14640 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14641 xmlClearParserCtxt(ctxt); 14642 return; 14643 } 14644 14645 xmlClearParserCtxt(ctxt); 14646 if (filename != NULL) 14647 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14648 input->base = buffer; 14649 input->cur = buffer; 14650 input->end = &buffer[xmlStrlen(buffer)]; 14651 inputPush(ctxt, input); 14652 } 14653 14654 /** 14655 * xmlSAXUserParseFile: 14656 * @sax: a SAX handler 14657 * @user_data: The user data returned on SAX callbacks 14658 * @filename: a file name 14659 * 14660 * parse an XML file and call the given SAX handler routines. 14661 * Automatic support for ZLIB/Compress compressed document is provided 14662 * 14663 * Returns 0 in case of success or a error number otherwise 14664 */ 14665 int 14666 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14667 const char *filename) { 14668 int ret = 0; 14669 xmlParserCtxtPtr ctxt; 14670 14671 ctxt = xmlCreateFileParserCtxt(filename); 14672 if (ctxt == NULL) return -1; 14673 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14674 xmlFree(ctxt->sax); 14675 ctxt->sax = sax; 14676 xmlDetectSAX2(ctxt); 14677 14678 if (user_data != NULL) 14679 ctxt->userData = user_data; 14680 14681 xmlParseDocument(ctxt); 14682 14683 if (ctxt->wellFormed) 14684 ret = 0; 14685 else { 14686 if (ctxt->errNo != 0) 14687 ret = ctxt->errNo; 14688 else 14689 ret = -1; 14690 } 14691 if (sax != NULL) 14692 ctxt->sax = NULL; 14693 if (ctxt->myDoc != NULL) { 14694 xmlFreeDoc(ctxt->myDoc); 14695 ctxt->myDoc = NULL; 14696 } 14697 xmlFreeParserCtxt(ctxt); 14698 14699 return ret; 14700 } 14701 #endif /* LIBXML_SAX1_ENABLED */ 14702 14703 /************************************************************************ 14704 * * 14705 * Front ends when parsing from memory * 14706 * * 14707 ************************************************************************/ 14708 14709 /** 14710 * xmlCreateMemoryParserCtxt: 14711 * @buffer: a pointer to a char array 14712 * @size: the size of the array 14713 * 14714 * Create a parser context for an XML in-memory document. 14715 * 14716 * Returns the new parser context or NULL 14717 */ 14718 xmlParserCtxtPtr 14719 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14720 xmlParserCtxtPtr ctxt; 14721 xmlParserInputPtr input; 14722 xmlParserInputBufferPtr buf; 14723 14724 if (buffer == NULL) 14725 return(NULL); 14726 if (size <= 0) 14727 return(NULL); 14728 14729 ctxt = xmlNewParserCtxt(); 14730 if (ctxt == NULL) 14731 return(NULL); 14732 14733 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14734 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14735 if (buf == NULL) { 14736 xmlFreeParserCtxt(ctxt); 14737 return(NULL); 14738 } 14739 14740 input = xmlNewInputStream(ctxt); 14741 if (input == NULL) { 14742 xmlFreeParserInputBuffer(buf); 14743 xmlFreeParserCtxt(ctxt); 14744 return(NULL); 14745 } 14746 14747 input->filename = NULL; 14748 input->buf = buf; 14749 xmlBufResetInput(input->buf->buffer, input); 14750 14751 inputPush(ctxt, input); 14752 return(ctxt); 14753 } 14754 14755 #ifdef LIBXML_SAX1_ENABLED 14756 /** 14757 * xmlSAXParseMemoryWithData: 14758 * @sax: the SAX handler block 14759 * @buffer: an pointer to a char array 14760 * @size: the size of the array 14761 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14762 * documents 14763 * @data: the userdata 14764 * 14765 * parse an XML in-memory block and use the given SAX function block 14766 * to handle the parsing callback. If sax is NULL, fallback to the default 14767 * DOM tree building routines. 14768 * 14769 * User data (void *) is stored within the parser context in the 14770 * context's _private member, so it is available nearly everywhere in libxml 14771 * 14772 * Returns the resulting document tree 14773 */ 14774 14775 xmlDocPtr 14776 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14777 int size, int recovery, void *data) { 14778 xmlDocPtr ret; 14779 xmlParserCtxtPtr ctxt; 14780 14781 xmlInitParser(); 14782 14783 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14784 if (ctxt == NULL) return(NULL); 14785 if (sax != NULL) { 14786 if (ctxt->sax != NULL) 14787 xmlFree(ctxt->sax); 14788 ctxt->sax = sax; 14789 } 14790 xmlDetectSAX2(ctxt); 14791 if (data!=NULL) { 14792 ctxt->_private=data; 14793 } 14794 14795 ctxt->recovery = recovery; 14796 14797 xmlParseDocument(ctxt); 14798 14799 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14800 else { 14801 ret = NULL; 14802 xmlFreeDoc(ctxt->myDoc); 14803 ctxt->myDoc = NULL; 14804 } 14805 if (sax != NULL) 14806 ctxt->sax = NULL; 14807 xmlFreeParserCtxt(ctxt); 14808 14809 return(ret); 14810 } 14811 14812 /** 14813 * xmlSAXParseMemory: 14814 * @sax: the SAX handler block 14815 * @buffer: an pointer to a char array 14816 * @size: the size of the array 14817 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14818 * documents 14819 * 14820 * parse an XML in-memory block and use the given SAX function block 14821 * to handle the parsing callback. If sax is NULL, fallback to the default 14822 * DOM tree building routines. 14823 * 14824 * Returns the resulting document tree 14825 */ 14826 xmlDocPtr 14827 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14828 int size, int recovery) { 14829 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14830 } 14831 14832 /** 14833 * xmlParseMemory: 14834 * @buffer: an pointer to a char array 14835 * @size: the size of the array 14836 * 14837 * parse an XML in-memory block and build a tree. 14838 * 14839 * Returns the resulting document tree 14840 */ 14841 14842 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14843 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14844 } 14845 14846 /** 14847 * xmlRecoverMemory: 14848 * @buffer: an pointer to a char array 14849 * @size: the size of the array 14850 * 14851 * parse an XML in-memory block and build a tree. 14852 * In the case the document is not Well Formed, an attempt to 14853 * build a tree is tried anyway 14854 * 14855 * Returns the resulting document tree or NULL in case of error 14856 */ 14857 14858 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14859 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14860 } 14861 14862 /** 14863 * xmlSAXUserParseMemory: 14864 * @sax: a SAX handler 14865 * @user_data: The user data returned on SAX callbacks 14866 * @buffer: an in-memory XML document input 14867 * @size: the length of the XML document in bytes 14868 * 14869 * A better SAX parsing routine. 14870 * parse an XML in-memory buffer and call the given SAX handler routines. 14871 * 14872 * Returns 0 in case of success or a error number otherwise 14873 */ 14874 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14875 const char *buffer, int size) { 14876 int ret = 0; 14877 xmlParserCtxtPtr ctxt; 14878 14879 xmlInitParser(); 14880 14881 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14882 if (ctxt == NULL) return -1; 14883 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14884 xmlFree(ctxt->sax); 14885 ctxt->sax = sax; 14886 xmlDetectSAX2(ctxt); 14887 14888 if (user_data != NULL) 14889 ctxt->userData = user_data; 14890 14891 xmlParseDocument(ctxt); 14892 14893 if (ctxt->wellFormed) 14894 ret = 0; 14895 else { 14896 if (ctxt->errNo != 0) 14897 ret = ctxt->errNo; 14898 else 14899 ret = -1; 14900 } 14901 if (sax != NULL) 14902 ctxt->sax = NULL; 14903 if (ctxt->myDoc != NULL) { 14904 xmlFreeDoc(ctxt->myDoc); 14905 ctxt->myDoc = NULL; 14906 } 14907 xmlFreeParserCtxt(ctxt); 14908 14909 return ret; 14910 } 14911 #endif /* LIBXML_SAX1_ENABLED */ 14912 14913 /** 14914 * xmlCreateDocParserCtxt: 14915 * @cur: a pointer to an array of xmlChar 14916 * 14917 * Creates a parser context for an XML in-memory document. 14918 * 14919 * Returns the new parser context or NULL 14920 */ 14921 xmlParserCtxtPtr 14922 xmlCreateDocParserCtxt(const xmlChar *cur) { 14923 int len; 14924 14925 if (cur == NULL) 14926 return(NULL); 14927 len = xmlStrlen(cur); 14928 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14929 } 14930 14931 #ifdef LIBXML_SAX1_ENABLED 14932 /** 14933 * xmlSAXParseDoc: 14934 * @sax: the SAX handler block 14935 * @cur: a pointer to an array of xmlChar 14936 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14937 * documents 14938 * 14939 * parse an XML in-memory document and build a tree. 14940 * It use the given SAX function block to handle the parsing callback. 14941 * If sax is NULL, fallback to the default DOM tree building routines. 14942 * 14943 * Returns the resulting document tree 14944 */ 14945 14946 xmlDocPtr 14947 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14948 xmlDocPtr ret; 14949 xmlParserCtxtPtr ctxt; 14950 xmlSAXHandlerPtr oldsax = NULL; 14951 14952 if (cur == NULL) return(NULL); 14953 14954 14955 ctxt = xmlCreateDocParserCtxt(cur); 14956 if (ctxt == NULL) return(NULL); 14957 if (sax != NULL) { 14958 oldsax = ctxt->sax; 14959 ctxt->sax = sax; 14960 ctxt->userData = NULL; 14961 } 14962 xmlDetectSAX2(ctxt); 14963 14964 xmlParseDocument(ctxt); 14965 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14966 else { 14967 ret = NULL; 14968 xmlFreeDoc(ctxt->myDoc); 14969 ctxt->myDoc = NULL; 14970 } 14971 if (sax != NULL) 14972 ctxt->sax = oldsax; 14973 xmlFreeParserCtxt(ctxt); 14974 14975 return(ret); 14976 } 14977 14978 /** 14979 * xmlParseDoc: 14980 * @cur: a pointer to an array of xmlChar 14981 * 14982 * parse an XML in-memory document and build a tree. 14983 * 14984 * Returns the resulting document tree 14985 */ 14986 14987 xmlDocPtr 14988 xmlParseDoc(const xmlChar *cur) { 14989 return(xmlSAXParseDoc(NULL, cur, 0)); 14990 } 14991 #endif /* LIBXML_SAX1_ENABLED */ 14992 14993 #ifdef LIBXML_LEGACY_ENABLED 14994 /************************************************************************ 14995 * * 14996 * Specific function to keep track of entities references * 14997 * and used by the XSLT debugger * 14998 * * 14999 ************************************************************************/ 15000 15001 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 15002 15003 /** 15004 * xmlAddEntityReference: 15005 * @ent : A valid entity 15006 * @firstNode : A valid first node for children of entity 15007 * @lastNode : A valid last node of children entity 15008 * 15009 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 15010 */ 15011 static void 15012 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 15013 xmlNodePtr lastNode) 15014 { 15015 if (xmlEntityRefFunc != NULL) { 15016 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 15017 } 15018 } 15019 15020 15021 /** 15022 * xmlSetEntityReferenceFunc: 15023 * @func: A valid function 15024 * 15025 * Set the function to call call back when a xml reference has been made 15026 */ 15027 void 15028 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 15029 { 15030 xmlEntityRefFunc = func; 15031 } 15032 #endif /* LIBXML_LEGACY_ENABLED */ 15033 15034 /************************************************************************ 15035 * * 15036 * Miscellaneous * 15037 * * 15038 ************************************************************************/ 15039 15040 #ifdef LIBXML_XPATH_ENABLED 15041 #include <libxml/xpath.h> 15042 #endif 15043 15044 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 15045 static int xmlParserInitialized = 0; 15046 15047 /** 15048 * xmlInitParser: 15049 * 15050 * Initialization function for the XML parser. 15051 * This is not reentrant. Call once before processing in case of 15052 * use in multithreaded programs. 15053 */ 15054 15055 void 15056 xmlInitParser(void) { 15057 if (xmlParserInitialized != 0) 15058 return; 15059 15060 #ifdef LIBXML_THREAD_ENABLED 15061 __xmlGlobalInitMutexLock(); 15062 if (xmlParserInitialized == 0) { 15063 #endif 15064 xmlInitThreads(); 15065 xmlInitGlobals(); 15066 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 15067 (xmlGenericError == NULL)) 15068 initGenericErrorDefaultFunc(NULL); 15069 xmlInitMemory(); 15070 xmlInitializeDict(); 15071 xmlInitCharEncodingHandlers(); 15072 xmlDefaultSAXHandlerInit(); 15073 xmlRegisterDefaultInputCallbacks(); 15074 #ifdef LIBXML_OUTPUT_ENABLED 15075 xmlRegisterDefaultOutputCallbacks(); 15076 #endif /* LIBXML_OUTPUT_ENABLED */ 15077 #ifdef LIBXML_HTML_ENABLED 15078 htmlInitAutoClose(); 15079 htmlDefaultSAXHandlerInit(); 15080 #endif 15081 #ifdef LIBXML_XPATH_ENABLED 15082 xmlXPathInit(); 15083 #endif 15084 xmlParserInitialized = 1; 15085 #ifdef LIBXML_THREAD_ENABLED 15086 } 15087 __xmlGlobalInitMutexUnlock(); 15088 #endif 15089 } 15090 15091 /** 15092 * xmlCleanupParser: 15093 * 15094 * This function name is somewhat misleading. It does not clean up 15095 * parser state, it cleans up memory allocated by the library itself. 15096 * It is a cleanup function for the XML library. It tries to reclaim all 15097 * related global memory allocated for the library processing. 15098 * It doesn't deallocate any document related memory. One should 15099 * call xmlCleanupParser() only when the process has finished using 15100 * the library and all XML/HTML documents built with it. 15101 * See also xmlInitParser() which has the opposite function of preparing 15102 * the library for operations. 15103 * 15104 * WARNING: if your application is multithreaded or has plugin support 15105 * calling this may crash the application if another thread or 15106 * a plugin is still using libxml2. It's sometimes very hard to 15107 * guess if libxml2 is in use in the application, some libraries 15108 * or plugins may use it without notice. In case of doubt abstain 15109 * from calling this function or do it just before calling exit() 15110 * to avoid leak reports from valgrind ! 15111 */ 15112 15113 void 15114 xmlCleanupParser(void) { 15115 if (!xmlParserInitialized) 15116 return; 15117 15118 xmlCleanupCharEncodingHandlers(); 15119 #ifdef LIBXML_CATALOG_ENABLED 15120 xmlCatalogCleanup(); 15121 #endif 15122 xmlDictCleanup(); 15123 xmlCleanupInputCallbacks(); 15124 #ifdef LIBXML_OUTPUT_ENABLED 15125 xmlCleanupOutputCallbacks(); 15126 #endif 15127 #ifdef LIBXML_SCHEMAS_ENABLED 15128 xmlSchemaCleanupTypes(); 15129 xmlRelaxNGCleanupTypes(); 15130 #endif 15131 xmlResetLastError(); 15132 xmlCleanupGlobals(); 15133 xmlCleanupThreads(); /* must be last if called not from the main thread */ 15134 xmlCleanupMemory(); 15135 xmlParserInitialized = 0; 15136 } 15137 15138 /************************************************************************ 15139 * * 15140 * New set (2.6.0) of simpler and more flexible APIs * 15141 * * 15142 ************************************************************************/ 15143 15144 /** 15145 * DICT_FREE: 15146 * @str: a string 15147 * 15148 * Free a string if it is not owned by the "dict" dictionary in the 15149 * current scope 15150 */ 15151 #define DICT_FREE(str) \ 15152 if ((str) && ((!dict) || \ 15153 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 15154 xmlFree((char *)(str)); 15155 15156 /** 15157 * xmlCtxtReset: 15158 * @ctxt: an XML parser context 15159 * 15160 * Reset a parser context 15161 */ 15162 void 15163 xmlCtxtReset(xmlParserCtxtPtr ctxt) 15164 { 15165 xmlParserInputPtr input; 15166 xmlDictPtr dict; 15167 15168 if (ctxt == NULL) 15169 return; 15170 15171 dict = ctxt->dict; 15172 15173 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 15174 xmlFreeInputStream(input); 15175 } 15176 ctxt->inputNr = 0; 15177 ctxt->input = NULL; 15178 15179 ctxt->spaceNr = 0; 15180 if (ctxt->spaceTab != NULL) { 15181 ctxt->spaceTab[0] = -1; 15182 ctxt->space = &ctxt->spaceTab[0]; 15183 } else { 15184 ctxt->space = NULL; 15185 } 15186 15187 15188 ctxt->nodeNr = 0; 15189 ctxt->node = NULL; 15190 15191 ctxt->nameNr = 0; 15192 ctxt->name = NULL; 15193 15194 DICT_FREE(ctxt->version); 15195 ctxt->version = NULL; 15196 DICT_FREE(ctxt->encoding); 15197 ctxt->encoding = NULL; 15198 DICT_FREE(ctxt->directory); 15199 ctxt->directory = NULL; 15200 DICT_FREE(ctxt->extSubURI); 15201 ctxt->extSubURI = NULL; 15202 DICT_FREE(ctxt->extSubSystem); 15203 ctxt->extSubSystem = NULL; 15204 if (ctxt->myDoc != NULL) 15205 xmlFreeDoc(ctxt->myDoc); 15206 ctxt->myDoc = NULL; 15207 15208 ctxt->standalone = -1; 15209 ctxt->hasExternalSubset = 0; 15210 ctxt->hasPErefs = 0; 15211 ctxt->html = 0; 15212 ctxt->external = 0; 15213 ctxt->instate = XML_PARSER_START; 15214 ctxt->token = 0; 15215 15216 ctxt->wellFormed = 1; 15217 ctxt->nsWellFormed = 1; 15218 ctxt->disableSAX = 0; 15219 ctxt->valid = 1; 15220 #if 0 15221 ctxt->vctxt.userData = ctxt; 15222 ctxt->vctxt.error = xmlParserValidityError; 15223 ctxt->vctxt.warning = xmlParserValidityWarning; 15224 #endif 15225 ctxt->record_info = 0; 15226 ctxt->nbChars = 0; 15227 ctxt->checkIndex = 0; 15228 ctxt->inSubset = 0; 15229 ctxt->errNo = XML_ERR_OK; 15230 ctxt->depth = 0; 15231 ctxt->charset = XML_CHAR_ENCODING_UTF8; 15232 ctxt->catalogs = NULL; 15233 ctxt->nbentities = 0; 15234 ctxt->sizeentities = 0; 15235 ctxt->sizeentcopy = 0; 15236 xmlInitNodeInfoSeq(&ctxt->node_seq); 15237 15238 if (ctxt->attsDefault != NULL) { 15239 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 15240 ctxt->attsDefault = NULL; 15241 } 15242 if (ctxt->attsSpecial != NULL) { 15243 xmlHashFree(ctxt->attsSpecial, NULL); 15244 ctxt->attsSpecial = NULL; 15245 } 15246 15247 #ifdef LIBXML_CATALOG_ENABLED 15248 if (ctxt->catalogs != NULL) 15249 xmlCatalogFreeLocal(ctxt->catalogs); 15250 #endif 15251 if (ctxt->lastError.code != XML_ERR_OK) 15252 xmlResetError(&ctxt->lastError); 15253 } 15254 15255 /** 15256 * xmlCtxtResetPush: 15257 * @ctxt: an XML parser context 15258 * @chunk: a pointer to an array of chars 15259 * @size: number of chars in the array 15260 * @filename: an optional file name or URI 15261 * @encoding: the document encoding, or NULL 15262 * 15263 * Reset a push parser context 15264 * 15265 * Returns 0 in case of success and 1 in case of error 15266 */ 15267 int 15268 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 15269 int size, const char *filename, const char *encoding) 15270 { 15271 xmlParserInputPtr inputStream; 15272 xmlParserInputBufferPtr buf; 15273 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 15274 15275 if (ctxt == NULL) 15276 return(1); 15277 15278 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 15279 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 15280 15281 buf = xmlAllocParserInputBuffer(enc); 15282 if (buf == NULL) 15283 return(1); 15284 15285 if (ctxt == NULL) { 15286 xmlFreeParserInputBuffer(buf); 15287 return(1); 15288 } 15289 15290 xmlCtxtReset(ctxt); 15291 15292 if (ctxt->pushTab == NULL) { 15293 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 15294 sizeof(xmlChar *)); 15295 if (ctxt->pushTab == NULL) { 15296 xmlErrMemory(ctxt, NULL); 15297 xmlFreeParserInputBuffer(buf); 15298 return(1); 15299 } 15300 } 15301 15302 if (filename == NULL) { 15303 ctxt->directory = NULL; 15304 } else { 15305 ctxt->directory = xmlParserGetDirectory(filename); 15306 } 15307 15308 inputStream = xmlNewInputStream(ctxt); 15309 if (inputStream == NULL) { 15310 xmlFreeParserInputBuffer(buf); 15311 return(1); 15312 } 15313 15314 if (filename == NULL) 15315 inputStream->filename = NULL; 15316 else 15317 inputStream->filename = (char *) 15318 xmlCanonicPath((const xmlChar *) filename); 15319 inputStream->buf = buf; 15320 xmlBufResetInput(buf->buffer, inputStream); 15321 15322 inputPush(ctxt, inputStream); 15323 15324 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 15325 (ctxt->input->buf != NULL)) { 15326 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 15327 size_t cur = ctxt->input->cur - ctxt->input->base; 15328 15329 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 15330 15331 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 15332 #ifdef DEBUG_PUSH 15333 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 15334 #endif 15335 } 15336 15337 if (encoding != NULL) { 15338 xmlCharEncodingHandlerPtr hdlr; 15339 15340 if (ctxt->encoding != NULL) 15341 xmlFree((xmlChar *) ctxt->encoding); 15342 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15343 15344 hdlr = xmlFindCharEncodingHandler(encoding); 15345 if (hdlr != NULL) { 15346 xmlSwitchToEncoding(ctxt, hdlr); 15347 } else { 15348 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 15349 "Unsupported encoding %s\n", BAD_CAST encoding); 15350 } 15351 } else if (enc != XML_CHAR_ENCODING_NONE) { 15352 xmlSwitchEncoding(ctxt, enc); 15353 } 15354 15355 return(0); 15356 } 15357 15358 15359 /** 15360 * xmlCtxtUseOptionsInternal: 15361 * @ctxt: an XML parser context 15362 * @options: a combination of xmlParserOption 15363 * @encoding: the user provided encoding to use 15364 * 15365 * Applies the options to the parser context 15366 * 15367 * Returns 0 in case of success, the set of unknown or unimplemented options 15368 * in case of error. 15369 */ 15370 static int 15371 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15372 { 15373 if (ctxt == NULL) 15374 return(-1); 15375 if (encoding != NULL) { 15376 if (ctxt->encoding != NULL) 15377 xmlFree((xmlChar *) ctxt->encoding); 15378 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15379 } 15380 if (options & XML_PARSE_RECOVER) { 15381 ctxt->recovery = 1; 15382 options -= XML_PARSE_RECOVER; 15383 ctxt->options |= XML_PARSE_RECOVER; 15384 } else 15385 ctxt->recovery = 0; 15386 if (options & XML_PARSE_DTDLOAD) { 15387 ctxt->loadsubset = XML_DETECT_IDS; 15388 options -= XML_PARSE_DTDLOAD; 15389 ctxt->options |= XML_PARSE_DTDLOAD; 15390 } else 15391 ctxt->loadsubset = 0; 15392 if (options & XML_PARSE_DTDATTR) { 15393 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15394 options -= XML_PARSE_DTDATTR; 15395 ctxt->options |= XML_PARSE_DTDATTR; 15396 } 15397 if (options & XML_PARSE_NOENT) { 15398 ctxt->replaceEntities = 1; 15399 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15400 options -= XML_PARSE_NOENT; 15401 ctxt->options |= XML_PARSE_NOENT; 15402 } else 15403 ctxt->replaceEntities = 0; 15404 if (options & XML_PARSE_PEDANTIC) { 15405 ctxt->pedantic = 1; 15406 options -= XML_PARSE_PEDANTIC; 15407 ctxt->options |= XML_PARSE_PEDANTIC; 15408 } else 15409 ctxt->pedantic = 0; 15410 if (options & XML_PARSE_NOBLANKS) { 15411 ctxt->keepBlanks = 0; 15412 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15413 options -= XML_PARSE_NOBLANKS; 15414 ctxt->options |= XML_PARSE_NOBLANKS; 15415 } else 15416 ctxt->keepBlanks = 1; 15417 if (options & XML_PARSE_DTDVALID) { 15418 ctxt->validate = 1; 15419 if (options & XML_PARSE_NOWARNING) 15420 ctxt->vctxt.warning = NULL; 15421 if (options & XML_PARSE_NOERROR) 15422 ctxt->vctxt.error = NULL; 15423 options -= XML_PARSE_DTDVALID; 15424 ctxt->options |= XML_PARSE_DTDVALID; 15425 } else 15426 ctxt->validate = 0; 15427 if (options & XML_PARSE_NOWARNING) { 15428 ctxt->sax->warning = NULL; 15429 options -= XML_PARSE_NOWARNING; 15430 } 15431 if (options & XML_PARSE_NOERROR) { 15432 ctxt->sax->error = NULL; 15433 ctxt->sax->fatalError = NULL; 15434 options -= XML_PARSE_NOERROR; 15435 } 15436 #ifdef LIBXML_SAX1_ENABLED 15437 if (options & XML_PARSE_SAX1) { 15438 ctxt->sax->startElement = xmlSAX2StartElement; 15439 ctxt->sax->endElement = xmlSAX2EndElement; 15440 ctxt->sax->startElementNs = NULL; 15441 ctxt->sax->endElementNs = NULL; 15442 ctxt->sax->initialized = 1; 15443 options -= XML_PARSE_SAX1; 15444 ctxt->options |= XML_PARSE_SAX1; 15445 } 15446 #endif /* LIBXML_SAX1_ENABLED */ 15447 if (options & XML_PARSE_NODICT) { 15448 ctxt->dictNames = 0; 15449 options -= XML_PARSE_NODICT; 15450 ctxt->options |= XML_PARSE_NODICT; 15451 } else { 15452 ctxt->dictNames = 1; 15453 } 15454 if (options & XML_PARSE_NOCDATA) { 15455 ctxt->sax->cdataBlock = NULL; 15456 options -= XML_PARSE_NOCDATA; 15457 ctxt->options |= XML_PARSE_NOCDATA; 15458 } 15459 if (options & XML_PARSE_NSCLEAN) { 15460 ctxt->options |= XML_PARSE_NSCLEAN; 15461 options -= XML_PARSE_NSCLEAN; 15462 } 15463 if (options & XML_PARSE_NONET) { 15464 ctxt->options |= XML_PARSE_NONET; 15465 options -= XML_PARSE_NONET; 15466 } 15467 if (options & XML_PARSE_COMPACT) { 15468 ctxt->options |= XML_PARSE_COMPACT; 15469 options -= XML_PARSE_COMPACT; 15470 } 15471 if (options & XML_PARSE_OLD10) { 15472 ctxt->options |= XML_PARSE_OLD10; 15473 options -= XML_PARSE_OLD10; 15474 } 15475 if (options & XML_PARSE_NOBASEFIX) { 15476 ctxt->options |= XML_PARSE_NOBASEFIX; 15477 options -= XML_PARSE_NOBASEFIX; 15478 } 15479 if (options & XML_PARSE_HUGE) { 15480 ctxt->options |= XML_PARSE_HUGE; 15481 options -= XML_PARSE_HUGE; 15482 if (ctxt->dict != NULL) 15483 xmlDictSetLimit(ctxt->dict, 0); 15484 } 15485 if (options & XML_PARSE_OLDSAX) { 15486 ctxt->options |= XML_PARSE_OLDSAX; 15487 options -= XML_PARSE_OLDSAX; 15488 } 15489 if (options & XML_PARSE_IGNORE_ENC) { 15490 ctxt->options |= XML_PARSE_IGNORE_ENC; 15491 options -= XML_PARSE_IGNORE_ENC; 15492 } 15493 if (options & XML_PARSE_BIG_LINES) { 15494 ctxt->options |= XML_PARSE_BIG_LINES; 15495 options -= XML_PARSE_BIG_LINES; 15496 } 15497 ctxt->linenumbers = 1; 15498 return (options); 15499 } 15500 15501 /** 15502 * xmlCtxtUseOptions: 15503 * @ctxt: an XML parser context 15504 * @options: a combination of xmlParserOption 15505 * 15506 * Applies the options to the parser context 15507 * 15508 * Returns 0 in case of success, the set of unknown or unimplemented options 15509 * in case of error. 15510 */ 15511 int 15512 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15513 { 15514 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15515 } 15516 15517 /** 15518 * xmlDoRead: 15519 * @ctxt: an XML parser context 15520 * @URL: the base URL to use for the document 15521 * @encoding: the document encoding, or NULL 15522 * @options: a combination of xmlParserOption 15523 * @reuse: keep the context for reuse 15524 * 15525 * Common front-end for the xmlRead functions 15526 * 15527 * Returns the resulting document tree or NULL 15528 */ 15529 static xmlDocPtr 15530 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15531 int options, int reuse) 15532 { 15533 xmlDocPtr ret; 15534 15535 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15536 if (encoding != NULL) { 15537 xmlCharEncodingHandlerPtr hdlr; 15538 15539 hdlr = xmlFindCharEncodingHandler(encoding); 15540 if (hdlr != NULL) 15541 xmlSwitchToEncoding(ctxt, hdlr); 15542 } 15543 if ((URL != NULL) && (ctxt->input != NULL) && 15544 (ctxt->input->filename == NULL)) 15545 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15546 xmlParseDocument(ctxt); 15547 if ((ctxt->wellFormed) || ctxt->recovery) 15548 ret = ctxt->myDoc; 15549 else { 15550 ret = NULL; 15551 if (ctxt->myDoc != NULL) { 15552 xmlFreeDoc(ctxt->myDoc); 15553 } 15554 } 15555 ctxt->myDoc = NULL; 15556 if (!reuse) { 15557 xmlFreeParserCtxt(ctxt); 15558 } 15559 15560 return (ret); 15561 } 15562 15563 /** 15564 * xmlReadDoc: 15565 * @cur: a pointer to a zero terminated string 15566 * @URL: the base URL to use for the document 15567 * @encoding: the document encoding, or NULL 15568 * @options: a combination of xmlParserOption 15569 * 15570 * parse an XML in-memory document and build a tree. 15571 * 15572 * Returns the resulting document tree 15573 */ 15574 xmlDocPtr 15575 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15576 { 15577 xmlParserCtxtPtr ctxt; 15578 15579 if (cur == NULL) 15580 return (NULL); 15581 xmlInitParser(); 15582 15583 ctxt = xmlCreateDocParserCtxt(cur); 15584 if (ctxt == NULL) 15585 return (NULL); 15586 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15587 } 15588 15589 /** 15590 * xmlReadFile: 15591 * @filename: a file or URL 15592 * @encoding: the document encoding, or NULL 15593 * @options: a combination of xmlParserOption 15594 * 15595 * parse an XML file from the filesystem or the network. 15596 * 15597 * Returns the resulting document tree 15598 */ 15599 xmlDocPtr 15600 xmlReadFile(const char *filename, const char *encoding, int options) 15601 { 15602 xmlParserCtxtPtr ctxt; 15603 15604 xmlInitParser(); 15605 ctxt = xmlCreateURLParserCtxt(filename, options); 15606 if (ctxt == NULL) 15607 return (NULL); 15608 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15609 } 15610 15611 /** 15612 * xmlReadMemory: 15613 * @buffer: a pointer to a char array 15614 * @size: the size of the array 15615 * @URL: the base URL to use for the document 15616 * @encoding: the document encoding, or NULL 15617 * @options: a combination of xmlParserOption 15618 * 15619 * parse an XML in-memory document and build a tree. 15620 * 15621 * Returns the resulting document tree 15622 */ 15623 xmlDocPtr 15624 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15625 { 15626 xmlParserCtxtPtr ctxt; 15627 15628 xmlInitParser(); 15629 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15630 if (ctxt == NULL) 15631 return (NULL); 15632 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15633 } 15634 15635 /** 15636 * xmlReadFd: 15637 * @fd: an open file descriptor 15638 * @URL: the base URL to use for the document 15639 * @encoding: the document encoding, or NULL 15640 * @options: a combination of xmlParserOption 15641 * 15642 * parse an XML from a file descriptor and build a tree. 15643 * NOTE that the file descriptor will not be closed when the 15644 * reader is closed or reset. 15645 * 15646 * Returns the resulting document tree 15647 */ 15648 xmlDocPtr 15649 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15650 { 15651 xmlParserCtxtPtr ctxt; 15652 xmlParserInputBufferPtr input; 15653 xmlParserInputPtr stream; 15654 15655 if (fd < 0) 15656 return (NULL); 15657 xmlInitParser(); 15658 15659 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15660 if (input == NULL) 15661 return (NULL); 15662 input->closecallback = NULL; 15663 ctxt = xmlNewParserCtxt(); 15664 if (ctxt == NULL) { 15665 xmlFreeParserInputBuffer(input); 15666 return (NULL); 15667 } 15668 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15669 if (stream == NULL) { 15670 xmlFreeParserInputBuffer(input); 15671 xmlFreeParserCtxt(ctxt); 15672 return (NULL); 15673 } 15674 inputPush(ctxt, stream); 15675 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15676 } 15677 15678 /** 15679 * xmlReadIO: 15680 * @ioread: an I/O read function 15681 * @ioclose: an I/O close function 15682 * @ioctx: an I/O handler 15683 * @URL: the base URL to use for the document 15684 * @encoding: the document encoding, or NULL 15685 * @options: a combination of xmlParserOption 15686 * 15687 * parse an XML document from I/O functions and source and build a tree. 15688 * 15689 * Returns the resulting document tree 15690 */ 15691 xmlDocPtr 15692 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15693 void *ioctx, const char *URL, const char *encoding, int options) 15694 { 15695 xmlParserCtxtPtr ctxt; 15696 xmlParserInputBufferPtr input; 15697 xmlParserInputPtr stream; 15698 15699 if (ioread == NULL) 15700 return (NULL); 15701 xmlInitParser(); 15702 15703 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15704 XML_CHAR_ENCODING_NONE); 15705 if (input == NULL) { 15706 if (ioclose != NULL) 15707 ioclose(ioctx); 15708 return (NULL); 15709 } 15710 ctxt = xmlNewParserCtxt(); 15711 if (ctxt == NULL) { 15712 xmlFreeParserInputBuffer(input); 15713 return (NULL); 15714 } 15715 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15716 if (stream == NULL) { 15717 xmlFreeParserInputBuffer(input); 15718 xmlFreeParserCtxt(ctxt); 15719 return (NULL); 15720 } 15721 inputPush(ctxt, stream); 15722 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15723 } 15724 15725 /** 15726 * xmlCtxtReadDoc: 15727 * @ctxt: an XML parser context 15728 * @cur: a pointer to a zero terminated string 15729 * @URL: the base URL to use for the document 15730 * @encoding: the document encoding, or NULL 15731 * @options: a combination of xmlParserOption 15732 * 15733 * parse an XML in-memory document and build a tree. 15734 * This reuses the existing @ctxt parser context 15735 * 15736 * Returns the resulting document tree 15737 */ 15738 xmlDocPtr 15739 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15740 const char *URL, const char *encoding, int options) 15741 { 15742 xmlParserInputPtr stream; 15743 15744 if (cur == NULL) 15745 return (NULL); 15746 if (ctxt == NULL) 15747 return (NULL); 15748 xmlInitParser(); 15749 15750 xmlCtxtReset(ctxt); 15751 15752 stream = xmlNewStringInputStream(ctxt, cur); 15753 if (stream == NULL) { 15754 return (NULL); 15755 } 15756 inputPush(ctxt, stream); 15757 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15758 } 15759 15760 /** 15761 * xmlCtxtReadFile: 15762 * @ctxt: an XML parser context 15763 * @filename: a file or URL 15764 * @encoding: the document encoding, or NULL 15765 * @options: a combination of xmlParserOption 15766 * 15767 * parse an XML file from the filesystem or the network. 15768 * This reuses the existing @ctxt parser context 15769 * 15770 * Returns the resulting document tree 15771 */ 15772 xmlDocPtr 15773 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15774 const char *encoding, int options) 15775 { 15776 xmlParserInputPtr stream; 15777 15778 if (filename == NULL) 15779 return (NULL); 15780 if (ctxt == NULL) 15781 return (NULL); 15782 xmlInitParser(); 15783 15784 xmlCtxtReset(ctxt); 15785 15786 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15787 if (stream == NULL) { 15788 return (NULL); 15789 } 15790 inputPush(ctxt, stream); 15791 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15792 } 15793 15794 /** 15795 * xmlCtxtReadMemory: 15796 * @ctxt: an XML parser context 15797 * @buffer: a pointer to a char array 15798 * @size: the size of the array 15799 * @URL: the base URL to use for the document 15800 * @encoding: the document encoding, or NULL 15801 * @options: a combination of xmlParserOption 15802 * 15803 * parse an XML in-memory document and build a tree. 15804 * This reuses the existing @ctxt parser context 15805 * 15806 * Returns the resulting document tree 15807 */ 15808 xmlDocPtr 15809 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15810 const char *URL, const char *encoding, int options) 15811 { 15812 xmlParserInputBufferPtr input; 15813 xmlParserInputPtr stream; 15814 15815 if (ctxt == NULL) 15816 return (NULL); 15817 if (buffer == NULL) 15818 return (NULL); 15819 xmlInitParser(); 15820 15821 xmlCtxtReset(ctxt); 15822 15823 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15824 if (input == NULL) { 15825 return(NULL); 15826 } 15827 15828 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15829 if (stream == NULL) { 15830 xmlFreeParserInputBuffer(input); 15831 return(NULL); 15832 } 15833 15834 inputPush(ctxt, stream); 15835 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15836 } 15837 15838 /** 15839 * xmlCtxtReadFd: 15840 * @ctxt: an XML parser context 15841 * @fd: an open file descriptor 15842 * @URL: the base URL to use for the document 15843 * @encoding: the document encoding, or NULL 15844 * @options: a combination of xmlParserOption 15845 * 15846 * parse an XML from a file descriptor and build a tree. 15847 * This reuses the existing @ctxt parser context 15848 * NOTE that the file descriptor will not be closed when the 15849 * reader is closed or reset. 15850 * 15851 * Returns the resulting document tree 15852 */ 15853 xmlDocPtr 15854 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15855 const char *URL, const char *encoding, int options) 15856 { 15857 xmlParserInputBufferPtr input; 15858 xmlParserInputPtr stream; 15859 15860 if (fd < 0) 15861 return (NULL); 15862 if (ctxt == NULL) 15863 return (NULL); 15864 xmlInitParser(); 15865 15866 xmlCtxtReset(ctxt); 15867 15868 15869 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15870 if (input == NULL) 15871 return (NULL); 15872 input->closecallback = NULL; 15873 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15874 if (stream == NULL) { 15875 xmlFreeParserInputBuffer(input); 15876 return (NULL); 15877 } 15878 inputPush(ctxt, stream); 15879 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15880 } 15881 15882 /** 15883 * xmlCtxtReadIO: 15884 * @ctxt: an XML parser context 15885 * @ioread: an I/O read function 15886 * @ioclose: an I/O close function 15887 * @ioctx: an I/O handler 15888 * @URL: the base URL to use for the document 15889 * @encoding: the document encoding, or NULL 15890 * @options: a combination of xmlParserOption 15891 * 15892 * parse an XML document from I/O functions and source and build a tree. 15893 * This reuses the existing @ctxt parser context 15894 * 15895 * Returns the resulting document tree 15896 */ 15897 xmlDocPtr 15898 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15899 xmlInputCloseCallback ioclose, void *ioctx, 15900 const char *URL, 15901 const char *encoding, int options) 15902 { 15903 xmlParserInputBufferPtr input; 15904 xmlParserInputPtr stream; 15905 15906 if (ioread == NULL) 15907 return (NULL); 15908 if (ctxt == NULL) 15909 return (NULL); 15910 xmlInitParser(); 15911 15912 xmlCtxtReset(ctxt); 15913 15914 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15915 XML_CHAR_ENCODING_NONE); 15916 if (input == NULL) { 15917 if (ioclose != NULL) 15918 ioclose(ioctx); 15919 return (NULL); 15920 } 15921 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15922 if (stream == NULL) { 15923 xmlFreeParserInputBuffer(input); 15924 return (NULL); 15925 } 15926 inputPush(ctxt, stream); 15927 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15928 } 15929 15930 #define bottom_parser 15931 #include "elfgcchack.h"