parserInternals.c
1 /* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10 #define IN_LIBXML 11 #include "libxml.h" 12 13 #if defined(WIN32) && !defined (__CYGWIN__) 14 #define XML_DIR_SEP '\\' 15 #else 16 #define XML_DIR_SEP '/' 17 #endif 18 19 #include <string.h> 20 #ifdef HAVE_CTYPE_H 21 #include <ctype.h> 22 #endif 23 #ifdef HAVE_STDLIB_H 24 #include <stdlib.h> 25 #endif 26 #ifdef HAVE_SYS_STAT_H 27 #include <sys/stat.h> 28 #endif 29 #ifdef HAVE_FCNTL_H 30 #include <fcntl.h> 31 #endif 32 #ifdef HAVE_UNISTD_H 33 #include <unistd.h> 34 #endif 35 #ifdef HAVE_ZLIB_H 36 #include <zlib.h> 37 #endif 38 39 #include <libxml/xmlmemory.h> 40 #include <libxml/tree.h> 41 #include <libxml/parser.h> 42 #include <libxml/parserInternals.h> 43 #include <libxml/valid.h> 44 #include <libxml/entities.h> 45 #include <libxml/xmlerror.h> 46 #include <libxml/encoding.h> 47 #include <libxml/valid.h> 48 #include <libxml/xmlIO.h> 49 #include <libxml/uri.h> 50 #include <libxml/dict.h> 51 #include <libxml/SAX.h> 52 #ifdef LIBXML_CATALOG_ENABLED 53 #include <libxml/catalog.h> 54 #endif 55 #include <libxml/globals.h> 56 #include <libxml/chvalid.h> 57 58 #define CUR(ctxt) ctxt->input->cur 59 #define END(ctxt) ctxt->input->end 60 #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) 61 62 #include "buf.h" 63 #include "enc.h" 64 65 /* 66 * Various global defaults for parsing 67 */ 68 69 /** 70 * xmlCheckVersion: 71 * @version: the include version number 72 * 73 * check the compiled lib version against the include one. 74 * This can warn or immediately kill the application 75 */ 76 void 77 xmlCheckVersion(int version) { 78 int myversion = (int) LIBXML_VERSION; 79 80 xmlInitParser(); 81 82 if ((myversion / 10000) != (version / 10000)) { 83 xmlGenericError(xmlGenericErrorContext, 84 "Fatal: program compiled against libxml %d using libxml %d\n", 85 (version / 10000), (myversion / 10000)); 86 fprintf(stderr, 87 "Fatal: program compiled against libxml %d using libxml %d\n", 88 (version / 10000), (myversion / 10000)); 89 } 90 if ((myversion / 100) < (version / 100)) { 91 xmlGenericError(xmlGenericErrorContext, 92 "Warning: program compiled against libxml %d using older %d\n", 93 (version / 100), (myversion / 100)); 94 } 95 } 96 97 98 /************************************************************************ 99 * * 100 * Some factorized error routines * 101 * * 102 ************************************************************************/ 103 104 105 /** 106 * xmlErrMemory: 107 * @ctxt: an XML parser context 108 * @extra: extra informations 109 * 110 * Handle a redefinition of attribute error 111 */ 112 void 113 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 114 { 115 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 116 (ctxt->instate == XML_PARSER_EOF)) 117 return; 118 if (ctxt != NULL) { 119 ctxt->errNo = XML_ERR_NO_MEMORY; 120 ctxt->instate = XML_PARSER_EOF; 121 ctxt->disableSAX = 1; 122 } 123 if (extra) 124 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 125 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 126 NULL, NULL, 0, 0, 127 "Memory allocation failed : %s\n", extra); 128 else 129 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 130 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 131 NULL, NULL, 0, 0, "Memory allocation failed\n"); 132 } 133 134 /** 135 * __xmlErrEncoding: 136 * @ctxt: an XML parser context 137 * @xmlerr: the error number 138 * @msg: the error message 139 * @str1: an string info 140 * @str2: an string info 141 * 142 * Handle an encoding error 143 */ 144 void 145 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 146 const char *msg, const xmlChar * str1, const xmlChar * str2) 147 { 148 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 149 (ctxt->instate == XML_PARSER_EOF)) 150 return; 151 if (ctxt != NULL) 152 ctxt->errNo = xmlerr; 153 #pragma clang diagnostic push 154 #pragma clang diagnostic ignored "-Wformat-nonliteral" 155 __xmlRaiseError(NULL, NULL, NULL, 156 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 157 NULL, 0, (const char *) str1, (const char *) str2, 158 NULL, 0, 0, msg, str1, str2); 159 #pragma clang diagnostic pop 160 if (ctxt != NULL) { 161 ctxt->wellFormed = 0; 162 if (ctxt->recovery == 0) 163 ctxt->disableSAX = 1; 164 } 165 } 166 167 /** 168 * xmlErrInternal: 169 * @ctxt: an XML parser context 170 * @msg: the error message 171 * @str: error informations 172 * 173 * Handle an internal error 174 */ 175 static void LIBXML_ATTR_FORMAT(2,0) 176 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 177 { 178 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 179 (ctxt->instate == XML_PARSER_EOF)) 180 return; 181 if (ctxt != NULL) 182 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 183 #pragma clang diagnostic push 184 #pragma clang diagnostic ignored "-Wformat-nonliteral" 185 __xmlRaiseError(NULL, NULL, NULL, 186 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 187 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 188 0, 0, msg, str); 189 #pragma clang diagnostic pop 190 if (ctxt != NULL) { 191 ctxt->wellFormed = 0; 192 if (ctxt->recovery == 0) 193 ctxt->disableSAX = 1; 194 } 195 } 196 197 /** 198 * xmlErrEncodingInt: 199 * @ctxt: an XML parser context 200 * @error: the error number 201 * @msg: the error message 202 * @val: an integer value 203 * 204 * n encoding error 205 */ 206 static void LIBXML_ATTR_FORMAT(3,0) 207 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 208 const char *msg, int val) 209 { 210 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 211 (ctxt->instate == XML_PARSER_EOF)) 212 return; 213 if (ctxt != NULL) 214 ctxt->errNo = error; 215 #pragma clang diagnostic push 216 #pragma clang diagnostic ignored "-Wformat-nonliteral" 217 __xmlRaiseError(NULL, NULL, NULL, 218 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 219 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 220 #pragma clang diagnostic pop 221 if (ctxt != NULL) { 222 ctxt->wellFormed = 0; 223 if (ctxt->recovery == 0) 224 ctxt->disableSAX = 1; 225 } 226 } 227 228 /** 229 * xmlIsLetter: 230 * @c: an unicode character (int) 231 * 232 * Check whether the character is allowed by the production 233 * [84] Letter ::= BaseChar | Ideographic 234 * 235 * Returns 0 if not, non-zero otherwise 236 */ 237 int 238 xmlIsLetter(int c) { 239 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 240 } 241 242 /************************************************************************ 243 * * 244 * Input handling functions for progressive parsing * 245 * * 246 ************************************************************************/ 247 248 /* #define DEBUG_INPUT */ 249 /* #define DEBUG_STACK */ 250 /* #define DEBUG_PUSH */ 251 252 253 /* we need to keep enough input to show errors in context */ 254 #define LINE_LEN 80 255 256 #ifdef DEBUG_INPUT 257 #define CHECK_BUFFER(in) check_buffer(in) 258 259 static 260 void check_buffer(xmlParserInputPtr in) { 261 if (in->base != xmlBufContent(in->buf->buffer)) { 262 xmlGenericError(xmlGenericErrorContext, 263 "xmlParserInput: base mismatch problem\n"); 264 } 265 if (in->cur < in->base) { 266 xmlGenericError(xmlGenericErrorContext, 267 "xmlParserInput: cur < base problem\n"); 268 } 269 if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { 270 xmlGenericError(xmlGenericErrorContext, 271 "xmlParserInput: cur > base + use problem\n"); 272 } 273 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", 274 (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, 275 xmlBufUse(in->buf->buffer)); 276 } 277 278 #else 279 #define CHECK_BUFFER(in) 280 #endif 281 282 283 /** 284 * xmlParserInputRead: 285 * @in: an XML parser input 286 * @len: an indicative size for the lookahead 287 * 288 * This function was internal and is deprecated. 289 * 290 * Returns -1 as this is an error to use it. 291 */ 292 int 293 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { 294 return(-1); 295 } 296 297 /** 298 * xmlParserInputGrow: 299 * @in: an XML parser input 300 * @len: an indicative size for the lookahead 301 * 302 * This function increase the input for the parser. It tries to 303 * preserve pointers to the input buffer, and keep already read data 304 * 305 * Returns the amount of char read, or -1 in case of error, 0 indicate the 306 * end of this entity 307 */ 308 int 309 xmlParserInputGrow(xmlParserInputPtr in, int len) { 310 int ret; 311 size_t indx; 312 const xmlChar *content; 313 314 if ((in == NULL) || (len < 0)) return(-1); 315 #ifdef DEBUG_INPUT 316 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 317 #endif 318 if (in->buf == NULL) return(-1); 319 if (in->base == NULL) return(-1); 320 if (in->cur == NULL) return(-1); 321 if (in->buf->buffer == NULL) return(-1); 322 323 CHECK_BUFFER(in); 324 325 indx = in->cur - in->base; 326 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { 327 328 CHECK_BUFFER(in); 329 330 return(0); 331 } 332 if (in->buf->readcallback != NULL) { 333 ret = xmlParserInputBufferGrow(in->buf, len); 334 } else 335 return(0); 336 337 /* 338 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 339 * block, but we use it really as an integer to do some 340 * pointer arithmetic. Insure will raise it as a bug but in 341 * that specific case, that's not ! 342 */ 343 344 content = xmlBufContent(in->buf->buffer); 345 if (in->base != content) { 346 /* 347 * the buffer has been reallocated 348 */ 349 indx = in->cur - in->base; 350 in->base = content; 351 in->cur = &content[indx]; 352 } 353 in->end = xmlBufEnd(in->buf->buffer); 354 355 CHECK_BUFFER(in); 356 357 return(ret); 358 } 359 360 /** 361 * xmlParserInputShrink: 362 * @in: an XML parser input 363 * 364 * This function removes used input for the parser. 365 */ 366 void 367 xmlParserInputShrink(xmlParserInputPtr in) { 368 size_t used; 369 size_t ret; 370 size_t indx; 371 const xmlChar *content; 372 373 #ifdef DEBUG_INPUT 374 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 375 #endif 376 if (in == NULL) return; 377 if (in->buf == NULL) return; 378 if (in->base == NULL) return; 379 if (in->cur == NULL) return; 380 if (in->buf->buffer == NULL) return; 381 382 CHECK_BUFFER(in); 383 384 used = in->cur - xmlBufContent(in->buf->buffer); 385 /* 386 * Do not shrink on large buffers whose only a tiny fraction 387 * was consumed 388 */ 389 if (used > INPUT_CHUNK) { 390 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); 391 if (ret > 0) { 392 in->cur -= ret; 393 in->consumed += ret; 394 } 395 in->end = xmlBufEnd(in->buf->buffer); 396 } 397 398 CHECK_BUFFER(in); 399 400 if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { 401 return; 402 } 403 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 404 content = xmlBufContent(in->buf->buffer); 405 if (in->base != content) { 406 /* 407 * the buffer has been reallocated 408 */ 409 indx = in->cur - in->base; 410 in->base = content; 411 in->cur = &content[indx]; 412 } 413 in->end = xmlBufEnd(in->buf->buffer); 414 415 CHECK_BUFFER(in); 416 } 417 418 /************************************************************************ 419 * * 420 * UTF8 character input and related functions * 421 * * 422 ************************************************************************/ 423 424 /** 425 * xmlNextChar: 426 * @ctxt: the XML parser context 427 * 428 * Skip to the next char input char. 429 */ 430 431 void 432 xmlNextChar(xmlParserCtxtPtr ctxt) 433 { 434 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 435 (ctxt->input == NULL)) 436 return; 437 438 if (!(VALID_CTXT(ctxt))) { 439 xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); 440 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 441 xmlStopParser(ctxt); 442 return; 443 } 444 445 if ((*ctxt->input->cur == 0) && 446 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { 447 if ((ctxt->instate != XML_PARSER_COMMENT)) 448 xmlPopInput(ctxt); 449 return; 450 } 451 452 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 453 const unsigned char *cur; 454 unsigned char c; 455 456 /* 457 * 2.11 End-of-Line Handling 458 * the literal two-character sequence "#xD#xA" or a standalone 459 * literal #xD, an XML processor must pass to the application 460 * the single character #xA. 461 */ 462 if (*(ctxt->input->cur) == '\n') { 463 ctxt->input->line++; ctxt->input->col = 1; 464 } else 465 ctxt->input->col++; 466 467 /* 468 * We are supposed to handle UTF8, check it's valid 469 * From rfc2044: encoding of the Unicode values on UTF-8: 470 * 471 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 472 * 0000 0000-0000 007F 0xxxxxxx 473 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 474 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 475 * 476 * Check for the 0x110000 limit too 477 */ 478 cur = ctxt->input->cur; 479 480 c = *cur; 481 if (c & 0x80) { 482 if (c == 0xC0) 483 goto encoding_error; 484 if (cur[1] == 0) { 485 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 486 cur = ctxt->input->cur; 487 } 488 if ((cur[1] & 0xc0) != 0x80) 489 goto encoding_error; 490 if ((c & 0xe0) == 0xe0) { 491 unsigned int val; 492 493 if (cur[2] == 0) { 494 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 495 cur = ctxt->input->cur; 496 } 497 if ((cur[2] & 0xc0) != 0x80) 498 goto encoding_error; 499 if ((c & 0xf0) == 0xf0) { 500 if (cur[3] == 0) { 501 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 502 cur = ctxt->input->cur; 503 } 504 if (((c & 0xf8) != 0xf0) || 505 ((cur[3] & 0xc0) != 0x80)) 506 goto encoding_error; 507 /* 4-byte code */ 508 ctxt->input->cur += 4; 509 val = (cur[0] & 0x7) << 18; 510 val |= (cur[1] & 0x3f) << 12; 511 val |= (cur[2] & 0x3f) << 6; 512 val |= cur[3] & 0x3f; 513 } else { 514 /* 3-byte code */ 515 ctxt->input->cur += 3; 516 val = (cur[0] & 0xf) << 12; 517 val |= (cur[1] & 0x3f) << 6; 518 val |= cur[2] & 0x3f; 519 } 520 if (((val > 0xd7ff) && (val < 0xe000)) || 521 ((val > 0xfffd) && (val < 0x10000)) || 522 (val >= 0x110000)) { 523 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 524 "Char 0x%X out of allowed range\n", 525 val); 526 } 527 } else 528 /* 2-byte code */ 529 ctxt->input->cur += 2; 530 } else 531 /* 1-byte code */ 532 ctxt->input->cur++; 533 534 ctxt->nbChars++; 535 if (*ctxt->input->cur == 0) 536 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 537 } else { 538 /* 539 * Assume it's a fixed length encoding (1) with 540 * a compatible encoding for the ASCII set, since 541 * XML constructs only use < 128 chars 542 */ 543 544 if (*(ctxt->input->cur) == '\n') { 545 ctxt->input->line++; ctxt->input->col = 1; 546 } else 547 ctxt->input->col++; 548 ctxt->input->cur++; 549 ctxt->nbChars++; 550 if (*ctxt->input->cur == 0) 551 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 552 } 553 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 554 xmlParserHandlePEReference(ctxt); 555 if ((*ctxt->input->cur == 0) && 556 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 557 xmlPopInput(ctxt); 558 return; 559 encoding_error: 560 /* 561 * If we detect an UTF8 error that probably mean that the 562 * input encoding didn't get properly advertised in the 563 * declaration header. Report the error and switch the encoding 564 * to ISO-Latin-1 (if you don't like this policy, just declare the 565 * encoding !) 566 */ 567 if ((ctxt == NULL) || (ctxt->input == NULL) || 568 (ctxt->input->end - ctxt->input->cur < 4)) { 569 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 570 "Input is not proper UTF-8, indicate encoding !\n", 571 NULL, NULL); 572 } else { 573 char buffer[150]; 574 575 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 576 ctxt->input->cur[0], ctxt->input->cur[1], 577 ctxt->input->cur[2], ctxt->input->cur[3]); 578 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 579 "Input is not proper UTF-8, indicate encoding !\n%s", 580 BAD_CAST buffer, NULL); 581 } 582 ctxt->charset = XML_CHAR_ENCODING_8859_1; 583 ctxt->input->cur++; 584 return; 585 } 586 587 /** 588 * xmlCurrentChar: 589 * @ctxt: the XML parser context 590 * @len: pointer to the length of the char read 591 * 592 * The current char value, if using UTF-8 this may actually span multiple 593 * bytes in the input buffer. Implement the end of line normalization: 594 * 2.11 End-of-Line Handling 595 * Wherever an external parsed entity or the literal entity value 596 * of an internal parsed entity contains either the literal two-character 597 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 598 * must pass to the application the single character #xA. 599 * This behavior can conveniently be produced by normalizing all 600 * line breaks to #xA on input, before parsing.) 601 * 602 * Returns the current char value and its length 603 */ 604 605 int 606 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 607 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 608 if (ctxt->instate == XML_PARSER_EOF) 609 return(0); 610 611 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 612 *len = 1; 613 return((int) *ctxt->input->cur); 614 } 615 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 616 /* 617 * We are supposed to handle UTF8, check it's valid 618 * From rfc2044: encoding of the Unicode values on UTF-8: 619 * 620 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 621 * 0000 0000-0000 007F 0xxxxxxx 622 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 623 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 624 * 625 * Check for the 0x110000 limit too 626 */ 627 const unsigned char *cur = ctxt->input->cur; 628 unsigned char c; 629 unsigned int val; 630 631 c = *cur; 632 if (c & 0x80) { 633 if (((c & 0x40) == 0) || (c == 0xC0)) 634 goto encoding_error; 635 if (cur[1] == 0) { 636 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 637 cur = ctxt->input->cur; 638 } 639 if ((cur[1] & 0xc0) != 0x80) 640 goto encoding_error; 641 if ((c & 0xe0) == 0xe0) { 642 if (cur[2] == 0) { 643 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 644 cur = ctxt->input->cur; 645 } 646 if ((cur[2] & 0xc0) != 0x80) 647 goto encoding_error; 648 if ((c & 0xf0) == 0xf0) { 649 if (cur[3] == 0) { 650 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 651 cur = ctxt->input->cur; 652 } 653 if (((c & 0xf8) != 0xf0) || 654 ((cur[3] & 0xc0) != 0x80)) 655 goto encoding_error; 656 /* 4-byte code */ 657 *len = 4; 658 val = (cur[0] & 0x7) << 18; 659 val |= (cur[1] & 0x3f) << 12; 660 val |= (cur[2] & 0x3f) << 6; 661 val |= cur[3] & 0x3f; 662 if (val < 0x10000) 663 goto encoding_error; 664 } else { 665 /* 3-byte code */ 666 *len = 3; 667 val = (cur[0] & 0xf) << 12; 668 val |= (cur[1] & 0x3f) << 6; 669 val |= cur[2] & 0x3f; 670 if (val < 0x800) 671 goto encoding_error; 672 } 673 } else { 674 /* 2-byte code */ 675 *len = 2; 676 val = (cur[0] & 0x1f) << 6; 677 val |= cur[1] & 0x3f; 678 if (val < 0x80) 679 goto encoding_error; 680 } 681 if (!IS_CHAR(val)) { 682 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 683 "Char 0x%X out of allowed range\n", val); 684 if (ctxt->instate == XML_PARSER_EOF) 685 goto encoding_error; 686 } 687 return(val); 688 } else { 689 /* 1-byte code */ 690 *len = 1; 691 if (*ctxt->input->cur == 0) 692 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 693 if ((*ctxt->input->cur == 0) && 694 (ctxt->input->end > ctxt->input->cur)) { 695 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 696 "Char 0x0 out of allowed range\n", 0); 697 if (ctxt->instate == XML_PARSER_EOF) 698 goto encoding_error; 699 } 700 if (*ctxt->input->cur == 0xD) { 701 if (ctxt->input->cur[1] == 0xA) { 702 ctxt->nbChars++; 703 ctxt->input->cur++; 704 } 705 return(0xA); 706 } 707 return((int) *ctxt->input->cur); 708 } 709 } 710 /* 711 * Assume it's a fixed length encoding (1) with 712 * a compatible encoding for the ASCII set, since 713 * XML constructs only use < 128 chars 714 */ 715 *len = 1; 716 if (*ctxt->input->cur == 0xD) { 717 if (ctxt->input->cur[1] == 0xA) { 718 ctxt->nbChars++; 719 ctxt->input->cur++; 720 } 721 return(0xA); 722 } 723 return((int) *ctxt->input->cur); 724 encoding_error: 725 /* 726 * An encoding problem may arise from a truncated input buffer 727 * splitting a character in the middle. In that case do not raise 728 * an error but return 0 to endicate an end of stream problem 729 */ 730 if (ctxt->input->end - ctxt->input->cur < 4) { 731 *len = 0; 732 return(0); 733 } 734 735 /* 736 * If we detect an UTF8 error that probably mean that the 737 * input encoding didn't get properly advertised in the 738 * declaration header. Report the error and switch the encoding 739 * to ISO-Latin-1 (if you don't like this policy, just declare the 740 * encoding !) 741 */ 742 { 743 char buffer[150]; 744 745 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 746 ctxt->input->cur[0], ctxt->input->cur[1], 747 ctxt->input->cur[2], ctxt->input->cur[3]); 748 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 749 "Input is not proper UTF-8, indicate encoding !\n%s", 750 BAD_CAST buffer, NULL); 751 } 752 ctxt->charset = XML_CHAR_ENCODING_8859_1; 753 *len = 1; 754 return((int) *ctxt->input->cur); 755 } 756 757 /** 758 * xmlStringCurrentChar: 759 * @ctxt: the XML parser context 760 * @cur: pointer to the beginning of the char 761 * @len: pointer to the length of the char read 762 * 763 * The current char value, if using UTF-8 this may actually span multiple 764 * bytes in the input buffer. 765 * 766 * Returns the current char value and its length 767 */ 768 769 int 770 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 771 { 772 if ((len == NULL) || (cur == NULL)) return(0); 773 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 774 /* 775 * We are supposed to handle UTF8, check it's valid 776 * From rfc2044: encoding of the Unicode values on UTF-8: 777 * 778 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 779 * 0000 0000-0000 007F 0xxxxxxx 780 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 781 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 782 * 783 * Check for the 0x110000 limit too 784 */ 785 unsigned char c; 786 unsigned int val; 787 788 c = *cur; 789 if (c & 0x80) { 790 if ((cur[1] & 0xc0) != 0x80) 791 goto encoding_error; 792 if ((c & 0xe0) == 0xe0) { 793 794 if ((cur[2] & 0xc0) != 0x80) 795 goto encoding_error; 796 if ((c & 0xf0) == 0xf0) { 797 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 798 goto encoding_error; 799 /* 4-byte code */ 800 *len = 4; 801 val = (cur[0] & 0x7) << 18; 802 val |= (cur[1] & 0x3f) << 12; 803 val |= (cur[2] & 0x3f) << 6; 804 val |= cur[3] & 0x3f; 805 } else { 806 /* 3-byte code */ 807 *len = 3; 808 val = (cur[0] & 0xf) << 12; 809 val |= (cur[1] & 0x3f) << 6; 810 val |= cur[2] & 0x3f; 811 } 812 } else { 813 /* 2-byte code */ 814 *len = 2; 815 val = (cur[0] & 0x1f) << 6; 816 val |= cur[1] & 0x3f; 817 } 818 if (!IS_CHAR(val)) { 819 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 820 "Char 0x%X out of allowed range\n", val); 821 } 822 return (val); 823 } else { 824 /* 1-byte code */ 825 *len = 1; 826 return ((int) *cur); 827 } 828 } 829 /* 830 * Assume it's a fixed length encoding (1) with 831 * a compatible encoding for the ASCII set, since 832 * XML constructs only use < 128 chars 833 */ 834 *len = 1; 835 return ((int) *cur); 836 encoding_error: 837 838 /* 839 * An encoding problem may arise from a truncated input buffer 840 * splitting a character in the middle. In that case do not raise 841 * an error but return 0 to endicate an end of stream problem 842 */ 843 if ((ctxt == NULL) || (ctxt->input == NULL) || 844 (ctxt->input->end - ctxt->input->cur < 4)) { 845 *len = 0; 846 return(0); 847 } 848 /* 849 * If we detect an UTF8 error that probably mean that the 850 * input encoding didn't get properly advertised in the 851 * declaration header. Report the error and switch the encoding 852 * to ISO-Latin-1 (if you don't like this policy, just declare the 853 * encoding !) 854 */ 855 { 856 char buffer[150]; 857 858 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 859 ctxt->input->cur[0], ctxt->input->cur[1], 860 ctxt->input->cur[2], ctxt->input->cur[3]); 861 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 862 "Input is not proper UTF-8, indicate encoding !\n%s", 863 BAD_CAST buffer, NULL); 864 } 865 *len = 1; 866 return ((int) *cur); 867 } 868 869 /** 870 * xmlCopyCharMultiByte: 871 * @out: pointer to an array of xmlChar 872 * @val: the char value 873 * 874 * append the char value in the array 875 * 876 * Returns the number of xmlChar written 877 */ 878 int 879 xmlCopyCharMultiByte(xmlChar *out, int val) { 880 if (out == NULL) return(0); 881 /* 882 * We are supposed to handle UTF8, check it's valid 883 * From rfc2044: encoding of the Unicode values on UTF-8: 884 * 885 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 886 * 0000 0000-0000 007F 0xxxxxxx 887 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 888 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 889 */ 890 if (val >= 0x80) { 891 xmlChar *savedout = out; 892 int bits; 893 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 894 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 895 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 896 else { 897 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 898 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 899 val); 900 return(0); 901 } 902 for ( ; bits >= 0; bits-= 6) 903 *out++= ((val >> bits) & 0x3F) | 0x80 ; 904 return (out - savedout); 905 } 906 *out = (xmlChar) val; 907 return 1; 908 } 909 910 /** 911 * xmlCopyChar: 912 * @len: Ignored, compatibility 913 * @out: pointer to an array of xmlChar 914 * @val: the char value 915 * 916 * append the char value in the array 917 * 918 * Returns the number of xmlChar written 919 */ 920 921 int 922 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 923 if (out == NULL) return(0); 924 /* the len parameter is ignored */ 925 if (val >= 0x80) { 926 return(xmlCopyCharMultiByte (out, val)); 927 } 928 *out = (xmlChar) val; 929 return 1; 930 } 931 932 /************************************************************************ 933 * * 934 * Commodity functions to switch encodings * 935 * * 936 ************************************************************************/ 937 938 static int 939 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 940 xmlCharEncodingHandlerPtr handler, int len); 941 static int 942 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 943 xmlCharEncodingHandlerPtr handler, int len); 944 /** 945 * xmlSwitchEncoding: 946 * @ctxt: the parser context 947 * @enc: the encoding value (number) 948 * 949 * change the input functions when discovering the character encoding 950 * of a given entity. 951 * 952 * Returns 0 in case of success, -1 otherwise 953 */ 954 int 955 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 956 { 957 xmlCharEncodingHandlerPtr handler; 958 int len = -1; 959 int ret; 960 961 if (ctxt == NULL) return(-1); 962 switch (enc) { 963 case XML_CHAR_ENCODING_ERROR: 964 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 965 "encoding unknown\n", NULL, NULL); 966 return(-1); 967 case XML_CHAR_ENCODING_NONE: 968 /* let's assume it's UTF-8 without the XML decl */ 969 ctxt->charset = XML_CHAR_ENCODING_UTF8; 970 return(0); 971 case XML_CHAR_ENCODING_UTF8: 972 /* default encoding, no conversion should be needed */ 973 ctxt->charset = XML_CHAR_ENCODING_UTF8; 974 975 /* 976 * Errata on XML-1.0 June 20 2001 977 * Specific handling of the Byte Order Mark for 978 * UTF-8 979 */ 980 if ((ctxt->input != NULL) && 981 (ctxt->input->cur[0] == 0xEF) && 982 (ctxt->input->cur[1] == 0xBB) && 983 (ctxt->input->cur[2] == 0xBF)) { 984 ctxt->input->cur += 3; 985 } 986 return(0); 987 case XML_CHAR_ENCODING_UTF16LE: 988 case XML_CHAR_ENCODING_UTF16BE: 989 /*The raw input characters are encoded 990 *in UTF-16. As we expect this function 991 *to be called after xmlCharEncInFunc, we expect 992 *ctxt->input->cur to contain UTF-8 encoded characters. 993 *So the raw UTF16 Byte Order Mark 994 *has also been converted into 995 *an UTF-8 BOM. Let's skip that BOM. 996 */ 997 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 998 (ctxt->input->cur[0] == 0xEF) && 999 (ctxt->input->cur[1] == 0xBB) && 1000 (ctxt->input->cur[2] == 0xBF)) { 1001 ctxt->input->cur += 3; 1002 } 1003 len = 90; 1004 break; 1005 case XML_CHAR_ENCODING_UCS2: 1006 len = 90; 1007 break; 1008 case XML_CHAR_ENCODING_UCS4BE: 1009 case XML_CHAR_ENCODING_UCS4LE: 1010 case XML_CHAR_ENCODING_UCS4_2143: 1011 case XML_CHAR_ENCODING_UCS4_3412: 1012 len = 180; 1013 break; 1014 case XML_CHAR_ENCODING_EBCDIC: 1015 case XML_CHAR_ENCODING_8859_1: 1016 case XML_CHAR_ENCODING_8859_2: 1017 case XML_CHAR_ENCODING_8859_3: 1018 case XML_CHAR_ENCODING_8859_4: 1019 case XML_CHAR_ENCODING_8859_5: 1020 case XML_CHAR_ENCODING_8859_6: 1021 case XML_CHAR_ENCODING_8859_7: 1022 case XML_CHAR_ENCODING_8859_8: 1023 case XML_CHAR_ENCODING_8859_9: 1024 case XML_CHAR_ENCODING_ASCII: 1025 case XML_CHAR_ENCODING_2022_JP: 1026 case XML_CHAR_ENCODING_SHIFT_JIS: 1027 case XML_CHAR_ENCODING_EUC_JP: 1028 len = 45; 1029 break; 1030 } 1031 handler = xmlGetCharEncodingHandler(enc); 1032 if (handler == NULL) { 1033 /* 1034 * Default handlers. 1035 */ 1036 switch (enc) { 1037 case XML_CHAR_ENCODING_ASCII: 1038 /* default encoding, no conversion should be needed */ 1039 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1040 return(0); 1041 case XML_CHAR_ENCODING_UTF16LE: 1042 break; 1043 case XML_CHAR_ENCODING_UTF16BE: 1044 break; 1045 case XML_CHAR_ENCODING_UCS4LE: 1046 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1047 "encoding not supported %s\n", 1048 BAD_CAST "USC4 little endian", NULL); 1049 break; 1050 case XML_CHAR_ENCODING_UCS4BE: 1051 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1052 "encoding not supported %s\n", 1053 BAD_CAST "USC4 big endian", NULL); 1054 break; 1055 case XML_CHAR_ENCODING_EBCDIC: 1056 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1057 "encoding not supported %s\n", 1058 BAD_CAST "EBCDIC", NULL); 1059 break; 1060 case XML_CHAR_ENCODING_UCS4_2143: 1061 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1062 "encoding not supported %s\n", 1063 BAD_CAST "UCS4 2143", NULL); 1064 break; 1065 case XML_CHAR_ENCODING_UCS4_3412: 1066 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1067 "encoding not supported %s\n", 1068 BAD_CAST "UCS4 3412", NULL); 1069 break; 1070 case XML_CHAR_ENCODING_UCS2: 1071 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1072 "encoding not supported %s\n", 1073 BAD_CAST "UCS2", NULL); 1074 break; 1075 case XML_CHAR_ENCODING_8859_1: 1076 case XML_CHAR_ENCODING_8859_2: 1077 case XML_CHAR_ENCODING_8859_3: 1078 case XML_CHAR_ENCODING_8859_4: 1079 case XML_CHAR_ENCODING_8859_5: 1080 case XML_CHAR_ENCODING_8859_6: 1081 case XML_CHAR_ENCODING_8859_7: 1082 case XML_CHAR_ENCODING_8859_8: 1083 case XML_CHAR_ENCODING_8859_9: 1084 /* 1085 * We used to keep the internal content in the 1086 * document encoding however this turns being unmaintainable 1087 * So xmlGetCharEncodingHandler() will return non-null 1088 * values for this now. 1089 */ 1090 if ((ctxt->inputNr == 1) && 1091 (ctxt->encoding == NULL) && 1092 (ctxt->input != NULL) && 1093 (ctxt->input->encoding != NULL)) { 1094 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1095 } 1096 ctxt->charset = enc; 1097 return(0); 1098 case XML_CHAR_ENCODING_2022_JP: 1099 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1100 "encoding not supported %s\n", 1101 BAD_CAST "ISO-2022-JP", NULL); 1102 break; 1103 case XML_CHAR_ENCODING_SHIFT_JIS: 1104 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1105 "encoding not supported %s\n", 1106 BAD_CAST "Shift_JIS", NULL); 1107 break; 1108 case XML_CHAR_ENCODING_EUC_JP: 1109 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1110 "encoding not supported %s\n", 1111 BAD_CAST "EUC-JP", NULL); 1112 break; 1113 default: 1114 break; 1115 } 1116 } 1117 if (handler == NULL) { 1118 xmlStopParser(ctxt); 1119 return(-1); 1120 } 1121 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1122 ret = xmlSwitchToEncodingInt(ctxt, handler, len); 1123 if (((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) && !(ctxt->html)) { 1124 /* 1125 * on XML encoding conversion errors, stop the parser 1126 */ 1127 xmlStopParser(ctxt); 1128 ctxt->errNo = XML_I18N_CONV_FAILED; 1129 } 1130 return(ret); 1131 } 1132 1133 /** 1134 * xmlSwitchInputEncoding: 1135 * @ctxt: the parser context 1136 * @input: the input stream 1137 * @handler: the encoding handler 1138 * @len: the number of bytes to convert for the first line or -1 1139 * 1140 * change the input functions when discovering the character encoding 1141 * of a given entity. 1142 * 1143 * Returns 0 in case of success, -1 otherwise 1144 */ 1145 static int 1146 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1147 xmlCharEncodingHandlerPtr handler, int len) 1148 { 1149 int nbchars; 1150 1151 if (handler == NULL) 1152 return (-1); 1153 if (input == NULL) 1154 return (-1); 1155 if (input->buf != NULL) { 1156 if (input->buf->encoder != NULL) { 1157 /* 1158 * Check in case the auto encoding detetection triggered 1159 * in already. 1160 */ 1161 if (input->buf->encoder == handler) 1162 return (0); 1163 1164 /* 1165 * "UTF-16" can be used for both LE and BE 1166 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1167 BAD_CAST "UTF-16", 6)) && 1168 (!xmlStrncmp(BAD_CAST handler->name, 1169 BAD_CAST "UTF-16", 6))) { 1170 return(0); 1171 } 1172 */ 1173 1174 /* 1175 * Note: this is a bit dangerous, but that's what it 1176 * takes to use nearly compatible signature for different 1177 * encodings. 1178 */ 1179 xmlCharEncCloseFunc(input->buf->encoder); 1180 input->buf->encoder = handler; 1181 return (0); 1182 } 1183 input->buf->encoder = handler; 1184 1185 /* 1186 * Is there already some content down the pipe to convert ? 1187 */ 1188 if (xmlBufIsEmpty(input->buf->buffer) == 0) { 1189 int processed; 1190 unsigned int use; 1191 1192 /* 1193 * Specific handling of the Byte Order Mark for 1194 * UTF-16 1195 */ 1196 if ((handler->name != NULL) && 1197 (!strcmp(handler->name, "UTF-16LE") || 1198 !strcmp(handler->name, "UTF-16")) && 1199 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1200 input->cur += 2; 1201 } 1202 if ((handler->name != NULL) && 1203 (!strcmp(handler->name, "UTF-16BE")) && 1204 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1205 input->cur += 2; 1206 } 1207 /* 1208 * Errata on XML-1.0 June 20 2001 1209 * Specific handling of the Byte Order Mark for 1210 * UTF-8 1211 */ 1212 if ((handler->name != NULL) && 1213 (!strcmp(handler->name, "UTF-8")) && 1214 (input->cur[0] == 0xEF) && 1215 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1216 input->cur += 3; 1217 } 1218 1219 /* 1220 * Shrink the current input buffer. 1221 * Move it as the raw buffer and create a new input buffer 1222 */ 1223 processed = input->cur - input->base; 1224 xmlBufShrink(input->buf->buffer, processed); 1225 input->buf->raw = input->buf->buffer; 1226 input->buf->buffer = xmlBufCreate(); 1227 input->buf->rawconsumed = processed; 1228 use = xmlBufUse(input->buf->raw); 1229 1230 if (ctxt->html) { 1231 /* 1232 * convert as much as possible of the buffer 1233 */ 1234 nbchars = xmlCharEncInput(input->buf, 1); 1235 } else { 1236 /* 1237 * convert just enough to get 1238 * '<?xml version="1.0" encoding="xxx"?>' 1239 * parsed with the autodetected encoding 1240 * into the parser reading buffer. 1241 */ 1242 nbchars = xmlCharEncFirstLineInput(input->buf, len); 1243 } 1244 if (nbchars < 0) { 1245 xmlBufFree(input->buf->buffer); 1246 input->buf->buffer = input->buf->raw; 1247 input->buf->raw = NULL; 1248 input->buf->rawconsumed = 0; 1249 } else { 1250 input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); 1251 } 1252 xmlBufResetInput(input->buf->buffer, input); 1253 if (nbchars < 0) { 1254 if (!ctxt->html) { 1255 xmlErrInternal(ctxt, 1256 "switching encoding: encoder error\n", 1257 NULL); 1258 } 1259 return (-1); 1260 } 1261 } 1262 return (0); 1263 } else if (input->length == 0) { 1264 /* 1265 * When parsing a static memory array one must know the 1266 * size to be able to convert the buffer. 1267 */ 1268 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1269 return (-1); 1270 } 1271 return (0); 1272 } 1273 1274 /** 1275 * xmlSwitchInputEncoding: 1276 * @ctxt: the parser context 1277 * @input: the input stream 1278 * @handler: the encoding handler 1279 * 1280 * change the input functions when discovering the character encoding 1281 * of a given entity. 1282 * 1283 * Returns 0 in case of success, -1 otherwise 1284 */ 1285 int 1286 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1287 xmlCharEncodingHandlerPtr handler) { 1288 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 1289 } 1290 1291 /** 1292 * xmlSwitchToEncodingInt: 1293 * @ctxt: the parser context 1294 * @handler: the encoding handler 1295 * @len: the length to convert or -1 1296 * 1297 * change the input functions when discovering the character encoding 1298 * of a given entity, and convert only @len bytes of the output, this 1299 * is needed on auto detect to allows any declared encoding later to 1300 * convert the actual content after the xmlDecl 1301 * 1302 * Returns 0 in case of success, -1 otherwise 1303 */ 1304 static int 1305 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 1306 xmlCharEncodingHandlerPtr handler, int len) { 1307 int ret = 0; 1308 1309 if (handler != NULL) { 1310 if (ctxt->input != NULL) { 1311 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 1312 } else { 1313 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1314 NULL); 1315 return(-1); 1316 } 1317 /* 1318 * The parsing is now done in UTF8 natively 1319 */ 1320 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1321 } else 1322 return(-1); 1323 return(ret); 1324 } 1325 1326 /** 1327 * xmlSwitchToEncoding: 1328 * @ctxt: the parser context 1329 * @handler: the encoding handler 1330 * 1331 * change the input functions when discovering the character encoding 1332 * of a given entity. 1333 * 1334 * Returns 0 in case of success, -1 otherwise 1335 */ 1336 int 1337 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1338 { 1339 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 1340 } 1341 1342 /************************************************************************ 1343 * * 1344 * Commodity functions to handle entities processing * 1345 * * 1346 ************************************************************************/ 1347 1348 /** 1349 * xmlFreeInputStream: 1350 * @input: an xmlParserInputPtr 1351 * 1352 * Free up an input stream. 1353 */ 1354 void 1355 xmlFreeInputStream(xmlParserInputPtr input) { 1356 if (input == NULL) return; 1357 1358 if (input->filename != NULL) xmlFree((char *) input->filename); 1359 if (input->directory != NULL) xmlFree((char *) input->directory); 1360 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1361 if (input->version != NULL) xmlFree((char *) input->version); 1362 if ((input->free != NULL) && (input->base != NULL)) 1363 input->free((xmlChar *) input->base); 1364 if (input->buf != NULL) 1365 xmlFreeParserInputBuffer(input->buf); 1366 xmlFree(input); 1367 } 1368 1369 /** 1370 * xmlNewInputStream: 1371 * @ctxt: an XML parser context 1372 * 1373 * Create a new input stream structure. 1374 * 1375 * Returns the new input stream or NULL 1376 */ 1377 xmlParserInputPtr 1378 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1379 xmlParserInputPtr input; 1380 1381 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1382 if (input == NULL) { 1383 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1384 return(NULL); 1385 } 1386 memset(input, 0, sizeof(xmlParserInput)); 1387 input->line = 1; 1388 input->col = 1; 1389 input->standalone = -1; 1390 1391 /* 1392 * If the context is NULL the id cannot be initialized, but that 1393 * should not happen while parsing which is the situation where 1394 * the id is actually needed. 1395 */ 1396 if (ctxt != NULL) 1397 input->id = ctxt->input_id++; 1398 1399 return(input); 1400 } 1401 1402 /** 1403 * xmlNewIOInputStream: 1404 * @ctxt: an XML parser context 1405 * @input: an I/O Input 1406 * @enc: the charset encoding if known 1407 * 1408 * Create a new input stream structure encapsulating the @input into 1409 * a stream suitable for the parser. 1410 * 1411 * Returns the new input stream or NULL 1412 */ 1413 xmlParserInputPtr 1414 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1415 xmlCharEncoding enc) { 1416 xmlParserInputPtr inputStream; 1417 1418 if (input == NULL) return(NULL); 1419 if (xmlParserDebugEntities) 1420 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1421 inputStream = xmlNewInputStream(ctxt); 1422 if (inputStream == NULL) { 1423 return(NULL); 1424 } 1425 inputStream->filename = NULL; 1426 inputStream->buf = input; 1427 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1428 1429 if (enc != XML_CHAR_ENCODING_NONE) { 1430 xmlSwitchEncoding(ctxt, enc); 1431 } 1432 1433 return(inputStream); 1434 } 1435 1436 /** 1437 * xmlNewEntityInputStream: 1438 * @ctxt: an XML parser context 1439 * @entity: an Entity pointer 1440 * 1441 * Create a new input stream based on an xmlEntityPtr 1442 * 1443 * Returns the new input stream or NULL 1444 */ 1445 xmlParserInputPtr 1446 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1447 xmlParserInputPtr input; 1448 1449 if (entity == NULL) { 1450 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1451 NULL); 1452 return(NULL); 1453 } 1454 if (xmlParserDebugEntities) 1455 xmlGenericError(xmlGenericErrorContext, 1456 "new input from entity: %s\n", entity->name); 1457 if (entity->content == NULL) { 1458 switch (entity->etype) { 1459 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1460 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1461 entity->name); 1462 break; 1463 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1464 case XML_EXTERNAL_PARAMETER_ENTITY: 1465 return(xmlLoadExternalEntity((char *) entity->URI, 1466 (char *) entity->ExternalID, ctxt)); 1467 case XML_INTERNAL_GENERAL_ENTITY: 1468 xmlErrInternal(ctxt, 1469 "Internal entity %s without content !\n", 1470 entity->name); 1471 break; 1472 case XML_INTERNAL_PARAMETER_ENTITY: 1473 xmlErrInternal(ctxt, 1474 "Internal parameter entity %s without content !\n", 1475 entity->name); 1476 break; 1477 case XML_INTERNAL_PREDEFINED_ENTITY: 1478 xmlErrInternal(ctxt, 1479 "Predefined entity %s without content !\n", 1480 entity->name); 1481 break; 1482 } 1483 return(NULL); 1484 } 1485 input = xmlNewInputStream(ctxt); 1486 if (input == NULL) { 1487 return(NULL); 1488 } 1489 if (entity->URI != NULL) 1490 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 1491 input->base = entity->content; 1492 if (entity->length == 0) 1493 entity->length = xmlStrlen(entity->content); 1494 input->cur = entity->content; 1495 input->length = entity->length; 1496 input->end = &entity->content[input->length]; 1497 return(input); 1498 } 1499 1500 /** 1501 * xmlNewStringInputStream: 1502 * @ctxt: an XML parser context 1503 * @buffer: an memory buffer 1504 * 1505 * Create a new input stream based on a memory buffer. 1506 * Returns the new input stream 1507 */ 1508 xmlParserInputPtr 1509 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1510 xmlParserInputPtr input; 1511 1512 if (buffer == NULL) { 1513 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1514 NULL); 1515 return(NULL); 1516 } 1517 if (xmlParserDebugEntities) 1518 xmlGenericError(xmlGenericErrorContext, 1519 "new fixed input: %.30s\n", buffer); 1520 input = xmlNewInputStream(ctxt); 1521 if (input == NULL) { 1522 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1523 return(NULL); 1524 } 1525 input->base = buffer; 1526 input->cur = buffer; 1527 input->length = xmlStrlen(buffer); 1528 input->end = &buffer[input->length]; 1529 return(input); 1530 } 1531 1532 /** 1533 * xmlNewInputFromFile: 1534 * @ctxt: an XML parser context 1535 * @filename: the filename to use as entity 1536 * 1537 * Create a new input stream based on a file or an URL. 1538 * 1539 * Returns the new input stream or NULL in case of error 1540 */ 1541 xmlParserInputPtr 1542 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1543 xmlParserInputBufferPtr buf; 1544 xmlParserInputPtr inputStream; 1545 char *directory = NULL; 1546 xmlChar *URI = NULL; 1547 1548 if (xmlParserDebugEntities) 1549 xmlGenericError(xmlGenericErrorContext, 1550 "new input from file: %s\n", filename); 1551 if (ctxt == NULL) return(NULL); 1552 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1553 if (buf == NULL) { 1554 if (filename == NULL) 1555 __xmlLoaderErr(ctxt, 1556 "failed to load external entity: NULL filename \n", 1557 NULL); 1558 else 1559 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1560 (const char *) filename); 1561 return(NULL); 1562 } 1563 1564 inputStream = xmlNewInputStream(ctxt); 1565 if (inputStream == NULL) 1566 return(NULL); 1567 1568 inputStream->buf = buf; 1569 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1570 if (inputStream == NULL) 1571 return(NULL); 1572 1573 if (inputStream->filename == NULL) 1574 URI = xmlStrdup((xmlChar *) filename); 1575 else 1576 URI = xmlStrdup((xmlChar *) inputStream->filename); 1577 directory = xmlParserGetDirectory((const char *) URI); 1578 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1579 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1580 if (URI != NULL) xmlFree((char *) URI); 1581 inputStream->directory = directory; 1582 1583 xmlBufResetInput(inputStream->buf->buffer, inputStream); 1584 if ((ctxt->directory == NULL) && (directory != NULL)) 1585 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1586 return(inputStream); 1587 } 1588 1589 /************************************************************************ 1590 * * 1591 * Commodity functions to handle parser contexts * 1592 * * 1593 ************************************************************************/ 1594 1595 /** 1596 * xmlInitParserCtxt: 1597 * @ctxt: an XML parser context 1598 * 1599 * Initialize a parser context 1600 * 1601 * Returns 0 in case of success and -1 in case of error 1602 */ 1603 1604 int 1605 xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1606 { 1607 xmlParserInputPtr input; 1608 1609 if(ctxt==NULL) { 1610 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1611 return(-1); 1612 } 1613 1614 xmlDefaultSAXHandlerInit(); 1615 1616 if (ctxt->dict == NULL) 1617 ctxt->dict = xmlDictCreate(); 1618 if (ctxt->dict == NULL) { 1619 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1620 return(-1); 1621 } 1622 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); 1623 1624 if (ctxt->sax == NULL) 1625 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1626 if (ctxt->sax == NULL) { 1627 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1628 return(-1); 1629 } 1630 else 1631 xmlSAXVersion(ctxt->sax, 2); 1632 1633 ctxt->maxatts = 0; 1634 ctxt->atts = NULL; 1635 /* Allocate the Input stack */ 1636 if (ctxt->inputTab == NULL) { 1637 ctxt->inputTab = (xmlParserInputPtr *) 1638 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1639 ctxt->inputMax = 5; 1640 } 1641 if (ctxt->inputTab == NULL) { 1642 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1643 ctxt->inputNr = 0; 1644 ctxt->inputMax = 0; 1645 ctxt->input = NULL; 1646 return(-1); 1647 } 1648 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1649 xmlFreeInputStream(input); 1650 } 1651 ctxt->inputNr = 0; 1652 ctxt->input = NULL; 1653 1654 ctxt->version = NULL; 1655 ctxt->encoding = NULL; 1656 ctxt->standalone = -1; 1657 ctxt->hasExternalSubset = 0; 1658 ctxt->hasPErefs = 0; 1659 ctxt->html = 0; 1660 ctxt->external = 0; 1661 ctxt->instate = XML_PARSER_START; 1662 ctxt->token = 0; 1663 ctxt->directory = NULL; 1664 1665 /* Allocate the Node stack */ 1666 if (ctxt->nodeTab == NULL) { 1667 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1668 ctxt->nodeMax = 10; 1669 } 1670 if (ctxt->nodeTab == NULL) { 1671 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1672 ctxt->nodeNr = 0; 1673 ctxt->nodeMax = 0; 1674 ctxt->node = NULL; 1675 ctxt->inputNr = 0; 1676 ctxt->inputMax = 0; 1677 ctxt->input = NULL; 1678 return(-1); 1679 } 1680 ctxt->nodeNr = 0; 1681 ctxt->node = NULL; 1682 1683 /* Allocate the Name stack */ 1684 if (ctxt->nameTab == NULL) { 1685 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1686 ctxt->nameMax = 10; 1687 } 1688 if (ctxt->nameTab == NULL) { 1689 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1690 ctxt->nodeNr = 0; 1691 ctxt->nodeMax = 0; 1692 ctxt->node = NULL; 1693 ctxt->inputNr = 0; 1694 ctxt->inputMax = 0; 1695 ctxt->input = NULL; 1696 ctxt->nameNr = 0; 1697 ctxt->nameMax = 0; 1698 ctxt->name = NULL; 1699 return(-1); 1700 } 1701 ctxt->nameNr = 0; 1702 ctxt->name = NULL; 1703 1704 /* Allocate the space stack */ 1705 if (ctxt->spaceTab == NULL) { 1706 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1707 ctxt->spaceMax = 10; 1708 } 1709 if (ctxt->spaceTab == NULL) { 1710 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1711 ctxt->nodeNr = 0; 1712 ctxt->nodeMax = 0; 1713 ctxt->node = NULL; 1714 ctxt->inputNr = 0; 1715 ctxt->inputMax = 0; 1716 ctxt->input = NULL; 1717 ctxt->nameNr = 0; 1718 ctxt->nameMax = 0; 1719 ctxt->name = NULL; 1720 ctxt->spaceNr = 0; 1721 ctxt->spaceMax = 0; 1722 ctxt->space = NULL; 1723 return(-1); 1724 } 1725 ctxt->spaceNr = 1; 1726 ctxt->spaceMax = 10; 1727 ctxt->spaceTab[0] = -1; 1728 ctxt->space = &ctxt->spaceTab[0]; 1729 ctxt->userData = ctxt; 1730 ctxt->myDoc = NULL; 1731 ctxt->wellFormed = 1; 1732 ctxt->nsWellFormed = 1; 1733 ctxt->valid = 1; 1734 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1735 if (ctxt->loadsubset) { 1736 ctxt->options |= XML_PARSE_DTDLOAD; 1737 } 1738 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1739 ctxt->pedantic = xmlPedanticParserDefaultValue; 1740 if (ctxt->pedantic) { 1741 ctxt->options |= XML_PARSE_PEDANTIC; 1742 } 1743 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1744 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1745 if (ctxt->keepBlanks == 0) { 1746 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1747 ctxt->options |= XML_PARSE_NOBLANKS; 1748 } 1749 1750 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1751 ctxt->vctxt.userData = ctxt; 1752 ctxt->vctxt.error = xmlParserValidityError; 1753 ctxt->vctxt.warning = xmlParserValidityWarning; 1754 if (ctxt->validate) { 1755 if (xmlGetWarningsDefaultValue == 0) 1756 ctxt->vctxt.warning = NULL; 1757 else 1758 ctxt->vctxt.warning = xmlParserValidityWarning; 1759 ctxt->vctxt.nodeMax = 0; 1760 ctxt->options |= XML_PARSE_DTDVALID; 1761 } 1762 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1763 if (ctxt->replaceEntities) { 1764 ctxt->options |= XML_PARSE_NOENT; 1765 } 1766 ctxt->record_info = 0; 1767 ctxt->nbChars = 0; 1768 ctxt->checkIndex = 0; 1769 ctxt->inSubset = 0; 1770 ctxt->errNo = XML_ERR_OK; 1771 ctxt->depth = 0; 1772 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1773 ctxt->catalogs = NULL; 1774 ctxt->nbentities = 0; 1775 ctxt->sizeentities = 0; 1776 ctxt->sizeentcopy = 0; 1777 ctxt->input_id = 1; 1778 xmlInitNodeInfoSeq(&ctxt->node_seq); 1779 return(0); 1780 } 1781 1782 /** 1783 * xmlFreeParserCtxt: 1784 * @ctxt: an XML parser context 1785 * 1786 * Free all the memory used by a parser context. However the parsed 1787 * document in ctxt->myDoc is not freed. 1788 */ 1789 1790 void 1791 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1792 { 1793 xmlParserInputPtr input; 1794 1795 if (ctxt == NULL) return; 1796 1797 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1798 xmlFreeInputStream(input); 1799 } 1800 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1801 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1802 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1803 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 1804 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1805 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1806 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1807 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1808 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1809 #ifdef LIBXML_SAX1_ENABLED 1810 if ((ctxt->sax != NULL) && 1811 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1812 #else 1813 if (ctxt->sax != NULL) 1814 #endif /* LIBXML_SAX1_ENABLED */ 1815 xmlFree(ctxt->sax); 1816 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1817 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1818 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1819 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1820 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1821 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1822 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1823 if (ctxt->attsDefault != NULL) 1824 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1825 if (ctxt->attsSpecial != NULL) 1826 xmlHashFree(ctxt->attsSpecial, NULL); 1827 if (ctxt->freeElems != NULL) { 1828 xmlNodePtr cur, next; 1829 1830 cur = ctxt->freeElems; 1831 while (cur != NULL) { 1832 next = cur->next; 1833 xmlFree(cur); 1834 cur = next; 1835 } 1836 } 1837 if (ctxt->freeAttrs != NULL) { 1838 xmlAttrPtr cur, next; 1839 1840 cur = ctxt->freeAttrs; 1841 while (cur != NULL) { 1842 next = cur->next; 1843 xmlFree(cur); 1844 cur = next; 1845 } 1846 } 1847 /* 1848 * cleanup the error strings 1849 */ 1850 if (ctxt->lastError.message != NULL) 1851 xmlFree(ctxt->lastError.message); 1852 if (ctxt->lastError.file != NULL) 1853 xmlFree(ctxt->lastError.file); 1854 if (ctxt->lastError.str1 != NULL) 1855 xmlFree(ctxt->lastError.str1); 1856 if (ctxt->lastError.str2 != NULL) 1857 xmlFree(ctxt->lastError.str2); 1858 if (ctxt->lastError.str3 != NULL) 1859 xmlFree(ctxt->lastError.str3); 1860 1861 #ifdef LIBXML_CATALOG_ENABLED 1862 if (ctxt->catalogs != NULL) 1863 xmlCatalogFreeLocal(ctxt->catalogs); 1864 #endif 1865 xmlFree(ctxt); 1866 } 1867 1868 /** 1869 * xmlNewParserCtxt: 1870 * 1871 * Allocate and initialize a new parser context. 1872 * 1873 * Returns the xmlParserCtxtPtr or NULL 1874 */ 1875 1876 xmlParserCtxtPtr 1877 xmlNewParserCtxt(void) 1878 { 1879 xmlParserCtxtPtr ctxt; 1880 1881 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1882 if (ctxt == NULL) { 1883 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1884 return(NULL); 1885 } 1886 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1887 if (xmlInitParserCtxt(ctxt) < 0) { 1888 xmlFreeParserCtxt(ctxt); 1889 return(NULL); 1890 } 1891 return(ctxt); 1892 } 1893 1894 /************************************************************************ 1895 * * 1896 * Handling of node informations * 1897 * * 1898 ************************************************************************/ 1899 1900 /** 1901 * xmlClearParserCtxt: 1902 * @ctxt: an XML parser context 1903 * 1904 * Clear (release owned resources) and reinitialize a parser context 1905 */ 1906 1907 void 1908 xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1909 { 1910 if (ctxt==NULL) 1911 return; 1912 xmlClearNodeInfoSeq(&ctxt->node_seq); 1913 xmlCtxtReset(ctxt); 1914 } 1915 1916 1917 /** 1918 * xmlParserFindNodeInfo: 1919 * @ctx: an XML parser context 1920 * @node: an XML node within the tree 1921 * 1922 * Find the parser node info struct for a given node 1923 * 1924 * Returns an xmlParserNodeInfo block pointer or NULL 1925 */ 1926 const xmlParserNodeInfo * 1927 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1928 { 1929 unsigned long pos; 1930 1931 if ((ctx == NULL) || (node == NULL)) 1932 return (NULL); 1933 /* Find position where node should be at */ 1934 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1935 if (pos < ctx->node_seq.length 1936 && ctx->node_seq.buffer[pos].node == node) 1937 return &ctx->node_seq.buffer[pos]; 1938 else 1939 return NULL; 1940 } 1941 1942 1943 /** 1944 * xmlInitNodeInfoSeq: 1945 * @seq: a node info sequence pointer 1946 * 1947 * -- Initialize (set to initial state) node info sequence 1948 */ 1949 void 1950 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1951 { 1952 if (seq == NULL) 1953 return; 1954 seq->length = 0; 1955 seq->maximum = 0; 1956 seq->buffer = NULL; 1957 } 1958 1959 /** 1960 * xmlClearNodeInfoSeq: 1961 * @seq: a node info sequence pointer 1962 * 1963 * -- Clear (release memory and reinitialize) node 1964 * info sequence 1965 */ 1966 void 1967 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1968 { 1969 if (seq == NULL) 1970 return; 1971 if (seq->buffer != NULL) 1972 xmlFree(seq->buffer); 1973 xmlInitNodeInfoSeq(seq); 1974 } 1975 1976 /** 1977 * xmlParserFindNodeInfoIndex: 1978 * @seq: a node info sequence pointer 1979 * @node: an XML node pointer 1980 * 1981 * 1982 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1983 * the given node is or should be at in a sorted sequence 1984 * 1985 * Returns a long indicating the position of the record 1986 */ 1987 unsigned long 1988 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1989 const xmlNodePtr node) 1990 { 1991 unsigned long upper, lower, middle; 1992 int found = 0; 1993 1994 if ((seq == NULL) || (node == NULL)) 1995 return ((unsigned long) -1); 1996 1997 /* Do a binary search for the key */ 1998 lower = 1; 1999 upper = seq->length; 2000 middle = 0; 2001 while (lower <= upper && !found) { 2002 middle = lower + (upper - lower) / 2; 2003 if (node == seq->buffer[middle - 1].node) 2004 found = 1; 2005 else if (node < seq->buffer[middle - 1].node) 2006 upper = middle - 1; 2007 else 2008 lower = middle + 1; 2009 } 2010 2011 /* Return position */ 2012 if (middle == 0 || seq->buffer[middle - 1].node < node) 2013 return middle; 2014 else 2015 return middle - 1; 2016 } 2017 2018 2019 /** 2020 * xmlParserAddNodeInfo: 2021 * @ctxt: an XML parser context 2022 * @info: a node info sequence pointer 2023 * 2024 * Insert node info record into the sorted sequence 2025 */ 2026 void 2027 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 2028 const xmlParserNodeInfoPtr info) 2029 { 2030 unsigned long pos; 2031 2032 if ((ctxt == NULL) || (info == NULL)) return; 2033 2034 /* Find pos and check to see if node is already in the sequence */ 2035 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 2036 info->node); 2037 2038 if ((pos < ctxt->node_seq.length) && 2039 (ctxt->node_seq.buffer != NULL) && 2040 (ctxt->node_seq.buffer[pos].node == info->node)) { 2041 ctxt->node_seq.buffer[pos] = *info; 2042 } 2043 2044 /* Otherwise, we need to add new node to buffer */ 2045 else { 2046 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || 2047 (ctxt->node_seq.buffer == NULL)) { 2048 xmlParserNodeInfo *tmp_buffer; 2049 unsigned int byte_size; 2050 2051 if (ctxt->node_seq.maximum == 0) 2052 ctxt->node_seq.maximum = 2; 2053 byte_size = (sizeof(*ctxt->node_seq.buffer) * 2054 (2 * ctxt->node_seq.maximum)); 2055 2056 if (ctxt->node_seq.buffer == NULL) 2057 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 2058 else 2059 tmp_buffer = 2060 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 2061 byte_size); 2062 2063 if (tmp_buffer == NULL) { 2064 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 2065 return; 2066 } 2067 ctxt->node_seq.buffer = tmp_buffer; 2068 ctxt->node_seq.maximum *= 2; 2069 } 2070 2071 /* If position is not at end, move elements out of the way */ 2072 if (pos != ctxt->node_seq.length) { 2073 unsigned long i; 2074 2075 for (i = ctxt->node_seq.length; i > pos; i--) 2076 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 2077 } 2078 2079 /* Copy element and increase length */ 2080 ctxt->node_seq.buffer[pos] = *info; 2081 ctxt->node_seq.length++; 2082 } 2083 } 2084 2085 /************************************************************************ 2086 * * 2087 * Defaults settings * 2088 * * 2089 ************************************************************************/ 2090 /** 2091 * xmlPedanticParserDefault: 2092 * @val: int 0 or 1 2093 * 2094 * Set and return the previous value for enabling pedantic warnings. 2095 * 2096 * Returns the last value for 0 for no substitution, 1 for substitution. 2097 */ 2098 2099 int 2100 xmlPedanticParserDefault(int val) { 2101 int old = xmlPedanticParserDefaultValue; 2102 2103 xmlPedanticParserDefaultValue = val; 2104 return(old); 2105 } 2106 2107 /** 2108 * xmlLineNumbersDefault: 2109 * @val: int 0 or 1 2110 * 2111 * Set and return the previous value for enabling line numbers in elements 2112 * contents. This may break on old application and is turned off by default. 2113 * 2114 * Returns the last value for 0 for no substitution, 1 for substitution. 2115 */ 2116 2117 int 2118 xmlLineNumbersDefault(int val) { 2119 int old = xmlLineNumbersDefaultValue; 2120 2121 xmlLineNumbersDefaultValue = val; 2122 return(old); 2123 } 2124 2125 /** 2126 * xmlSubstituteEntitiesDefault: 2127 * @val: int 0 or 1 2128 * 2129 * Set and return the previous value for default entity support. 2130 * Initially the parser always keep entity references instead of substituting 2131 * entity values in the output. This function has to be used to change the 2132 * default parser behavior 2133 * SAX::substituteEntities() has to be used for changing that on a file by 2134 * file basis. 2135 * 2136 * Returns the last value for 0 for no substitution, 1 for substitution. 2137 */ 2138 2139 int 2140 xmlSubstituteEntitiesDefault(int val) { 2141 int old = xmlSubstituteEntitiesDefaultValue; 2142 2143 xmlSubstituteEntitiesDefaultValue = val; 2144 return(old); 2145 } 2146 2147 /** 2148 * xmlKeepBlanksDefault: 2149 * @val: int 0 or 1 2150 * 2151 * Set and return the previous value for default blanks text nodes support. 2152 * The 1.x version of the parser used an heuristic to try to detect 2153 * ignorable white spaces. As a result the SAX callback was generating 2154 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2155 * using the DOM output text nodes containing those blanks were not generated. 2156 * The 2.x and later version will switch to the XML standard way and 2157 * ignorableWhitespace() are only generated when running the parser in 2158 * validating mode and when the current element doesn't allow CDATA or 2159 * mixed content. 2160 * This function is provided as a way to force the standard behavior 2161 * on 1.X libs and to switch back to the old mode for compatibility when 2162 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2163 * by using xmlIsBlankNode() commodity function to detect the "empty" 2164 * nodes generated. 2165 * This value also affect autogeneration of indentation when saving code 2166 * if blanks sections are kept, indentation is not generated. 2167 * 2168 * Returns the last value for 0 for no substitution, 1 for substitution. 2169 */ 2170 2171 int 2172 xmlKeepBlanksDefault(int val) { 2173 int old = xmlKeepBlanksDefaultValue; 2174 2175 xmlKeepBlanksDefaultValue = val; 2176 if (!val) xmlIndentTreeOutput = 1; 2177 return(old); 2178 } 2179 2180 #define bottom_parserInternals 2181 #include "elfgcchack.h"